feat: add Azure AI integration with deployment configuration

2026-02-04 19:45:16 -05:00 · 2025-02-02 09:53:09 -05:00
parent 4040e934b2
commit bc4ac4f2f8
9 changed files with 1022 additions and 283 deletions
--- a/azure-ai-inference-provider-plan.md
+++ b/azure-ai-inference-provider-plan.md
@@ -0,0 +1,337 @@
+# Azure AI Inference Provider Implementation Plan
+
+## Overview
+This document outlines the implementation plan for adding Azure AI Inference support as a new provider in `src/api/providers/`. While Azure AI uses OpenAI's API format as a base, there are significant differences in implementation that need to be accounted for.
+
+## Key Differences from OpenAI
+
+### Endpoint Structure
+- OpenAI: `https://api.openai.com/v1/chat/completions`
+- Azure: `https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version={api-version}`
+
+### Authentication
+- OpenAI: Uses `Authorization: Bearer sk-...`
+- Azure: Uses `api-key: {key}`
+
+### Request Format
+- OpenAI: Requires `model` field in request body
+- Azure: Omits `model` from body (uses deployment name in URL instead)
+
+### Special Considerations
+- Required API version in URL query parameter
+- Model-Mesh deployments require additional header: `x-ms-model-mesh-model-name`
+- Different API versions for different features (e.g., 2023-12-01-preview, 2024-02-15-preview)
+
+## Dependencies
+
+```typescript
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI, { AzureOpenAI } from "openai"
+import {
+  ApiHandlerOptions,
+  ModelInfo,
+  azureAiDefaultModelId,
+  AzureAiModelId,
+  azureAiModels
+} from "../../shared/api"
+import { ApiHandler, SingleCompletionHandler } from "../index"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+```
+
+## Configuration (shared/api.ts)
+
+```typescript
+export type AzureAiModelId = "gpt-35-turbo" | "gpt-4" | "gpt-4-turbo"
+
+export interface AzureDeploymentConfig {
+  name: string
+  apiVersion: string
+  modelMeshName?: string // For Model-Mesh deployments
+}
+
+export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
+  "gpt-35-turbo": {
+    maxTokens: 4096,
+    contextWindow: 16385,
+    supportsPromptCache: true,
+    inputPrice: 0.0015,
+    outputPrice: 0.002,
+    defaultDeployment: {
+      name: "gpt-35-turbo",
+      apiVersion: "2024-02-15-preview"
+    }
+  },
+  "gpt-4": {
+    maxTokens: 8192,
+    contextWindow: 8192,
+    supportsPromptCache: true,
+    inputPrice: 0.03,
+    outputPrice: 0.06,
+    defaultDeployment: {
+      name: "gpt-4",
+      apiVersion: "2024-02-15-preview"
+    }
+  },
+  "gpt-4-turbo": {
+    maxTokens: 4096,
+    contextWindow: 128000,
+    supportsPromptCache: true,
+    inputPrice: 0.01,
+    outputPrice: 0.03,
+    defaultDeployment: {
+      name: "gpt-4-turbo",
+      apiVersion: "2024-02-15-preview"
+    }
+  }
+}
+
+export const azureAiDefaultModelId: AzureAiModelId = "gpt-35-turbo"
+```
+
+## Implementation (src/api/providers/azure-ai.ts)
+
+```typescript
+export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
+  private options: ApiHandlerOptions
+  private client: AzureOpenAI
+  
+  constructor(options: ApiHandlerOptions) {
+    this.options = options
+    
+    if (!options.azureAiEndpoint) {
+      throw new Error("Azure AI endpoint is required")
+    }
+    
+    if (!options.azureAiKey) {
+      throw new Error("Azure AI key is required") 
+    }
+
+    const deployment = this.getDeploymentConfig()
+    
+    this.client = new AzureOpenAI({
+      apiKey: options.azureAiKey,
+      endpoint: options.azureAiEndpoint,
+      deployment: deployment.name,
+      apiVersion: deployment.apiVersion,
+      headers: deployment.modelMeshName ? {
+        'x-ms-model-mesh-model-name': deployment.modelMeshName
+      } : undefined
+    })
+  }
+  
+  private getDeploymentConfig(): AzureDeploymentConfig {
+    const model = this.getModel()
+    const defaultConfig = azureAiModels[model.id].defaultDeployment
+    
+    // Override with user-provided deployment names if available
+    const deploymentName = 
+      this.options.azureAiDeployments?.[model.id]?.name || 
+      defaultConfig.name
+    
+    const apiVersion = 
+      this.options.azureAiDeployments?.[model.id]?.apiVersion || 
+      defaultConfig.apiVersion
+      
+    const modelMeshName = 
+      this.options.azureAiDeployments?.[model.id]?.modelMeshName
+
+    return {
+      name: deploymentName,
+      apiVersion,
+      modelMeshName
+    }
+  }
+
+  async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+    const modelInfo = this.getModel().info
+    
+    const systemMessage = {
+      role: "system", 
+      content: systemPrompt
+    }
+    
+    // Note: model parameter is omitted as it's handled by deployment
+    const requestOptions: Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, 'model'> = {
+      messages: [systemMessage, ...convertToOpenAiMessages(messages)],
+      temperature: 0,
+      stream: true,
+      max_tokens: modelInfo.maxTokens
+    }
+
+    try {
+      const stream = await this.client.chat.completions.create(requestOptions as any)
+
+      for await (const chunk of stream) {
+        const delta = chunk.choices[0]?.delta
+        
+        if (delta?.content) {
+          yield {
+            type: "text",
+            text: delta.content
+          }
+        }
+
+        if (chunk.usage) {
+          yield {
+            type: "usage",
+            inputTokens: chunk.usage.prompt_tokens || 0,
+            outputTokens: chunk.usage.completion_tokens || 0
+          }
+        }
+      }
+    } catch (error) {
+      // Handle Azure-specific error format
+      if (error instanceof Error) {
+        const azureError = error as any
+        throw new Error(
+          `Azure AI error (${azureError.code || 'Unknown'}): ${azureError.message}`
+        )
+      }
+      throw error
+    }
+  }
+
+  getModel(): { id: AzureAiModelId; info: ModelInfo } {
+    const modelId = this.options.apiModelId
+    if (modelId && modelId in azureAiModels) {
+      const id = modelId as AzureAiModelId
+      return { id, info: azureAiModels[id] }
+    }
+    return { id: azureAiDefaultModelId, info: azureAiModels[azureAiDefaultModelId] }
+  }
+
+  async completePrompt(prompt: string): Promise<string> {
+    try {
+      // Note: model parameter is omitted as it's handled by deployment
+      const response = await this.client.chat.completions.create({
+        messages: [{ role: "user", content: prompt }],
+        temperature: 0
+      } as any)
+      
+      return response.choices[0]?.message.content || ""
+    } catch (error) {
+      // Handle Azure-specific error format
+      if (error instanceof Error) {
+        const azureError = error as any
+        throw new Error(
+          `Azure AI completion error (${azureError.code || 'Unknown'}): ${azureError.message}`
+        )
+      }
+      throw error
+    }
+  }
+}
+```
+
+## Required Updates to ApiHandlerOptions
+
+Add to ApiHandlerOptions interface in shared/api.ts:
+
+```typescript
+azureAiEndpoint?: string
+azureAiKey?: string
+azureAiDeployments?: {
+  [key in AzureAiModelId]?: {
+    name: string
+    apiVersion: string
+    modelMeshName?: string
+  }
+}
+```
+
+## Testing Plan
+
+1. Create __tests__ directory with azure-ai.test.ts:
+   ```typescript
+   describe('AzureAiHandler', () => {
+     // Test URL construction
+     test('constructs correct Azure endpoint URL', () => {})
+     
+     // Test authentication
+     test('sets correct authentication headers', () => {})
+     
+     // Test deployment configuration
+     test('uses correct deployment names', () => {})
+     test('handles Model-Mesh configuration', () => {})
+     
+     // Test error handling
+     test('handles Azure-specific error format', () => {})
+     
+     // Test request/response format
+     test('omits model from request body', () => {})
+     test('handles Azure response format', () => {})
+   })
+   ```
+
+## Integration Steps
+
+1. Add Azure AI models and types to shared/api.ts
+2. Create azure-ai.ts provider implementation
+3. Add provider tests
+4. Update API handler options
+5. Add deployment configuration support
+6. Implement Azure-specific error handling
+7. Test with real Azure AI endpoints
+
+## Error Handling
+
+Azure returns errors in a specific format:
+```typescript
+interface AzureError {
+  code: string        // e.g., "InternalServerError", "InvalidRequest"
+  message: string
+  target?: string
+  details?: Array<{
+    code: string
+    message: string
+  }>
+}
+```
+
+Implementation should:
+- Parse Azure error format
+- Include error codes in messages
+- Handle deployment-specific errors
+- Provide clear upgrade paths for API version issues
+
+## Documentation Updates
+
+1. Add Azure AI configuration section to README.md:
+   - Endpoint configuration
+   - Authentication setup
+   - Deployment mapping
+   - API version selection
+   - Model-Mesh support
+
+2. Document configuration examples:
+   ```typescript
+   {
+     azureAiEndpoint: "https://your-resource.openai.azure.com",
+     azureAiKey: "your-api-key",
+     azureAiDeployments: {
+       "gpt-4": {
+         name: "your-gpt4-deployment",
+         apiVersion: "2024-02-15-preview",
+         modelMeshName: "optional-model-mesh-name"
+       }
+     }
+   }
+   ```
+
+## Future Improvements
+
+1. Support for Azure-specific features:
+   - Fine-tuning endpoints
+   - Custom deployment configurations
+   - Managed identity authentication
+
+2. Performance optimizations:
+   - Connection pooling
+   - Regional endpoint selection
+   - Automatic API version negotiation
+
+3. Advanced features:
+   - Response format control
+   - Function calling support
+   - Vision model support
--- a/package.json
+++ b/package.json
@@ -271,6 +271,8 @@
 		"@anthropic-ai/bedrock-sdk": "^0.10.2",
 		"@anthropic-ai/sdk": "^0.26.0",
 		"@anthropic-ai/vertex-sdk": "^0.4.1",
+  "@azure-rest/ai-inference": "^1.0.0",
+  "@azure/core-auth": "^1.5.0",
 		"@aws-sdk/client-bedrock-runtime": "^3.706.0",
 		"@google/generative-ai": "^0.18.0",
 		"@mistralai/mistralai": "^1.3.6",
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -14,6 +14,7 @@ import { DeepSeekHandler } from "./providers/deepseek"
 import { MistralHandler } from "./providers/mistral"
 import { VsCodeLmHandler } from "./providers/vscode-lm"
 import { ApiStream } from "./transform/stream"
+import { AzureAiHandler } from "./providers/azure-ai"
 import { UnboundHandler } from "./providers/unbound"

 export interface SingleCompletionHandler {
@@ -56,7 +57,9 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 			return new MistralHandler(options)
 		case "unbound":
 			return new UnboundHandler(options)
-		default:
+		case "azure-ai":
+return new AzureAiHandler(options)
+default:
 			return new AnthropicHandler(options)
 	}
 }
--- a/src/api/providers/tests/azure-ai.test.ts
+++ b/src/api/providers/tests/azure-ai.test.ts
@@ -0,0 +1,171 @@
+import { AzureAiHandler } from "../azure-ai"
+import { ApiHandlerOptions } from "../../../shared/api"
+import { Readable } from "stream"
+import ModelClient from "@azure-rest/ai-inference"
+
+// Mock the Azure AI client
+jest.mock("@azure-rest/ai-inference", () => {
+  return {
+    __esModule: true,
+    default: jest.fn().mockImplementation(() => ({
+      path: jest.fn().mockReturnValue({
+        post: jest.fn()
+      })
+    })),
+    isUnexpected: jest.fn()
+  }
+})
+
+describe("AzureAiHandler", () => {
+  const mockOptions: ApiHandlerOptions = {
+    apiProvider: "azure-ai",
+    apiModelId: "azure-gpt-35",
+    azureAiEndpoint: "https://test-resource.inference.azure.com",
+    azureAiKey: "test-key",
+    azureAiDeployments: {
+      "azure-gpt-35": {
+        name: "custom-gpt35",
+        apiVersion: "2024-02-15-preview",
+        modelMeshName: "test-mesh-model"
+      }
+    }
+  }
+
+  beforeEach(() => {
+    jest.clearAllMocks()
+  })
+
+  test("constructs with required options", () => {
+    const handler = new AzureAiHandler(mockOptions)
+    expect(handler).toBeInstanceOf(AzureAiHandler)
+  })
+
+  test("throws error without endpoint", () => {
+    const invalidOptions = { ...mockOptions }
+    delete invalidOptions.azureAiEndpoint
+    expect(() => new AzureAiHandler(invalidOptions)).toThrow("Azure AI endpoint is required")
+  })
+
+  test("throws error without API key", () => {
+    const invalidOptions = { ...mockOptions }
+    delete invalidOptions.azureAiKey
+    expect(() => new AzureAiHandler(invalidOptions)).toThrow("Azure AI key is required")
+  })
+
+  test("creates chat completion correctly", async () => {
+    const handler = new AzureAiHandler(mockOptions)
+    const mockResponse = {
+      body: {
+        choices: [
+          {
+            message: {
+              content: "test response"
+            }
+          }
+        ]
+      }
+    }
+    
+    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
+    mockClient.prototype.path.mockReturnValue({
+      post: jest.fn().mockResolvedValue(mockResponse)
+    })
+
+    const result = await handler.completePrompt("test prompt")
+    expect(result).toBe("test response")
+
+    expect(mockClient.prototype.path).toHaveBeenCalledWith("/chat/completions")
+    expect(mockClient.prototype.path().post).toHaveBeenCalledWith({
+      body: {
+        messages: [{ role: "user", content: "test prompt" }],
+        temperature: 0
+      }
+    })
+  })
+
+  test("handles streaming responses correctly", async () => {
+    const handler = new AzureAiHandler(mockOptions)
+    const mockStream = Readable.from([
+      'data: {"choices":[{"delta":{"content":"Hello"},"finish_reason":null}]}\n\n',
+      'data: {"choices":[{"delta":{"content":" world"},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":2}}\n\n',
+      'data: [DONE]\n\n'
+    ])
+
+    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
+    mockClient.prototype.path.mockReturnValue({
+      post: jest.fn().mockResolvedValue({
+        status: 200,
+        body: mockStream,
+      })
+    })
+
+    const messages = []
+    for await (const message of handler.createMessage("system prompt", [])) {
+      messages.push(message)
+    }
+
+    expect(messages).toEqual([
+      { type: "text", text: "Hello" },
+      { type: "text", text: " world" },
+      { type: "usage", inputTokens: 10, outputTokens: 2 }
+    ])
+
+    expect(mockClient.prototype.path().post).toHaveBeenCalledWith({
+      body: {
+        messages: [{ role: "system", content: "system prompt" }],
+        temperature: 0,
+        stream: true,
+        max_tokens: expect.any(Number)
+      }
+    })
+  })
+
+  test("handles rate limit errors", async () => {
+    const handler = new AzureAiHandler(mockOptions)
+    const mockError = new Error("Rate limit exceeded")
+    Object.assign(mockError, { status: 429 })
+
+    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
+    mockClient.prototype.path.mockReturnValue({
+      post: jest.fn().mockRejectedValue(mockError)
+    })
+
+    await expect(handler.completePrompt("test")).rejects.toThrow(
+      "Azure AI rate limit exceeded. Please try again later."
+    )
+  })
+
+  test("handles content safety errors", async () => {
+    const handler = new AzureAiHandler(mockOptions)
+    const mockError = {
+      status: 400,
+      body: {
+        error: {
+          code: "ContentFilterError",
+          message: "Content was flagged by content safety filters"
+        }
+      }
+    }
+
+    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
+    mockClient.prototype.path.mockReturnValue({
+      post: jest.fn().mockRejectedValue(mockError)
+    })
+
+    await expect(handler.completePrompt("test")).rejects.toThrow(
+      "Azure AI completion error: Content was flagged by content safety filters"
+    )
+  })
+
+  test("falls back to default model configuration", async () => {
+    const options = { ...mockOptions }
+    delete options.azureAiDeployments
+
+    const handler = new AzureAiHandler(options)
+    const model = handler.getModel()
+
+    expect(model.id).toBe("azure-gpt-35")
+    expect(model.info).toBeDefined()
+    expect(model.info.defaultDeployment.name).toBe("azure-gpt-35")
+  })
+})
--- a/src/api/providers/azure-ai.ts
+++ b/src/api/providers/azure-ai.ts
@@ -0,0 +1,147 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import ModelClient from "@azure-rest/ai-inference"
+import { isUnexpected } from "@azure-rest/ai-inference"
+import { AzureKeyCredential } from "@azure/core-auth"
+import {
+  ApiHandlerOptions,
+  ModelInfo,
+  azureAiDefaultModelId,
+  AzureAiModelId,
+  azureAiModels,
+  AzureDeploymentConfig
+} from "../../shared/api"
+import { ApiHandler, SingleCompletionHandler } from "../index"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+
+export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
+  private options: ApiHandlerOptions
+  private client: ModelClient
+  
+  constructor(options: ApiHandlerOptions) {
+    this.options = options
+    
+    if (!options.azureAiEndpoint) {
+      throw new Error("Azure AI endpoint is required")
+    }
+    
+    if (!options.azureAiKey) {
+      throw new Error("Azure AI key is required") 
+    }
+
+    this.client = new ModelClient(
+      options.azureAiEndpoint,
+      new AzureKeyCredential(options.azureAiKey)
+    )
+  }
+  
+  private getDeploymentConfig(): AzureDeploymentConfig {
+    const model = this.getModel()
+    const defaultConfig = azureAiModels[model.id].defaultDeployment
+    
+    return {
+      name: this.options.azureAiDeployments?.[model.id]?.name || defaultConfig.name,
+      apiVersion: this.options.azureAiDeployments?.[model.id]?.apiVersion || defaultConfig.apiVersion,
+      modelMeshName: this.options.azureAiDeployments?.[model.id]?.modelMeshName
+    }
+  }
+
+  async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+    const modelInfo = this.getModel().info
+    const chatMessages = [
+      { role: "system", content: systemPrompt },
+      ...convertToOpenAiMessages(messages)
+    ]
+
+    try {
+      const response = await this.client.path("/chat/completions").post({
+        body: {
+          messages: chatMessages,
+          temperature: 0,
+          stream: true,
+          max_tokens: modelInfo.maxTokens
+        }
+      }).asNodeStream()
+
+      const stream = response.body
+      if (!stream) {
+        throw new Error(`Failed to get chat completions with status: ${response.status}`)
+      }
+
+      if (response.status !== 200) {
+        throw new Error(`Failed to get chat completions: ${response.body.error}`)
+      }
+
+      for await (const chunk of stream) {
+        if (chunk.toString() === 'data: [DONE]') {
+          return
+        }
+
+        try {
+          const data = JSON.parse(chunk.toString().replace('data: ', ''))
+          const delta = data.choices[0]?.delta
+          
+          if (delta?.content) {
+            yield {
+              type: "text",
+              text: delta.content
+            }
+          }
+
+          if (data.usage) {
+            yield {
+              type: "usage",
+              inputTokens: data.usage.prompt_tokens || 0,
+              outputTokens: data.usage.completion_tokens || 0
+            }
+          }
+        } catch (e) {
+          // Ignore parse errors from incomplete chunks
+          continue
+        }
+      }
+    } catch (error) {
+      if (error instanceof Error) {
+        if ('status' in error && error.status === 429) {
+          throw new Error("Azure AI rate limit exceeded. Please try again later.")
+        }
+        throw new Error(`Azure AI error: ${error.message}`)
+      }
+      throw error
+    }
+  }
+
+  getModel(): { id: AzureAiModelId; info: ModelInfo } {
+    const modelId = this.options.apiModelId
+    if (modelId && modelId in azureAiModels) {
+      const id = modelId as AzureAiModelId
+      return { id, info: azureAiModels[id] }
+    }
+    return { id: azureAiDefaultModelId, info: azureAiModels[azureAiDefaultModelId] }
+  }
+
+  async completePrompt(prompt: string): Promise<string> {
+    try {
+      const response = await this.client.path("/chat/completions").post({
+        body: {
+          messages: [{ role: "user", content: prompt }],
+          temperature: 0
+        }
+      })
+
+      if (isUnexpected(response)) {
+        throw response.body.error
+      }
+
+      return response.body.choices[0]?.message?.content || ""
+    } catch (error) {
+      if (error instanceof Error) {
+        if ('status' in error && error.status === 429) {
+          throw new Error("Azure AI rate limit exceeded. Please try again later.")
+        }
+        throw new Error(`Azure AI completion error: ${error.message}`)
+      }
+      throw error
+    }
+  }
+}
--- a/src/core/webview/ClineProvider.ts
+++ b/src/core/webview/ClineProvider.ts
@@ -86,6 +86,7 @@ type GlobalStateKey =
 	| "lmStudioBaseUrl"
 	| "anthropicBaseUrl"
 	| "azureApiVersion"
+  | "azureAiDeployments"
 	| "openAiStreamingEnabled"
 	| "openRouterModelId"
 	| "openRouterModelInfo"
@@ -1074,6 +1075,16 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						await this.updateGlobalState("autoApprovalEnabled", message.bool ?? false)
 						await this.postStateToWebview()
 						break
+      case "updateAzureAiDeployment":
+        if (message.azureAiDeployment) {
+          const deployments = await this.getGlobalState("azureAiDeployments") || {}
+          deployments[message.azureAiDeployment.modelId] = {
+            ...message.azureAiDeployment,
+          }
+          await this.updateGlobalState("azureAiDeployments", deployments)
+          await this.postStateToWebview()
+        }
+        break
 					case "enhancePrompt":
 						if (message.text) {
 							try {
@@ -1506,6 +1517,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 		await this.storeSecret("openAiNativeApiKey", openAiNativeApiKey)
 		await this.storeSecret("deepSeekApiKey", deepSeekApiKey)
 		await this.updateGlobalState("azureApiVersion", azureApiVersion)
+    await this.updateGlobalState("azureAiDeployments", apiConfiguration.azureAiDeployments)
 		await this.updateGlobalState("openAiStreamingEnabled", openAiStreamingEnabled)
 		await this.updateGlobalState("openRouterModelId", openRouterModelId)
 		await this.updateGlobalState("openRouterModelInfo", openRouterModelInfo)
@@ -2147,6 +2159,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			openAiNativeApiKey,
 			deepSeekApiKey,
 			mistralApiKey,
+    azureAiDeployments,
 			azureApiVersion,
 			openAiStreamingEnabled,
 			openRouterModelId,
@@ -2221,6 +2234,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.getSecret("openAiNativeApiKey") as Promise<string | undefined>,
 			this.getSecret("deepSeekApiKey") as Promise<string | undefined>,
 			this.getSecret("mistralApiKey") as Promise<string | undefined>,
+    this.getGlobalState("azureAiDeployments") as Promise<Record<string, any> | undefined>,
 			this.getGlobalState("azureApiVersion") as Promise<string | undefined>,
 			this.getGlobalState("openAiStreamingEnabled") as Promise<boolean | undefined>,
 			this.getGlobalState("openRouterModelId") as Promise<string | undefined>,
@@ -2313,6 +2327,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 				deepSeekApiKey,
 				mistralApiKey,
 				azureApiVersion,
+    azureAiDeployments,
 				openAiStreamingEnabled,
 				openRouterModelId,
 				openRouterModelInfo,
--- a/src/shared/ExtensionMessage.ts
+++ b/src/shared/ExtensionMessage.ts
@@ -1,6 +1,4 @@
-// type that represents json data that is sent from extension to webview, called ExtensionMessage and has 'type' enum which can be 'plusButtonClicked' or 'settingsButtonClicked' or 'hello'
-
-import { ApiConfiguration, ApiProvider, ModelInfo } from "./api"
+import { ApiConfiguration, ApiProvider, ModelInfo, AzureDeploymentConfig } from "./api"
 import { HistoryItem } from "./HistoryItem"
 import { McpServer } from "./mcp"
 import { GitCommit } from "../utils/git"
@@ -9,221 +7,220 @@ import { CustomSupportPrompts } from "./support-prompt"
 import { ExperimentId } from "./experiments"

 export interface LanguageModelChatSelector {
-	vendor?: string
-	family?: string
-	version?: string
-	id?: string
+  vendor?: string
+  family?: string
+  version?: string
+  id?: string
 }

-// webview will hold state
 export interface ExtensionMessage {
-	type:
-		| "action"
-		| "state"
-		| "selectedImages"
-		| "ollamaModels"
-		| "lmStudioModels"
-		| "theme"
-		| "workspaceUpdated"
-		| "invoke"
-		| "partialMessage"
-		| "glamaModels"
-		| "openRouterModels"
-		| "openAiModels"
-		| "mcpServers"
-		| "enhancedPrompt"
-		| "commitSearchResults"
-		| "listApiConfig"
-		| "vsCodeLmModels"
-		| "vsCodeLmApiAvailable"
-		| "requestVsCodeLmModels"
-		| "updatePrompt"
-		| "systemPrompt"
-		| "autoApprovalEnabled"
-		| "updateCustomMode"
-		| "deleteCustomMode"
-	text?: string
-	action?:
-		| "chatButtonClicked"
-		| "mcpButtonClicked"
-		| "settingsButtonClicked"
-		| "historyButtonClicked"
-		| "promptsButtonClicked"
-		| "didBecomeVisible"
-	invoke?: "sendMessage" | "primaryButtonClick" | "secondaryButtonClick" | "setChatBoxMessage"
-	state?: ExtensionState
-	images?: string[]
-	ollamaModels?: string[]
-	lmStudioModels?: string[]
-	vsCodeLmModels?: { vendor?: string; family?: string; version?: string; id?: string }[]
-	filePaths?: string[]
-	openedTabs?: Array<{
-		label: string
-		isActive: boolean
-		path?: string
-	}>
-	partialMessage?: ClineMessage
-	glamaModels?: Record<string, ModelInfo>
-	openRouterModels?: Record<string, ModelInfo>
-	openAiModels?: string[]
-	mcpServers?: McpServer[]
-	commits?: GitCommit[]
-	listApiConfig?: ApiConfigMeta[]
-	mode?: Mode
-	customMode?: ModeConfig
-	slug?: string
+  type:
+    | "action"
+    | "state"
+    | "selectedImages"
+    | "ollamaModels"
+    | "lmStudioModels"
+    | "theme"
+    | "workspaceUpdated"
+    | "invoke"
+    | "partialMessage"
+    | "glamaModels"
+    | "openRouterModels"
+    | "openAiModels"
+    | "mcpServers"
+    | "enhancedPrompt"
+    | "commitSearchResults"
+    | "listApiConfig"
+    | "vsCodeLmModels"
+    | "vsCodeLmApiAvailable"
+    | "requestVsCodeLmModels"
+    | "updatePrompt"
+    | "systemPrompt"
+    | "autoApprovalEnabled"
+    | "updateCustomMode"
+    | "deleteCustomMode"
+  text?: string
+  action?:
+    | "chatButtonClicked"
+    | "mcpButtonClicked"
+    | "settingsButtonClicked"
+    | "historyButtonClicked"
+    | "promptsButtonClicked"
+    | "didBecomeVisible"
+  invoke?: "sendMessage" | "primaryButtonClick" | "secondaryButtonClick" | "setChatBoxMessage"
+  state?: ExtensionState
+  images?: string[]
+  ollamaModels?: string[]
+  lmStudioModels?: string[]
+  vsCodeLmModels?: { vendor?: string; family?: string; version?: string; id?: string }[]
+  filePaths?: string[]
+  openedTabs?: Array<{
+    label: string
+    isActive: boolean
+    path?: string
+  }>
+  partialMessage?: ClineMessage
+  glamaModels?: Record<string, ModelInfo>
+  openRouterModels?: Record<string, ModelInfo>
+  openAiModels?: string[]
+  mcpServers?: McpServer[]
+  commits?: GitCommit[]
+  listApiConfig?: ApiConfigMeta[]
+  mode?: Mode
+  customMode?: ModeConfig
+  slug?: string
 }

 export interface ApiConfigMeta {
-	id: string
-	name: string
-	apiProvider?: ApiProvider
+  id: string
+  name: string
+  apiProvider?: ApiProvider
 }

 export interface ExtensionState {
-	version: string
-	clineMessages: ClineMessage[]
-	taskHistory: HistoryItem[]
-	shouldShowAnnouncement: boolean
-	apiConfiguration?: ApiConfiguration
-	currentApiConfigName?: string
-	listApiConfigMeta?: ApiConfigMeta[]
-	customInstructions?: string
-	customModePrompts?: CustomModePrompts
-	customSupportPrompts?: CustomSupportPrompts
-	alwaysAllowReadOnly?: boolean
-	alwaysAllowWrite?: boolean
-	alwaysAllowExecute?: boolean
-	alwaysAllowBrowser?: boolean
-	alwaysAllowMcp?: boolean
-	alwaysApproveResubmit?: boolean
-	alwaysAllowModeSwitch?: boolean
-	requestDelaySeconds: number
-	rateLimitSeconds: number // Minimum time between successive requests (0 = disabled)
-	uriScheme?: string
-	allowedCommands?: string[]
-	soundEnabled?: boolean
-	soundVolume?: number
-	diffEnabled?: boolean
-	browserViewportSize?: string
-	screenshotQuality?: number
-	fuzzyMatchThreshold?: number
-	preferredLanguage: string
-	writeDelayMs: number
-	terminalOutputLineLimit?: number
-	mcpEnabled: boolean
-	enableMcpServerCreation: boolean
-	mode: Mode
-	modeApiConfigs?: Record<Mode, string>
-	enhancementApiConfigId?: string
-	experiments: Record<ExperimentId, boolean> // Map of experiment IDs to their enabled state
-	autoApprovalEnabled?: boolean
-	customModes: ModeConfig[]
-	toolRequirements?: Record<string, boolean> // Map of tool names to their requirements (e.g. {"apply_diff": true} if diffEnabled)
+  version: string
+  clineMessages: ClineMessage[]
+  taskHistory: HistoryItem[]
+  shouldShowAnnouncement: boolean
+  apiConfiguration?: ApiConfiguration
+  currentApiConfigName?: string
+  listApiConfigMeta?: ApiConfigMeta[]
+  customInstructions?: string
+  customModePrompts?: CustomModePrompts
+  customSupportPrompts?: CustomSupportPrompts
+  alwaysAllowReadOnly?: boolean
+  alwaysAllowWrite?: boolean
+  alwaysAllowExecute?: boolean
+  alwaysAllowBrowser?: boolean
+  alwaysAllowMcp?: boolean
+  alwaysApproveResubmit?: boolean
+  alwaysAllowModeSwitch?: boolean
+  requestDelaySeconds: number
+  rateLimitSeconds: number
+  uriScheme?: string
+  allowedCommands?: string[]
+  soundEnabled?: boolean
+  soundVolume?: number
+  diffEnabled?: boolean
+  browserViewportSize?: string
+  screenshotQuality?: number
+  fuzzyMatchThreshold?: number
+  preferredLanguage: string
+  writeDelayMs: number
+  terminalOutputLineLimit?: number
+  mcpEnabled: boolean
+  enableMcpServerCreation: boolean
+  mode: Mode
+  modeApiConfigs?: Record<Mode, string>
+  enhancementApiConfigId?: string
+  experiments: Record<ExperimentId, boolean>
+  autoApprovalEnabled?: boolean
+  customModes: ModeConfig[]
+  toolRequirements?: Record<string, boolean>
+  azureAiDeployments?: Record<string, AzureDeploymentConfig>
 }

 export interface ClineMessage {
-	ts: number
-	type: "ask" | "say"
-	ask?: ClineAsk
-	say?: ClineSay
-	text?: string
-	images?: string[]
-	partial?: boolean
-	reasoning?: string
+  ts: number
+  type: "ask" | "say"
+  ask?: ClineAsk
+  say?: ClineSay
+  text?: string
+  images?: string[]
+  partial?: boolean
+  reasoning?: string
 }

 export type ClineAsk =
-	| "followup"
-	| "command"
-	| "command_output"
-	| "completion_result"
-	| "tool"
-	| "api_req_failed"
-	| "resume_task"
-	| "resume_completed_task"
-	| "mistake_limit_reached"
-	| "browser_action_launch"
-	| "use_mcp_server"
+  | "followup"
+  | "command"
+  | "command_output"
+  | "completion_result"
+  | "tool"
+  | "api_req_failed"
+  | "resume_task"
+  | "resume_completed_task"
+  | "mistake_limit_reached"
+  | "browser_action_launch"
+  | "use_mcp_server"

 export type ClineSay =
-	| "task"
-	| "error"
-	| "api_req_started"
-	| "api_req_finished"
-	| "text"
-	| "reasoning"
-	| "completion_result"
-	| "user_feedback"
-	| "user_feedback_diff"
-	| "api_req_retried"
-	| "api_req_retry_delayed"
-	| "command_output"
-	| "tool"
-	| "shell_integration_warning"
-	| "browser_action"
-	| "browser_action_result"
-	| "command"
-	| "mcp_server_request_started"
-	| "mcp_server_response"
-	| "new_task_started"
-	| "new_task"
+  | "task"
+  | "error"
+  | "api_req_started"
+  | "api_req_finished"
+  | "text"
+  | "reasoning"
+  | "completion_result"
+  | "user_feedback"
+  | "user_feedback_diff"
+  | "api_req_retried"
+  | "api_req_retry_delayed"
+  | "command_output"
+  | "tool"
+  | "shell_integration_warning"
+  | "browser_action"
+  | "browser_action_result"
+  | "command"
+  | "mcp_server_request_started"
+  | "mcp_server_response"
+  | "new_task_started"
+  | "new_task"

 export interface ClineSayTool {
-	tool:
-		| "editedExistingFile"
-		| "appliedDiff"
-		| "newFileCreated"
-		| "readFile"
-		| "listFilesTopLevel"
-		| "listFilesRecursive"
-		| "listCodeDefinitionNames"
-		| "searchFiles"
-		| "switchMode"
-		| "newTask"
-	path?: string
-	diff?: string
-	content?: string
-	regex?: string
-	filePattern?: string
-	mode?: string
-	reason?: string
+  tool:
+    | "editedExistingFile"
+    | "appliedDiff"
+    | "newFileCreated"
+    | "readFile"
+    | "listFilesTopLevel"
+    | "listFilesRecursive"
+    | "listCodeDefinitionNames"
+    | "searchFiles"
+    | "switchMode"
+    | "newTask"
+  path?: string
+  diff?: string
+  content?: string
+  regex?: string
+  filePattern?: string
+  mode?: string
+  reason?: string
 }

-// must keep in sync with system prompt
 export const browserActions = ["launch", "click", "type", "scroll_down", "scroll_up", "close"] as const
 export type BrowserAction = (typeof browserActions)[number]

 export interface ClineSayBrowserAction {
-	action: BrowserAction
-	coordinate?: string
-	text?: string
+  action: BrowserAction
+  coordinate?: string
+  text?: string
 }

 export type BrowserActionResult = {
-	screenshot?: string
-	logs?: string
-	currentUrl?: string
-	currentMousePosition?: string
+  screenshot?: string
+  logs?: string
+  currentUrl?: string
+  currentMousePosition?: string
 }

 export interface ClineAskUseMcpServer {
-	serverName: string
-	type: "use_mcp_tool" | "access_mcp_resource"
-	toolName?: string
-	arguments?: string
-	uri?: string
+  serverName: string
+  type: "use_mcp_tool" | "access_mcp_resource"
+  toolName?: string
+  arguments?: string
+  uri?: string
 }

 export interface ClineApiReqInfo {
-	request?: string
-	tokensIn?: number
-	tokensOut?: number
-	cacheWrites?: number
-	cacheReads?: number
-	cost?: number
-	cancelReason?: ClineApiReqCancelReason
-	streamingFailedMessage?: string
+  request?: string
+  tokensIn?: number
+  tokensOut?: number
+  cacheWrites?: number
+  cacheReads?: number
+  cost?: number
+  cancelReason?: ClineApiReqCancelReason
+  streamingFailedMessage?: string
 }

 export type ClineApiReqCancelReason = "streaming_failed" | "user_cancelled"
--- a/src/shared/WebviewMessage.ts
+++ b/src/shared/WebviewMessage.ts
@@ -6,104 +6,114 @@ export type PromptMode = Mode | "enhance"
 export type AudioType = "notification" | "celebration" | "progress_loop"

 export interface WebviewMessage {
-	type:
-		| "apiConfiguration"
-		| "currentApiConfigName"
-		| "upsertApiConfiguration"
-		| "deleteApiConfiguration"
-		| "loadApiConfiguration"
-		| "renameApiConfiguration"
-		| "getListApiConfiguration"
-		| "customInstructions"
-		| "allowedCommands"
-		| "alwaysAllowReadOnly"
-		| "alwaysAllowWrite"
-		| "alwaysAllowExecute"
-		| "webviewDidLaunch"
-		| "newTask"
-		| "askResponse"
-		| "clearTask"
-		| "didShowAnnouncement"
-		| "selectImages"
-		| "exportCurrentTask"
-		| "showTaskWithId"
-		| "deleteTaskWithId"
-		| "exportTaskWithId"
-		| "resetState"
-		| "requestOllamaModels"
-		| "requestLmStudioModels"
-		| "openImage"
-		| "openFile"
-		| "openMention"
-		| "cancelTask"
-		| "refreshGlamaModels"
-		| "refreshOpenRouterModels"
-		| "refreshOpenAiModels"
-		| "alwaysAllowBrowser"
-		| "alwaysAllowMcp"
-		| "alwaysAllowModeSwitch"
-		| "playSound"
-		| "soundEnabled"
-		| "soundVolume"
-		| "diffEnabled"
-		| "browserViewportSize"
-		| "screenshotQuality"
-		| "openMcpSettings"
-		| "restartMcpServer"
-		| "toggleToolAlwaysAllow"
-		| "toggleMcpServer"
-		| "updateMcpTimeout"
-		| "fuzzyMatchThreshold"
-		| "preferredLanguage"
-		| "writeDelayMs"
-		| "enhancePrompt"
-		| "enhancedPrompt"
-		| "draggedImages"
-		| "deleteMessage"
-		| "terminalOutputLineLimit"
-		| "mcpEnabled"
-		| "enableMcpServerCreation"
-		| "searchCommits"
-		| "refreshGlamaModels"
-		| "alwaysApproveResubmit"
-		| "requestDelaySeconds"
-		| "rateLimitSeconds"
-		| "setApiConfigPassword"
-		| "requestVsCodeLmModels"
-		| "mode"
-		| "updatePrompt"
-		| "updateSupportPrompt"
-		| "resetSupportPrompt"
-		| "getSystemPrompt"
-		| "systemPrompt"
-		| "enhancementApiConfigId"
-		| "updateExperimental"
-		| "autoApprovalEnabled"
-		| "updateCustomMode"
-		| "deleteCustomMode"
-		| "setopenAiCustomModelInfo"
-		| "openCustomModesSettings"
-	text?: string
-	disabled?: boolean
-	askResponse?: ClineAskResponse
-	apiConfiguration?: ApiConfiguration
-	images?: string[]
-	bool?: boolean
-	value?: number
-	commands?: string[]
-	audioType?: AudioType
-	serverName?: string
-	toolName?: string
-	alwaysAllow?: boolean
-	mode?: Mode
-	promptMode?: PromptMode
-	customPrompt?: PromptComponent
-	dataUrls?: string[]
-	values?: Record<string, any>
-	query?: string
-	slug?: string
-	modeConfig?: ModeConfig
-	timeout?: number
+  type:
+    | "apiConfiguration"
+    | "currentApiConfigName"
+    | "upsertApiConfiguration"
+    | "deleteApiConfiguration"
+    | "loadApiConfiguration"
+    | "renameApiConfiguration"
+    | "getListApiConfiguration"
+    | "customInstructions"
+    | "allowedCommands"
+    | "alwaysAllowReadOnly"
+    | "alwaysAllowWrite"
+    | "alwaysAllowExecute"
+    | "alwaysAllowBrowser"
+    | "alwaysAllowMcp"
+    | "alwaysAllowModeSwitch"
+    | "webviewDidLaunch"
+    | "newTask"
+    | "askResponse"
+    | "clearTask"
+    | "didShowAnnouncement"
+    | "selectImages"
+    | "exportCurrentTask"
+    | "showTaskWithId"
+    | "deleteTaskWithId"
+    | "exportTaskWithId"
+    | "resetState"
+    | "requestOllamaModels"
+    | "requestLmStudioModels"
+    | "openImage"
+    | "openFile"
+    | "openMention"
+    | "cancelTask"
+    | "refreshGlamaModels"
+    | "refreshOpenRouterModels"
+    | "refreshOpenAiModels"
+    | "alwaysAllowBrowser"
+    | "alwaysAllowMcp"
+    | "alwaysAllowModeSwitch"
+    | "playSound"
+    | "soundEnabled"
+    | "soundVolume"
+    | "diffEnabled"
+    | "browserViewportSize"
+    | "screenshotQuality"
+    | "openMcpSettings"
+    | "restartMcpServer"
+    | "toggleToolAlwaysAllow"
+    | "toggleMcpServer"
+    | "updateMcpTimeout"
+    | "fuzzyMatchThreshold"
+    | "preferredLanguage"
+    | "writeDelayMs"
+    | "enhancePrompt"
+    | "enhancedPrompt"
+    | "draggedImages"
+    | "deleteMessage"
+    | "terminalOutputLineLimit"
+    | "mcpEnabled"
+    | "enableMcpServerCreation"
+    | "searchCommits"
+    | "refreshGlamaModels"
+    | "alwaysApproveResubmit"
+    | "requestDelaySeconds"
+    | "rateLimitSeconds"
+    | "setApiConfigPassword"
+    | "requestVsCodeLmModels"
+    | "mode"
+    | "updatePrompt"
+    | "updateSupportPrompt"
+    | "resetSupportPrompt"
+    | "getSystemPrompt"
+    | "systemPrompt"
+    | "enhancementApiConfigId"
+    | "updateExperimental"
+    | "autoApprovalEnabled"
+    | "updateCustomMode"
+    | "deleteCustomMode"
+    | "setopenAiCustomModelInfo"
+    | "openCustomModesSettings"
+    | "updateAzureAiDeployment"
+  text?: string
+  disabled?: boolean
+  askResponse?: ClineAskResponse
+  apiConfiguration?: ApiConfiguration
+  images?: string[]
+  bool?: boolean
+  value?: number
+  commands?: string[]
+  audioType?: AudioType
+  serverName?: string
+  toolName?: string
+  alwaysAllow?: boolean
+  mode?: Mode
+  promptMode?: PromptMode
+  customPrompt?: PromptComponent
+  dataUrls?: string[]
+  values?: Record<string, any>
+  query?: string
+  slug?: string
+  modeConfig?: ModeConfig
+  timeout?: number
+  azureAiDeployment?: {
+    modelId: string
+    name: string
+    apiVersion: string
+    modelMeshName?: string
+  }
 }

 export type ClineAskResponse = "yesButtonClicked" | "noButtonClicked" | "messageResponse"
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -15,6 +15,7 @@ export type ApiProvider =
 	| "vscode-lm"
 	| "mistral"
 	| "unbound"
+| "azure-ai"

 export interface ApiHandlerOptions {
 	apiModelId?: string
@@ -60,6 +61,15 @@ export interface ApiHandlerOptions {
 	includeMaxTokens?: boolean
 	unboundApiKey?: string
 	unboundModelId?: string
+  azureAiEndpoint?: string
+  azureAiKey?: string
+  azureAiDeployments?: {
+    [key in AzureAiModelId]?: {
+      name: string
+      apiVersion: string
+      modelMeshName?: string
+    }
+  }
 }

 export type ApiConfiguration = ApiHandlerOptions & {
@@ -635,3 +645,50 @@ export const unboundModels = {
 	"deepseek/deepseek-reasoner": deepSeekModels["deepseek-reasoner"],
 	"mistral/codestral-latest": mistralModels["codestral-latest"],
 } as const satisfies Record<string, ModelInfo>
+
+// Azure AI
+export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
+
+export interface AzureDeploymentConfig {
+  name: string
+  apiVersion: string
+  modelMeshName?: string // For Model-Mesh deployments
+}
+
+export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
+  "azure-gpt-35": {
+    maxTokens: 4096,
+    contextWindow: 16385,
+    supportsPromptCache: true,
+    inputPrice: 0.0015,
+    outputPrice: 0.002,
+    defaultDeployment: {
+      name: "azure-gpt-35",
+      apiVersion: "2024-02-15-preview"
+    }
+  },
+  "azure-gpt-4": {
+    maxTokens: 8192,
+    contextWindow: 8192,
+    supportsPromptCache: true,
+    inputPrice: 0.03,
+    outputPrice: 0.06,
+    defaultDeployment: {
+      name: "azure-gpt-4",
+      apiVersion: "2024-02-15-preview"
+    }
+  },
+  "azure-gpt-4-turbo": {
+    maxTokens: 4096,
+    contextWindow: 128000,
+    supportsPromptCache: true,
+    inputPrice: 0.01,
+    outputPrice: 0.03,
+    defaultDeployment: {
+      name: "azure-gpt-4-turbo",
+      apiVersion: "2024-02-15-preview"
+    }
+  }
+} as const satisfies Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }>
+
+export const azureAiDefaultModelId: AzureAiModelId = "azure-gpt-35"