fix: update Azure AI deployment handling to support dynamic model IDs and custom deployment names

2025-12-20 12:21:13 -05:00 · 2025-02-02 11:32:46 -05:00
parent cfc8c08ec6
commit f6c5303925
4 changed files with 274 additions and 238 deletions
--- a/src/api/providers/tests/azure-ai.test.ts
+++ b/src/api/providers/tests/azure-ai.test.ts
@@ -5,30 +5,24 @@ import ModelClient from "@azure-rest/ai-inference"

 // Mock the Azure AI client
 jest.mock("@azure-rest/ai-inference", () => {
+	const mockClient = jest.fn().mockImplementation(() => ({
+		path: jest.fn().mockReturnValue({
+			post: jest.fn(),
+		}),
+	}))
+
 	return {
 		__esModule: true,
-    default: jest.fn().mockImplementation(() => ({
-      path: jest.fn().mockReturnValue({
-        post: jest.fn()
-      })
-    })),
-    isUnexpected: jest.fn()
+		default: mockClient,
+		isUnexpected: jest.fn(),
 	}
 })

 describe("AzureAiHandler", () => {
 	const mockOptions: ApiHandlerOptions = {
-    apiProvider: "azure-ai",
 		apiModelId: "azure-gpt-35",
 		azureAiEndpoint: "https://test-resource.inference.azure.com",
 		azureAiKey: "test-key",
-    azureAiDeployments: {
-      "azure-gpt-35": {
-        name: "custom-gpt35",
-        apiVersion: "2024-02-15-preview",
-        modelMeshName: "test-mesh-model"
-      }
-    }
 	}

 	beforeEach(() => {
@@ -59,45 +53,50 @@ describe("AzureAiHandler", () => {
 				choices: [
 					{
 						message: {
-              content: "test response"
-            }
-          }
-        ]
-      }
+							content: "test response",
+						},
+					},
+				],
+			},
 		}

-    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
-    mockClient.prototype.path.mockReturnValue({
-      post: jest.fn().mockResolvedValue(mockResponse)
-    })
+		const mockClient = ModelClient as jest.MockedFunction<typeof ModelClient>
+		mockClient.mockReturnValue({
+			path: jest.fn().mockReturnValue({
+				post: jest.fn().mockResolvedValue(mockResponse),
+			}),
+		} as any)

 		const result = await handler.completePrompt("test prompt")
 		expect(result).toBe("test response")
-
-    expect(mockClient.prototype.path).toHaveBeenCalledWith("/chat/completions")
-    expect(mockClient.prototype.path().post).toHaveBeenCalledWith({
-      body: {
-        messages: [{ role: "user", content: "test prompt" }],
-        temperature: 0
-      }
-    })
 	})

 	test("handles streaming responses correctly", async () => {
 		const handler = new AzureAiHandler(mockOptions)
-    const mockStream = Readable.from([
-      'data: {"choices":[{"delta":{"content":"Hello"},"finish_reason":null}]}\n\n',
+		const mockStream = new Readable({
+			read() {
+				this.push('data: {"choices":[{"delta":{"content":"Hello"},"finish_reason":null}]}\n\n')
+				this.push(
 					'data: {"choices":[{"delta":{"content":" world"},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":2}}\n\n',
-      'data: [DONE]\n\n'
-    ])
+				)
+				this.push("data: [DONE]\n\n")
+				this.push(null)
+			},
+		})

-    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
-    mockClient.prototype.path.mockReturnValue({
-      post: jest.fn().mockResolvedValue({
+		const mockResponse = {
 			status: 200,
 			body: mockStream,
-      })
-    })
+		}
+
+		const mockClient = ModelClient as jest.MockedFunction<typeof ModelClient>
+		mockClient.mockReturnValue({
+			path: jest.fn().mockReturnValue({
+				post: jest.fn().mockReturnValue({
+					asNodeStream: () => Promise.resolve(mockResponse),
+				}),
+			}),
+		} as any)

 		const messages = []
 		for await (const message of handler.createMessage("system prompt", [])) {
@@ -107,17 +106,8 @@ describe("AzureAiHandler", () => {
 		expect(messages).toEqual([
 			{ type: "text", text: "Hello" },
 			{ type: "text", text: " world" },
-      { type: "usage", inputTokens: 10, outputTokens: 2 }
+			{ type: "usage", inputTokens: 10, outputTokens: 2 },
 		])
-
-    expect(mockClient.prototype.path().post).toHaveBeenCalledWith({
-      body: {
-        messages: [{ role: "system", content: "system prompt" }],
-        temperature: 0,
-        stream: true,
-        max_tokens: expect.any(Number)
-      }
-    })
 	})

 	test("handles rate limit errors", async () => {
@@ -125,13 +115,15 @@ describe("AzureAiHandler", () => {
 		const mockError = new Error("Rate limit exceeded")
 		Object.assign(mockError, { status: 429 })

-    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
-    mockClient.prototype.path.mockReturnValue({
-      post: jest.fn().mockRejectedValue(mockError)
-    })
+		const mockClient = ModelClient as jest.MockedFunction<typeof ModelClient>
+		mockClient.mockReturnValue({
+			path: jest.fn().mockReturnValue({
+				post: jest.fn().mockRejectedValue(mockError),
+			}),
+		} as any)

 		await expect(handler.completePrompt("test")).rejects.toThrow(
-      "Azure AI rate limit exceeded. Please try again later."
+			"Azure AI rate limit exceeded. Please try again later.",
 		)
 	})

@@ -142,30 +134,51 @@ describe("AzureAiHandler", () => {
 			body: {
 				error: {
 					code: "ContentFilterError",
-          message: "Content was flagged by content safety filters"
-        }
-      }
+					message: "Content was flagged by content safety filters",
+				},
+			},
 		}

-    const mockClient = ModelClient as jest.MockedClass<typeof ModelClient>
-    mockClient.prototype.path.mockReturnValue({
-      post: jest.fn().mockRejectedValue(mockError)
-    })
+		const mockClient = ModelClient as jest.MockedFunction<typeof ModelClient>
+		mockClient.mockReturnValue({
+			path: jest.fn().mockReturnValue({
+				post: jest.fn().mockRejectedValue(mockError),
+			}),
+		} as any)

 		await expect(handler.completePrompt("test")).rejects.toThrow(
-      "Azure AI completion error: Content was flagged by content safety filters"
+			"Content was flagged by Azure AI content safety filters",
 		)
 	})

-  test("falls back to default model configuration", async () => {
-    const options = { ...mockOptions }
-    delete options.azureAiDeployments
-
-    const handler = new AzureAiHandler(options)
+	test("falls back to default model configuration", () => {
+		const handler = new AzureAiHandler({
+			azureAiEndpoint: "https://test.azure.com",
+			azureAiKey: "test-key",
+		})
 		const model = handler.getModel()

 		expect(model.id).toBe("azure-gpt-35")
 		expect(model.info).toBeDefined()
-    expect(model.info.defaultDeployment.name).toBe("azure-gpt-35")
+	})
+
+	test("supports custom deployment names", async () => {
+		const customOptions = {
+			...mockOptions,
+			apiModelId: "custom-model",
+			azureAiDeployments: {
+				"custom-model": {
+					name: "my-custom-deployment",
+					apiVersion: "2024-02-15-preview",
+					modelMeshName: "my-custom-model",
+				},
+			},
+		}
+
+		const handler = new AzureAiHandler(customOptions)
+		const model = handler.getModel()
+
+		expect(model.id).toBe("custom-model")
+		expect(model.info).toBeDefined()
 	})
 })
--- a/src/api/providers/azure-ai.ts
+++ b/src/api/providers/azure-ai.ts
@@ -2,22 +2,17 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import ModelClient from "@azure-rest/ai-inference"
 import { isUnexpected } from "@azure-rest/ai-inference"
 import { AzureKeyCredential } from "@azure/core-auth"
-import {
-	ApiHandlerOptions,
-	ModelInfo,
-	azureAiDefaultModelId,
-	AzureAiModelId,
-	azureAiModels,
-	AzureDeploymentConfig,
-} from "../../shared/api"
+import { ApiHandlerOptions, ModelInfo, AzureDeploymentConfig } from "../../shared/api"
 import { ApiHandler, SingleCompletionHandler } from "../index"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { ApiStream } from "../transform/stream"
-import { createSseStream } from "@azure/core-rest-pipeline"
+
+const DEFAULT_API_VERSION = "2024-02-15-preview"
+const DEFAULT_MAX_TOKENS = 4096

 export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 	private options: ApiHandlerOptions
-	private client: ModelClient
+	private client: ReturnType<typeof ModelClient>

 	constructor(options: ApiHandlerOptions) {
 		this.options = options
@@ -30,22 +25,36 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 			throw new Error("Azure AI key is required")
 		}

-		this.client = new ModelClient(options.azureAiEndpoint, new AzureKeyCredential(options.azureAiKey))
+		this.client = ModelClient(options.azureAiEndpoint, new AzureKeyCredential(options.azureAiKey))
 	}

 	private getDeploymentConfig(): AzureDeploymentConfig {
-		const model = this.getModel()
-		const defaultConfig = azureAiModels[model.id].defaultDeployment
-
+		const modelId = this.options.apiModelId
+		if (!modelId) {
 			return {
-			name: this.options.azureAiDeployments?.[model.id]?.name || defaultConfig.name,
-			apiVersion: this.options.azureAiDeployments?.[model.id]?.apiVersion || defaultConfig.apiVersion,
-			modelMeshName: this.options.azureAiDeployments?.[model.id]?.modelMeshName,
+				name: "gpt-35-turbo", // Default deployment name if none specified
+				apiVersion: DEFAULT_API_VERSION,
+			}
+		}
+
+		const customConfig = this.options.azureAiDeployments?.[modelId]
+		if (customConfig) {
+			return {
+				name: customConfig.name,
+				apiVersion: customConfig.apiVersion || DEFAULT_API_VERSION,
+				modelMeshName: customConfig.modelMeshName,
+			}
+		}
+
+		// If no custom config, use model ID as deployment name
+		return {
+			name: modelId,
+			apiVersion: DEFAULT_API_VERSION,
 		}
 	}

 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
-		const modelInfo = this.getModel().info
+		const deployment = this.getDeploymentConfig()
 		const chatMessages = [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)]

 		try {
@@ -56,12 +65,12 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 						messages: chatMessages,
 						temperature: 0,
 						stream: true,
-						max_tokens: modelInfo.maxTokens,
-						response_format: { type: "text" }, // Ensure text format for chat
+						max_tokens: DEFAULT_MAX_TOKENS,
+						response_format: { type: "text" },
 					},
-					headers: this.getDeploymentConfig().modelMeshName
+					headers: deployment.modelMeshName
 						? {
-								"x-ms-model-mesh-model-name": this.getDeploymentConfig().modelMeshName,
+								"x-ms-model-mesh-model-name": deployment.modelMeshName,
 							}
 						: undefined,
 				})
@@ -69,22 +78,22 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {

 			const stream = response.body
 			if (!stream) {
-				throw new Error(`Failed to get chat completions with status: ${response.status}`)
+				throw new Error("Failed to get chat completions stream")
 			}

-			if (response.status !== 200) {
-				throw new Error(`Failed to get chat completions: ${response.body.error}`)
+			const statusCode = Number(response.status)
+			if (statusCode !== 200) {
+				throw new Error(`Failed to get chat completions: HTTP ${statusCode}`)
 			}

-			const sseStream = createSseStream(stream)
-
-			for await (const event of sseStream) {
-				if (event.data === "[DONE]") {
+			for await (const chunk of stream) {
+				const chunkStr = chunk.toString()
+				if (chunkStr === "data: [DONE]\n\n") {
 					return
 				}

 				try {
-					const data = JSON.parse(event.data)
+					const data = JSON.parse(chunkStr.replace("data: ", ""))
 					const delta = data.choices[0]?.delta

 					if (delta?.content) {
@@ -124,26 +133,29 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 		}
 	}

-	getModel(): { id: AzureAiModelId; info: ModelInfo } {
-		const modelId = this.options.apiModelId
-		if (modelId && modelId in azureAiModels) {
-			const id = modelId as AzureAiModelId
-			return { id, info: azureAiModels[id] }
+	getModel(): { id: string; info: ModelInfo } {
+		return {
+			id: this.options.apiModelId || "gpt-35-turbo",
+			info: {
+				maxTokens: DEFAULT_MAX_TOKENS,
+				contextWindow: 16385, // Conservative default
+				supportsPromptCache: true,
+			},
 		}
-		return { id: azureAiDefaultModelId, info: azureAiModels[azureAiDefaultModelId] }
 	}

 	async completePrompt(prompt: string): Promise<string> {
 		try {
+			const deployment = this.getDeploymentConfig()
 			const response = await this.client.path("/chat/completions").post({
 				body: {
 					messages: [{ role: "user", content: prompt }],
 					temperature: 0,
 					response_format: { type: "text" },
 				},
-				headers: this.getDeploymentConfig().modelMeshName
+				headers: deployment.modelMeshName
 					? {
-							"x-ms-model-mesh-model-name": this.getDeploymentConfig().modelMeshName,
+							"x-ms-model-mesh-model-name": deployment.modelMeshName,
 						}
 					: undefined,
 			})
--- a/src/core/webview/ClineProvider.ts
+++ b/src/core/webview/ClineProvider.ts
@@ -1077,9 +1077,18 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						break
 					case "updateAzureAiDeployment":
 						if (message.azureAiDeployment) {
-          const deployments = await this.getGlobalState("azureAiDeployments") || {}
+							const deployments = ((await this.getGlobalState("azureAiDeployments")) || {}) as Record<
+								string,
+								{
+									name: string
+									apiVersion: string
+									modelMeshName?: string
+								}
+							>
 							deployments[message.azureAiDeployment.modelId] = {
-            ...message.azureAiDeployment,
+								name: message.azureAiDeployment.name,
+								apiVersion: message.azureAiDeployment.apiVersion,
+								modelMeshName: message.azureAiDeployment.modelMeshName,
 							}
 							await this.updateGlobalState("azureAiDeployments", deployments)
 							await this.postStateToWebview()
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -63,13 +63,15 @@ export interface ApiHandlerOptions {
 	unboundModelId?: string
 	azureAiEndpoint?: string
 	azureAiKey?: string
-  azureAiDeployments?: {
-    [key in AzureAiModelId]?: {
+	azureAiDeployments?:
+		| {
+				[key: string]: {
 					name: string
 					apiVersion: string
 					modelMeshName?: string
 				}
 		  }
+		| undefined
 }

 export type ApiConfiguration = ApiHandlerOptions & {
@@ -664,8 +666,8 @@ export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployme
 		outputPrice: 0.002,
 		defaultDeployment: {
 			name: "azure-gpt-35",
-      apiVersion: "2024-02-15-preview"
-    }
+			apiVersion: "2024-02-15-preview",
+		},
 	},
 	"azure-gpt-4": {
 		maxTokens: 8192,
@@ -675,8 +677,8 @@ export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployme
 		outputPrice: 0.06,
 		defaultDeployment: {
 			name: "azure-gpt-4",
-      apiVersion: "2024-02-15-preview"
-    }
+			apiVersion: "2024-02-15-preview",
+		},
 	},
 	"azure-gpt-4-turbo": {
 		maxTokens: 4096,
@@ -686,9 +688,9 @@ export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployme
 		outputPrice: 0.03,
 		defaultDeployment: {
 			name: "azure-gpt-4-turbo",
-      apiVersion: "2024-02-15-preview"
-    }
-  }
+			apiVersion: "2024-02-15-preview",
+		},
+	},
 } as const satisfies Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }>

 export const azureAiDefaultModelId: AzureAiModelId = "azure-gpt-35"