feat: add default model information for Azure AI Model Inference configuration

2025-12-20 04:11:10 -05:00 · 2025-02-02 20:29:46 -05:00
parent 53e307c8f3
commit b6e8db8145
2 changed files with 46 additions and 30 deletions
--- a/src/api/providers/azure-ai.ts
+++ b/src/api/providers/azure-ai.ts
@@ -9,6 +9,16 @@ import { ApiStream } from "../transform/stream"

 const DEFAULT_API_VERSION = "2024-05-01-preview"

+interface AzureErrorResponse {
+	status: number
+	body?: {
+		error?: {
+			message?: string
+		}
+	}
+	message: string
+}
+
 export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 	private options: ApiHandlerOptions
 	private client: ReturnType<typeof ModelClient>
@@ -117,21 +127,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
 				}
 			}
 		} catch (error) {
-			if (error instanceof Error) {
-				// Handle Azure-specific error cases
-				if (isUnexpected(error) && error.status === 429) {
-					throw new Error("Azure AI rate limit exceeded. Please try again later.")
-				}
-				if (isUnexpected(error)) {
-					// Use proper Model Inference error handling
-					const message = error.body?.error?.message || error.message
-					if (error.status === 422) {
-						throw new Error(`Request validation failed: ${message}`)
-					}
-				}
-				throw new Error(`Azure AI error: ${error.message}`)
+			const azureError = error as AzureErrorResponse
+
+			// Check for specific error status codes
+			if (azureError.status === 429) {
+				throw new Error("Azure AI rate limit exceeded. Please try again later")
 			}
-			throw error
+			if (azureError.status === 422) {
+				const message = azureError.body?.error?.message || azureError.message
+				throw new Error(`Request validation failed: ${message}`)
+			}
+
+			throw new Error(`Azure AI error: ${azureError.message}`)
 		}
 	}

@@ -167,20 +174,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {

 			return response.body.choices[0]?.message?.content || ""
 		} catch (error) {
-			if (error instanceof Error) {
-				// Handle Azure-specific error cases
-				if (isUnexpected(error) && error.status === 429) {
-					throw new Error("Azure AI rate limit exceeded. Please try again later.")
-				}
-				if (isUnexpected(error)) {
-					const message = error.body?.error?.message || error.message
-					if (error.status === 422) {
-						throw new Error(`Request validation failed: ${message}`)
-					}
-				}
-				throw new Error(`Azure AI completion error: ${error.message}`)
+			const azureError = error as AzureErrorResponse
+
+			// Check for specific error status codes
+			if (azureError.status === 429) {
+				throw new Error("Azure AI rate limit exceeded. Please try again later")
 			}
-			throw error
+			if (azureError.status === 422) {
+				const message = azureError.body?.error?.message || azureError.message
+				throw new Error(`Request validation failed: ${message}`)
+			}
+
+			throw new Error(`Azure AI completion error: ${azureError.message}`)
 		}
 	}
 }
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -621,6 +621,16 @@ export const deepSeekModels = {
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
 export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
+export const azureAiModelInfoSaneDefaults: ModelInfo = {
+	maxTokens: -1, // Dynamic based on model
+	contextWindow: 128_000, // Conservative default
+	supportsImages: true,
+	supportsComputerUse: true,
+	supportsPromptCache: false,
+	inputPrice: 0,
+	outputPrice: 0,
+	description: "Azure AI Model Inference allows you to deploy and use any model through Azure's inference service.",
+}

 // Mistral
 // https://docs.mistral.ai/getting-started/models/models_overview/
@@ -648,15 +658,16 @@ export const unboundModels = {
 	"mistral/codestral-latest": mistralModels["codestral-latest"],
 } as const satisfies Record<string, ModelInfo>

-// Azure AI
-export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
-
+// Azure AI Model Inference Configuration
 export interface AzureDeploymentConfig {
 	name: string
 	apiVersion: string
 	modelMeshName?: string // For Model-Mesh deployments
 }

+// Azure OpenAI Models
+export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
+
 export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
 	"azure-gpt-35": {
 		maxTokens: 4096,