feat: add default model information for Azure AI Model Inference configuration

This commit is contained in:
pacnpal
2025-02-02 20:29:46 -05:00
parent 53e307c8f3
commit b6e8db8145
2 changed files with 46 additions and 30 deletions

View File

@@ -9,6 +9,16 @@ import { ApiStream } from "../transform/stream"
const DEFAULT_API_VERSION = "2024-05-01-preview" const DEFAULT_API_VERSION = "2024-05-01-preview"
interface AzureErrorResponse {
status: number
body?: {
error?: {
message?: string
}
}
message: string
}
export class AzureAiHandler implements ApiHandler, SingleCompletionHandler { export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
private client: ReturnType<typeof ModelClient> private client: ReturnType<typeof ModelClient>
@@ -117,21 +127,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
} }
} }
} catch (error) { } catch (error) {
if (error instanceof Error) { const azureError = error as AzureErrorResponse
// Handle Azure-specific error cases
if (isUnexpected(error) && error.status === 429) { // Check for specific error status codes
throw new Error("Azure AI rate limit exceeded. Please try again later.") if (azureError.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later")
} }
if (isUnexpected(error)) { if (azureError.status === 422) {
// Use proper Model Inference error handling const message = azureError.body?.error?.message || azureError.message
const message = error.body?.error?.message || error.message
if (error.status === 422) {
throw new Error(`Request validation failed: ${message}`) throw new Error(`Request validation failed: ${message}`)
} }
}
throw new Error(`Azure AI error: ${error.message}`) throw new Error(`Azure AI error: ${azureError.message}`)
}
throw error
} }
} }
@@ -167,20 +174,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
return response.body.choices[0]?.message?.content || "" return response.body.choices[0]?.message?.content || ""
} catch (error) { } catch (error) {
if (error instanceof Error) { const azureError = error as AzureErrorResponse
// Handle Azure-specific error cases
if (isUnexpected(error) && error.status === 429) { // Check for specific error status codes
throw new Error("Azure AI rate limit exceeded. Please try again later.") if (azureError.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later")
} }
if (isUnexpected(error)) { if (azureError.status === 422) {
const message = error.body?.error?.message || error.message const message = azureError.body?.error?.message || azureError.message
if (error.status === 422) {
throw new Error(`Request validation failed: ${message}`) throw new Error(`Request validation failed: ${message}`)
} }
}
throw new Error(`Azure AI completion error: ${error.message}`) throw new Error(`Azure AI completion error: ${azureError.message}`)
}
throw error
} }
} }
} }

View File

@@ -621,6 +621,16 @@ export const deepSeekModels = {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
export const azureAiModelInfoSaneDefaults: ModelInfo = {
maxTokens: -1, // Dynamic based on model
contextWindow: 128_000, // Conservative default
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Azure AI Model Inference allows you to deploy and use any model through Azure's inference service.",
}
// Mistral // Mistral
// https://docs.mistral.ai/getting-started/models/models_overview/ // https://docs.mistral.ai/getting-started/models/models_overview/
@@ -648,15 +658,16 @@ export const unboundModels = {
"mistral/codestral-latest": mistralModels["codestral-latest"], "mistral/codestral-latest": mistralModels["codestral-latest"],
} as const satisfies Record<string, ModelInfo> } as const satisfies Record<string, ModelInfo>
// Azure AI // Azure AI Model Inference Configuration
export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
export interface AzureDeploymentConfig { export interface AzureDeploymentConfig {
name: string name: string
apiVersion: string apiVersion: string
modelMeshName?: string // For Model-Mesh deployments modelMeshName?: string // For Model-Mesh deployments
} }
// Azure OpenAI Models
export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = { export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
"azure-gpt-35": { "azure-gpt-35": {
maxTokens: 4096, maxTokens: 4096,