feat: add default model information for Azure AI Model Inference configuration

This commit is contained in:
pacnpal
2025-02-02 20:29:46 -05:00
parent 53e307c8f3
commit b6e8db8145
2 changed files with 46 additions and 30 deletions

View File

@@ -9,6 +9,16 @@ import { ApiStream } from "../transform/stream"
const DEFAULT_API_VERSION = "2024-05-01-preview"
interface AzureErrorResponse {
status: number
body?: {
error?: {
message?: string
}
}
message: string
}
export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions
private client: ReturnType<typeof ModelClient>
@@ -117,21 +127,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
}
}
} catch (error) {
if (error instanceof Error) {
// Handle Azure-specific error cases
if (isUnexpected(error) && error.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later.")
const azureError = error as AzureErrorResponse
// Check for specific error status codes
if (azureError.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later")
}
if (isUnexpected(error)) {
// Use proper Model Inference error handling
const message = error.body?.error?.message || error.message
if (error.status === 422) {
if (azureError.status === 422) {
const message = azureError.body?.error?.message || azureError.message
throw new Error(`Request validation failed: ${message}`)
}
}
throw new Error(`Azure AI error: ${error.message}`)
}
throw error
throw new Error(`Azure AI error: ${azureError.message}`)
}
}
@@ -167,20 +174,18 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
return response.body.choices[0]?.message?.content || ""
} catch (error) {
if (error instanceof Error) {
// Handle Azure-specific error cases
if (isUnexpected(error) && error.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later.")
const azureError = error as AzureErrorResponse
// Check for specific error status codes
if (azureError.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later")
}
if (isUnexpected(error)) {
const message = error.body?.error?.message || error.message
if (error.status === 422) {
if (azureError.status === 422) {
const message = azureError.body?.error?.message || azureError.message
throw new Error(`Request validation failed: ${message}`)
}
}
throw new Error(`Azure AI completion error: ${error.message}`)
}
throw error
throw new Error(`Azure AI completion error: ${azureError.message}`)
}
}
}

View File

@@ -621,6 +621,16 @@ export const deepSeekModels = {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
export const azureAiModelInfoSaneDefaults: ModelInfo = {
maxTokens: -1, // Dynamic based on model
contextWindow: 128_000, // Conservative default
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Azure AI Model Inference allows you to deploy and use any model through Azure's inference service.",
}
// Mistral
// https://docs.mistral.ai/getting-started/models/models_overview/
@@ -648,15 +658,16 @@ export const unboundModels = {
"mistral/codestral-latest": mistralModels["codestral-latest"],
} as const satisfies Record<string, ModelInfo>
// Azure AI
export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
// Azure AI Model Inference Configuration
export interface AzureDeploymentConfig {
name: string
apiVersion: string
modelMeshName?: string // For Model-Mesh deployments
}
// Azure OpenAI Models
export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo"
export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
"azure-gpt-35": {
maxTokens: 4096,