fix: update Azure AI handler to improve error handling and support new response format

This commit is contained in:
pacnpal
2025-02-02 11:32:31 -05:00
parent bc4ac4f2f8
commit cfc8c08ec6
4 changed files with 401 additions and 463 deletions

View File

@@ -1,337 +0,0 @@
# Azure AI Inference Provider Implementation Plan
## Overview
This document outlines the implementation plan for adding Azure AI Inference support as a new provider in `src/api/providers/`. While Azure AI uses OpenAI's API format as a base, there are significant differences in implementation that need to be accounted for.
## Key Differences from OpenAI
### Endpoint Structure
- OpenAI: `https://api.openai.com/v1/chat/completions`
- Azure: `https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version={api-version}`
### Authentication
- OpenAI: Uses `Authorization: Bearer sk-...`
- Azure: Uses `api-key: {key}`
### Request Format
- OpenAI: Requires `model` field in request body
- Azure: Omits `model` from body (uses deployment name in URL instead)
### Special Considerations
- Required API version in URL query parameter
- Model-Mesh deployments require additional header: `x-ms-model-mesh-model-name`
- Different API versions for different features (e.g., 2023-12-01-preview, 2024-02-15-preview)
## Dependencies
```typescript
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI, { AzureOpenAI } from "openai"
import {
ApiHandlerOptions,
ModelInfo,
azureAiDefaultModelId,
AzureAiModelId,
azureAiModels
} from "../../shared/api"
import { ApiHandler, SingleCompletionHandler } from "../index"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
```
## Configuration (shared/api.ts)
```typescript
export type AzureAiModelId = "gpt-35-turbo" | "gpt-4" | "gpt-4-turbo"
export interface AzureDeploymentConfig {
name: string
apiVersion: string
modelMeshName?: string // For Model-Mesh deployments
}
export const azureAiModels: Record<AzureAiModelId, ModelInfo & { defaultDeployment: AzureDeploymentConfig }> = {
"gpt-35-turbo": {
maxTokens: 4096,
contextWindow: 16385,
supportsPromptCache: true,
inputPrice: 0.0015,
outputPrice: 0.002,
defaultDeployment: {
name: "gpt-35-turbo",
apiVersion: "2024-02-15-preview"
}
},
"gpt-4": {
maxTokens: 8192,
contextWindow: 8192,
supportsPromptCache: true,
inputPrice: 0.03,
outputPrice: 0.06,
defaultDeployment: {
name: "gpt-4",
apiVersion: "2024-02-15-preview"
}
},
"gpt-4-turbo": {
maxTokens: 4096,
contextWindow: 128000,
supportsPromptCache: true,
inputPrice: 0.01,
outputPrice: 0.03,
defaultDeployment: {
name: "gpt-4-turbo",
apiVersion: "2024-02-15-preview"
}
}
}
export const azureAiDefaultModelId: AzureAiModelId = "gpt-35-turbo"
```
## Implementation (src/api/providers/azure-ai.ts)
```typescript
export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions
private client: AzureOpenAI
constructor(options: ApiHandlerOptions) {
this.options = options
if (!options.azureAiEndpoint) {
throw new Error("Azure AI endpoint is required")
}
if (!options.azureAiKey) {
throw new Error("Azure AI key is required")
}
const deployment = this.getDeploymentConfig()
this.client = new AzureOpenAI({
apiKey: options.azureAiKey,
endpoint: options.azureAiEndpoint,
deployment: deployment.name,
apiVersion: deployment.apiVersion,
headers: deployment.modelMeshName ? {
'x-ms-model-mesh-model-name': deployment.modelMeshName
} : undefined
})
}
private getDeploymentConfig(): AzureDeploymentConfig {
const model = this.getModel()
const defaultConfig = azureAiModels[model.id].defaultDeployment
// Override with user-provided deployment names if available
const deploymentName =
this.options.azureAiDeployments?.[model.id]?.name ||
defaultConfig.name
const apiVersion =
this.options.azureAiDeployments?.[model.id]?.apiVersion ||
defaultConfig.apiVersion
const modelMeshName =
this.options.azureAiDeployments?.[model.id]?.modelMeshName
return {
name: deploymentName,
apiVersion,
modelMeshName
}
}
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const modelInfo = this.getModel().info
const systemMessage = {
role: "system",
content: systemPrompt
}
// Note: model parameter is omitted as it's handled by deployment
const requestOptions: Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, 'model'> = {
messages: [systemMessage, ...convertToOpenAiMessages(messages)],
temperature: 0,
stream: true,
max_tokens: modelInfo.maxTokens
}
try {
const stream = await this.client.chat.completions.create(requestOptions as any)
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content
}
}
if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0
}
}
}
} catch (error) {
// Handle Azure-specific error format
if (error instanceof Error) {
const azureError = error as any
throw new Error(
`Azure AI error (${azureError.code || 'Unknown'}): ${azureError.message}`
)
}
throw error
}
}
getModel(): { id: AzureAiModelId; info: ModelInfo } {
const modelId = this.options.apiModelId
if (modelId && modelId in azureAiModels) {
const id = modelId as AzureAiModelId
return { id, info: azureAiModels[id] }
}
return { id: azureAiDefaultModelId, info: azureAiModels[azureAiDefaultModelId] }
}
async completePrompt(prompt: string): Promise<string> {
try {
// Note: model parameter is omitted as it's handled by deployment
const response = await this.client.chat.completions.create({
messages: [{ role: "user", content: prompt }],
temperature: 0
} as any)
return response.choices[0]?.message.content || ""
} catch (error) {
// Handle Azure-specific error format
if (error instanceof Error) {
const azureError = error as any
throw new Error(
`Azure AI completion error (${azureError.code || 'Unknown'}): ${azureError.message}`
)
}
throw error
}
}
}
```
## Required Updates to ApiHandlerOptions
Add to ApiHandlerOptions interface in shared/api.ts:
```typescript
azureAiEndpoint?: string
azureAiKey?: string
azureAiDeployments?: {
[key in AzureAiModelId]?: {
name: string
apiVersion: string
modelMeshName?: string
}
}
```
## Testing Plan
1. Create __tests__ directory with azure-ai.test.ts:
```typescript
describe('AzureAiHandler', () => {
// Test URL construction
test('constructs correct Azure endpoint URL', () => {})
// Test authentication
test('sets correct authentication headers', () => {})
// Test deployment configuration
test('uses correct deployment names', () => {})
test('handles Model-Mesh configuration', () => {})
// Test error handling
test('handles Azure-specific error format', () => {})
// Test request/response format
test('omits model from request body', () => {})
test('handles Azure response format', () => {})
})
```
## Integration Steps
1. Add Azure AI models and types to shared/api.ts
2. Create azure-ai.ts provider implementation
3. Add provider tests
4. Update API handler options
5. Add deployment configuration support
6. Implement Azure-specific error handling
7. Test with real Azure AI endpoints
## Error Handling
Azure returns errors in a specific format:
```typescript
interface AzureError {
code: string // e.g., "InternalServerError", "InvalidRequest"
message: string
target?: string
details?: Array<{
code: string
message: string
}>
}
```
Implementation should:
- Parse Azure error format
- Include error codes in messages
- Handle deployment-specific errors
- Provide clear upgrade paths for API version issues
## Documentation Updates
1. Add Azure AI configuration section to README.md:
- Endpoint configuration
- Authentication setup
- Deployment mapping
- API version selection
- Model-Mesh support
2. Document configuration examples:
```typescript
{
azureAiEndpoint: "https://your-resource.openai.azure.com",
azureAiKey: "your-api-key",
azureAiDeployments: {
"gpt-4": {
name: "your-gpt4-deployment",
apiVersion: "2024-02-15-preview",
modelMeshName: "optional-model-mesh-name"
}
}
}
```
## Future Improvements
1. Support for Azure-specific features:
- Fine-tuning endpoints
- Custom deployment configurations
- Managed identity authentication
2. Performance optimizations:
- Connection pooling
- Regional endpoint selection
- Automatic API version negotiation
3. Advanced features:
- Response format control
- Function calling support
- Vision model support

249
package-lock.json generated
View File

@@ -12,6 +12,8 @@
"@anthropic-ai/sdk": "^0.26.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@aws-sdk/client-bedrock-runtime": "^3.706.0",
"@azure-rest/ai-inference": "^1.0.0-beta.5",
"@azure/core-auth": "^1.5.0",
"@google/generative-ai": "^0.18.0",
"@mistralai/mistralai": "^1.3.6",
"@modelcontextprotocol/sdk": "^1.0.1",
@@ -2222,6 +2224,253 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="
},
"node_modules/@azure-rest/ai-inference": {
"version": "1.0.0-beta.5",
"resolved": "https://registry.npmjs.org/@azure-rest/ai-inference/-/ai-inference-1.0.0-beta.5.tgz",
"integrity": "sha512-G6tAWR7DGHTfWx5+N5csTWX304lWNWeePXHx1LBYKLhTeonNTY4OrpqC6DD12oPxLuK0WbEJ3JXK/A3HdKj+BA==",
"license": "MIT",
"dependencies": {
"@azure-rest/core-client": "^2.1.0",
"@azure/abort-controller": "^1.0.0",
"@azure/core-auth": "^1.7.2",
"@azure/core-lro": "^2.6.0",
"@azure/core-rest-pipeline": "^1.14.0",
"@azure/core-tracing": "^1.2.0",
"@azure/logger": "^1.0.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure-rest/ai-inference/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure-rest/core-client": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/@azure-rest/core-client/-/core-client-2.3.2.tgz",
"integrity": "sha512-rS8Z6iNCaGYQZz96SdUpRw75j3b5vRpEJqocSJwnuByrydirubjUkY54pThm7GshRBgh7GdMK4hGOZA6BSeRaw==",
"license": "MIT",
"dependencies": {
"@azure/abort-controller": "^2.0.0",
"@azure/core-auth": "^1.3.0",
"@azure/core-rest-pipeline": "^1.5.0",
"@azure/core-tracing": "^1.0.1",
"@azure/core-util": "^1.0.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure-rest/core-client/node_modules/@azure/abort-controller": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
"integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure-rest/core-client/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/abort-controller": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-1.1.0.tgz",
"integrity": "sha512-TrRLIoSQVzfAJX9H1JeFjzAoDGcoK1IYX1UImfceTZpsyYfWr09Ss1aHW1y5TrrR3iq6RZLBwJ3E24uwPhwahw==",
"license": "MIT",
"dependencies": {
"tslib": "^2.2.0"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/@azure/abort-controller/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/core-auth": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.9.0.tgz",
"integrity": "sha512-FPwHpZywuyasDSLMqJ6fhbOK3TqUdviZNF8OqRGA4W5Ewib2lEEZ+pBsYcBa88B2NGO/SEnYPGhyBqNlE8ilSw==",
"license": "MIT",
"dependencies": {
"@azure/abort-controller": "^2.0.0",
"@azure/core-util": "^1.11.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-auth/node_modules/@azure/abort-controller": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
"integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-auth/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/core-lro": {
"version": "2.7.2",
"resolved": "https://registry.npmjs.org/@azure/core-lro/-/core-lro-2.7.2.tgz",
"integrity": "sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw==",
"license": "MIT",
"dependencies": {
"@azure/abort-controller": "^2.0.0",
"@azure/core-util": "^1.2.0",
"@azure/logger": "^1.0.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-lro/node_modules/@azure/abort-controller": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
"integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-lro/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/core-rest-pipeline": {
"version": "1.18.2",
"resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.18.2.tgz",
"integrity": "sha512-IkTf/DWKyCklEtN/WYW3lqEsIaUDshlzWRlZNNwSYtFcCBQz++OtOjxNpm8rr1VcbMS6RpjybQa3u6B6nG0zNw==",
"license": "MIT",
"dependencies": {
"@azure/abort-controller": "^2.0.0",
"@azure/core-auth": "^1.8.0",
"@azure/core-tracing": "^1.0.1",
"@azure/core-util": "^1.11.0",
"@azure/logger": "^1.0.0",
"http-proxy-agent": "^7.0.0",
"https-proxy-agent": "^7.0.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-rest-pipeline/node_modules/@azure/abort-controller": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
"integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-rest-pipeline/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/core-tracing": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.2.0.tgz",
"integrity": "sha512-UKTiEJPkWcESPYJz3X5uKRYyOcJD+4nYph+KpfdPRnQJVrZfk0KJgdnaAWKfhsBBtAf/D58Az4AvCJEmWgIBAg==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-tracing/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/core-util": {
"version": "1.11.0",
"resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.11.0.tgz",
"integrity": "sha512-DxOSLua+NdpWoSqULhjDyAZTXFdP/LKkqtYuxxz1SCN289zk3OG8UOpnCQAz/tygyACBtWp/BoO72ptK7msY8g==",
"license": "MIT",
"dependencies": {
"@azure/abort-controller": "^2.0.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-util/node_modules/@azure/abort-controller": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
"integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/core-util/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@azure/logger": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.1.4.tgz",
"integrity": "sha512-4IXXzcCdLdlXuCG+8UKEwLA1T1NHqUfanhXYHiQTn+6sfWCZXduqbtXDGceg3Ce5QxTGo7EqmbV6Bi+aqKuClQ==",
"license": "MIT",
"dependencies": {
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@azure/logger/node_modules/tslib": {
"version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/@babel/code-frame": {
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",

View File

@@ -271,7 +271,7 @@
"@anthropic-ai/bedrock-sdk": "^0.10.2",
"@anthropic-ai/sdk": "^0.26.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@azure-rest/ai-inference": "^1.0.0",
"@azure-rest/ai-inference": "^1.0.0-beta.5",
"@azure/core-auth": "^1.5.0",
"@aws-sdk/client-bedrock-runtime": "^3.706.0",
"@google/generative-ai": "^0.18.0",

View File

@@ -8,11 +8,12 @@ import {
azureAiDefaultModelId,
AzureAiModelId,
azureAiModels,
AzureDeploymentConfig
AzureDeploymentConfig,
} from "../../shared/api"
import { ApiHandler, SingleCompletionHandler } from "../index"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
import { createSseStream } from "@azure/core-rest-pipeline"
export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions
@@ -29,10 +30,7 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
throw new Error("Azure AI key is required")
}
this.client = new ModelClient(
options.azureAiEndpoint,
new AzureKeyCredential(options.azureAiKey)
)
this.client = new ModelClient(options.azureAiEndpoint, new AzureKeyCredential(options.azureAiKey))
}
private getDeploymentConfig(): AzureDeploymentConfig {
@@ -42,26 +40,32 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
return {
name: this.options.azureAiDeployments?.[model.id]?.name || defaultConfig.name,
apiVersion: this.options.azureAiDeployments?.[model.id]?.apiVersion || defaultConfig.apiVersion,
modelMeshName: this.options.azureAiDeployments?.[model.id]?.modelMeshName
modelMeshName: this.options.azureAiDeployments?.[model.id]?.modelMeshName,
}
}
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const modelInfo = this.getModel().info
const chatMessages = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages)
]
const chatMessages = [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)]
try {
const response = await this.client.path("/chat/completions").post({
const response = await this.client
.path("/chat/completions")
.post({
body: {
messages: chatMessages,
temperature: 0,
stream: true,
max_tokens: modelInfo.maxTokens
max_tokens: modelInfo.maxTokens,
response_format: { type: "text" }, // Ensure text format for chat
},
headers: this.getDeploymentConfig().modelMeshName
? {
"x-ms-model-mesh-model-name": this.getDeploymentConfig().modelMeshName,
}
}).asNodeStream()
: undefined,
})
.asNodeStream()
const stream = response.body
if (!stream) {
@@ -72,19 +76,21 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
throw new Error(`Failed to get chat completions: ${response.body.error}`)
}
for await (const chunk of stream) {
if (chunk.toString() === 'data: [DONE]') {
const sseStream = createSseStream(stream)
for await (const event of sseStream) {
if (event.data === "[DONE]") {
return
}
try {
const data = JSON.parse(chunk.toString().replace('data: ', ''))
const data = JSON.parse(event.data)
const delta = data.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content
text: delta.content,
}
}
@@ -92,7 +98,7 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
yield {
type: "usage",
inputTokens: data.usage.prompt_tokens || 0,
outputTokens: data.usage.completion_tokens || 0
outputTokens: data.usage.completion_tokens || 0,
}
}
} catch (e) {
@@ -102,9 +108,16 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
}
} catch (error) {
if (error instanceof Error) {
if ('status' in error && error.status === 429) {
// Handle Azure-specific error cases
if ("status" in error && error.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later.")
}
if ("status" in error && error.status === 400) {
const azureError = error as any
if (azureError.body?.error?.code === "ContentFilterError") {
throw new Error("Content was flagged by Azure AI content safety filters")
}
}
throw new Error(`Azure AI error: ${error.message}`)
}
throw error
@@ -125,8 +138,14 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
const response = await this.client.path("/chat/completions").post({
body: {
messages: [{ role: "user", content: prompt }],
temperature: 0
temperature: 0,
response_format: { type: "text" },
},
headers: this.getDeploymentConfig().modelMeshName
? {
"x-ms-model-mesh-model-name": this.getDeploymentConfig().modelMeshName,
}
: undefined,
})
if (isUnexpected(response)) {
@@ -136,9 +155,16 @@ export class AzureAiHandler implements ApiHandler, SingleCompletionHandler {
return response.body.choices[0]?.message?.content || ""
} catch (error) {
if (error instanceof Error) {
if ('status' in error && error.status === 429) {
// Handle Azure-specific error cases
if ("status" in error && error.status === 429) {
throw new Error("Azure AI rate limit exceeded. Please try again later.")
}
if ("status" in error && error.status === 400) {
const azureError = error as any
if (azureError.body?.error?.code === "ContentFilterError") {
throw new Error("Content was flagged by Azure AI content safety filters")
}
}
throw new Error(`Azure AI completion error: ${error.message}`)
}
throw error