diff --git a/src/shared/api.ts b/src/shared/api.ts index f54c54e..704e7f3 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -64,15 +64,13 @@ export interface ApiHandlerOptions { azureAiEndpoint?: string azureAiKey?: string azureAiModelConfig?: ModelInfo - azureAiDeployments?: - | { - [key: string]: { - name: string - apiVersion: string - modelMeshName?: string - } - } - | undefined + azureAiDeployments?: { + [key: string]: { + name: string + apiVersion: string + modelMeshName?: string + } + } } export type ApiConfiguration = ApiHandlerOptions & { @@ -81,13 +79,12 @@ export type ApiConfiguration = ApiHandlerOptions & { } // Models - export interface ModelInfo { maxTokens?: number contextWindow: number supportsImages?: boolean supportsComputerUse?: boolean - supportsPromptCache: boolean // this value is hardcoded for now + supportsPromptCache: boolean inputPrice?: number outputPrice?: number cacheWritesPrice?: number @@ -96,532 +93,17 @@ export interface ModelInfo { reasoningEffort?: "low" | "medium" | "high" } -// Anthropic -// https://docs.anthropic.com/en/docs/about-claude/models -export type AnthropicModelId = keyof typeof anthropicModels -export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-20241022" -export const anthropicModels = { - "claude-3-5-sonnet-20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, // $3 per million input tokens - outputPrice: 15.0, // $15 per million output tokens - cacheWritesPrice: 3.75, // $3.75 per million tokens - cacheReadsPrice: 0.3, // $0.30 per million tokens - }, - "claude-3-5-haiku-20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: true, - inputPrice: 1.0, - outputPrice: 5.0, - cacheWritesPrice: 1.25, - cacheReadsPrice: 0.1, - }, - "claude-3-opus-20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 15.0, - outputPrice: 75.0, - cacheWritesPrice: 18.75, - cacheReadsPrice: 1.5, - }, - "claude-3-haiku-20240307": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.25, - outputPrice: 1.25, - cacheWritesPrice: 0.3, - cacheReadsPrice: 0.03, - }, -} as const satisfies Record // as const assertion makes the object deeply readonly - -// AWS Bedrock -// https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html -export interface MessageContent { - type: "text" | "image" | "video" | "tool_use" | "tool_result" - text?: string - source?: { - type: "base64" - data: string | Uint8Array // string for Anthropic, Uint8Array for Bedrock - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" - } - // Video specific fields - format?: string - s3Location?: { - uri: string - bucketOwner?: string - } - // Tool use and result fields - toolUseId?: string - name?: string - input?: any - output?: any // Used for tool_result type +// Azure AI Model Inference Configuration +export interface AzureDeploymentConfig { + name: string + apiVersion: string // Azure AI Inference API version (e.g. 2024-05-01-preview) + modelMeshName?: string // Model-Mesh deployment name if using Model-Mesh } -export type BedrockModelId = keyof typeof bedrockModels -export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0" -export const bedrockModels = { - "amazon.nova-pro-v1:0": { - maxTokens: 5000, - contextWindow: 300_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.8, - outputPrice: 3.2, - cacheWritesPrice: 0.8, // per million tokens - cacheReadsPrice: 0.2, // per million tokens - }, - "amazon.nova-lite-v1:0": { - maxTokens: 5000, - contextWindow: 300_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.06, - outputPrice: 0.024, - cacheWritesPrice: 0.06, // per million tokens - cacheReadsPrice: 0.015, // per million tokens - }, - "amazon.nova-micro-v1:0": { - maxTokens: 5000, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.035, - outputPrice: 0.14, - cacheWritesPrice: 0.035, // per million tokens - cacheReadsPrice: 0.00875, // per million tokens - }, - "anthropic.claude-3-5-sonnet-20241022-v2:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, // per million tokens - cacheReadsPrice: 0.3, // per million tokens - }, - "anthropic.claude-3-5-haiku-20241022-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.0, - outputPrice: 5.0, - cacheWritesPrice: 1.0, - cacheReadsPrice: 0.08, - }, - "anthropic.claude-3-5-sonnet-20240620-v1:0": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "anthropic.claude-3-opus-20240229-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15.0, - outputPrice: 75.0, - }, - "anthropic.claude-3-sonnet-20240229-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "anthropic.claude-3-haiku-20240307-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.25, - outputPrice: 1.25, - }, - "meta.llama3-3-70b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - }, - "meta.llama3-2-90b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - }, - "meta.llama3-2-11b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: true, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.16, - outputPrice: 0.16, - }, - "meta.llama3-2-3b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.15, - }, - "meta.llama3-2-1b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.1, - outputPrice: 0.1, - }, - "meta.llama3-1-405b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 2.4, - outputPrice: 2.4, - }, - "meta.llama3-1-70b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 128_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.72, - outputPrice: 0.72, - }, - "meta.llama3-1-8b-instruct-v1:0": { - maxTokens: 8192, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.22, - outputPrice: 0.22, - }, - "meta.llama3-70b-instruct-v1:0": { - maxTokens: 2048, - contextWindow: 8_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 2.65, - outputPrice: 3.5, - }, - "meta.llama3-8b-instruct-v1:0": { - maxTokens: 2048, - contextWindow: 4_000, - supportsImages: false, - supportsComputerUse: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.6, - }, -} as const satisfies Record - -// Glama -// https://glama.ai/models -export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet" -export const glamaDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - description: - "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", -} - -// OpenRouter -// https://openrouter.ai/models?order=newest&supported_parameters=tools -export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels -export const openRouterDefaultModelInfo: ModelInfo = { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: true, - inputPrice: 3.0, - outputPrice: 15.0, - cacheWritesPrice: 3.75, - cacheReadsPrice: 0.3, - description: - "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._", -} - -// Vertex AI -// https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude -export type VertexModelId = keyof typeof vertexModels -export const vertexDefaultModelId: VertexModelId = "claude-3-5-sonnet-v2@20241022" -export const vertexModels = { - "claude-3-5-sonnet-v2@20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsComputerUse: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "claude-3-5-sonnet@20240620": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, - "claude-3-5-haiku@20241022": { - maxTokens: 8192, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.0, - outputPrice: 5.0, - }, - "claude-3-opus@20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15.0, - outputPrice: 75.0, - }, - "claude-3-haiku@20240307": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.25, - outputPrice: 1.25, - }, -} as const satisfies Record - -export const openAiModelInfoSaneDefaults: ModelInfo = { - maxTokens: -1, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, -} - -// Gemini -// https://ai.google.dev/gemini-api/docs/models/gemini -export type GeminiModelId = keyof typeof geminiModels -export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-thinking-exp-01-21" -export const geminiModels = { - "gemini-2.0-flash-thinking-exp-01-21": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-thinking-exp-1219": { - maxTokens: 8192, - contextWindow: 32_767, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-exp": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-002": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-8b-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-pro-002": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-pro-exp-0827": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-exp-1206": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, -} as const satisfies Record - -// OpenAI Native -// https://openai.com/api/pricing/ -export type OpenAiNativeModelId = keyof typeof openAiNativeModels -export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o" -export const openAiNativeModels = { - // don't support tool use yet - "o3-mini": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.1, - outputPrice: 4.4, - reasoningEffort: "medium", - }, - "o3-mini-high": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.1, - outputPrice: 4.4, - reasoningEffort: "high", - }, - "o3-mini-low": { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 1.1, - outputPrice: 4.4, - reasoningEffort: "low", - }, - o1: { - maxTokens: 100_000, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15, - outputPrice: 60, - }, - "o1-preview": { - maxTokens: 32_768, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15, - outputPrice: 60, - }, - "o1-mini": { - maxTokens: 65_536, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 1.1, - outputPrice: 4.4, - }, - "gpt-4o": { - maxTokens: 4_096, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 5, - outputPrice: 15, - }, - "gpt-4o-mini": { - maxTokens: 16_384, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.6, - }, -} as const satisfies Record - -// DeepSeek -// https://platform.deepseek.com/docs/api -export type DeepSeekModelId = keyof typeof deepSeekModels -export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" -export const deepSeekModels = { - "deepseek-chat": { - maxTokens: 8192, - contextWindow: 64_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.014, // $0.014 per million tokens - outputPrice: 0.28, // $0.28 per million tokens - description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, - }, - "deepseek-reasoner": { - maxTokens: 8192, - contextWindow: 64_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.55, // $0.55 per million tokens - outputPrice: 2.19, // $2.19 per million tokens - description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`, - }, -} as const satisfies Record - -// Azure OpenAI -// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation -// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs +// Azure OpenAI API Version export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" + +// Azure AI Model Inference Defaults export const azureAiModelInfoSaneDefaults: ModelInfo = { maxTokens: -1, // Dynamic based on model contextWindow: 128_000, // Conservative default @@ -632,77 +114,3 @@ export const azureAiModelInfoSaneDefaults: ModelInfo = { outputPrice: 0, description: "Azure AI Model Inference allows you to deploy and use any model through Azure's inference service.", } - -// Mistral -// https://docs.mistral.ai/getting-started/models/models_overview/ -export type MistralModelId = keyof typeof mistralModels -export const mistralDefaultModelId: MistralModelId = "codestral-latest" -export const mistralModels = { - "codestral-latest": { - maxTokens: 32_768, - contextWindow: 256_000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.3, - outputPrice: 0.9, - }, -} as const satisfies Record - -// Unbound Security -export type UnboundModelId = keyof typeof unboundModels -export const unboundDefaultModelId = "openai/gpt-4o" -export const unboundModels = { - "anthropic/claude-3-5-sonnet-20241022": anthropicModels["claude-3-5-sonnet-20241022"], - "openai/gpt-4o": openAiNativeModels["gpt-4o"], - "deepseek/deepseek-chat": deepSeekModels["deepseek-chat"], - "deepseek/deepseek-reasoner": deepSeekModels["deepseek-reasoner"], - "mistral/codestral-latest": mistralModels["codestral-latest"], -} as const satisfies Record - -// Azure AI Model Inference Configuration -export interface AzureDeploymentConfig { - name: string - apiVersion: string - modelMeshName?: string // For Model-Mesh deployments -} - -// Azure OpenAI Models -export type AzureAiModelId = "azure-gpt-35" | "azure-gpt-4" | "azure-gpt-4-turbo" - -export const azureAiModels: Record = { - "azure-gpt-35": { - maxTokens: 4096, - contextWindow: 16385, - supportsPromptCache: true, - inputPrice: 0.0015, - outputPrice: 0.002, - defaultDeployment: { - name: "azure-gpt-35", - apiVersion: "2024-02-15-preview", - }, - }, - "azure-gpt-4": { - maxTokens: 8192, - contextWindow: 8192, - supportsPromptCache: true, - inputPrice: 0.03, - outputPrice: 0.06, - defaultDeployment: { - name: "azure-gpt-4", - apiVersion: "2024-02-15-preview", - }, - }, - "azure-gpt-4-turbo": { - maxTokens: 4096, - contextWindow: 128000, - supportsPromptCache: true, - inputPrice: 0.01, - outputPrice: 0.03, - defaultDeployment: { - name: "azure-gpt-4-turbo", - apiVersion: "2024-02-15-preview", - }, - }, -} as const satisfies Record - -export const azureAiDefaultModelId: AzureAiModelId = "azure-gpt-35"