From dc617a92a9c4380a538b363a22a6c2ba9c2217a1 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Fri, 13 Sep 2024 00:49:04 -0400 Subject: [PATCH] Add prompt caching to openrouter; remove o1 since it doesn't support tool use --- src/ClaudeDev.ts | 17 +++-- src/api/openrouter.ts | 54 +++++++++++++ src/shared/api.ts | 96 +++++++++++------------- webview-ui/src/components/ApiOptions.tsx | 7 +- webview-ui/src/components/TaskHeader.tsx | 4 +- 5 files changed, 115 insertions(+), 63 deletions(-) diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts index 59b9f4f..b0fe05c 100644 --- a/src/ClaudeDev.ts +++ b/src/ClaudeDev.ts @@ -1656,6 +1656,9 @@ ${this.customInstructions.trim()} let cacheReadInputTokens = (response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage .cache_read_input_tokens || undefined + // @ts-ignore-next-line + let totalCost = response.usage.total_cost + await this.say( "api_req_finished", JSON.stringify({ @@ -1663,12 +1666,14 @@ ${this.customInstructions.trim()} tokensOut: outputTokens, cacheWrites: cacheCreationInputTokens, cacheReads: cacheReadInputTokens, - cost: this.calculateApiCost( - inputTokens, - outputTokens, - cacheCreationInputTokens, - cacheReadInputTokens - ), + cost: + totalCost || + this.calculateApiCost( + inputTokens, + outputTokens, + cacheCreationInputTokens, + cacheReadInputTokens + ), }) ) diff --git a/src/api/openrouter.ts b/src/api/openrouter.ts index 6e43ea6..3bec297 100644 --- a/src/api/openrouter.ts +++ b/src/api/openrouter.ts @@ -9,6 +9,7 @@ import { openRouterModels, } from "../shared/api" import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format" +import axios from "axios" export class OpenRouterHandler implements ApiHandler { private options: ApiHandlerOptions @@ -37,6 +38,44 @@ export class OpenRouterHandler implements ApiHandler { ...convertToOpenAiMessages(messages), ] + // prompt caching: https://openrouter.ai/docs/prompt-caching + switch (this.getModel().id) { + case "anthropic/claude-3.5-sonnet:beta": + case "anthropic/claude-3-haiku:beta": + case "anthropic/claude-3-opus:beta": + openAiMessages[0] = { + role: "system", + content: [ + { + type: "text", + text: systemPrompt, + // @ts-ignore-next-line + cache_control: { type: "ephemeral" }, + }, + ], + } + // Add cache_control to the last two user messages + const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2) + lastTwoUserMessages.forEach((msg) => { + if (typeof msg.content === "string") { + msg.content = [{ type: "text", text: msg.content }] + } + if (Array.isArray(msg.content)) { + let lastTextPart = msg.content.filter((part) => part.type === "text").pop() + + if (!lastTextPart) { + lastTextPart = { type: "text", text: "..." } + msg.content.push(lastTextPart) + } + // @ts-ignore-next-line + lastTextPart["cache_control"] = { type: "ephemeral" } + } + }) + break + default: + break + } + // Convert Anthropic tools to OpenAI tools const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({ type: "function", @@ -91,6 +130,21 @@ export class OpenRouterHandler implements ApiHandler { // break // } + const genId = completion.id + // Log the generation details from OpenRouter API + try { + const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, { + headers: { + Authorization: `Bearer ${this.options.openRouterApiKey}`, + }, + }) + // @ts-ignore-next-line + anthropicMessage.usage.total_cost = response.data?.data?.total_cost + console.log("OpenRouter generation details:", response.data) + } catch (error) { + console.error("Error fetching OpenRouter generation details:", error) + } + return { message: anthropicMessage } } diff --git a/src/shared/api.ts b/src/shared/api.ts index d4dfc5c..9697bcb 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -70,14 +70,6 @@ export const anthropicModels = { cacheWritesPrice: 18.75, cacheReadsPrice: 1.5, }, - "claude-3-sonnet-20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, "claude-3-haiku-20240307": { maxTokens: 4096, contextWindow: 200_000, @@ -111,14 +103,6 @@ export const bedrockModels = { inputPrice: 15.0, outputPrice: 75.0, }, - "anthropic.claude-3-sonnet-20240229-v1:0": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, "anthropic.claude-3-haiku-20240307-v1:0": { maxTokens: 4096, contextWindow: 200_000, @@ -138,34 +122,49 @@ export const openRouterModels = { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, - supportsPromptCache: false, + supportsPromptCache: true, inputPrice: 3.0, outputPrice: 15.0, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, }, "anthropic/claude-3-opus:beta": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, - supportsPromptCache: false, + supportsPromptCache: true, inputPrice: 15, outputPrice: 75, - }, - "anthropic/claude-3-sonnet:beta": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3, - outputPrice: 15, + cacheWritesPrice: 18.75, + cacheReadsPrice: 1.5, }, "anthropic/claude-3-haiku:beta": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, - supportsPromptCache: false, + supportsPromptCache: true, inputPrice: 0.25, outputPrice: 1.25, + cacheWritesPrice: 0.3, + cacheReadsPrice: 0.03, }, + // Doesn't support tool use (yet) + // "openai/o1-preview": { + // maxTokens: 32_768, + // contextWindow: 128_000, + // supportsImages: true, + // supportsPromptCache: false, + // inputPrice: 15, + // outputPrice: 60, + // }, + // "openai/o1-mini": { + // maxTokens: 65_536, + // contextWindow: 128_000, + // supportsImages: true, + // supportsPromptCache: false, + // inputPrice: 3, + // outputPrice: 12, + // }, "openai/gpt-4o-2024-08-06": { maxTokens: 16384, contextWindow: 128_000, @@ -294,14 +293,6 @@ export const vertexModels = { inputPrice: 15.0, outputPrice: 75.0, }, - "claude-3-sonnet@20240229": { - maxTokens: 4096, - contextWindow: 200_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3.0, - outputPrice: 15.0, - }, "claude-3-haiku@20240307": { maxTokens: 4096, contextWindow: 200_000, @@ -347,24 +338,25 @@ export const geminiModels = { // OpenAI Native // https://openai.com/api/pricing/ export type OpenAiNativeModelId = keyof typeof openAiNativeModels -export const openAiNativeDefaultModelId: OpenAiNativeModelId = "o1-preview" +export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o" export const openAiNativeModels = { - "o1-preview": { - maxTokens: 32_768, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 15, - outputPrice: 60, - }, - "o1-mini": { - maxTokens: 65_536, - contextWindow: 128_000, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 3, - outputPrice: 12, - }, + // don't support tool use yet + // "o1-preview": { + // maxTokens: 32_768, + // contextWindow: 128_000, + // supportsImages: true, + // supportsPromptCache: false, + // inputPrice: 15, + // outputPrice: 60, + // }, + // "o1-mini": { + // maxTokens: 65_536, + // contextWindow: 128_000, + // supportsImages: true, + // supportsPromptCache: false, + // inputPrice: 3, + // outputPrice: 12, + // }, "gpt-4o": { maxTokens: 4_096, contextWindow: 128_000, diff --git a/webview-ui/src/components/ApiOptions.tsx b/webview-ui/src/components/ApiOptions.tsx index d2c809b..62c6c0b 100644 --- a/webview-ui/src/components/ApiOptions.tsx +++ b/webview-ui/src/components/ApiOptions.tsx @@ -12,7 +12,6 @@ import { useEvent, useInterval } from "react-use" import { ApiConfiguration, ModelInfo, - OpenAiNativeModelId, anthropicDefaultModelId, anthropicModels, bedrockDefaultModelId, @@ -115,11 +114,11 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => { onChange={handleInputChange("apiProvider")} style={{ minWidth: 130 }}> Anthropic - OpenAI OpenRouter Google Gemini - AWS Bedrock GCP Vertex AI + AWS Bedrock + OpenAI OpenAI Compatible Ollama @@ -547,7 +546,7 @@ export const formatPrice = (price: number) => { const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => { const isGemini = Object.keys(geminiModels).includes(selectedModelId) - const isO1 = (["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId) + const isO1 = false //(["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId) return (

= ({ ) }, [apiConfiguration?.apiProvider]) + const shouldShowPromptCacheInfo = doesModelSupportPromptCache && apiConfiguration?.apiProvider !== "openrouter" + return (

= ({ {!isCostAvailable && }
- {(doesModelSupportPromptCache || cacheReads !== undefined || cacheWrites !== undefined) && ( + {(shouldShowPromptCacheInfo || cacheReads !== undefined || cacheWrites !== undefined) && (
Cache: