From dc617a92a9c4380a538b363a22a6c2ba9c2217a1 Mon Sep 17 00:00:00 2001
From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com>
Date: Fri, 13 Sep 2024 00:49:04 -0400
Subject: [PATCH] Add prompt caching to openrouter; remove o1 since it doesn't
support tool use
---
src/ClaudeDev.ts | 17 +++--
src/api/openrouter.ts | 54 +++++++++++++
src/shared/api.ts | 96 +++++++++++-------------
webview-ui/src/components/ApiOptions.tsx | 7 +-
webview-ui/src/components/TaskHeader.tsx | 4 +-
5 files changed, 115 insertions(+), 63 deletions(-)
diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts
index 59b9f4f..b0fe05c 100644
--- a/src/ClaudeDev.ts
+++ b/src/ClaudeDev.ts
@@ -1656,6 +1656,9 @@ ${this.customInstructions.trim()}
let cacheReadInputTokens =
(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
.cache_read_input_tokens || undefined
+ // @ts-ignore-next-line
+ let totalCost = response.usage.total_cost
+
await this.say(
"api_req_finished",
JSON.stringify({
@@ -1663,12 +1666,14 @@ ${this.customInstructions.trim()}
tokensOut: outputTokens,
cacheWrites: cacheCreationInputTokens,
cacheReads: cacheReadInputTokens,
- cost: this.calculateApiCost(
- inputTokens,
- outputTokens,
- cacheCreationInputTokens,
- cacheReadInputTokens
- ),
+ cost:
+ totalCost ||
+ this.calculateApiCost(
+ inputTokens,
+ outputTokens,
+ cacheCreationInputTokens,
+ cacheReadInputTokens
+ ),
})
)
diff --git a/src/api/openrouter.ts b/src/api/openrouter.ts
index 6e43ea6..3bec297 100644
--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@@ -9,6 +9,7 @@ import {
openRouterModels,
} from "../shared/api"
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import axios from "axios"
export class OpenRouterHandler implements ApiHandler {
private options: ApiHandlerOptions
@@ -37,6 +38,44 @@ export class OpenRouterHandler implements ApiHandler {
...convertToOpenAiMessages(messages),
]
+ // prompt caching: https://openrouter.ai/docs/prompt-caching
+ switch (this.getModel().id) {
+ case "anthropic/claude-3.5-sonnet:beta":
+ case "anthropic/claude-3-haiku:beta":
+ case "anthropic/claude-3-opus:beta":
+ openAiMessages[0] = {
+ role: "system",
+ content: [
+ {
+ type: "text",
+ text: systemPrompt,
+ // @ts-ignore-next-line
+ cache_control: { type: "ephemeral" },
+ },
+ ],
+ }
+ // Add cache_control to the last two user messages
+ const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+ lastTwoUserMessages.forEach((msg) => {
+ if (typeof msg.content === "string") {
+ msg.content = [{ type: "text", text: msg.content }]
+ }
+ if (Array.isArray(msg.content)) {
+ let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+ if (!lastTextPart) {
+ lastTextPart = { type: "text", text: "..." }
+ msg.content.push(lastTextPart)
+ }
+ // @ts-ignore-next-line
+ lastTextPart["cache_control"] = { type: "ephemeral" }
+ }
+ })
+ break
+ default:
+ break
+ }
+
// Convert Anthropic tools to OpenAI tools
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
type: "function",
@@ -91,6 +130,21 @@ export class OpenRouterHandler implements ApiHandler {
// break
// }
+ const genId = completion.id
+ // Log the generation details from OpenRouter API
+ try {
+ const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
+ headers: {
+ Authorization: `Bearer ${this.options.openRouterApiKey}`,
+ },
+ })
+ // @ts-ignore-next-line
+ anthropicMessage.usage.total_cost = response.data?.data?.total_cost
+ console.log("OpenRouter generation details:", response.data)
+ } catch (error) {
+ console.error("Error fetching OpenRouter generation details:", error)
+ }
+
return { message: anthropicMessage }
}
diff --git a/src/shared/api.ts b/src/shared/api.ts
index d4dfc5c..9697bcb 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -70,14 +70,6 @@ export const anthropicModels = {
cacheWritesPrice: 18.75,
cacheReadsPrice: 1.5,
},
- "claude-3-sonnet-20240229": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
"claude-3-haiku-20240307": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -111,14 +103,6 @@ export const bedrockModels = {
inputPrice: 15.0,
outputPrice: 75.0,
},
- "anthropic.claude-3-sonnet-20240229-v1:0": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
"anthropic.claude-3-haiku-20240307-v1:0": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -138,34 +122,49 @@ export const openRouterModels = {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
- supportsPromptCache: false,
+ supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
+ cacheWritesPrice: 3.75,
+ cacheReadsPrice: 0.3,
},
"anthropic/claude-3-opus:beta": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
- supportsPromptCache: false,
+ supportsPromptCache: true,
inputPrice: 15,
outputPrice: 75,
- },
- "anthropic/claude-3-sonnet:beta": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3,
- outputPrice: 15,
+ cacheWritesPrice: 18.75,
+ cacheReadsPrice: 1.5,
},
"anthropic/claude-3-haiku:beta": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
- supportsPromptCache: false,
+ supportsPromptCache: true,
inputPrice: 0.25,
outputPrice: 1.25,
+ cacheWritesPrice: 0.3,
+ cacheReadsPrice: 0.03,
},
+ // Doesn't support tool use (yet)
+ // "openai/o1-preview": {
+ // maxTokens: 32_768,
+ // contextWindow: 128_000,
+ // supportsImages: true,
+ // supportsPromptCache: false,
+ // inputPrice: 15,
+ // outputPrice: 60,
+ // },
+ // "openai/o1-mini": {
+ // maxTokens: 65_536,
+ // contextWindow: 128_000,
+ // supportsImages: true,
+ // supportsPromptCache: false,
+ // inputPrice: 3,
+ // outputPrice: 12,
+ // },
"openai/gpt-4o-2024-08-06": {
maxTokens: 16384,
contextWindow: 128_000,
@@ -294,14 +293,6 @@ export const vertexModels = {
inputPrice: 15.0,
outputPrice: 75.0,
},
- "claude-3-sonnet@20240229": {
- maxTokens: 4096,
- contextWindow: 200_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3.0,
- outputPrice: 15.0,
- },
"claude-3-haiku@20240307": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -347,24 +338,25 @@ export const geminiModels = {
// OpenAI Native
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels
-export const openAiNativeDefaultModelId: OpenAiNativeModelId = "o1-preview"
+export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
export const openAiNativeModels = {
- "o1-preview": {
- maxTokens: 32_768,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 15,
- outputPrice: 60,
- },
- "o1-mini": {
- maxTokens: 65_536,
- contextWindow: 128_000,
- supportsImages: true,
- supportsPromptCache: false,
- inputPrice: 3,
- outputPrice: 12,
- },
+ // don't support tool use yet
+ // "o1-preview": {
+ // maxTokens: 32_768,
+ // contextWindow: 128_000,
+ // supportsImages: true,
+ // supportsPromptCache: false,
+ // inputPrice: 15,
+ // outputPrice: 60,
+ // },
+ // "o1-mini": {
+ // maxTokens: 65_536,
+ // contextWindow: 128_000,
+ // supportsImages: true,
+ // supportsPromptCache: false,
+ // inputPrice: 3,
+ // outputPrice: 12,
+ // },
"gpt-4o": {
maxTokens: 4_096,
contextWindow: 128_000,
diff --git a/webview-ui/src/components/ApiOptions.tsx b/webview-ui/src/components/ApiOptions.tsx
index d2c809b..62c6c0b 100644
--- a/webview-ui/src/components/ApiOptions.tsx
+++ b/webview-ui/src/components/ApiOptions.tsx
@@ -12,7 +12,6 @@ import { useEvent, useInterval } from "react-use"
import {
ApiConfiguration,
ModelInfo,
- OpenAiNativeModelId,
anthropicDefaultModelId,
anthropicModels,
bedrockDefaultModelId,
@@ -115,11 +114,11 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
onChange={handleInputChange("apiProvider")}
style={{ minWidth: 130 }}>