Add prompt caching to openrouter; remove o1 since it doesn't support tool use

2026-02-05 03:55:23 -05:00 · 2024-09-13 00:49:04 -04:00
parent c29fdaa520
commit dc617a92a9
5 changed files with 115 additions and 63 deletions
--- a/src/ClaudeDev.ts
+++ b/src/ClaudeDev.ts
@@ -1656,6 +1656,9 @@ ${this.customInstructions.trim()}
 			let cacheReadInputTokens =
 				(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
 					.cache_read_input_tokens || undefined
+			// @ts-ignore-next-line
+			let totalCost = response.usage.total_cost
+
 			await this.say(
 				"api_req_finished",
 				JSON.stringify({
@@ -1663,12 +1666,14 @@ ${this.customInstructions.trim()}
 					tokensOut: outputTokens,
 					cacheWrites: cacheCreationInputTokens,
 					cacheReads: cacheReadInputTokens,
-					cost: this.calculateApiCost(
-						inputTokens,
-						outputTokens,
-						cacheCreationInputTokens,
-						cacheReadInputTokens
-					),
+					cost:
+						totalCost ||
+						this.calculateApiCost(
+							inputTokens,
+							outputTokens,
+							cacheCreationInputTokens,
+							cacheReadInputTokens
+						),
 				})
 			)

--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@@ -9,6 +9,7 @@ import {
 	openRouterModels,
 } from "../shared/api"
 import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import axios from "axios"

 export class OpenRouterHandler implements ApiHandler {
 	private options: ApiHandlerOptions
@@ -37,6 +38,44 @@ export class OpenRouterHandler implements ApiHandler {
 			...convertToOpenAiMessages(messages),
 		]

+		// prompt caching: https://openrouter.ai/docs/prompt-caching
+		switch (this.getModel().id) {
+			case "anthropic/claude-3.5-sonnet:beta":
+			case "anthropic/claude-3-haiku:beta":
+			case "anthropic/claude-3-opus:beta":
+				openAiMessages[0] = {
+					role: "system",
+					content: [
+						{
+							type: "text",
+							text: systemPrompt,
+							// @ts-ignore-next-line
+							cache_control: { type: "ephemeral" },
+						},
+					],
+				}
+				// Add cache_control to the last two user messages
+				const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+				lastTwoUserMessages.forEach((msg) => {
+					if (typeof msg.content === "string") {
+						msg.content = [{ type: "text", text: msg.content }]
+					}
+					if (Array.isArray(msg.content)) {
+						let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+						if (!lastTextPart) {
+							lastTextPart = { type: "text", text: "..." }
+							msg.content.push(lastTextPart)
+						}
+						// @ts-ignore-next-line
+						lastTextPart["cache_control"] = { type: "ephemeral" }
+					}
+				})
+				break
+			default:
+				break
+		}
+
 		// Convert Anthropic tools to OpenAI tools
 		const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
 			type: "function",
@@ -91,6 +130,21 @@ export class OpenRouterHandler implements ApiHandler {
 		// 		break
 		// }

+		const genId = completion.id
+		// Log the generation details from OpenRouter API
+		try {
+			const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
+				headers: {
+					Authorization: `Bearer ${this.options.openRouterApiKey}`,
+				},
+			})
+			// @ts-ignore-next-line
+			anthropicMessage.usage.total_cost = response.data?.data?.total_cost
+			console.log("OpenRouter generation details:", response.data)
+		} catch (error) {
+			console.error("Error fetching OpenRouter generation details:", error)
+		}
+
 		return { message: anthropicMessage }
 	}

--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -70,14 +70,6 @@ export const anthropicModels = {
 		cacheWritesPrice: 18.75,
 		cacheReadsPrice: 1.5,
 	},
-	"claude-3-sonnet-20240229": {
-		maxTokens: 4096,
-		contextWindow: 200_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-	},
 	"claude-3-haiku-20240307": {
 		maxTokens: 4096,
 		contextWindow: 200_000,
@@ -111,14 +103,6 @@ export const bedrockModels = {
 		inputPrice: 15.0,
 		outputPrice: 75.0,
 	},
-	"anthropic.claude-3-sonnet-20240229-v1:0": {
-		maxTokens: 4096,
-		contextWindow: 200_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-	},
 	"anthropic.claude-3-haiku-20240307-v1:0": {
 		maxTokens: 4096,
 		contextWindow: 200_000,
@@ -138,34 +122,49 @@ export const openRouterModels = {
 		maxTokens: 8192,
 		contextWindow: 200_000,
 		supportsImages: true,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 3.0,
 		outputPrice: 15.0,
+		cacheWritesPrice: 3.75,
+		cacheReadsPrice: 0.3,
 	},
 	"anthropic/claude-3-opus:beta": {
 		maxTokens: 4096,
 		contextWindow: 200_000,
 		supportsImages: true,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 15,
 		outputPrice: 75,
-	},
-	"anthropic/claude-3-sonnet:beta": {
-		maxTokens: 4096,
-		contextWindow: 200_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 3,
-		outputPrice: 15,
+		cacheWritesPrice: 18.75,
+		cacheReadsPrice: 1.5,
 	},
 	"anthropic/claude-3-haiku:beta": {
 		maxTokens: 4096,
 		contextWindow: 200_000,
 		supportsImages: true,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.25,
 		outputPrice: 1.25,
+		cacheWritesPrice: 0.3,
+		cacheReadsPrice: 0.03,
 	},
+	// Doesn't support tool use (yet)
+	// "openai/o1-preview": {
+	// 	maxTokens: 32_768,
+	// 	contextWindow: 128_000,
+	// 	supportsImages: true,
+	// 	supportsPromptCache: false,
+	// 	inputPrice: 15,
+	// 	outputPrice: 60,
+	// },
+	// "openai/o1-mini": {
+	// 	maxTokens: 65_536,
+	// 	contextWindow: 128_000,
+	// 	supportsImages: true,
+	// 	supportsPromptCache: false,
+	// 	inputPrice: 3,
+	// 	outputPrice: 12,
+	// },
 	"openai/gpt-4o-2024-08-06": {
 		maxTokens: 16384,
 		contextWindow: 128_000,
@@ -294,14 +293,6 @@ export const vertexModels = {
 		inputPrice: 15.0,
 		outputPrice: 75.0,
 	},
-	"claude-3-sonnet@20240229": {
-		maxTokens: 4096,
-		contextWindow: 200_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-	},
 	"claude-3-haiku@20240307": {
 		maxTokens: 4096,
 		contextWindow: 200_000,
@@ -347,24 +338,25 @@ export const geminiModels = {
 // OpenAI Native
 // https://openai.com/api/pricing/
 export type OpenAiNativeModelId = keyof typeof openAiNativeModels
-export const openAiNativeDefaultModelId: OpenAiNativeModelId = "o1-preview"
+export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
 export const openAiNativeModels = {
-	"o1-preview": {
-		maxTokens: 32_768,
-		contextWindow: 128_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 15,
-		outputPrice: 60,
-	},
-	"o1-mini": {
-		maxTokens: 65_536,
-		contextWindow: 128_000,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 3,
-		outputPrice: 12,
-	},
+	// don't support tool use yet
+	// "o1-preview": {
+	// 	maxTokens: 32_768,
+	// 	contextWindow: 128_000,
+	// 	supportsImages: true,
+	// 	supportsPromptCache: false,
+	// 	inputPrice: 15,
+	// 	outputPrice: 60,
+	// },
+	// "o1-mini": {
+	// 	maxTokens: 65_536,
+	// 	contextWindow: 128_000,
+	// 	supportsImages: true,
+	// 	supportsPromptCache: false,
+	// 	inputPrice: 3,
+	// 	outputPrice: 12,
+	// },
 	"gpt-4o": {
 		maxTokens: 4_096,
 		contextWindow: 128_000,
--- a/webview-ui/src/components/ApiOptions.tsx
+++ b/webview-ui/src/components/ApiOptions.tsx
@@ -12,7 +12,6 @@ import { useEvent, useInterval } from "react-use"
 import {
 	ApiConfiguration,
 	ModelInfo,
-	OpenAiNativeModelId,
 	anthropicDefaultModelId,
 	anthropicModels,
 	bedrockDefaultModelId,
@@ -115,11 +114,11 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
 					onChange={handleInputChange("apiProvider")}
 					style={{ minWidth: 130 }}>
 					<VSCodeOption value="anthropic">Anthropic</VSCodeOption>
-					<VSCodeOption value="openai-native">OpenAI</VSCodeOption>
 					<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
 					<VSCodeOption value="gemini">Google Gemini</VSCodeOption>
-					<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
 					<VSCodeOption value="vertex">GCP Vertex AI</VSCodeOption>
+					<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
+					<VSCodeOption value="openai-native">OpenAI</VSCodeOption>
 					<VSCodeOption value="openai">OpenAI Compatible</VSCodeOption>
 					<VSCodeOption value="ollama">Ollama</VSCodeOption>
 				</VSCodeDropdown>
@@ -547,7 +546,7 @@ export const formatPrice = (price: number) => {

 const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
 	const isGemini = Object.keys(geminiModels).includes(selectedModelId)
-	const isO1 = (["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId)
+	const isO1 = false //(["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId)
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
 			<ModelInfoSupportsItem
--- a/webview-ui/src/components/TaskHeader.tsx
+++ b/webview-ui/src/components/TaskHeader.tsx
@@ -98,6 +98,8 @@ const TaskHeader: React.FC<TaskHeaderProps> = ({
 		)
 	}, [apiConfiguration?.apiProvider])

+	const shouldShowPromptCacheInfo = doesModelSupportPromptCache && apiConfiguration?.apiProvider !== "openrouter"
+
 	return (
 		<div style={{ padding: "10px 13px 10px 13px" }}>
 			<div
@@ -265,7 +267,7 @@ const TaskHeader: React.FC<TaskHeaderProps> = ({
 								{!isCostAvailable && <ExportButton />}
 							</div>

-							{(doesModelSupportPromptCache || cacheReads !== undefined || cacheWrites !== undefined) && (
+							{(shouldShowPromptCacheInfo || cacheReads !== undefined || cacheWrites !== undefined) && (
 								<div style={{ display: "flex", alignItems: "center", gap: "4px", flexWrap: "wrap" }}>
 									<span style={{ fontWeight: "bold" }}>Cache:</span>
 									<span style={{ display: "flex", alignItems: "center", gap: "3px" }}>