Add prompt caching to openrouter; remove o1 since it doesn't support tool use

This commit is contained in:
Saoud Rizwan
2024-09-13 00:49:04 -04:00
parent c29fdaa520
commit dc617a92a9
5 changed files with 115 additions and 63 deletions

View File

@@ -1656,6 +1656,9 @@ ${this.customInstructions.trim()}
let cacheReadInputTokens =
(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
.cache_read_input_tokens || undefined
// @ts-ignore-next-line
let totalCost = response.usage.total_cost
await this.say(
"api_req_finished",
JSON.stringify({
@@ -1663,7 +1666,9 @@ ${this.customInstructions.trim()}
tokensOut: outputTokens,
cacheWrites: cacheCreationInputTokens,
cacheReads: cacheReadInputTokens,
cost: this.calculateApiCost(
cost:
totalCost ||
this.calculateApiCost(
inputTokens,
outputTokens,
cacheCreationInputTokens,

View File

@@ -9,6 +9,7 @@ import {
openRouterModels,
} from "../shared/api"
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
import axios from "axios"
export class OpenRouterHandler implements ApiHandler {
private options: ApiHandlerOptions
@@ -37,6 +38,44 @@ export class OpenRouterHandler implements ApiHandler {
...convertToOpenAiMessages(messages),
]
// prompt caching: https://openrouter.ai/docs/prompt-caching
switch (this.getModel().id) {
case "anthropic/claude-3.5-sonnet:beta":
case "anthropic/claude-3-haiku:beta":
case "anthropic/claude-3-opus:beta":
openAiMessages[0] = {
role: "system",
content: [
{
type: "text",
text: systemPrompt,
// @ts-ignore-next-line
cache_control: { type: "ephemeral" },
},
],
}
// Add cache_control to the last two user messages
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}
if (Array.isArray(msg.content)) {
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
if (!lastTextPart) {
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}
// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
})
break
default:
break
}
// Convert Anthropic tools to OpenAI tools
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
type: "function",
@@ -91,6 +130,21 @@ export class OpenRouterHandler implements ApiHandler {
// break
// }
const genId = completion.id
// Log the generation details from OpenRouter API
try {
const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
headers: {
Authorization: `Bearer ${this.options.openRouterApiKey}`,
},
})
// @ts-ignore-next-line
anthropicMessage.usage.total_cost = response.data?.data?.total_cost
console.log("OpenRouter generation details:", response.data)
} catch (error) {
console.error("Error fetching OpenRouter generation details:", error)
}
return { message: anthropicMessage }
}

View File

@@ -70,14 +70,6 @@ export const anthropicModels = {
cacheWritesPrice: 18.75,
cacheReadsPrice: 1.5,
},
"claude-3-sonnet-20240229": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3.0,
outputPrice: 15.0,
},
"claude-3-haiku-20240307": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -111,14 +103,6 @@ export const bedrockModels = {
inputPrice: 15.0,
outputPrice: 75.0,
},
"anthropic.claude-3-sonnet-20240229-v1:0": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3.0,
outputPrice: 15.0,
},
"anthropic.claude-3-haiku-20240307-v1:0": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -138,34 +122,49 @@ export const openRouterModels = {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
},
"anthropic/claude-3-opus:beta": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
supportsPromptCache: true,
inputPrice: 15,
outputPrice: 75,
},
"anthropic/claude-3-sonnet:beta": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3,
outputPrice: 15,
cacheWritesPrice: 18.75,
cacheReadsPrice: 1.5,
},
"anthropic/claude-3-haiku:beta": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
supportsPromptCache: true,
inputPrice: 0.25,
outputPrice: 1.25,
cacheWritesPrice: 0.3,
cacheReadsPrice: 0.03,
},
// Doesn't support tool use (yet)
// "openai/o1-preview": {
// maxTokens: 32_768,
// contextWindow: 128_000,
// supportsImages: true,
// supportsPromptCache: false,
// inputPrice: 15,
// outputPrice: 60,
// },
// "openai/o1-mini": {
// maxTokens: 65_536,
// contextWindow: 128_000,
// supportsImages: true,
// supportsPromptCache: false,
// inputPrice: 3,
// outputPrice: 12,
// },
"openai/gpt-4o-2024-08-06": {
maxTokens: 16384,
contextWindow: 128_000,
@@ -294,14 +293,6 @@ export const vertexModels = {
inputPrice: 15.0,
outputPrice: 75.0,
},
"claude-3-sonnet@20240229": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3.0,
outputPrice: 15.0,
},
"claude-3-haiku@20240307": {
maxTokens: 4096,
contextWindow: 200_000,
@@ -347,24 +338,25 @@ export const geminiModels = {
// OpenAI Native
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "o1-preview"
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
export const openAiNativeModels = {
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 15,
outputPrice: 60,
},
"o1-mini": {
maxTokens: 65_536,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3,
outputPrice: 12,
},
// don't support tool use yet
// "o1-preview": {
// maxTokens: 32_768,
// contextWindow: 128_000,
// supportsImages: true,
// supportsPromptCache: false,
// inputPrice: 15,
// outputPrice: 60,
// },
// "o1-mini": {
// maxTokens: 65_536,
// contextWindow: 128_000,
// supportsImages: true,
// supportsPromptCache: false,
// inputPrice: 3,
// outputPrice: 12,
// },
"gpt-4o": {
maxTokens: 4_096,
contextWindow: 128_000,

View File

@@ -12,7 +12,6 @@ import { useEvent, useInterval } from "react-use"
import {
ApiConfiguration,
ModelInfo,
OpenAiNativeModelId,
anthropicDefaultModelId,
anthropicModels,
bedrockDefaultModelId,
@@ -115,11 +114,11 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
onChange={handleInputChange("apiProvider")}
style={{ minWidth: 130 }}>
<VSCodeOption value="anthropic">Anthropic</VSCodeOption>
<VSCodeOption value="openai-native">OpenAI</VSCodeOption>
<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
<VSCodeOption value="gemini">Google Gemini</VSCodeOption>
<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
<VSCodeOption value="vertex">GCP Vertex AI</VSCodeOption>
<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
<VSCodeOption value="openai-native">OpenAI</VSCodeOption>
<VSCodeOption value="openai">OpenAI Compatible</VSCodeOption>
<VSCodeOption value="ollama">Ollama</VSCodeOption>
</VSCodeDropdown>
@@ -547,7 +546,7 @@ export const formatPrice = (price: number) => {
const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
const isGemini = Object.keys(geminiModels).includes(selectedModelId)
const isO1 = (["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId)
const isO1 = false //(["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId)
return (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
<ModelInfoSupportsItem

View File

@@ -98,6 +98,8 @@ const TaskHeader: React.FC<TaskHeaderProps> = ({
)
}, [apiConfiguration?.apiProvider])
const shouldShowPromptCacheInfo = doesModelSupportPromptCache && apiConfiguration?.apiProvider !== "openrouter"
return (
<div style={{ padding: "10px 13px 10px 13px" }}>
<div
@@ -265,7 +267,7 @@ const TaskHeader: React.FC<TaskHeaderProps> = ({
{!isCostAvailable && <ExportButton />}
</div>
{(doesModelSupportPromptCache || cacheReads !== undefined || cacheWrites !== undefined) && (
{(shouldShowPromptCacheInfo || cacheReads !== undefined || cacheWrites !== undefined) && (
<div style={{ display: "flex", alignItems: "center", gap: "4px", flexWrap: "wrap" }}>
<span style={{ fontWeight: "bold" }}>Cache:</span>
<span style={{ display: "flex", alignItems: "center", gap: "3px" }}>