mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-22 21:31:08 -05:00
Add prompt caching to openrouter; remove o1 since it doesn't support tool use
This commit is contained in:
@@ -1656,6 +1656,9 @@ ${this.customInstructions.trim()}
|
||||
let cacheReadInputTokens =
|
||||
(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
|
||||
.cache_read_input_tokens || undefined
|
||||
// @ts-ignore-next-line
|
||||
let totalCost = response.usage.total_cost
|
||||
|
||||
await this.say(
|
||||
"api_req_finished",
|
||||
JSON.stringify({
|
||||
@@ -1663,12 +1666,14 @@ ${this.customInstructions.trim()}
|
||||
tokensOut: outputTokens,
|
||||
cacheWrites: cacheCreationInputTokens,
|
||||
cacheReads: cacheReadInputTokens,
|
||||
cost: this.calculateApiCost(
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreationInputTokens,
|
||||
cacheReadInputTokens
|
||||
),
|
||||
cost:
|
||||
totalCost ||
|
||||
this.calculateApiCost(
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreationInputTokens,
|
||||
cacheReadInputTokens
|
||||
),
|
||||
})
|
||||
)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
openRouterModels,
|
||||
} from "../shared/api"
|
||||
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
|
||||
import axios from "axios"
|
||||
|
||||
export class OpenRouterHandler implements ApiHandler {
|
||||
private options: ApiHandlerOptions
|
||||
@@ -37,6 +38,44 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
...convertToOpenAiMessages(messages),
|
||||
]
|
||||
|
||||
// prompt caching: https://openrouter.ai/docs/prompt-caching
|
||||
switch (this.getModel().id) {
|
||||
case "anthropic/claude-3.5-sonnet:beta":
|
||||
case "anthropic/claude-3-haiku:beta":
|
||||
case "anthropic/claude-3-opus:beta":
|
||||
openAiMessages[0] = {
|
||||
role: "system",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: systemPrompt,
|
||||
// @ts-ignore-next-line
|
||||
cache_control: { type: "ephemeral" },
|
||||
},
|
||||
],
|
||||
}
|
||||
// Add cache_control to the last two user messages
|
||||
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
|
||||
lastTwoUserMessages.forEach((msg) => {
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = [{ type: "text", text: msg.content }]
|
||||
}
|
||||
if (Array.isArray(msg.content)) {
|
||||
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
|
||||
|
||||
if (!lastTextPart) {
|
||||
lastTextPart = { type: "text", text: "..." }
|
||||
msg.content.push(lastTextPart)
|
||||
}
|
||||
// @ts-ignore-next-line
|
||||
lastTextPart["cache_control"] = { type: "ephemeral" }
|
||||
}
|
||||
})
|
||||
break
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
// Convert Anthropic tools to OpenAI tools
|
||||
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
|
||||
type: "function",
|
||||
@@ -91,6 +130,21 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
// break
|
||||
// }
|
||||
|
||||
const genId = completion.id
|
||||
// Log the generation details from OpenRouter API
|
||||
try {
|
||||
const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.options.openRouterApiKey}`,
|
||||
},
|
||||
})
|
||||
// @ts-ignore-next-line
|
||||
anthropicMessage.usage.total_cost = response.data?.data?.total_cost
|
||||
console.log("OpenRouter generation details:", response.data)
|
||||
} catch (error) {
|
||||
console.error("Error fetching OpenRouter generation details:", error)
|
||||
}
|
||||
|
||||
return { message: anthropicMessage }
|
||||
}
|
||||
|
||||
|
||||
@@ -70,14 +70,6 @@ export const anthropicModels = {
|
||||
cacheWritesPrice: 18.75,
|
||||
cacheReadsPrice: 1.5,
|
||||
},
|
||||
"claude-3-sonnet-20240229": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 3.0,
|
||||
outputPrice: 15.0,
|
||||
},
|
||||
"claude-3-haiku-20240307": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
@@ -111,14 +103,6 @@ export const bedrockModels = {
|
||||
inputPrice: 15.0,
|
||||
outputPrice: 75.0,
|
||||
},
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 3.0,
|
||||
outputPrice: 15.0,
|
||||
},
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
@@ -138,34 +122,49 @@ export const openRouterModels = {
|
||||
maxTokens: 8192,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 3.0,
|
||||
outputPrice: 15.0,
|
||||
cacheWritesPrice: 3.75,
|
||||
cacheReadsPrice: 0.3,
|
||||
},
|
||||
"anthropic/claude-3-opus:beta": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 15,
|
||||
outputPrice: 75,
|
||||
},
|
||||
"anthropic/claude-3-sonnet:beta": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 3,
|
||||
outputPrice: 15,
|
||||
cacheWritesPrice: 18.75,
|
||||
cacheReadsPrice: 1.5,
|
||||
},
|
||||
"anthropic/claude-3-haiku:beta": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 0.25,
|
||||
outputPrice: 1.25,
|
||||
cacheWritesPrice: 0.3,
|
||||
cacheReadsPrice: 0.03,
|
||||
},
|
||||
// Doesn't support tool use (yet)
|
||||
// "openai/o1-preview": {
|
||||
// maxTokens: 32_768,
|
||||
// contextWindow: 128_000,
|
||||
// supportsImages: true,
|
||||
// supportsPromptCache: false,
|
||||
// inputPrice: 15,
|
||||
// outputPrice: 60,
|
||||
// },
|
||||
// "openai/o1-mini": {
|
||||
// maxTokens: 65_536,
|
||||
// contextWindow: 128_000,
|
||||
// supportsImages: true,
|
||||
// supportsPromptCache: false,
|
||||
// inputPrice: 3,
|
||||
// outputPrice: 12,
|
||||
// },
|
||||
"openai/gpt-4o-2024-08-06": {
|
||||
maxTokens: 16384,
|
||||
contextWindow: 128_000,
|
||||
@@ -294,14 +293,6 @@ export const vertexModels = {
|
||||
inputPrice: 15.0,
|
||||
outputPrice: 75.0,
|
||||
},
|
||||
"claude-3-sonnet@20240229": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 3.0,
|
||||
outputPrice: 15.0,
|
||||
},
|
||||
"claude-3-haiku@20240307": {
|
||||
maxTokens: 4096,
|
||||
contextWindow: 200_000,
|
||||
@@ -347,24 +338,25 @@ export const geminiModels = {
|
||||
// OpenAI Native
|
||||
// https://openai.com/api/pricing/
|
||||
export type OpenAiNativeModelId = keyof typeof openAiNativeModels
|
||||
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "o1-preview"
|
||||
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
|
||||
export const openAiNativeModels = {
|
||||
"o1-preview": {
|
||||
maxTokens: 32_768,
|
||||
contextWindow: 128_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 15,
|
||||
outputPrice: 60,
|
||||
},
|
||||
"o1-mini": {
|
||||
maxTokens: 65_536,
|
||||
contextWindow: 128_000,
|
||||
supportsImages: true,
|
||||
supportsPromptCache: false,
|
||||
inputPrice: 3,
|
||||
outputPrice: 12,
|
||||
},
|
||||
// don't support tool use yet
|
||||
// "o1-preview": {
|
||||
// maxTokens: 32_768,
|
||||
// contextWindow: 128_000,
|
||||
// supportsImages: true,
|
||||
// supportsPromptCache: false,
|
||||
// inputPrice: 15,
|
||||
// outputPrice: 60,
|
||||
// },
|
||||
// "o1-mini": {
|
||||
// maxTokens: 65_536,
|
||||
// contextWindow: 128_000,
|
||||
// supportsImages: true,
|
||||
// supportsPromptCache: false,
|
||||
// inputPrice: 3,
|
||||
// outputPrice: 12,
|
||||
// },
|
||||
"gpt-4o": {
|
||||
maxTokens: 4_096,
|
||||
contextWindow: 128_000,
|
||||
|
||||
Reference in New Issue
Block a user