From 5dcf836d814a429d6e6829e967d243a338f8b7c5 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Sat, 24 Aug 2024 22:08:02 -0400 Subject: [PATCH] Enable prompt caching for Opus --- src/api/anthropic.ts | 1 + src/api/kodu.ts | 1 + src/shared/api.ts | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/api/anthropic.ts b/src/api/anthropic.ts index 5e686b0..ff74608 100644 --- a/src/api/anthropic.ts +++ b/src/api/anthropic.ts @@ -19,6 +19,7 @@ export class AnthropicHandler implements ApiHandler { const modelId = this.getModel().id switch (modelId) { case "claude-3-5-sonnet-20240620": + case "claude-3-opus-20240229": case "claude-3-haiku-20240307": /* The latest message will be the new user message, one before will be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request.. diff --git a/src/api/kodu.ts b/src/api/kodu.ts index 2eb49a7..f406946 100644 --- a/src/api/kodu.ts +++ b/src/api/kodu.ts @@ -41,6 +41,7 @@ export class KoduHandler implements ApiHandler { let requestBody: Anthropic.Beta.PromptCaching.Messages.MessageCreateParamsNonStreaming switch (modelId) { case "claude-3-5-sonnet-20240620": + case "claude-3-opus-20240229": case "claude-3-haiku-20240307": const userMsgIndices = messages.reduce( (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), diff --git a/src/shared/api.ts b/src/shared/api.ts index fa7d4be..5223c38 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -46,7 +46,7 @@ export const anthropicModels = { "claude-3-opus-20240229": { maxTokens: 4096, supportsImages: true, - supportsPromptCache: false, + supportsPromptCache: true, inputPrice: 15.0, outputPrice: 75.0, cacheWritesPrice: 18.75,