Enable prompt caching for Opus

This commit is contained in:
Saoud Rizwan
2024-08-24 22:08:02 -04:00
parent 6739e4536a
commit 5dcf836d81
3 changed files with 3 additions and 1 deletions

View File

@@ -19,6 +19,7 @@ export class AnthropicHandler implements ApiHandler {
const modelId = this.getModel().id
switch (modelId) {
case "claude-3-5-sonnet-20240620":
case "claude-3-opus-20240229":
case "claude-3-haiku-20240307":
/*
The latest message will be the new user message, one before will be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..

View File

@@ -41,6 +41,7 @@ export class KoduHandler implements ApiHandler {
let requestBody: Anthropic.Beta.PromptCaching.Messages.MessageCreateParamsNonStreaming
switch (modelId) {
case "claude-3-5-sonnet-20240620":
case "claude-3-opus-20240229":
case "claude-3-haiku-20240307":
const userMsgIndices = messages.reduce(
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),