From cfd4cbfa368b8f2f84d2ea86303abe604f8c6aac Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Sun, 25 Aug 2024 00:53:00 -0400 Subject: [PATCH] Fix sliding window taking into account message order --- src/utils/context-management.ts | 55 ++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/src/utils/context-management.ts b/src/utils/context-management.ts index 07e56ac..c2cf273 100644 --- a/src/utils/context-management.ts +++ b/src/utils/context-management.ts @@ -7,7 +7,7 @@ export function slidingWindowContextManagement( messages: Anthropic.Messages.MessageParam[], tools: Anthropic.Messages.Tool[] ): Anthropic.Messages.MessageParam[] { - const adjustedContextWindow = contextWindow - 10000 // Buffer to account for tokenizer differences + const adjustedContextWindow = contextWindow - 10_000 // Buffer to account for tokenizer differences const systemPromptTokens = countTokens(systemPrompt) const toolsTokens = countTokens(JSON.stringify(tools)) let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens @@ -19,17 +19,50 @@ export function slidingWindowContextManagement( // If over limit, remove messages starting from the third message onwards (task and claude's step-by-step thought process are important to keep in context) const newMessages = [...messages] - while (totalMessageTokens > availableTokens && newMessages.length > 2) { - const removedMessage = newMessages.splice(2, 1)[0] - const removedTokens = countMessageTokens(removedMessage) - totalMessageTokens -= removedTokens + let index = 2 + while (totalMessageTokens > availableTokens && index < newMessages.length) { + const messageToEmpty = newMessages[index] + const originalTokens = countMessageTokens(messageToEmpty) + // Empty the content of the message (messages must be in a specific order so we can't just remove) + if (typeof messageToEmpty.content === "string") { + messageToEmpty.content = "" + } else if (Array.isArray(messageToEmpty.content)) { + messageToEmpty.content = messageToEmpty.content.map((item) => { + if (typeof item === "string") { + return { + type: "text", + text: "(truncated due to context window)", + } as Anthropic.Messages.TextBlockParam + } else if (item.type === "text") { + return { + type: "text", + text: "(truncated due to context window)", + } as Anthropic.Messages.TextBlockParam + } else if (item.type === "image") { + return { ...item, source: { type: "base64", data: "" } } as Anthropic.Messages.ImageBlockParam + } else if (item.type === "tool_use") { + return { ...item, input: {} } as Anthropic.Messages.ToolUseBlockParam + } else if (item.type === "tool_result") { + return { + ...item, + content: Array.isArray(item.content) + ? item.content.map((contentItem) => + contentItem.type === "text" + ? { ...contentItem, text: "(truncated due to context window)" } + : contentItem.type === "image" + ? { ...contentItem, source: { type: "base64", data: "" } } + : contentItem + ) + : "", + } as Anthropic.Messages.ToolResultBlockParam + } + return item + }) + } + const newTokens = countMessageTokens(messageToEmpty) + totalMessageTokens -= originalTokens - newTokens + index++ } - - if (totalMessageTokens > availableTokens) { - // Over the limit due to the first two messages - throw new Error("Task exceeds available context window") - } - return newMessages }