From 3e58160d9994b7800538f81be3c1b9b922070698 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Fri, 30 Aug 2024 22:29:18 -0400 Subject: [PATCH] Replace token estimation with using last API response token usage --- esbuild.js | 6 --- package-lock.json | 51 ++--------------------- package.json | 2 - src/ClaudeDev.ts | 31 +++++++++----- src/utils/array-helpers.ts | 5 +++ src/utils/context-management.ts | 73 --------------------------------- 6 files changed, 28 insertions(+), 140 deletions(-) diff --git a/esbuild.js b/esbuild.js index c3b613c..8b20307 100644 --- a/esbuild.js +++ b/esbuild.js @@ -29,12 +29,6 @@ const copyWasmFiles = { name: "copy-wasm-files", setup(build) { build.onEnd(() => { - // tiktoken - fs.copyFileSync( - path.join(__dirname, "node_modules", "tiktoken", "tiktoken_bg.wasm"), - path.join(__dirname, "dist", "tiktoken_bg.wasm") - ) - // tree sitter const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter") const targetDir = path.join(__dirname, "dist") diff --git a/package-lock.json b/package-lock.json index 89ca1bf..b16f6ab 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,17 +1,16 @@ { "name": "claude-dev", - "version": "1.4.24", + "version": "1.5.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "claude-dev", - "version": "1.4.24", + "version": "1.5.3", "license": "MIT", "dependencies": { "@anthropic-ai/bedrock-sdk": "^0.10.2", "@anthropic-ai/sdk": "^0.26.0", - "@anthropic-ai/tokenizer": "^0.0.4", "@anthropic-ai/vertex-sdk": "^0.4.1", "@types/clone-deep": "^4.0.4", "@vscode/codicons": "^0.0.36", @@ -22,7 +21,6 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", - "image-size": "^1.1.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", @@ -90,23 +88,6 @@ "undici-types": "~5.26.4" } }, - "node_modules/@anthropic-ai/tokenizer": { - "version": "0.0.4", - "resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz", - "integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==", - "dependencies": { - "@types/node": "^18.11.18", - "tiktoken": "^1.0.10" - } - }, - "node_modules/@anthropic-ai/tokenizer/node_modules/@types/node": { - "version": "18.19.45", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.45.tgz", - "integrity": "sha512-VZxPKNNhjKmaC1SUYowuXSRSMGyQGmQjvvA1xE4QZ0xce2kLtEhPDS+kqpCPBZYgqblCLQ2DAjSzmgCM5auvhA==", - "dependencies": { - "undici-types": "~5.26.4" - } - }, "node_modules/@anthropic-ai/vertex-sdk": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.4.1.tgz", @@ -6918,20 +6899,6 @@ "node": ">= 4" } }, - "node_modules/image-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz", - "integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==", - "dependencies": { - "queue": "6.0.2" - }, - "bin": { - "image-size": "bin/image-size.js" - }, - "engines": { - "node": ">=16.x" - } - }, "node_modules/immediate": { "version": "3.0.6", "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", @@ -6982,6 +6949,7 @@ "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true, "license": "ISC" }, "node_modules/internal-slot": { @@ -8772,14 +8740,6 @@ "node": ">=6" } }, - "node_modules/queue": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", - "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", - "dependencies": { - "inherits": "~2.0.3" - } - }, "node_modules/queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", @@ -9643,11 +9603,6 @@ "dev": true, "license": "MIT" }, - "node_modules/tiktoken": { - "version": "1.0.16", - "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.16.tgz", - "integrity": "sha512-hRcORIGF2YlAgWx3nzrGJOrKSJwLoc81HpXmMQk89632XAgURc7IeV2FgQ2iXo9z/J96fCvpsHg2kWoHcbj9fg==" - }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", diff --git a/package.json b/package.json index 9f6d533..a2fc4fe 100644 --- a/package.json +++ b/package.json @@ -134,7 +134,6 @@ "dependencies": { "@anthropic-ai/bedrock-sdk": "^0.10.2", "@anthropic-ai/sdk": "^0.26.0", - "@anthropic-ai/tokenizer": "^0.0.4", "@anthropic-ai/vertex-sdk": "^0.4.1", "@types/clone-deep": "^4.0.4", "@vscode/codicons": "^0.0.36", @@ -145,7 +144,6 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", - "image-size": "^1.1.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts index 71bdc70..5fe2637 100644 --- a/src/ClaudeDev.ts +++ b/src/ClaudeDev.ts @@ -24,8 +24,8 @@ import { getApiMetrics } from "./shared/getApiMetrics" import { HistoryItem } from "./shared/HistoryItem" import { Tool, ToolName } from "./shared/Tool" import { ClaudeAskResponse } from "./shared/WebviewMessage" -import { findLastIndex } from "./utils" -import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management" +import { findLast, findLastIndex } from "./utils" +import { truncateHalfConversation } from "./utils/context-management" import { regexSearchFiles } from "./utils/ripgrep" const SYSTEM_PROMPT = @@ -1304,15 +1304,24 @@ The following additional instructions are provided by the user. They should be f ${this.customInstructions.trim()} ` } - const isPromptWithinContextWindow = isWithinContextWindow( - this.api.getModel().info.contextWindow, - systemPrompt, - tools, - this.apiConversationHistory - ) - if (!isPromptWithinContextWindow) { - const truncatedMessages = truncateHalfConversation(this.apiConversationHistory) - await this.overwriteApiConversationHistory(truncatedMessages) + + // Check last API request metrics to see if we need to truncate + const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished") + if (lastApiReqFinished && lastApiReqFinished.text) { + const { + tokensIn, + tokensOut, + cacheWrites, + cacheReads, + }: { tokensIn?: number; tokensOut?: number; cacheWrites?: number; cacheReads?: number } = JSON.parse( + lastApiReqFinished.text + ) + const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) + const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8 + if (isCloseToContextWindowLimit) { + const truncatedMessages = truncateHalfConversation(this.apiConversationHistory) + await this.overwriteApiConversationHistory(truncatedMessages) + } } const { message, userCredits } = await this.api.createMessage( systemPrompt, diff --git a/src/utils/array-helpers.ts b/src/utils/array-helpers.ts index c20e327..b87c458 100644 --- a/src/utils/array-helpers.ts +++ b/src/utils/array-helpers.ts @@ -15,3 +15,8 @@ export function findLastIndex(array: Array, predicate: (value: T, index: n } return -1 } + +export function findLast(array: Array, predicate: (value: T, index: number, obj: T[]) => boolean): T | undefined { + const index = findLastIndex(array, predicate) + return index === -1 ? undefined : array[index] +} diff --git a/src/utils/context-management.ts b/src/utils/context-management.ts index f2442b1..708e9a6 100644 --- a/src/utils/context-management.ts +++ b/src/utils/context-management.ts @@ -1,26 +1,4 @@ import { Anthropic } from "@anthropic-ai/sdk" -import { countTokens } from "@anthropic-ai/tokenizer" -import { Buffer } from "buffer" -import sizeOf from "image-size" - -export function isWithinContextWindow( - contextWindow: number, - systemPrompt: string, - tools: Anthropic.Messages.Tool[], - messages: Anthropic.Messages.MessageParam[] -): boolean { - const adjustedContextWindow = contextWindow * 0.75 // Buffer to account for tokenizer differences - // counting tokens is expensive, so we first try to estimate before doing a more accurate calculation - const estimatedTotalMessageTokens = countTokens(systemPrompt + JSON.stringify(tools) + JSON.stringify(messages)) - if (estimatedTotalMessageTokens <= adjustedContextWindow) { - return true - } - const systemPromptTokens = countTokens(systemPrompt) - const toolsTokens = countTokens(JSON.stringify(tools)) - let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens - let accurateTotalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0) - return accurateTotalMessageTokens <= availableTokens -} /* We can't implement a dynamically updating sliding window as it would break prompt cache @@ -46,54 +24,3 @@ export function truncateHalfConversation( return truncatedMessages } - -function countMessageTokens(message: Anthropic.Messages.MessageParam): number { - if (typeof message.content === "string") { - return countTokens(message.content) - } else if (Array.isArray(message.content)) { - return message.content.reduce((sum, item) => { - if (typeof item === "string") { - return sum + countTokens(item) - } else if (item.type === "text") { - return sum + countTokens(item.text) - } else if (item.type === "image") { - return sum + estimateImageTokens(item.source.data) - } else if (item.type === "tool_use") { - return sum + countTokens(JSON.stringify(item.input)) - } else if (item.type === "tool_result") { - if (Array.isArray(item.content)) { - return ( - sum + - item.content.reduce((contentSum, contentItem) => { - if (contentItem.type === "text") { - return contentSum + countTokens(contentItem.text) - } else if (contentItem.type === "image") { - return contentSum + estimateImageTokens(contentItem.source.data) - } - return contentSum + countTokens(JSON.stringify(contentItem)) - }, 0) - ) - } else { - return sum + countTokens(item.content || "") - } - } else { - return sum + countTokens(JSON.stringify(item)) - } - }, 0) - } else { - return countTokens(JSON.stringify(message.content)) - } -} - -function estimateImageTokens(base64: string): number { - const base64Data = base64.split(";base64,").pop() - if (base64Data) { - const buffer = Buffer.from(base64Data, "base64") - const dimensions = sizeOf(buffer) - if (dimensions.width && dimensions.height) { - // "you can estimate the number of tokens used through this algorithm: tokens = (width px * height px)/750" - return Math.ceil((dimensions.width * dimensions.height) / 750) - } - } - return countTokens(base64) -}