diff --git a/package-lock.json b/package-lock.json index 3c1126f..01b47bb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@anthropic-ai/bedrock-sdk": "^0.10.2", "@anthropic-ai/sdk": "^0.26.0", + "@anthropic-ai/tokenizer": "^0.0.4", "@kodu-ai/cloud-api": "^1.0.1", "@vscode/codicons": "^0.0.36", "axios": "^1.7.4", @@ -19,7 +20,6 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", - "gpt-tokenizer": "^2.2.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", @@ -88,6 +88,23 @@ "undici-types": "~5.26.4" } }, + "node_modules/@anthropic-ai/tokenizer": { + "version": "0.0.4", + "resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz", + "integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==", + "dependencies": { + "@types/node": "^18.11.18", + "tiktoken": "^1.0.10" + } + }, + "node_modules/@anthropic-ai/tokenizer/node_modules/@types/node": { + "version": "18.19.45", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.45.tgz", + "integrity": "sha512-VZxPKNNhjKmaC1SUYowuXSRSMGyQGmQjvvA1xE4QZ0xce2kLtEhPDS+kqpCPBZYgqblCLQ2DAjSzmgCM5auvhA==", + "dependencies": { + "undici-types": "~5.26.4" + } + }, "node_modules/@aws-crypto/crc32": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz", @@ -6625,14 +6642,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/gpt-tokenizer": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.2.1.tgz", - "integrity": "sha512-JYvLWTpPtFGz7eS7uixHslv3L96zka0n18MlQeH5YVl5F6mNhssxzSBTeqwNfW8A0AQIMYEaOfbSr+MaoCUvpg==", - "dependencies": { - "rfc4648": "^1.5.2" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -8811,11 +8820,6 @@ "node": ">=0.10.0" } }, - "node_modules/rfc4648": { - "version": "1.5.3", - "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.3.tgz", - "integrity": "sha512-MjOWxM065+WswwnmNONOT+bD1nXzY9Km6u3kzvnx8F8/HXGZdz3T6e6vZJ8Q/RIMUSp/nxqjH3GwvJDy8ijeQQ==" - }, "node_modules/rimraf": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", @@ -9502,6 +9506,11 @@ "dev": true, "license": "MIT" }, + "node_modules/tiktoken": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.16.tgz", + "integrity": "sha512-hRcORIGF2YlAgWx3nzrGJOrKSJwLoc81HpXmMQk89632XAgURc7IeV2FgQ2iXo9z/J96fCvpsHg2kWoHcbj9fg==" + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", diff --git a/package.json b/package.json index bf51466..c220b8c 100644 --- a/package.json +++ b/package.json @@ -134,6 +134,7 @@ "dependencies": { "@anthropic-ai/bedrock-sdk": "^0.10.2", "@anthropic-ai/sdk": "^0.26.0", + "@anthropic-ai/tokenizer": "^0.0.4", "@kodu-ai/cloud-api": "^1.0.1", "@vscode/codicons": "^0.0.36", "axios": "^1.7.4", @@ -142,7 +143,6 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", - "gpt-tokenizer": "^2.2.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts index b9523e8..8a8f609 100644 --- a/src/ClaudeDev.ts +++ b/src/ClaudeDev.ts @@ -25,6 +25,7 @@ import { HistoryItem } from "./shared/HistoryItem" import { combineApiRequests } from "./shared/combineApiRequests" import { combineCommandSequences } from "./shared/combineCommandSequences" import { findLastIndex } from "./utils" +import { slidingWindowContextManagement } from "./utils/context-management" const SYSTEM_PROMPT = () => `You are Claude Dev, a highly skilled software developer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices. @@ -1217,7 +1218,13 @@ The following additional instructions are provided by the user. They should be f ${this.customInstructions.trim()} ` } - return await this.api.createMessage(systemPrompt, this.apiConversationHistory, tools) + const adjustedMessages = slidingWindowContextManagement( + this.api.getModel().info.contextWindow, + systemPrompt, + this.apiConversationHistory, + tools + ) + return await this.api.createMessage(systemPrompt, adjustedMessages, tools) } catch (error) { const { response } = await this.ask( "api_req_failed", diff --git a/src/shared/api.ts b/src/shared/api.ts index 5223c38..7113935 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -19,6 +19,7 @@ export type ApiConfiguration = ApiHandlerOptions & { export interface ModelInfo { maxTokens: number + contextWindow: number supportsImages: boolean supportsPromptCache: boolean inputPrice: number @@ -36,6 +37,7 @@ export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-2024 export const anthropicModels = { "claude-3-5-sonnet-20240620": { maxTokens: 8192, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens @@ -45,6 +47,7 @@ export const anthropicModels = { }, "claude-3-opus-20240229": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15.0, @@ -54,6 +57,7 @@ export const anthropicModels = { }, "claude-3-sonnet-20240229": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 3.0, @@ -61,6 +65,7 @@ export const anthropicModels = { }, "claude-3-haiku-20240307": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 0.25, @@ -77,6 +82,7 @@ export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonne export const bedrockModels = { "anthropic.claude-3-5-sonnet-20240620-v1:0": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 3.0, @@ -84,6 +90,7 @@ export const bedrockModels = { }, "anthropic.claude-3-opus-20240229-v1:0": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 15.0, @@ -91,6 +98,7 @@ export const bedrockModels = { }, "anthropic.claude-3-sonnet-20240229-v1:0": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 3.0, @@ -98,6 +106,7 @@ export const bedrockModels = { }, "anthropic.claude-3-haiku-20240307-v1:0": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 0.25, @@ -112,6 +121,7 @@ export const openRouterDefaultModelId: OpenRouterModelId = "anthropic/claude-3.5 export const openRouterModels = { "anthropic/claude-3.5-sonnet:beta": { maxTokens: 8192, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 3.0, @@ -119,6 +129,7 @@ export const openRouterModels = { }, "anthropic/claude-3-opus:beta": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 15, @@ -126,6 +137,7 @@ export const openRouterModels = { }, "anthropic/claude-3-sonnet:beta": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 3, @@ -133,6 +145,7 @@ export const openRouterModels = { }, "anthropic/claude-3-haiku:beta": { maxTokens: 4096, + contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 0.25, @@ -140,6 +153,7 @@ export const openRouterModels = { }, "openai/gpt-4o-2024-08-06": { maxTokens: 16384, + contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, @@ -147,6 +161,7 @@ export const openRouterModels = { }, "openai/gpt-4o-mini-2024-07-18": { maxTokens: 16384, + contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, @@ -154,6 +169,7 @@ export const openRouterModels = { }, "openai/gpt-4-turbo": { maxTokens: 4096, + contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 10, @@ -200,6 +216,7 @@ export const openRouterModels = { // while deepseek coder can use tools, it may sometimes send tool invocation as a text block "deepseek/deepseek-coder": { maxTokens: 4096, + contextWindow: 128_000, supportsImages: false, supportsPromptCache: false, inputPrice: 0.14, @@ -208,6 +225,7 @@ export const openRouterModels = { // mistral models can use tools but aren't great at going step-by-step and proceeding to the next step "mistralai/mistral-large": { maxTokens: 8192, + contextWindow: 128_000, supportsImages: false, supportsPromptCache: false, inputPrice: 3, diff --git a/src/utils/context-management.ts b/src/utils/context-management.ts new file mode 100644 index 0000000..07e56ac --- /dev/null +++ b/src/utils/context-management.ts @@ -0,0 +1,42 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { countTokens } from "@anthropic-ai/tokenizer" + +export function slidingWindowContextManagement( + contextWindow: number, + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + tools: Anthropic.Messages.Tool[] +): Anthropic.Messages.MessageParam[] { + const adjustedContextWindow = contextWindow - 10000 // Buffer to account for tokenizer differences + const systemPromptTokens = countTokens(systemPrompt) + const toolsTokens = countTokens(JSON.stringify(tools)) + let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens + let totalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0) + + if (totalMessageTokens <= availableTokens) { + return messages + } + + // If over limit, remove messages starting from the third message onwards (task and claude's step-by-step thought process are important to keep in context) + const newMessages = [...messages] + while (totalMessageTokens > availableTokens && newMessages.length > 2) { + const removedMessage = newMessages.splice(2, 1)[0] + const removedTokens = countMessageTokens(removedMessage) + totalMessageTokens -= removedTokens + } + + if (totalMessageTokens > availableTokens) { + // Over the limit due to the first two messages + throw new Error("Task exceeds available context window") + } + + return newMessages +} + +function countMessageTokens(message: Anthropic.Messages.MessageParam): number { + if (typeof message.content === "string") { + return countTokens(message.content) + } else { + return countTokens(JSON.stringify(message.content)) + } +}