Add sliding window context management

2026-02-05 12:05:16 -05:00 · 2024-08-24 23:38:38 -04:00
parent 6a7ab87f68
commit 650283a11f
5 changed files with 92 additions and 16 deletions
--- a/src/ClaudeDev.ts
+++ b/src/ClaudeDev.ts
@@ -25,6 +25,7 @@ import { HistoryItem } from "./shared/HistoryItem"
 import { combineApiRequests } from "./shared/combineApiRequests"
 import { combineCommandSequences } from "./shared/combineCommandSequences"
 import { findLastIndex } from "./utils"
+import { slidingWindowContextManagement } from "./utils/context-management"

 const SYSTEM_PROMPT =
 	() => `You are Claude Dev, a highly skilled software developer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
@@ -1217,7 +1218,13 @@ The following additional instructions are provided by the user. They should be f
 ${this.customInstructions.trim()}
 `
 			}
-			return await this.api.createMessage(systemPrompt, this.apiConversationHistory, tools)
+			const adjustedMessages = slidingWindowContextManagement(
+				this.api.getModel().info.contextWindow,
+				systemPrompt,
+				this.apiConversationHistory,
+				tools
+			)
+			return await this.api.createMessage(systemPrompt, adjustedMessages, tools)
 		} catch (error) {
 			const { response } = await this.ask(
 				"api_req_failed",
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -19,6 +19,7 @@ export type ApiConfiguration = ApiHandlerOptions & {

 export interface ModelInfo {
 	maxTokens: number
+	contextWindow: number
 	supportsImages: boolean
 	supportsPromptCache: boolean
 	inputPrice: number
@@ -36,6 +37,7 @@ export const anthropicDefaultModelId: AnthropicModelId = "claude-3-5-sonnet-2024
 export const anthropicModels = {
 	"claude-3-5-sonnet-20240620": {
 		maxTokens: 8192,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 3.0, // $3 per million input tokens
@@ -45,6 +47,7 @@ export const anthropicModels = {
 	},
 	"claude-3-opus-20240229": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 15.0,
@@ -54,6 +57,7 @@ export const anthropicModels = {
 	},
 	"claude-3-sonnet-20240229": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 3.0,
@@ -61,6 +65,7 @@ export const anthropicModels = {
 	},
 	"claude-3-haiku-20240307": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: true,
 		inputPrice: 0.25,
@@ -77,6 +82,7 @@ export const bedrockDefaultModelId: BedrockModelId = "anthropic.claude-3-5-sonne
 export const bedrockModels = {
 	"anthropic.claude-3-5-sonnet-20240620-v1:0": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 3.0,
@@ -84,6 +90,7 @@ export const bedrockModels = {
 	},
 	"anthropic.claude-3-opus-20240229-v1:0": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 15.0,
@@ -91,6 +98,7 @@ export const bedrockModels = {
 	},
 	"anthropic.claude-3-sonnet-20240229-v1:0": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 3.0,
@@ -98,6 +106,7 @@ export const bedrockModels = {
 	},
 	"anthropic.claude-3-haiku-20240307-v1:0": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0.25,
@@ -112,6 +121,7 @@ export const openRouterDefaultModelId: OpenRouterModelId = "anthropic/claude-3.5
 export const openRouterModels = {
 	"anthropic/claude-3.5-sonnet:beta": {
 		maxTokens: 8192,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 3.0,
@@ -119,6 +129,7 @@ export const openRouterModels = {
 	},
 	"anthropic/claude-3-opus:beta": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 15,
@@ -126,6 +137,7 @@ export const openRouterModels = {
 	},
 	"anthropic/claude-3-sonnet:beta": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 3,
@@ -133,6 +145,7 @@ export const openRouterModels = {
 	},
 	"anthropic/claude-3-haiku:beta": {
 		maxTokens: 4096,
+		contextWindow: 200_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0.25,
@@ -140,6 +153,7 @@ export const openRouterModels = {
 	},
 	"openai/gpt-4o-2024-08-06": {
 		maxTokens: 16384,
+		contextWindow: 128_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 2.5,
@@ -147,6 +161,7 @@ export const openRouterModels = {
 	},
 	"openai/gpt-4o-mini-2024-07-18": {
 		maxTokens: 16384,
+		contextWindow: 128_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0.15,
@@ -154,6 +169,7 @@ export const openRouterModels = {
 	},
 	"openai/gpt-4-turbo": {
 		maxTokens: 4096,
+		contextWindow: 128_000,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 10,
@@ -200,6 +216,7 @@ export const openRouterModels = {
 	// while deepseek coder can use tools, it may sometimes send tool invocation as a text block
 	"deepseek/deepseek-coder": {
 		maxTokens: 4096,
+		contextWindow: 128_000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0.14,
@@ -208,6 +225,7 @@ export const openRouterModels = {
 	// mistral models can use tools but aren't great at going step-by-step and proceeding to the next step
 	"mistralai/mistral-large": {
 		maxTokens: 8192,
+		contextWindow: 128_000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 3,
--- a/src/utils/context-management.ts
+++ b/src/utils/context-management.ts
@@ -0,0 +1,42 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import { countTokens } from "@anthropic-ai/tokenizer"
+
+export function slidingWindowContextManagement(
+	contextWindow: number,
+	systemPrompt: string,
+	messages: Anthropic.Messages.MessageParam[],
+	tools: Anthropic.Messages.Tool[]
+): Anthropic.Messages.MessageParam[] {
+	const adjustedContextWindow = contextWindow - 10000 // Buffer to account for tokenizer differences
+	const systemPromptTokens = countTokens(systemPrompt)
+	const toolsTokens = countTokens(JSON.stringify(tools))
+	let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens
+	let totalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0)
+
+	if (totalMessageTokens <= availableTokens) {
+		return messages
+	}
+
+	// If over limit, remove messages starting from the third message onwards (task and claude's step-by-step thought process are important to keep in context)
+	const newMessages = [...messages]
+	while (totalMessageTokens > availableTokens && newMessages.length > 2) {
+		const removedMessage = newMessages.splice(2, 1)[0]
+		const removedTokens = countMessageTokens(removedMessage)
+		totalMessageTokens -= removedTokens
+	}
+
+	if (totalMessageTokens > availableTokens) {
+		// Over the limit due to the first two messages
+		throw new Error("Task exceeds available context window")
+	}
+
+	return newMessages
+}
+
+function countMessageTokens(message: Anthropic.Messages.MessageParam): number {
+	if (typeof message.content === "string") {
+		return countTokens(message.content)
+	} else {
+		return countTokens(JSON.stringify(message.content))
+	}
+}