mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-19 20:01:08 -05:00
Replace token estimation with using last API response token usage
This commit is contained in:
@@ -29,12 +29,6 @@ const copyWasmFiles = {
|
|||||||
name: "copy-wasm-files",
|
name: "copy-wasm-files",
|
||||||
setup(build) {
|
setup(build) {
|
||||||
build.onEnd(() => {
|
build.onEnd(() => {
|
||||||
// tiktoken
|
|
||||||
fs.copyFileSync(
|
|
||||||
path.join(__dirname, "node_modules", "tiktoken", "tiktoken_bg.wasm"),
|
|
||||||
path.join(__dirname, "dist", "tiktoken_bg.wasm")
|
|
||||||
)
|
|
||||||
|
|
||||||
// tree sitter
|
// tree sitter
|
||||||
const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter")
|
const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter")
|
||||||
const targetDir = path.join(__dirname, "dist")
|
const targetDir = path.join(__dirname, "dist")
|
||||||
|
|||||||
51
package-lock.json
generated
51
package-lock.json
generated
@@ -1,17 +1,16 @@
|
|||||||
{
|
{
|
||||||
"name": "claude-dev",
|
"name": "claude-dev",
|
||||||
"version": "1.4.24",
|
"version": "1.5.3",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "claude-dev",
|
"name": "claude-dev",
|
||||||
"version": "1.4.24",
|
"version": "1.5.3",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/bedrock-sdk": "^0.10.2",
|
"@anthropic-ai/bedrock-sdk": "^0.10.2",
|
||||||
"@anthropic-ai/sdk": "^0.26.0",
|
"@anthropic-ai/sdk": "^0.26.0",
|
||||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
||||||
"@anthropic-ai/vertex-sdk": "^0.4.1",
|
"@anthropic-ai/vertex-sdk": "^0.4.1",
|
||||||
"@types/clone-deep": "^4.0.4",
|
"@types/clone-deep": "^4.0.4",
|
||||||
"@vscode/codicons": "^0.0.36",
|
"@vscode/codicons": "^0.0.36",
|
||||||
@@ -22,7 +21,6 @@
|
|||||||
"diff": "^5.2.0",
|
"diff": "^5.2.0",
|
||||||
"execa": "^9.3.0",
|
"execa": "^9.3.0",
|
||||||
"globby": "^14.0.2",
|
"globby": "^14.0.2",
|
||||||
"image-size": "^1.1.1",
|
|
||||||
"openai": "^4.54.0",
|
"openai": "^4.54.0",
|
||||||
"os-name": "^6.0.0",
|
"os-name": "^6.0.0",
|
||||||
"p-wait-for": "^5.0.2",
|
"p-wait-for": "^5.0.2",
|
||||||
@@ -90,23 +88,6 @@
|
|||||||
"undici-types": "~5.26.4"
|
"undici-types": "~5.26.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@anthropic-ai/tokenizer": {
|
|
||||||
"version": "0.0.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz",
|
|
||||||
"integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/node": "^18.11.18",
|
|
||||||
"tiktoken": "^1.0.10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@anthropic-ai/tokenizer/node_modules/@types/node": {
|
|
||||||
"version": "18.19.45",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.45.tgz",
|
|
||||||
"integrity": "sha512-VZxPKNNhjKmaC1SUYowuXSRSMGyQGmQjvvA1xE4QZ0xce2kLtEhPDS+kqpCPBZYgqblCLQ2DAjSzmgCM5auvhA==",
|
|
||||||
"dependencies": {
|
|
||||||
"undici-types": "~5.26.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@anthropic-ai/vertex-sdk": {
|
"node_modules/@anthropic-ai/vertex-sdk": {
|
||||||
"version": "0.4.1",
|
"version": "0.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.4.1.tgz",
|
||||||
@@ -6918,20 +6899,6 @@
|
|||||||
"node": ">= 4"
|
"node": ">= 4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/image-size": {
|
|
||||||
"version": "1.1.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz",
|
|
||||||
"integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"queue": "6.0.2"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"image-size": "bin/image-size.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=16.x"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/immediate": {
|
"node_modules/immediate": {
|
||||||
"version": "3.0.6",
|
"version": "3.0.6",
|
||||||
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
||||||
@@ -6982,6 +6949,7 @@
|
|||||||
"version": "2.0.4",
|
"version": "2.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||||
|
"dev": true,
|
||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
"node_modules/internal-slot": {
|
"node_modules/internal-slot": {
|
||||||
@@ -8772,14 +8740,6 @@
|
|||||||
"node": ">=6"
|
"node": ">=6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/queue": {
|
|
||||||
"version": "6.0.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz",
|
|
||||||
"integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==",
|
|
||||||
"dependencies": {
|
|
||||||
"inherits": "~2.0.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/queue-microtask": {
|
"node_modules/queue-microtask": {
|
||||||
"version": "1.2.3",
|
"version": "1.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
|
||||||
@@ -9643,11 +9603,6 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/tiktoken": {
|
|
||||||
"version": "1.0.16",
|
|
||||||
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.16.tgz",
|
|
||||||
"integrity": "sha512-hRcORIGF2YlAgWx3nzrGJOrKSJwLoc81HpXmMQk89632XAgURc7IeV2FgQ2iXo9z/J96fCvpsHg2kWoHcbj9fg=="
|
|
||||||
},
|
|
||||||
"node_modules/to-regex-range": {
|
"node_modules/to-regex-range": {
|
||||||
"version": "5.0.1",
|
"version": "5.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
||||||
|
|||||||
@@ -134,7 +134,6 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/bedrock-sdk": "^0.10.2",
|
"@anthropic-ai/bedrock-sdk": "^0.10.2",
|
||||||
"@anthropic-ai/sdk": "^0.26.0",
|
"@anthropic-ai/sdk": "^0.26.0",
|
||||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
||||||
"@anthropic-ai/vertex-sdk": "^0.4.1",
|
"@anthropic-ai/vertex-sdk": "^0.4.1",
|
||||||
"@types/clone-deep": "^4.0.4",
|
"@types/clone-deep": "^4.0.4",
|
||||||
"@vscode/codicons": "^0.0.36",
|
"@vscode/codicons": "^0.0.36",
|
||||||
@@ -145,7 +144,6 @@
|
|||||||
"diff": "^5.2.0",
|
"diff": "^5.2.0",
|
||||||
"execa": "^9.3.0",
|
"execa": "^9.3.0",
|
||||||
"globby": "^14.0.2",
|
"globby": "^14.0.2",
|
||||||
"image-size": "^1.1.1",
|
|
||||||
"openai": "^4.54.0",
|
"openai": "^4.54.0",
|
||||||
"os-name": "^6.0.0",
|
"os-name": "^6.0.0",
|
||||||
"p-wait-for": "^5.0.2",
|
"p-wait-for": "^5.0.2",
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ import { getApiMetrics } from "./shared/getApiMetrics"
|
|||||||
import { HistoryItem } from "./shared/HistoryItem"
|
import { HistoryItem } from "./shared/HistoryItem"
|
||||||
import { Tool, ToolName } from "./shared/Tool"
|
import { Tool, ToolName } from "./shared/Tool"
|
||||||
import { ClaudeAskResponse } from "./shared/WebviewMessage"
|
import { ClaudeAskResponse } from "./shared/WebviewMessage"
|
||||||
import { findLastIndex } from "./utils"
|
import { findLast, findLastIndex } from "./utils"
|
||||||
import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management"
|
import { truncateHalfConversation } from "./utils/context-management"
|
||||||
import { regexSearchFiles } from "./utils/ripgrep"
|
import { regexSearchFiles } from "./utils/ripgrep"
|
||||||
|
|
||||||
const SYSTEM_PROMPT =
|
const SYSTEM_PROMPT =
|
||||||
@@ -1304,15 +1304,24 @@ The following additional instructions are provided by the user. They should be f
|
|||||||
${this.customInstructions.trim()}
|
${this.customInstructions.trim()}
|
||||||
`
|
`
|
||||||
}
|
}
|
||||||
const isPromptWithinContextWindow = isWithinContextWindow(
|
|
||||||
this.api.getModel().info.contextWindow,
|
// Check last API request metrics to see if we need to truncate
|
||||||
systemPrompt,
|
const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished")
|
||||||
tools,
|
if (lastApiReqFinished && lastApiReqFinished.text) {
|
||||||
this.apiConversationHistory
|
const {
|
||||||
)
|
tokensIn,
|
||||||
if (!isPromptWithinContextWindow) {
|
tokensOut,
|
||||||
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
|
cacheWrites,
|
||||||
await this.overwriteApiConversationHistory(truncatedMessages)
|
cacheReads,
|
||||||
|
}: { tokensIn?: number; tokensOut?: number; cacheWrites?: number; cacheReads?: number } = JSON.parse(
|
||||||
|
lastApiReqFinished.text
|
||||||
|
)
|
||||||
|
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
|
||||||
|
const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8
|
||||||
|
if (isCloseToContextWindowLimit) {
|
||||||
|
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
|
||||||
|
await this.overwriteApiConversationHistory(truncatedMessages)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const { message, userCredits } = await this.api.createMessage(
|
const { message, userCredits } = await this.api.createMessage(
|
||||||
systemPrompt,
|
systemPrompt,
|
||||||
|
|||||||
@@ -15,3 +15,8 @@ export function findLastIndex<T>(array: Array<T>, predicate: (value: T, index: n
|
|||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function findLast<T>(array: Array<T>, predicate: (value: T, index: number, obj: T[]) => boolean): T | undefined {
|
||||||
|
const index = findLastIndex(array, predicate)
|
||||||
|
return index === -1 ? undefined : array[index]
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,26 +1,4 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import { countTokens } from "@anthropic-ai/tokenizer"
|
|
||||||
import { Buffer } from "buffer"
|
|
||||||
import sizeOf from "image-size"
|
|
||||||
|
|
||||||
export function isWithinContextWindow(
|
|
||||||
contextWindow: number,
|
|
||||||
systemPrompt: string,
|
|
||||||
tools: Anthropic.Messages.Tool[],
|
|
||||||
messages: Anthropic.Messages.MessageParam[]
|
|
||||||
): boolean {
|
|
||||||
const adjustedContextWindow = contextWindow * 0.75 // Buffer to account for tokenizer differences
|
|
||||||
// counting tokens is expensive, so we first try to estimate before doing a more accurate calculation
|
|
||||||
const estimatedTotalMessageTokens = countTokens(systemPrompt + JSON.stringify(tools) + JSON.stringify(messages))
|
|
||||||
if (estimatedTotalMessageTokens <= adjustedContextWindow) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
const systemPromptTokens = countTokens(systemPrompt)
|
|
||||||
const toolsTokens = countTokens(JSON.stringify(tools))
|
|
||||||
let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens
|
|
||||||
let accurateTotalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0)
|
|
||||||
return accurateTotalMessageTokens <= availableTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
We can't implement a dynamically updating sliding window as it would break prompt cache
|
We can't implement a dynamically updating sliding window as it would break prompt cache
|
||||||
@@ -46,54 +24,3 @@ export function truncateHalfConversation(
|
|||||||
|
|
||||||
return truncatedMessages
|
return truncatedMessages
|
||||||
}
|
}
|
||||||
|
|
||||||
function countMessageTokens(message: Anthropic.Messages.MessageParam): number {
|
|
||||||
if (typeof message.content === "string") {
|
|
||||||
return countTokens(message.content)
|
|
||||||
} else if (Array.isArray(message.content)) {
|
|
||||||
return message.content.reduce((sum, item) => {
|
|
||||||
if (typeof item === "string") {
|
|
||||||
return sum + countTokens(item)
|
|
||||||
} else if (item.type === "text") {
|
|
||||||
return sum + countTokens(item.text)
|
|
||||||
} else if (item.type === "image") {
|
|
||||||
return sum + estimateImageTokens(item.source.data)
|
|
||||||
} else if (item.type === "tool_use") {
|
|
||||||
return sum + countTokens(JSON.stringify(item.input))
|
|
||||||
} else if (item.type === "tool_result") {
|
|
||||||
if (Array.isArray(item.content)) {
|
|
||||||
return (
|
|
||||||
sum +
|
|
||||||
item.content.reduce((contentSum, contentItem) => {
|
|
||||||
if (contentItem.type === "text") {
|
|
||||||
return contentSum + countTokens(contentItem.text)
|
|
||||||
} else if (contentItem.type === "image") {
|
|
||||||
return contentSum + estimateImageTokens(contentItem.source.data)
|
|
||||||
}
|
|
||||||
return contentSum + countTokens(JSON.stringify(contentItem))
|
|
||||||
}, 0)
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
return sum + countTokens(item.content || "")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return sum + countTokens(JSON.stringify(item))
|
|
||||||
}
|
|
||||||
}, 0)
|
|
||||||
} else {
|
|
||||||
return countTokens(JSON.stringify(message.content))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function estimateImageTokens(base64: string): number {
|
|
||||||
const base64Data = base64.split(";base64,").pop()
|
|
||||||
if (base64Data) {
|
|
||||||
const buffer = Buffer.from(base64Data, "base64")
|
|
||||||
const dimensions = sizeOf(buffer)
|
|
||||||
if (dimensions.width && dimensions.height) {
|
|
||||||
// "you can estimate the number of tokens used through this algorithm: tokens = (width px * height px)/750"
|
|
||||||
return Math.ceil((dimensions.width * dimensions.height) / 750)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return countTokens(base64)
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user