mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-23 05:41:10 -05:00
Replace token estimation with using last API response token usage
This commit is contained in:
@@ -24,8 +24,8 @@ import { getApiMetrics } from "./shared/getApiMetrics"
|
||||
import { HistoryItem } from "./shared/HistoryItem"
|
||||
import { Tool, ToolName } from "./shared/Tool"
|
||||
import { ClaudeAskResponse } from "./shared/WebviewMessage"
|
||||
import { findLastIndex } from "./utils"
|
||||
import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management"
|
||||
import { findLast, findLastIndex } from "./utils"
|
||||
import { truncateHalfConversation } from "./utils/context-management"
|
||||
import { regexSearchFiles } from "./utils/ripgrep"
|
||||
|
||||
const SYSTEM_PROMPT =
|
||||
@@ -1304,15 +1304,24 @@ The following additional instructions are provided by the user. They should be f
|
||||
${this.customInstructions.trim()}
|
||||
`
|
||||
}
|
||||
const isPromptWithinContextWindow = isWithinContextWindow(
|
||||
this.api.getModel().info.contextWindow,
|
||||
systemPrompt,
|
||||
tools,
|
||||
this.apiConversationHistory
|
||||
)
|
||||
if (!isPromptWithinContextWindow) {
|
||||
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
|
||||
await this.overwriteApiConversationHistory(truncatedMessages)
|
||||
|
||||
// Check last API request metrics to see if we need to truncate
|
||||
const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished")
|
||||
if (lastApiReqFinished && lastApiReqFinished.text) {
|
||||
const {
|
||||
tokensIn,
|
||||
tokensOut,
|
||||
cacheWrites,
|
||||
cacheReads,
|
||||
}: { tokensIn?: number; tokensOut?: number; cacheWrites?: number; cacheReads?: number } = JSON.parse(
|
||||
lastApiReqFinished.text
|
||||
)
|
||||
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
|
||||
const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8
|
||||
if (isCloseToContextWindowLimit) {
|
||||
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
|
||||
await this.overwriteApiConversationHistory(truncatedMessages)
|
||||
}
|
||||
}
|
||||
const { message, userCredits } = await this.api.createMessage(
|
||||
systemPrompt,
|
||||
|
||||
@@ -15,3 +15,8 @@ export function findLastIndex<T>(array: Array<T>, predicate: (value: T, index: n
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
export function findLast<T>(array: Array<T>, predicate: (value: T, index: number, obj: T[]) => boolean): T | undefined {
|
||||
const index = findLastIndex(array, predicate)
|
||||
return index === -1 ? undefined : array[index]
|
||||
}
|
||||
|
||||
@@ -1,26 +1,4 @@
|
||||
import { Anthropic } from "@anthropic-ai/sdk"
|
||||
import { countTokens } from "@anthropic-ai/tokenizer"
|
||||
import { Buffer } from "buffer"
|
||||
import sizeOf from "image-size"
|
||||
|
||||
export function isWithinContextWindow(
|
||||
contextWindow: number,
|
||||
systemPrompt: string,
|
||||
tools: Anthropic.Messages.Tool[],
|
||||
messages: Anthropic.Messages.MessageParam[]
|
||||
): boolean {
|
||||
const adjustedContextWindow = contextWindow * 0.75 // Buffer to account for tokenizer differences
|
||||
// counting tokens is expensive, so we first try to estimate before doing a more accurate calculation
|
||||
const estimatedTotalMessageTokens = countTokens(systemPrompt + JSON.stringify(tools) + JSON.stringify(messages))
|
||||
if (estimatedTotalMessageTokens <= adjustedContextWindow) {
|
||||
return true
|
||||
}
|
||||
const systemPromptTokens = countTokens(systemPrompt)
|
||||
const toolsTokens = countTokens(JSON.stringify(tools))
|
||||
let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens
|
||||
let accurateTotalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0)
|
||||
return accurateTotalMessageTokens <= availableTokens
|
||||
}
|
||||
|
||||
/*
|
||||
We can't implement a dynamically updating sliding window as it would break prompt cache
|
||||
@@ -46,54 +24,3 @@ export function truncateHalfConversation(
|
||||
|
||||
return truncatedMessages
|
||||
}
|
||||
|
||||
function countMessageTokens(message: Anthropic.Messages.MessageParam): number {
|
||||
if (typeof message.content === "string") {
|
||||
return countTokens(message.content)
|
||||
} else if (Array.isArray(message.content)) {
|
||||
return message.content.reduce((sum, item) => {
|
||||
if (typeof item === "string") {
|
||||
return sum + countTokens(item)
|
||||
} else if (item.type === "text") {
|
||||
return sum + countTokens(item.text)
|
||||
} else if (item.type === "image") {
|
||||
return sum + estimateImageTokens(item.source.data)
|
||||
} else if (item.type === "tool_use") {
|
||||
return sum + countTokens(JSON.stringify(item.input))
|
||||
} else if (item.type === "tool_result") {
|
||||
if (Array.isArray(item.content)) {
|
||||
return (
|
||||
sum +
|
||||
item.content.reduce((contentSum, contentItem) => {
|
||||
if (contentItem.type === "text") {
|
||||
return contentSum + countTokens(contentItem.text)
|
||||
} else if (contentItem.type === "image") {
|
||||
return contentSum + estimateImageTokens(contentItem.source.data)
|
||||
}
|
||||
return contentSum + countTokens(JSON.stringify(contentItem))
|
||||
}, 0)
|
||||
)
|
||||
} else {
|
||||
return sum + countTokens(item.content || "")
|
||||
}
|
||||
} else {
|
||||
return sum + countTokens(JSON.stringify(item))
|
||||
}
|
||||
}, 0)
|
||||
} else {
|
||||
return countTokens(JSON.stringify(message.content))
|
||||
}
|
||||
}
|
||||
|
||||
function estimateImageTokens(base64: string): number {
|
||||
const base64Data = base64.split(";base64,").pop()
|
||||
if (base64Data) {
|
||||
const buffer = Buffer.from(base64Data, "base64")
|
||||
const dimensions = sizeOf(buffer)
|
||||
if (dimensions.width && dimensions.height) {
|
||||
// "you can estimate the number of tokens used through this algorithm: tokens = (width px * height px)/750"
|
||||
return Math.ceil((dimensions.width * dimensions.height) / 750)
|
||||
}
|
||||
}
|
||||
return countTokens(base64)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user