Set better max token size

This commit is contained in:
Saoud Rizwan
2024-08-30 22:37:58 -04:00
parent 3e58160d99
commit 88e4217878

View File

@@ -1305,7 +1305,7 @@ ${this.customInstructions.trim()}
` `
} }
// Check last API request metrics to see if we need to truncate // If the last API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished") const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished")
if (lastApiReqFinished && lastApiReqFinished.text) { if (lastApiReqFinished && lastApiReqFinished.text) {
const { const {
@@ -1317,8 +1317,9 @@ ${this.customInstructions.trim()}
lastApiReqFinished.text lastApiReqFinished.text
) )
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8 const contextWindow = this.api.getModel().info.contextWindow
if (isCloseToContextWindowLimit) { const maxAllowedSize = Math.max(contextWindow - 20_000, contextWindow * 0.8)
if (totalTokens >= maxAllowedSize) {
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory) const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
await this.overwriteApiConversationHistory(truncatedMessages) await this.overwriteApiConversationHistory(truncatedMessages)
} }