Fix non-first chunk error handling

2025-12-20 04:11:10 -05:00 · 2024-10-11 02:27:05 -04:00
parent b7027464f9
commit 2b97775c96
1 changed files with 29 additions and 25 deletions
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@@ -740,38 +740,36 @@ export class Cline {
 	}

 	async *attemptApiRequest(previousApiReqIndex: number): ApiStream {
-		try {
-			let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
-			if (this.customInstructions && this.customInstructions.trim()) {
-				// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
-				systemPrompt += addCustomInstructions(this.customInstructions)
-			}
+		let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
+		if (this.customInstructions && this.customInstructions.trim()) {
+			// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
+			systemPrompt += addCustomInstructions(this.customInstructions)
+		}

-			// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
-			if (previousApiReqIndex >= 0) {
-				const previousRequest = this.clineMessages[previousApiReqIndex]
-				if (previousRequest && previousRequest.text) {
-					const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
-						previousRequest.text
-					)
-					const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-					const contextWindow = this.api.getModel().info.contextWindow || 128_000
-					const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
-					if (totalTokens >= maxAllowedSize) {
-						const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
-						await this.overwriteApiConversationHistory(truncatedMessages)
-					}
+		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
+		if (previousApiReqIndex >= 0) {
+			const previousRequest = this.clineMessages[previousApiReqIndex]
+			if (previousRequest && previousRequest.text) {
+				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
+					previousRequest.text
+				)
+				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
+				const contextWindow = this.api.getModel().info.contextWindow || 128_000
+				const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
+				if (totalTokens >= maxAllowedSize) {
+					const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
+					await this.overwriteApiConversationHistory(truncatedMessages)
 				}
 			}
+		}

-			const stream = this.api.createMessage(systemPrompt, this.apiConversationHistory)
-			const iterator = stream[Symbol.asyncIterator]()
+		const stream = this.api.createMessage(systemPrompt, this.apiConversationHistory)
+		const iterator = stream[Symbol.asyncIterator]()
+
+		try {
 			// awaiting first chunk to see if it will throw an error
 			const firstChunk = await iterator.next()
 			yield firstChunk.value
-			// no error, so we can continue to yield all remaining chunks
-			// this delegates to another generator or iterable object. In this case, it's saying "yield all remaining values from this iterator". This effectively passes along all subsequent chunks from the original stream.
-			yield* iterator
 		} catch (error) {
 			// note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely.
 			const { response } = await this.ask(
@@ -785,7 +783,13 @@ export class Cline {
 			await this.say("api_req_retried")
 			// delegate generator output from the recursive call
 			yield* this.attemptApiRequest(previousApiReqIndex)
+			return
 		}
+
+		// no error, so we can continue to yield all remaining chunks
+		// (needs to be placed outside of try/catch since it we want caller to handle errors not with api_req_failed as that is reserved for first chunk failures only)
+		// this delegates to another generator or iterable object. In this case, it's saying "yield all remaining values from this iterator". This effectively passes along all subsequent chunks from the original stream.
+		yield* iterator
 	}

 	async presentAssistantMessage() {