Add streaming fallback to mitigate openrouter failed requests

2026-02-05 12:05:16 -05:00 · 2024-08-12 23:18:45 -04:00
parent a3d345d1af
commit b2ced9b484
3 changed files with 131 additions and 28 deletions
--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@@ -46,13 +46,21 @@ export class OpenRouterHandler implements ApiHandler {
 			},
 		}))

-		const completion = await this.client.chat.completions.create({
+		const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 			model: this.getModel().id,
 			max_tokens: this.getModel().info.maxTokens,
 			messages: openAiMessages,
 			tools: openAiTools,
 			tool_choice: "auto",
-		})
+		}
+
+		let completion: OpenAI.Chat.Completions.ChatCompletion
+		try {
+			completion = await this.client.chat.completions.create(createParams)
+		} catch (error) {
+			console.error("Error creating message from normal request. Using streaming fallback...", error)
+			completion = await this.streamCompletion(createParams)
+		}

 		const errorMessage = (completion as any).error?.message // openrouter returns an error object instead of the openai sdk throwing an error
 		if (errorMessage) {
@@ -72,7 +80,19 @@ export class OpenRouterHandler implements ApiHandler {
 				},
 			],
 			model: completion.model,
-			stop_reason: this.mapFinishReason(completion.choices[0].finish_reason),
+			stop_reason: (() => {
+				switch (completion.choices[0].finish_reason) {
+					case "stop":
+						return "end_turn"
+					case "length":
+						return "max_tokens"
+					case "tool_calls":
+						return "tool_use"
+					case "content_filter": // Anthropic doesn't have an exact equivalent
+					default:
+						return null
+				}
+			})(),
 			stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
 			usage: {
 				input_tokens: completion.usage?.prompt_tokens || 0,
@@ -102,23 +122,6 @@ export class OpenRouterHandler implements ApiHandler {
 		return anthropicMessage
 	}

-	private mapFinishReason(
-		finishReason: OpenAI.Chat.ChatCompletion.Choice["finish_reason"]
-	): Anthropic.Messages.Message["stop_reason"] {
-		switch (finishReason) {
-			case "stop":
-				return "end_turn"
-			case "length":
-				return "max_tokens"
-			case "tool_calls":
-				return "tool_use"
-			case "content_filter":
-				return null // Anthropic doesn't have an exact equivalent
-			default:
-				return null
-		}
-	}
-
 	convertToOpenAiMessages(
 		anthropicMessages: Anthropic.Messages.MessageParam[]
 	): OpenAI.Chat.ChatCompletionMessageParam[] {
@@ -261,6 +264,110 @@ export class OpenRouterHandler implements ApiHandler {
 		return openAiMessages
 	}

+	/*
+	Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer")
+	*/
+	async streamCompletion(
+		createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
+	): Promise<OpenAI.Chat.Completions.ChatCompletion> {
+		const stream = await this.client.chat.completions.create({
+			...createParams,
+			stream: true,
+		})
+
+		let textContent: string = ""
+		let toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
+
+		try {
+			let currentToolCall: (OpenAI.Chat.ChatCompletionMessageToolCall & { index?: number }) | null = null
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+				if (delta?.content) {
+					textContent += delta.content
+				}
+				if (delta?.tool_calls) {
+					for (const toolCallDelta of delta.tool_calls) {
+						if (toolCallDelta.index === undefined) {
+							continue
+						}
+						if (!currentToolCall || currentToolCall.index !== toolCallDelta.index) {
+							// new index means new tool call, so add the previous one to the list
+							if (currentToolCall) {
+								toolCalls.push(currentToolCall)
+							}
+							currentToolCall = {
+								index: toolCallDelta.index,
+								id: toolCallDelta.id || "",
+								type: "function",
+								function: { name: "", arguments: "" },
+							}
+						}
+						if (toolCallDelta.id) {
+							currentToolCall.id = toolCallDelta.id
+						}
+						if (toolCallDelta.type) {
+							currentToolCall.type = toolCallDelta.type
+						}
+						if (toolCallDelta.function) {
+							if (toolCallDelta.function.name) {
+								currentToolCall.function.name = toolCallDelta.function.name
+							}
+							if (toolCallDelta.function.arguments) {
+								currentToolCall.function.arguments =
+									(currentToolCall.function.arguments || "") + toolCallDelta.function.arguments
+							}
+						}
+					}
+				}
+			}
+			if (currentToolCall) {
+				toolCalls.push(currentToolCall)
+			}
+		} catch (error) {
+			console.error("Error streaming completion:", error)
+			throw error
+		}
+
+		// Usage information is not available in streaming responses, so we need to estimate token counts
+		function approximateTokenCount(text: string): number {
+			return Math.ceil(new TextEncoder().encode(text).length / 4)
+		}
+		const promptTokens = approximateTokenCount(
+			createParams.messages
+				.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
+				.join(" ")
+		)
+		const completionTokens = approximateTokenCount(
+			textContent + toolCalls.map((toolCall) => toolCall.function.arguments || "").join(" ")
+		)
+
+		const completion: OpenAI.Chat.Completions.ChatCompletion = {
+			created: Date.now(),
+			object: "chat.completion",
+			id: `openrouter-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`, // this ID won't be traceable back to OpenRouter's systems if you need to debug issues
+			choices: [
+				{
+					message: {
+						role: "assistant",
+						content: textContent,
+						tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
+					},
+					finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
+					index: 0,
+					logprobs: null,
+				},
+			],
+			model: this.getModel().id,
+			usage: {
+				prompt_tokens: promptTokens,
+				completion_tokens: completionTokens,
+				total_tokens: promptTokens + completionTokens,
+			},
+		}
+
+		return completion
+	}
+
 	createUserReadableRequest(
 		userContent: Array<
 			| Anthropic.TextBlockParam