Add streaming fallback to mitigate openrouter failed requests

2025-12-20 12:21:13 -05:00 · 2024-08-12 23:18:45 -04:00
parent a3d345d1af
commit b2ced9b484
3 changed files with 131 additions and 28 deletions
--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@@ -46,13 +46,21 @@ export class OpenRouterHandler implements ApiHandler {
 			},
 		}))
-		const completion = await this.client.chat.completions.create({
+		const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 			model: this.getModel().id,
 			max_tokens: this.getModel().info.maxTokens,
 			messages: openAiMessages,
 			tools: openAiTools,
 			tool_choice: "auto",
-		})
+		}
 		let completion: OpenAI.Chat.Completions.ChatCompletion
 		try {
 			completion = await this.client.chat.completions.create(createParams)
 		} catch (error) {
 			console.error("Error creating message from normal request. Using streaming fallback...", error)
 			completion = await this.streamCompletion(createParams)
 		}
 		const errorMessage = (completion as any).error?.message // openrouter returns an error object instead of the openai sdk throwing an error
 		if (errorMessage) {
@@ -72,7 +80,19 @@ export class OpenRouterHandler implements ApiHandler {
 				},
 			],
 			model: completion.model,
-			stop_reason: this.mapFinishReason(completion.choices[0].finish_reason),
+			stop_reason: (() => {
 				switch (completion.choices[0].finish_reason) {
 					case "stop":
 						return "end_turn"
 					case "length":
 						return "max_tokens"
 					case "tool_calls":
 						return "tool_use"
 					case "content_filter": // Anthropic doesn't have an exact equivalent
 					default:
 						return null
 				}
 			})(),
 			stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
 			usage: {
 				input_tokens: completion.usage?.prompt_tokens || 0,
@@ -102,23 +122,6 @@ export class OpenRouterHandler implements ApiHandler {
 		return anthropicMessage
 	}
 	private mapFinishReason(
 		finishReason: OpenAI.Chat.ChatCompletion.Choice["finish_reason"]
 	): Anthropic.Messages.Message["stop_reason"] {
 		switch (finishReason) {
 			case "stop":
 				return "end_turn"
 			case "length":
 				return "max_tokens"
 			case "tool_calls":
 				return "tool_use"
 			case "content_filter":
 				return null // Anthropic doesn't have an exact equivalent
 			default:
 				return null
 		}
 	}
 	convertToOpenAiMessages(
 		anthropicMessages: Anthropic.Messages.MessageParam[]
 	): OpenAI.Chat.ChatCompletionMessageParam[] {
@@ -261,6 +264,110 @@ export class OpenRouterHandler implements ApiHandler {
 		return openAiMessages
 	}
 	/*
 	Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer")
 	*/
 	async streamCompletion(
 		createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
 	): Promise<OpenAI.Chat.Completions.ChatCompletion> {
 		const stream = await this.client.chat.completions.create({
 			...createParams,
 			stream: true,
 		})
 		let textContent: string = ""
 		let toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
 		try {
 			let currentToolCall: (OpenAI.Chat.ChatCompletionMessageToolCall & { index?: number }) | null = null
 			for await (const chunk of stream) {
 				const delta = chunk.choices[0]?.delta
 				if (delta?.content) {
 					textContent += delta.content
 				}
 				if (delta?.tool_calls) {
 					for (const toolCallDelta of delta.tool_calls) {
 						if (toolCallDelta.index === undefined) {
 							continue
 						}
 						if (!currentToolCall || currentToolCall.index !== toolCallDelta.index) {
 							// new index means new tool call, so add the previous one to the list
 							if (currentToolCall) {
 								toolCalls.push(currentToolCall)
 							}
 							currentToolCall = {
 								index: toolCallDelta.index,
 								id: toolCallDelta.id || "",
 								type: "function",
 								function: { name: "", arguments: "" },
 							}
 						}
 						if (toolCallDelta.id) {
 							currentToolCall.id = toolCallDelta.id
 						}
 						if (toolCallDelta.type) {
 							currentToolCall.type = toolCallDelta.type
 						}
 						if (toolCallDelta.function) {
 							if (toolCallDelta.function.name) {
 								currentToolCall.function.name = toolCallDelta.function.name
 							}
 							if (toolCallDelta.function.arguments) {
 								currentToolCall.function.arguments =
 									(currentToolCall.function.arguments || "") + toolCallDelta.function.arguments
 							}
 						}
 					}
 				}
 			}
 			if (currentToolCall) {
 				toolCalls.push(currentToolCall)
 			}
 		} catch (error) {
 			console.error("Error streaming completion:", error)
 			throw error
 		}
 		// Usage information is not available in streaming responses, so we need to estimate token counts
 		function approximateTokenCount(text: string): number {
 			return Math.ceil(new TextEncoder().encode(text).length / 4)
 		}
 		const promptTokens = approximateTokenCount(
 			createParams.messages
 				.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
 				.join(" ")
 		)
 		const completionTokens = approximateTokenCount(
 			textContent + toolCalls.map((toolCall) => toolCall.function.arguments || "").join(" ")
 		)
 		const completion: OpenAI.Chat.Completions.ChatCompletion = {
 			created: Date.now(),
 			object: "chat.completion",
 			id: `openrouter-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`, // this ID won't be traceable back to OpenRouter's systems if you need to debug issues
 			choices: [
 				{
 					message: {
 						role: "assistant",
 						content: textContent,
 						tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
 					},
 					finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
 					index: 0,
 					logprobs: null,
 				},
 			],
 			model: this.getModel().id,
 			usage: {
 				prompt_tokens: promptTokens,
 				completion_tokens: completionTokens,
 				total_tokens: promptTokens + completionTokens,
 			},
 		}
 		return completion
 	}
 	createUserReadableRequest(
 		userContent: Array<
 			| Anthropic.TextBlockParam
--- a/src/providers/ClaudeDevProvider.ts
+++ b/src/providers/ClaudeDevProvider.ts
@@ -457,8 +457,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 			if (apiKey) {
 				apiProvider = "anthropic"
 			} else {
-				// New users should default to anthropic (openrouter doesn't perform well with large files)
+				// New users should default to openrouter (better rate limits and wider model selection)
-				apiProvider = "anthropic"
+				apiProvider = "openrouter"
 			}
 		}
--- a/webview-ui/src/components/ApiOptions.tsx
+++ b/webview-ui/src/components/ApiOptions.tsx
@@ -66,9 +66,9 @@ const ApiOptions: React.FC<ApiOptionsProps> = ({ showModelOptions, apiConfigurat
 					<span style={{ fontWeight: 500 }}>API Provider</span>
 				</label>
 				<VSCodeDropdown id="api-provider" value={selectedProvider} onChange={handleInputChange("apiProvider")}>
 					<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
 					<VSCodeOption value="anthropic">Anthropic</VSCodeOption>
 					<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
 					<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
 				</VSCodeDropdown>
 			</div>
@@ -113,11 +113,7 @@ const ApiOptions: React.FC<ApiOptionsProps> = ({ showModelOptions, apiConfigurat
 						This key is stored locally and only used to make API requests from this extension.
 						<VSCodeLink href="https://openrouter.ai/" style={{ display: "inline" }}>
 							You can get an OpenRouter API key by signing up here.
-						</VSCodeLink>{" "}
+						</VSCodeLink>
 						<span style={{ color: "var(--vscode-errorForeground)" }}>
 							(<b>Note:</b> OpenRouter support is experimental and may not work well with tool use or
 							large outputs.)
 						</span>
 					</p>
 				</div>
 			)}