From b2ced9b48462c8451c6d748a7440b9a17a98c407 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Mon, 12 Aug 2024 23:18:45 -0400 Subject: [PATCH] Add streaming fallback to mitigate openrouter failed requests --- src/api/openrouter.ts | 147 ++++++++++++++++++++--- src/providers/ClaudeDevProvider.ts | 4 +- webview-ui/src/components/ApiOptions.tsx | 8 +- 3 files changed, 131 insertions(+), 28 deletions(-) diff --git a/src/api/openrouter.ts b/src/api/openrouter.ts index 3171c15..bb354e4 100644 --- a/src/api/openrouter.ts +++ b/src/api/openrouter.ts @@ -46,13 +46,21 @@ export class OpenRouterHandler implements ApiHandler { }, })) - const completion = await this.client.chat.completions.create({ + const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: this.getModel().id, max_tokens: this.getModel().info.maxTokens, messages: openAiMessages, tools: openAiTools, tool_choice: "auto", - }) + } + + let completion: OpenAI.Chat.Completions.ChatCompletion + try { + completion = await this.client.chat.completions.create(createParams) + } catch (error) { + console.error("Error creating message from normal request. Using streaming fallback...", error) + completion = await this.streamCompletion(createParams) + } const errorMessage = (completion as any).error?.message // openrouter returns an error object instead of the openai sdk throwing an error if (errorMessage) { @@ -72,7 +80,19 @@ export class OpenRouterHandler implements ApiHandler { }, ], model: completion.model, - stop_reason: this.mapFinishReason(completion.choices[0].finish_reason), + stop_reason: (() => { + switch (completion.choices[0].finish_reason) { + case "stop": + return "end_turn" + case "length": + return "max_tokens" + case "tool_calls": + return "tool_use" + case "content_filter": // Anthropic doesn't have an exact equivalent + default: + return null + } + })(), stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence) usage: { input_tokens: completion.usage?.prompt_tokens || 0, @@ -102,23 +122,6 @@ export class OpenRouterHandler implements ApiHandler { return anthropicMessage } - private mapFinishReason( - finishReason: OpenAI.Chat.ChatCompletion.Choice["finish_reason"] - ): Anthropic.Messages.Message["stop_reason"] { - switch (finishReason) { - case "stop": - return "end_turn" - case "length": - return "max_tokens" - case "tool_calls": - return "tool_use" - case "content_filter": - return null // Anthropic doesn't have an exact equivalent - default: - return null - } - } - convertToOpenAiMessages( anthropicMessages: Anthropic.Messages.MessageParam[] ): OpenAI.Chat.ChatCompletionMessageParam[] { @@ -261,6 +264,110 @@ export class OpenRouterHandler implements ApiHandler { return openAiMessages } + /* + Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer") + */ + async streamCompletion( + createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming + ): Promise { + const stream = await this.client.chat.completions.create({ + ...createParams, + stream: true, + }) + + let textContent: string = "" + let toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = [] + + try { + let currentToolCall: (OpenAI.Chat.ChatCompletionMessageToolCall & { index?: number }) | null = null + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta + if (delta?.content) { + textContent += delta.content + } + if (delta?.tool_calls) { + for (const toolCallDelta of delta.tool_calls) { + if (toolCallDelta.index === undefined) { + continue + } + if (!currentToolCall || currentToolCall.index !== toolCallDelta.index) { + // new index means new tool call, so add the previous one to the list + if (currentToolCall) { + toolCalls.push(currentToolCall) + } + currentToolCall = { + index: toolCallDelta.index, + id: toolCallDelta.id || "", + type: "function", + function: { name: "", arguments: "" }, + } + } + if (toolCallDelta.id) { + currentToolCall.id = toolCallDelta.id + } + if (toolCallDelta.type) { + currentToolCall.type = toolCallDelta.type + } + if (toolCallDelta.function) { + if (toolCallDelta.function.name) { + currentToolCall.function.name = toolCallDelta.function.name + } + if (toolCallDelta.function.arguments) { + currentToolCall.function.arguments = + (currentToolCall.function.arguments || "") + toolCallDelta.function.arguments + } + } + } + } + } + if (currentToolCall) { + toolCalls.push(currentToolCall) + } + } catch (error) { + console.error("Error streaming completion:", error) + throw error + } + + // Usage information is not available in streaming responses, so we need to estimate token counts + function approximateTokenCount(text: string): number { + return Math.ceil(new TextEncoder().encode(text).length / 4) + } + const promptTokens = approximateTokenCount( + createParams.messages + .map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content))) + .join(" ") + ) + const completionTokens = approximateTokenCount( + textContent + toolCalls.map((toolCall) => toolCall.function.arguments || "").join(" ") + ) + + const completion: OpenAI.Chat.Completions.ChatCompletion = { + created: Date.now(), + object: "chat.completion", + id: `openrouter-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`, // this ID won't be traceable back to OpenRouter's systems if you need to debug issues + choices: [ + { + message: { + role: "assistant", + content: textContent, + tool_calls: toolCalls.length > 0 ? toolCalls : undefined, + }, + finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop", + index: 0, + logprobs: null, + }, + ], + model: this.getModel().id, + usage: { + prompt_tokens: promptTokens, + completion_tokens: completionTokens, + total_tokens: promptTokens + completionTokens, + }, + } + + return completion + } + createUserReadableRequest( userContent: Array< | Anthropic.TextBlockParam diff --git a/src/providers/ClaudeDevProvider.ts b/src/providers/ClaudeDevProvider.ts index 8098866..3cce982 100644 --- a/src/providers/ClaudeDevProvider.ts +++ b/src/providers/ClaudeDevProvider.ts @@ -457,8 +457,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider { if (apiKey) { apiProvider = "anthropic" } else { - // New users should default to anthropic (openrouter doesn't perform well with large files) - apiProvider = "anthropic" + // New users should default to openrouter (better rate limits and wider model selection) + apiProvider = "openrouter" } } diff --git a/webview-ui/src/components/ApiOptions.tsx b/webview-ui/src/components/ApiOptions.tsx index b0b59a3..f993644 100644 --- a/webview-ui/src/components/ApiOptions.tsx +++ b/webview-ui/src/components/ApiOptions.tsx @@ -66,9 +66,9 @@ const ApiOptions: React.FC = ({ showModelOptions, apiConfigurat API Provider + OpenRouter Anthropic AWS Bedrock - OpenRouter @@ -113,11 +113,7 @@ const ApiOptions: React.FC = ({ showModelOptions, apiConfigurat This key is stored locally and only used to make API requests from this extension. You can get an OpenRouter API key by signing up here. - {" "} - - (Note: OpenRouter support is experimental and may not work well with tool use or - large outputs.) - +

)}