Add streaming fallback to mitigate openrouter failed requests

This commit is contained in:
Saoud Rizwan
2024-08-12 23:18:45 -04:00
parent a3d345d1af
commit b2ced9b484
3 changed files with 131 additions and 28 deletions

View File

@@ -46,13 +46,21 @@ export class OpenRouterHandler implements ApiHandler {
},
}))
const completion = await this.client.chat.completions.create({
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens,
messages: openAiMessages,
tools: openAiTools,
tool_choice: "auto",
})
}
let completion: OpenAI.Chat.Completions.ChatCompletion
try {
completion = await this.client.chat.completions.create(createParams)
} catch (error) {
console.error("Error creating message from normal request. Using streaming fallback...", error)
completion = await this.streamCompletion(createParams)
}
const errorMessage = (completion as any).error?.message // openrouter returns an error object instead of the openai sdk throwing an error
if (errorMessage) {
@@ -72,7 +80,19 @@ export class OpenRouterHandler implements ApiHandler {
},
],
model: completion.model,
stop_reason: this.mapFinishReason(completion.choices[0].finish_reason),
stop_reason: (() => {
switch (completion.choices[0].finish_reason) {
case "stop":
return "end_turn"
case "length":
return "max_tokens"
case "tool_calls":
return "tool_use"
case "content_filter": // Anthropic doesn't have an exact equivalent
default:
return null
}
})(),
stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
usage: {
input_tokens: completion.usage?.prompt_tokens || 0,
@@ -102,23 +122,6 @@ export class OpenRouterHandler implements ApiHandler {
return anthropicMessage
}
private mapFinishReason(
finishReason: OpenAI.Chat.ChatCompletion.Choice["finish_reason"]
): Anthropic.Messages.Message["stop_reason"] {
switch (finishReason) {
case "stop":
return "end_turn"
case "length":
return "max_tokens"
case "tool_calls":
return "tool_use"
case "content_filter":
return null // Anthropic doesn't have an exact equivalent
default:
return null
}
}
convertToOpenAiMessages(
anthropicMessages: Anthropic.Messages.MessageParam[]
): OpenAI.Chat.ChatCompletionMessageParam[] {
@@ -261,6 +264,110 @@ export class OpenRouterHandler implements ApiHandler {
return openAiMessages
}
/*
Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer")
*/
async streamCompletion(
createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
): Promise<OpenAI.Chat.Completions.ChatCompletion> {
const stream = await this.client.chat.completions.create({
...createParams,
stream: true,
})
let textContent: string = ""
let toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
try {
let currentToolCall: (OpenAI.Chat.ChatCompletionMessageToolCall & { index?: number }) | null = null
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
textContent += delta.content
}
if (delta?.tool_calls) {
for (const toolCallDelta of delta.tool_calls) {
if (toolCallDelta.index === undefined) {
continue
}
if (!currentToolCall || currentToolCall.index !== toolCallDelta.index) {
// new index means new tool call, so add the previous one to the list
if (currentToolCall) {
toolCalls.push(currentToolCall)
}
currentToolCall = {
index: toolCallDelta.index,
id: toolCallDelta.id || "",
type: "function",
function: { name: "", arguments: "" },
}
}
if (toolCallDelta.id) {
currentToolCall.id = toolCallDelta.id
}
if (toolCallDelta.type) {
currentToolCall.type = toolCallDelta.type
}
if (toolCallDelta.function) {
if (toolCallDelta.function.name) {
currentToolCall.function.name = toolCallDelta.function.name
}
if (toolCallDelta.function.arguments) {
currentToolCall.function.arguments =
(currentToolCall.function.arguments || "") + toolCallDelta.function.arguments
}
}
}
}
}
if (currentToolCall) {
toolCalls.push(currentToolCall)
}
} catch (error) {
console.error("Error streaming completion:", error)
throw error
}
// Usage information is not available in streaming responses, so we need to estimate token counts
function approximateTokenCount(text: string): number {
return Math.ceil(new TextEncoder().encode(text).length / 4)
}
const promptTokens = approximateTokenCount(
createParams.messages
.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
.join(" ")
)
const completionTokens = approximateTokenCount(
textContent + toolCalls.map((toolCall) => toolCall.function.arguments || "").join(" ")
)
const completion: OpenAI.Chat.Completions.ChatCompletion = {
created: Date.now(),
object: "chat.completion",
id: `openrouter-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`, // this ID won't be traceable back to OpenRouter's systems if you need to debug issues
choices: [
{
message: {
role: "assistant",
content: textContent,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
},
finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
index: 0,
logprobs: null,
},
],
model: this.getModel().id,
usage: {
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
total_tokens: promptTokens + completionTokens,
},
}
return completion
}
createUserReadableRequest(
userContent: Array<
| Anthropic.TextBlockParam

View File

@@ -457,8 +457,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
if (apiKey) {
apiProvider = "anthropic"
} else {
// New users should default to anthropic (openrouter doesn't perform well with large files)
apiProvider = "anthropic"
// New users should default to openrouter (better rate limits and wider model selection)
apiProvider = "openrouter"
}
}

View File

@@ -66,9 +66,9 @@ const ApiOptions: React.FC<ApiOptionsProps> = ({ showModelOptions, apiConfigurat
<span style={{ fontWeight: 500 }}>API Provider</span>
</label>
<VSCodeDropdown id="api-provider" value={selectedProvider} onChange={handleInputChange("apiProvider")}>
<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
<VSCodeOption value="anthropic">Anthropic</VSCodeOption>
<VSCodeOption value="bedrock">AWS Bedrock</VSCodeOption>
<VSCodeOption value="openrouter">OpenRouter</VSCodeOption>
</VSCodeDropdown>
</div>
@@ -113,11 +113,7 @@ const ApiOptions: React.FC<ApiOptionsProps> = ({ showModelOptions, apiConfigurat
This key is stored locally and only used to make API requests from this extension.
<VSCodeLink href="https://openrouter.ai/" style={{ display: "inline" }}>
You can get an OpenRouter API key by signing up here.
</VSCodeLink>{" "}
<span style={{ color: "var(--vscode-errorForeground)" }}>
(<b>Note:</b> OpenRouter support is experimental and may not work well with tool use or
large outputs.)
</span>
</VSCodeLink>
</p>
</div>
)}