Add streaming fallback to mitigate openrouter failed requests

This commit is contained in:
Saoud Rizwan
2024-08-12 23:18:45 -04:00
parent a3d345d1af
commit b2ced9b484
3 changed files with 131 additions and 28 deletions

View File

@@ -46,13 +46,21 @@ export class OpenRouterHandler implements ApiHandler {
},
}))
const completion = await this.client.chat.completions.create({
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens,
messages: openAiMessages,
tools: openAiTools,
tool_choice: "auto",
})
}
let completion: OpenAI.Chat.Completions.ChatCompletion
try {
completion = await this.client.chat.completions.create(createParams)
} catch (error) {
console.error("Error creating message from normal request. Using streaming fallback...", error)
completion = await this.streamCompletion(createParams)
}
const errorMessage = (completion as any).error?.message // openrouter returns an error object instead of the openai sdk throwing an error
if (errorMessage) {
@@ -72,7 +80,19 @@ export class OpenRouterHandler implements ApiHandler {
},
],
model: completion.model,
stop_reason: this.mapFinishReason(completion.choices[0].finish_reason),
stop_reason: (() => {
switch (completion.choices[0].finish_reason) {
case "stop":
return "end_turn"
case "length":
return "max_tokens"
case "tool_calls":
return "tool_use"
case "content_filter": // Anthropic doesn't have an exact equivalent
default:
return null
}
})(),
stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
usage: {
input_tokens: completion.usage?.prompt_tokens || 0,
@@ -102,23 +122,6 @@ export class OpenRouterHandler implements ApiHandler {
return anthropicMessage
}
private mapFinishReason(
finishReason: OpenAI.Chat.ChatCompletion.Choice["finish_reason"]
): Anthropic.Messages.Message["stop_reason"] {
switch (finishReason) {
case "stop":
return "end_turn"
case "length":
return "max_tokens"
case "tool_calls":
return "tool_use"
case "content_filter":
return null // Anthropic doesn't have an exact equivalent
default:
return null
}
}
convertToOpenAiMessages(
anthropicMessages: Anthropic.Messages.MessageParam[]
): OpenAI.Chat.ChatCompletionMessageParam[] {
@@ -261,6 +264,110 @@ export class OpenRouterHandler implements ApiHandler {
return openAiMessages
}
/*
Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer")
*/
async streamCompletion(
createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
): Promise<OpenAI.Chat.Completions.ChatCompletion> {
const stream = await this.client.chat.completions.create({
...createParams,
stream: true,
})
let textContent: string = ""
let toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = []
try {
let currentToolCall: (OpenAI.Chat.ChatCompletionMessageToolCall & { index?: number }) | null = null
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
textContent += delta.content
}
if (delta?.tool_calls) {
for (const toolCallDelta of delta.tool_calls) {
if (toolCallDelta.index === undefined) {
continue
}
if (!currentToolCall || currentToolCall.index !== toolCallDelta.index) {
// new index means new tool call, so add the previous one to the list
if (currentToolCall) {
toolCalls.push(currentToolCall)
}
currentToolCall = {
index: toolCallDelta.index,
id: toolCallDelta.id || "",
type: "function",
function: { name: "", arguments: "" },
}
}
if (toolCallDelta.id) {
currentToolCall.id = toolCallDelta.id
}
if (toolCallDelta.type) {
currentToolCall.type = toolCallDelta.type
}
if (toolCallDelta.function) {
if (toolCallDelta.function.name) {
currentToolCall.function.name = toolCallDelta.function.name
}
if (toolCallDelta.function.arguments) {
currentToolCall.function.arguments =
(currentToolCall.function.arguments || "") + toolCallDelta.function.arguments
}
}
}
}
}
if (currentToolCall) {
toolCalls.push(currentToolCall)
}
} catch (error) {
console.error("Error streaming completion:", error)
throw error
}
// Usage information is not available in streaming responses, so we need to estimate token counts
function approximateTokenCount(text: string): number {
return Math.ceil(new TextEncoder().encode(text).length / 4)
}
const promptTokens = approximateTokenCount(
createParams.messages
.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
.join(" ")
)
const completionTokens = approximateTokenCount(
textContent + toolCalls.map((toolCall) => toolCall.function.arguments || "").join(" ")
)
const completion: OpenAI.Chat.Completions.ChatCompletion = {
created: Date.now(),
object: "chat.completion",
id: `openrouter-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`, // this ID won't be traceable back to OpenRouter's systems if you need to debug issues
choices: [
{
message: {
role: "assistant",
content: textContent,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
},
finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
index: 0,
logprobs: null,
},
],
model: this.getModel().id,
usage: {
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
total_tokens: promptTokens + completionTokens,
},
}
return completion
}
createUserReadableRequest(
userContent: Array<
| Anthropic.TextBlockParam