From 141fa69c05c59d8104b237fa0738b7f70f249969 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Sat, 24 Aug 2024 22:46:29 -0400 Subject: [PATCH] Refactor openai messages conversion --- package-lock.json | 14 +++ package.json | 1 + src/api/openrouter.ts | 145 +------------------------------- src/utils/context-management.ts | 144 +++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 143 deletions(-) create mode 100644 src/utils/context-management.ts diff --git a/package-lock.json b/package-lock.json index fafef5e..3c1126f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,7 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", + "gpt-tokenizer": "^2.2.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", @@ -6624,6 +6625,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gpt-tokenizer": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.2.1.tgz", + "integrity": "sha512-JYvLWTpPtFGz7eS7uixHslv3L96zka0n18MlQeH5YVl5F6mNhssxzSBTeqwNfW8A0AQIMYEaOfbSr+MaoCUvpg==", + "dependencies": { + "rfc4648": "^1.5.2" + } + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -8802,6 +8811,11 @@ "node": ">=0.10.0" } }, + "node_modules/rfc4648": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.3.tgz", + "integrity": "sha512-MjOWxM065+WswwnmNONOT+bD1nXzY9Km6u3kzvnx8F8/HXGZdz3T6e6vZJ8Q/RIMUSp/nxqjH3GwvJDy8ijeQQ==" + }, "node_modules/rimraf": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", diff --git a/package.json b/package.json index 865c933..bf51466 100644 --- a/package.json +++ b/package.json @@ -142,6 +142,7 @@ "diff": "^5.2.0", "execa": "^9.3.0", "globby": "^14.0.2", + "gpt-tokenizer": "^2.2.1", "openai": "^4.54.0", "os-name": "^6.0.0", "p-wait-for": "^5.0.2", diff --git a/src/api/openrouter.ts b/src/api/openrouter.ts index b7940fa..be6d5d1 100644 --- a/src/api/openrouter.ts +++ b/src/api/openrouter.ts @@ -8,6 +8,7 @@ import { OpenRouterModelId, openRouterModels, } from "../shared/api" +import { convertToOpenAiMessages } from "../utils/context-management" export class OpenRouterHandler implements ApiHandler { private options: ApiHandlerOptions @@ -33,7 +34,7 @@ export class OpenRouterHandler implements ApiHandler { // Convert Anthropic messages to OpenAI format const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, - ...this.convertToOpenAiMessages(messages), + ...convertToOpenAiMessages(messages), ] // Convert Anthropic tools to OpenAI tools @@ -122,148 +123,6 @@ export class OpenRouterHandler implements ApiHandler { return anthropicMessage } - convertToOpenAiMessages( - anthropicMessages: Anthropic.Messages.MessageParam[] - ): OpenAI.Chat.ChatCompletionMessageParam[] { - const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [] - - for (const anthropicMessage of anthropicMessages) { - if (typeof anthropicMessage.content === "string") { - openAiMessages.push({ role: anthropicMessage.role, content: anthropicMessage.content }) - } else { - // image_url.url is base64 encoded image data - // ensure it contains the content-type of the image: data:image/png;base64, - /* - { role: "user", content: "" | { type: "text", text: string } | { type: "image_url", image_url: { url: string } } }, - // content required unless tool_calls is present - { role: "assistant", content?: "" | null, tool_calls?: [{ id: "", function: { name: "", arguments: "" }, type: "function" }] }, - { role: "tool", tool_call_id: "", content: ""} - */ - if (anthropicMessage.role === "user") { - const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ - nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] - toolMessages: Anthropic.ToolResultBlockParam[] - }>( - (acc, part) => { - if (part.type === "tool_result") { - acc.toolMessages.push(part) - } else if (part.type === "text" || part.type === "image") { - acc.nonToolMessages.push(part) - } // user cannot send tool_use messages - return acc - }, - { nonToolMessages: [], toolMessages: [] } - ) - - // Process non-tool messages - if (nonToolMessages.length > 0) { - openAiMessages.push({ - role: "user", - content: nonToolMessages.map((part) => { - if (part.type === "image") { - return { - type: "image_url", - image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, - } - } - return { type: "text", text: part.text } - }), - }) - } - - // Process tool result messages - let toolResultImages: Anthropic.Messages.ImageBlockParam[] = [] - toolMessages.forEach((toolMessage) => { - // The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility. - let content: string - - if (typeof toolMessage.content === "string") { - content = toolMessage.content - } else { - content = - toolMessage.content - ?.map((part) => { - if (part.type === "image") { - toolResultImages.push(part) - return "(see following user message for image)" - } - return part.text - }) - .join("\n") ?? "" - } - openAiMessages.push({ - role: "tool", - tool_call_id: toolMessage.tool_use_id, - content: content, - }) - }) - - // If tool results contain images, send as a separate user message - // I ran into an issue where if I gave feedback for one of many tool uses, the request would fail. - // "Messages following `tool_use` blocks must begin with a matching number of `tool_result` blocks." - // Therefore we need to send these images after the tool result messages - if (toolResultImages.length > 0) { - openAiMessages.push({ - role: "user", - content: toolResultImages.map((part) => ({ - type: "image_url", - image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, - })), - }) - } - } else if (anthropicMessage.role === "assistant") { - const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ - nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] - toolMessages: Anthropic.ToolUseBlockParam[] - }>( - (acc, part) => { - if (part.type === "tool_use") { - acc.toolMessages.push(part) - } else if (part.type === "text" || part.type === "image") { - acc.nonToolMessages.push(part) - } // assistant cannot send tool_result messages - return acc - }, - { nonToolMessages: [], toolMessages: [] } - ) - - // Process non-tool messages - let content: string | undefined - if (nonToolMessages.length > 0) { - content = nonToolMessages - .map((part) => { - if (part.type === "image") { - return "" // impossible as the assistant cannot send images - } - return part.text - }) - .join("\n") - } - - // Process tool use messages - let tool_calls: OpenAI.Chat.ChatCompletionMessageToolCall[] = toolMessages.map((toolMessage) => ({ - id: toolMessage.id, - type: "function", - function: { - name: toolMessage.name, - // json string - arguments: JSON.stringify(toolMessage.input), - }, - })) - - openAiMessages.push({ - role: "assistant", - content, - // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty - tool_calls: tool_calls.length > 0 ? tool_calls : undefined, - }) - } - } - } - - return openAiMessages - } - /* Streaming the completion is a fallback behavior for when a normal request responds with an invalid JSON object ("Unexpected end of JSON input"). This would usually happen in cases where the model makes tool calls with large arguments. After talking with OpenRouter folks, streaming mitigates this issue for now until they fix the underlying problem ("some weird data from anthropic got decoded wrongly and crashed the buffer") */ diff --git a/src/utils/context-management.ts b/src/utils/context-management.ts new file mode 100644 index 0000000..41ea418 --- /dev/null +++ b/src/utils/context-management.ts @@ -0,0 +1,144 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +export function convertToOpenAiMessages( + anthropicMessages: Anthropic.Messages.MessageParam[] +): OpenAI.Chat.ChatCompletionMessageParam[] { + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [] + + for (const anthropicMessage of anthropicMessages) { + if (typeof anthropicMessage.content === "string") { + openAiMessages.push({ role: anthropicMessage.role, content: anthropicMessage.content }) + } else { + // image_url.url is base64 encoded image data + // ensure it contains the content-type of the image: data:image/png;base64, + /* + { role: "user", content: "" | { type: "text", text: string } | { type: "image_url", image_url: { url: string } } }, + // content required unless tool_calls is present + { role: "assistant", content?: "" | null, tool_calls?: [{ id: "", function: { name: "", arguments: "" }, type: "function" }] }, + { role: "tool", tool_call_id: "", content: ""} + */ + if (anthropicMessage.role === "user") { + const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ + nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] + toolMessages: Anthropic.ToolResultBlockParam[] + }>( + (acc, part) => { + if (part.type === "tool_result") { + acc.toolMessages.push(part) + } else if (part.type === "text" || part.type === "image") { + acc.nonToolMessages.push(part) + } // user cannot send tool_use messages + return acc + }, + { nonToolMessages: [], toolMessages: [] } + ) + + // Process non-tool messages + if (nonToolMessages.length > 0) { + openAiMessages.push({ + role: "user", + content: nonToolMessages.map((part) => { + if (part.type === "image") { + return { + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + } + } + return { type: "text", text: part.text } + }), + }) + } + + // Process tool result messages + let toolResultImages: Anthropic.Messages.ImageBlockParam[] = [] + toolMessages.forEach((toolMessage) => { + // The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility. + let content: string + + if (typeof toolMessage.content === "string") { + content = toolMessage.content + } else { + content = + toolMessage.content + ?.map((part) => { + if (part.type === "image") { + toolResultImages.push(part) + return "(see following user message for image)" + } + return part.text + }) + .join("\n") ?? "" + } + openAiMessages.push({ + role: "tool", + tool_call_id: toolMessage.tool_use_id, + content: content, + }) + }) + + // If tool results contain images, send as a separate user message + // I ran into an issue where if I gave feedback for one of many tool uses, the request would fail. + // "Messages following `tool_use` blocks must begin with a matching number of `tool_result` blocks." + // Therefore we need to send these images after the tool result messages + if (toolResultImages.length > 0) { + openAiMessages.push({ + role: "user", + content: toolResultImages.map((part) => ({ + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + })), + }) + } + } else if (anthropicMessage.role === "assistant") { + const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ + nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] + toolMessages: Anthropic.ToolUseBlockParam[] + }>( + (acc, part) => { + if (part.type === "tool_use") { + acc.toolMessages.push(part) + } else if (part.type === "text" || part.type === "image") { + acc.nonToolMessages.push(part) + } // assistant cannot send tool_result messages + return acc + }, + { nonToolMessages: [], toolMessages: [] } + ) + + // Process non-tool messages + let content: string | undefined + if (nonToolMessages.length > 0) { + content = nonToolMessages + .map((part) => { + if (part.type === "image") { + return "" // impossible as the assistant cannot send images + } + return part.text + }) + .join("\n") + } + + // Process tool use messages + let tool_calls: OpenAI.Chat.ChatCompletionMessageToolCall[] = toolMessages.map((toolMessage) => ({ + id: toolMessage.id, + type: "function", + function: { + name: toolMessage.name, + // json string + arguments: JSON.stringify(toolMessage.input), + }, + })) + + openAiMessages.push({ + role: "assistant", + content, + // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty + tool_calls: tool_calls.length > 0 ? tool_calls : undefined, + }) + } + } + } + + return openAiMessages +}