Add ability to attach images to messages

This commit is contained in:
Saoud Rizwan
2024-08-08 02:44:51 -04:00
parent 9acae31fbb
commit 911dd159cd
16 changed files with 1129 additions and 179 deletions

View File

@@ -1,5 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
import { ApiHandlerOptions } from "../shared/api"
export class AnthropicHandler implements ApiHandler {
@@ -44,7 +44,7 @@ export class AnthropicHandler implements ApiHandler {
model: "claude-3-5-sonnet-20240620",
max_tokens: 8192,
system: "(see SYSTEM_PROMPT in src/ClaudeDev.ts)",
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: { type: "auto" },
}

View File

@@ -1,7 +1,7 @@
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk"
import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandlerOptions } from "../shared/api"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
// https://docs.anthropic.com/en/api/claude-on-amazon-bedrock
export class AwsBedrockHandler implements ApiHandler {
@@ -49,7 +49,7 @@ export class AwsBedrockHandler implements ApiHandler {
model: "anthropic.claude-3-5-sonnet-20240620-v1:0",
max_tokens: 4096,
system: "(see SYSTEM_PROMPT in src/ClaudeDev.ts)",
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: { type: "auto" },
}

View File

@@ -34,3 +34,31 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
return new AnthropicHandler(options)
}
}
export function withoutImageData(
userContent: Array<
| Anthropic.TextBlockParam
| Anthropic.ImageBlockParam
| Anthropic.ToolUseBlockParam
| Anthropic.ToolResultBlockParam
>
): Array<
Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam
> {
return userContent.map((part) => {
if (part.type === "image") {
return { ...part, source: { ...part.source, data: "..." } }
} else if (part.type === "tool_result" && typeof part.content !== "string") {
return {
...part,
content: part.content?.map((contentPart) => {
if (contentPart.type === "image") {
return { ...contentPart, source: { ...contentPart.source, data: "..." } }
}
return contentPart
}),
}
}
return part
})
}

View File

@@ -1,6 +1,6 @@
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
import { ApiHandlerOptions } from "../shared/api"
export class OpenRouterHandler implements ApiHandler {
@@ -118,6 +118,7 @@ export class OpenRouterHandler implements ApiHandler {
openAiMessages.push({ role: anthropicMessage.role, content: anthropicMessage.content })
} else {
// image_url.url is base64 encoded image data
// ensure it contains the content-type of the image: data:image/png;base64,
/*
{ role: "user", content: "" | { type: "text", text: string } | { type: "image_url", image_url: { url: string } } },
// content required unless tool_calls is present
@@ -146,7 +147,10 @@ export class OpenRouterHandler implements ApiHandler {
role: "user",
content: nonToolMessages.map((part) => {
if (part.type === "image") {
return { type: "image_url", image_url: { url: part.source.data } }
return {
type: "image_url",
image_url: { url: "data:image/webp;base64," + part.source.data },
}
}
return { type: "text", text: part.text }
}),
@@ -157,6 +161,7 @@ export class OpenRouterHandler implements ApiHandler {
toolMessages.forEach((toolMessage) => {
// The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility.
let content: string
let images: string[] = []
if (typeof toolMessage.content === "string") {
content = toolMessage.content
} else {
@@ -164,7 +169,8 @@ export class OpenRouterHandler implements ApiHandler {
toolMessage.content
?.map((part) => {
if (part.type === "image") {
return `{ type: "image_url", image_url: { url: ${part.source.data} } }`
images.push(part.source.data)
return "(see following user message for image)"
}
return part.text
})
@@ -175,6 +181,16 @@ export class OpenRouterHandler implements ApiHandler {
tool_call_id: toolMessage.tool_use_id,
content: content,
})
// If tool results contain images, send as a separate user message
if (images.length > 0) {
openAiMessages.push({
role: "user",
content: images.map((image) => ({
type: "image_url",
image_url: { url: "data:image/webp;base64," + image },
})),
})
}
})
} else if (anthropicMessage.role === "assistant") {
const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
@@ -198,7 +214,7 @@ export class OpenRouterHandler implements ApiHandler {
content = nonToolMessages
.map((part) => {
if (part.type === "image") {
return `{ type: "image_url", image_url: { url: ${part.source.data} } }`
return "" // impossible as the assistant cannot send images
}
return part.text
})
@@ -239,7 +255,7 @@ export class OpenRouterHandler implements ApiHandler {
return {
model: "anthropic/claude-3.5-sonnet:beta",
max_tokens: 4096,
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: "auto",
}