Fix gemini message conversion

This commit is contained in:
Saoud Rizwan
2024-09-12 11:49:32 -04:00
parent fbb7620fa1
commit 3b004aed37
5 changed files with 121 additions and 49 deletions

View File

@@ -70,6 +70,27 @@ export class OpenRouterHandler implements ApiHandler {
const anthropicMessage = convertToAnthropicMessage(completion)
// Check if the model is Gemini Flash and remove extra escapes in tool result args
// switch (this.getModel().id) {
// case "google/gemini-pro-1.5":
// case "google/gemini-flash-1.5":
// const content = anthropicMessage.content
// for (const block of content) {
// if (
// block.type === "tool_use" &&
// typeof block.input === "object" &&
// block.input !== null &&
// "content" in block.input &&
// typeof block.input.content === "string"
// ) {
// block.input.content = unescapeGeminiContent(block.input.content)
// }
// }
// break
// default:
// break
// }
return { message: anthropicMessage }
}

View File

@@ -201,17 +201,22 @@ export const openRouterModels = {
// outputPrice: 0.06,
// },
// OpenRouter needs to fix mapping gemini 1.5 responses for tool calls properly, they return content with line breaks formatted wrong (too many escapes), and throw errors for being in the wrong order when they're not. They also cannot handle feedback given to a request with multiple tools. Giving feedback to one tool use requests works fine. ("Please ensure that function response turn comes immediately after a function call turn. And the number of function response parts should be equal to number of function call parts of the function call turn.")
// UPDATE: I keep getting "400: Please ensure that function call turn comes immediately after a user turn or after a function response turn.", which gets fixed as soon as i switch to openrouter/claude, so it's obviously an error on openrouters end transforming the message structure. This is likely the culprit behind the tool order error people have seen with gpt4o.
// "google/gemini-pro-1.5": {
// maxTokens: 8192,
// supportsImages: false, // "Function Calling is not supported with non-text input"
// contextWindow: 2_097_152,
// supportsImages: true, // "Function Calling is not supported with non-text input"
// supportsPromptCache: false,
// inputPrice: 2.5,
// outputPrice: 7.5,
// },
// "google/gemini-flash-1.5": {
// maxTokens: 8192,
// supportsImages: false, // "Function Calling is not supported with non-text input"
// inputPrice: 0.25,
// outputPrice: 0.75,
// contextWindow: 1_048_576,
// supportsImages: true, // "Function Calling is not supported with non-text input"
// supportsPromptCache: false,
// inputPrice: 0.0375,
// outputPrice: 0.15,
// },
// "google/gemini-pro": {
// maxTokens: 8192,

View File

@@ -1,5 +1,15 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { Content, EnhancedGenerateContentResponse, FunctionDeclaration, Part, SchemaType } from "@google/generative-ai"
import {
Content,
EnhancedGenerateContentResponse,
FunctionCallPart,
FunctionDeclaration,
FunctionResponsePart,
InlineDataPart,
Part,
SchemaType,
TextPart,
} from "@google/generative-ai"
export function convertAnthropicContentToGemini(
content:
@@ -12,12 +22,12 @@ export function convertAnthropicContentToGemini(
>
): Part[] {
if (typeof content === "string") {
return [{ text: content }]
return [{ text: content } as TextPart]
}
return content.map((block) => {
return content.flatMap((block) => {
switch (block.type) {
case "text":
return { text: block.text }
return { text: block.text } as TextPart
case "image":
if (block.source.type !== "base64") {
throw new Error("Unsupported image source type")
@@ -27,22 +37,55 @@ export function convertAnthropicContentToGemini(
data: block.source.data,
mimeType: block.source.media_type,
},
}
} as InlineDataPart
case "tool_use":
return {
functionCall: {
name: block.name,
args: block.input,
},
} as Part
} as FunctionCallPart
case "tool_result":
const name = block.tool_use_id.split("-")[0]
if (!block.content) {
return []
}
if (typeof block.content === "string") {
return {
functionResponse: {
name: block.tool_use_id,
name,
response: {
name,
content: block.content,
},
},
} as FunctionResponsePart
} else {
// The only case when tool_result could be array is when the tool failed and we're providing ie user feedback potentially with images
const textParts = block.content.filter((part) => part.type === "text")
const imageParts = block.content.filter((part) => part.type === "image")
const text = textParts.length > 0 ? textParts.map((part) => part.text).join("\n\n") : ""
const imageText = imageParts.length > 0 ? "\n\n(See next part for image)" : ""
return [
{
functionResponse: {
name,
response: {
name,
content: text + imageText,
},
},
} as FunctionResponsePart,
...imageParts.map(
(part) =>
({
inlineData: {
data: part.source.data,
mimeType: part.source.media_type,
},
} as InlineDataPart)
),
]
}
default:
throw new Error(`Unsupported content block type: ${(block as any).type}`)
@@ -52,7 +95,7 @@ export function convertAnthropicContentToGemini(
export function convertAnthropicMessageToGemini(message: Anthropic.Messages.MessageParam): Content {
return {
role: message.role === "assistant" ? "model" : message.role,
role: message.role === "assistant" ? "model" : "user",
parts: convertAnthropicContentToGemini(message.content),
}
}
@@ -77,6 +120,13 @@ export function convertAnthropicToolToGemini(tool: Anthropic.Messages.Tool): Fun
}
}
/*
It looks like gemini likes to double escape certain characters when writing file contents: https://discuss.ai.google.dev/t/function-call-string-property-is-double-escaped/37867
*/
export function unescapeGeminiContent(content: string) {
return content.replace(/\\n/g, "\n").replace(/\\'/g, "'").replace(/\\"/g, '"')
}
export function convertGeminiResponseToAnthropic(
response: EnhancedGenerateContentResponse
): Anthropic.Messages.Message {
@@ -92,9 +142,12 @@ export function convertGeminiResponseToAnthropic(
const functionCalls = response.functionCalls()
if (functionCalls) {
functionCalls.forEach((call, index) => {
if ("content" in call.args && typeof call.args.content === "string") {
call.args.content = unescapeGeminiContent(call.args.content)
}
content.push({
type: "tool_use",
id: `tool_${index}`,
id: `${call.name}-${index}-${Date.now()}`,
name: call.name,
input: call.args,
})

View File

@@ -220,7 +220,6 @@ const ChatRowContent = ({ message, isExpanded, onToggleExpand, lastModifiedMessa
onClick={() => {
vscode.postMessage({ type: "openFile", text: tool.content })
}}>
<div style={{ display: "flex", alignItems: "center" }}>
<span
style={{
whiteSpace: "nowrap",
@@ -232,7 +231,6 @@ const ChatRowContent = ({ message, isExpanded, onToggleExpand, lastModifiedMessa
}}>
{removeLeadingNonAlphanumeric(tool.path ?? "") + "\u200E"}
</span>
</div>
<span
className={`codicon codicon-link-external`}
style={{ fontSize: 13.5, margin: "1px 0" }}></span>

View File

@@ -49,24 +49,19 @@ const CodeAccordian = ({ code, diff, language, path, isFeedback, isExpanded, onT
msUserSelect: "none",
}}
onClick={onToggleExpand}>
<div style={{ display: "flex", alignItems: "center" }}>
{isFeedback && (
<span className="codicon codicon-feedback" style={{ marginRight: "6px" }}></span>
)}
{isFeedback && <span className="codicon codicon-feedback" style={{ marginRight: "6px" }}></span>}
<span
style={{
whiteSpace: "nowrap",
overflow: "hidden",
textOverflow: "ellipsis",
marginRight: "8px",
// fontSize: "11px",
// trick to get ellipsis at beginning of string
direction: "rtl",
textAlign: "left",
}}>
{isFeedback ? "User Edits" : removeLeadingNonAlphanumeric(path ?? "") + "\u200E"}
</span>
</div>
<span className={`codicon codicon-chevron-${isExpanded ? "up" : "down"}`}></span>
</div>
)}