Refactor API

2026-02-05 12:05:16 -05:00 · 2024-09-24 10:43:31 -04:00
parent f774e62c13
commit a009c84597
12 changed files with 25 additions and 19 deletions
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -1,13 +1,13 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { ApiConfiguration, ModelInfo } from "../shared/api"
-import { AnthropicHandler } from "./anthropic"
-import { AwsBedrockHandler } from "./bedrock"
-import { OpenRouterHandler } from "./openrouter"
-import { VertexHandler } from "./vertex"
-import { OpenAiHandler } from "./openai"
-import { OllamaHandler } from "./ollama"
-import { GeminiHandler } from "./gemini"
-import { OpenAiNativeHandler } from "./openai-native"
+import { AnthropicHandler } from "./providers/anthropic"
+import { AwsBedrockHandler } from "./providers/bedrock"
+import { OpenRouterHandler } from "./providers/openrouter"
+import { VertexHandler } from "./providers/vertex"
+import { OpenAiHandler } from "./providers/openai"
+import { OllamaHandler } from "./providers/ollama"
+import { GeminiHandler } from "./providers/gemini"
+import { OpenAiNativeHandler } from "./providers/openai-native"

 export interface ApiHandlerMessageResponse {
 	message: Anthropic.Messages.Message
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -1,6 +1,12 @@
 import { Anthropic } from "@anthropic-ai/sdk"
-import { ApiHandler, ApiHandlerMessageResponse } from "."
-import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, ApiHandlerOptions, ModelInfo } from "../shared/api"
+import { ApiHandler, ApiHandlerMessageResponse } from "../index"
+import {
+	anthropicDefaultModelId,
+	AnthropicModelId,
+	anthropicModels,
+	ApiHandlerOptions,
+	ModelInfo,
+} from "../../shared/api"

 export class AnthropicHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -6,7 +6,7 @@ import {
 	convertAnthropicMessageToGemini,
 	convertAnthropicToolToGemini,
 	convertGeminiResponseToAnthropic,
-} from "../utils/gemini-format"
+} from "./transform/gemini-format"

 export class GeminiHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/ollama.ts
+++ b/src/api/providers/ollama.ts
@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 import { ApiHandler, ApiHandlerMessageResponse } from "."
 import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../shared/api"
-import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import { convertToAnthropicMessage, convertToOpenAiMessages } from "./transform/openai-format"

 export class OllamaHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -8,8 +8,8 @@ import {
 	OpenAiNativeModelId,
 	openAiNativeModels,
 } from "../shared/api"
-import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
-import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../utils/o1-format"
+import { convertToAnthropicMessage, convertToOpenAiMessages } from "./transform/openai-format"
+import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "./transform/o1-format"

 export class OpenAiNativeHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -1,8 +1,8 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI, { AzureOpenAI } from "openai"
-import { ApiHandler, ApiHandlerMessageResponse } from "."
-import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../shared/api"
-import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import { ApiHandler, ApiHandlerMessageResponse } from "../index"
+import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
+import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format"

 export class OpenAiHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/openrouter.ts
+++ b/src/api/providers/openrouter.ts
@@ -8,9 +8,9 @@ import {
 	OpenRouterModelId,
 	openRouterModels,
 } from "../shared/api"
-import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import { convertToAnthropicMessage, convertToOpenAiMessages } from "./transform/openai-format"
 import axios from "axios"
-import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../utils/o1-format"
+import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "./transform/o1-format"

 export class OpenRouterHandler implements ApiHandler {
 	private options: ApiHandlerOptions
--- a/src/api/providers/vertex.ts
+++ b/src/api/providers/vertex.ts
--- a/src/api/transform/gemini-format.ts
+++ b/src/api/transform/gemini-format.ts
@@ -0,0 +1,195 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import {
+	Content,
+	EnhancedGenerateContentResponse,
+	FunctionCallPart,
+	FunctionDeclaration,
+	FunctionResponsePart,
+	InlineDataPart,
+	Part,
+	SchemaType,
+	TextPart,
+} from "@google/generative-ai"
+
+export function convertAnthropicContentToGemini(
+	content:
+		| string
+		| Array<
+				| Anthropic.Messages.TextBlockParam
+				| Anthropic.Messages.ImageBlockParam
+				| Anthropic.Messages.ToolUseBlockParam
+				| Anthropic.Messages.ToolResultBlockParam
+		  >
+): Part[] {
+	if (typeof content === "string") {
+		return [{ text: content } as TextPart]
+	}
+	return content.flatMap((block) => {
+		switch (block.type) {
+			case "text":
+				return { text: block.text } as TextPart
+			case "image":
+				if (block.source.type !== "base64") {
+					throw new Error("Unsupported image source type")
+				}
+				return {
+					inlineData: {
+						data: block.source.data,
+						mimeType: block.source.media_type,
+					},
+				} as InlineDataPart
+			case "tool_use":
+				return {
+					functionCall: {
+						name: block.name,
+						args: block.input,
+					},
+				} as FunctionCallPart
+			case "tool_result":
+				const name = block.tool_use_id.split("-")[0]
+				if (!block.content) {
+					return []
+				}
+				if (typeof block.content === "string") {
+					return {
+						functionResponse: {
+							name,
+							response: {
+								name,
+								content: block.content,
+							},
+						},
+					} as FunctionResponsePart
+				} else {
+					// The only case when tool_result could be array is when the tool failed and we're providing ie user feedback potentially with images
+					const textParts = block.content.filter((part) => part.type === "text")
+					const imageParts = block.content.filter((part) => part.type === "image")
+					const text = textParts.length > 0 ? textParts.map((part) => part.text).join("\n\n") : ""
+					const imageText = imageParts.length > 0 ? "\n\n(See next part for image)" : ""
+					return [
+						{
+							functionResponse: {
+								name,
+								response: {
+									name,
+									content: text + imageText,
+								},
+							},
+						} as FunctionResponsePart,
+						...imageParts.map(
+							(part) =>
+								({
+									inlineData: {
+										data: part.source.data,
+										mimeType: part.source.media_type,
+									},
+								} as InlineDataPart)
+						),
+					]
+				}
+			default:
+				throw new Error(`Unsupported content block type: ${(block as any).type}`)
+		}
+	})
+}
+
+export function convertAnthropicMessageToGemini(message: Anthropic.Messages.MessageParam): Content {
+	return {
+		role: message.role === "assistant" ? "model" : "user",
+		parts: convertAnthropicContentToGemini(message.content),
+	}
+}
+
+export function convertAnthropicToolToGemini(tool: Anthropic.Messages.Tool): FunctionDeclaration {
+	return {
+		name: tool.name,
+		description: tool.description || "",
+		parameters: {
+			type: SchemaType.OBJECT,
+			properties: Object.fromEntries(
+				Object.entries(tool.input_schema.properties || {}).map(([key, value]) => [
+					key,
+					{
+						type: (value as any).type.toUpperCase(),
+						description: (value as any).description || "",
+					},
+				])
+			),
+			required: (tool.input_schema.required as string[]) || [],
+		},
+	}
+}
+
+/*
+It looks like gemini likes to double escape certain characters when writing file contents: https://discuss.ai.google.dev/t/function-call-string-property-is-double-escaped/37867
+*/
+export function unescapeGeminiContent(content: string) {
+	return content
+		.replace(/\\n/g, "\n")
+		.replace(/\\'/g, "'")
+		.replace(/\\"/g, '"')
+		.replace(/\\r/g, "\r")
+		.replace(/\\t/g, "\t")
+}
+
+export function convertGeminiResponseToAnthropic(
+	response: EnhancedGenerateContentResponse
+): Anthropic.Messages.Message {
+	const content: Anthropic.Messages.ContentBlock[] = []
+
+	// Add the main text response
+	const text = response.text()
+	if (text) {
+		content.push({ type: "text", text })
+	}
+
+	// Add function calls as tool_use blocks
+	const functionCalls = response.functionCalls()
+	if (functionCalls) {
+		functionCalls.forEach((call, index) => {
+			if ("content" in call.args && typeof call.args.content === "string") {
+				call.args.content = unescapeGeminiContent(call.args.content)
+			}
+			content.push({
+				type: "tool_use",
+				id: `${call.name}-${index}-${Date.now()}`,
+				name: call.name,
+				input: call.args,
+			})
+		})
+	}
+
+	// Determine stop reason
+	let stop_reason: Anthropic.Messages.Message["stop_reason"] = null
+	const finishReason = response.candidates?.[0]?.finishReason
+	if (finishReason) {
+		switch (finishReason) {
+			case "STOP":
+				stop_reason = "end_turn"
+				break
+			case "MAX_TOKENS":
+				stop_reason = "max_tokens"
+				break
+			case "SAFETY":
+			case "RECITATION":
+			case "OTHER":
+				stop_reason = "stop_sequence"
+				break
+			// Add more cases if needed
+		}
+	}
+
+	return {
+		id: `msg_${Date.now()}`, // Generate a unique ID
+		type: "message",
+		role: "assistant",
+		content,
+		model: "",
+		stop_reason,
+		stop_sequence: null, // Gemini doesn't provide this information
+		usage: {
+			input_tokens: response.usageMetadata?.promptTokenCount ?? 0,
+			output_tokens: response.usageMetadata?.candidatesTokenCount ?? 0,
+		},
+	}
+}
--- a/src/api/transform/o1-format.ts
+++ b/src/api/transform/o1-format.ts
@@ -0,0 +1,429 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+const o1SystemPrompt = (systemPrompt: string) => `
+# System Prompt
+
+${systemPrompt}
+
+# Instructions for Formulating Your Response
+
+You must respond to the user's request by using at least one tool call. When formulating your response, follow these guidelines:
+
+1. Begin your response with normal text, explaining your thoughts, analysis, or plan of action.
+2. If you need to use any tools, place ALL tool calls at the END of your message, after your normal text explanation.
+3. You can use multiple tool calls if needed, but they should all be grouped together at the end of your message.
+4. After placing the tool calls, do not add any additional normal text. The tool calls should be the final content in your message.
+
+Here's the general structure your responses should follow:
+
+\`\`\`
+[Your normal text response explaining your thoughts and actions]
+
+[Tool Call 1]
+[Tool Call 2 if needed]
+[Tool Call 3 if needed]
+...
+\`\`\`
+
+Remember:
+- Choose the most appropriate tool(s) based on the task and the tool descriptions provided.
+- Formulate your tool calls using the XML format specified for each tool.
+- Provide clear explanations in your normal text about what actions you're taking and why you're using particular tools.
+- Act as if the tool calls will be executed immediately after your message, and your next response will have access to their results.
+
+# Tool Descriptions and XML Formats
+
+1. execute_command:
+<execute_command>
+<command>Your command here</command>
+</execute_command>
+Description: Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory.
+
+2. list_files:
+<list_files>
+<path>Directory path here</path>
+<recursive>true or false (optional)</recursive>
+</list_files>
+Description: List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.
+
+3. list_code_definition_names:
+<list_code_definition_names>
+<path>Directory path here</path>
+</list_code_definition_names>
+Description: Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.
+
+4. search_files:
+<search_files>
+<path>Directory path here</path>
+<regex>Your regex pattern here</regex>
+<filePattern>Optional file pattern here</filePattern>
+</search_files>
+Description: Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.
+
+5. read_file:
+<read_file>
+<path>File path here</path>
+</read_file>
+Description: Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
+
+6. write_to_file:
+<write_to_file>
+<path>File path here</path>
+<content>
+Your file content here
+</content>
+</write_to_file>
+Description: Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.
+
+7. ask_followup_question:
+<ask_followup_question>
+<question>Your question here</question>
+</ask_followup_question>
+Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.
+
+8. attempt_completion:
+<attempt_completion>
+<command>Optional command to demonstrate result</command>
+<result>
+Your final result description here
+</result>
+</attempt_completion>
+Description: Once you've completed the task, use this tool to present the result to the user. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
+
+# Examples
+
+Here are some examples of how to structure your responses with tool calls:
+
+Example 1: Using a single tool
+
+Let's run the test suite for our project. This will help us ensure that all our components are functioning correctly.
+
+<execute_command>
+<command>npm test</command>
+</execute_command>
+
+Example 2: Using multiple tools
+
+Let's create two new configuration files for the web application: one for the frontend and one for the backend.
+
+<write_to_file>
+<path>./frontend-config.json</path>
+<content>
+{
+  "apiEndpoint": "https://api.example.com",
+  "theme": {
+    "primaryColor": "#007bff",
+    "secondaryColor": "#6c757d",
+    "fontFamily": "Arial, sans-serif"
+  },
+  "features": {
+    "darkMode": true,
+    "notifications": true,
+    "analytics": false
+  },
+  "version": "1.0.0"
+}
+</content>
+</write_to_file>
+
+<write_to_file>
+<path>./backend-config.yaml</path>
+<content>
+database:
+  host: localhost
+  port: 5432
+  name: myapp_db
+  user: admin
+
+server:
+  port: 3000
+  environment: development
+  logLevel: debug
+
+security:
+  jwtSecret: your-secret-key-here
+  passwordSaltRounds: 10
+
+caching:
+  enabled: true
+  provider: redis
+  ttl: 3600
+
+externalServices:
+  emailProvider: sendgrid
+  storageProvider: aws-s3
+</content>
+</write_to_file>
+
+Example 3: Asking a follow-up question
+
+I've analyzed the project structure, but I need more information to proceed. Let me ask the user for clarification.
+
+<ask_followup_question>
+<question>Which specific feature would you like me to implement in the example.py file?</question>
+</ask_followup_question>
+`
+
+export function convertToO1Messages(
+	openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[],
+	systemPrompt: string
+): OpenAI.Chat.ChatCompletionMessageParam[] {
+	const toolsReplaced = openAiMessages.reduce((acc, message) => {
+		if (message.role === "tool") {
+			// Convert tool messages to user messages
+			acc.push({
+				role: "user",
+				content: message.content || "",
+			})
+		} else if (message.role === "assistant" && message.tool_calls) {
+			// Convert tool calls to content and remove tool_calls
+			let content = message.content || ""
+			message.tool_calls.forEach((toolCall) => {
+				if (toolCall.type === "function") {
+					content += `\nTool Call: ${toolCall.function.name}\nArguments: ${toolCall.function.arguments}`
+				}
+			})
+			acc.push({
+				role: "assistant",
+				content: content,
+				tool_calls: undefined,
+			})
+		} else {
+			// Keep other messages as they are
+			acc.push(message)
+		}
+		return acc
+	}, [] as OpenAI.Chat.ChatCompletionMessageParam[])
+
+	// Find the index of the last assistant message
+	// const lastAssistantIndex = findLastIndex(toolsReplaced, (message) => message.role === "assistant")
+
+	// Create a new array to hold the modified messages
+	const messagesWithSystemPrompt = [
+		{
+			role: "user",
+			content: o1SystemPrompt(systemPrompt),
+		} as OpenAI.Chat.ChatCompletionUserMessageParam,
+		...toolsReplaced,
+	]
+
+	// If there's an assistant message, insert the system prompt after it
+	// if (lastAssistantIndex !== -1) {
+	// 	const insertIndex = lastAssistantIndex + 1
+	// 	if (insertIndex < messagesWithSystemPrompt.length && messagesWithSystemPrompt[insertIndex].role === "user") {
+	// 		messagesWithSystemPrompt.splice(insertIndex, 0, {
+	// 			role: "user",
+	// 			content: o1SystemPrompt(systemPrompt),
+	// 		})
+	// 	}
+	// } else {
+	// 	// If there were no assistant messages, prepend the system prompt
+	// 	messagesWithSystemPrompt.unshift({
+	// 		role: "user",
+	// 		content: o1SystemPrompt(systemPrompt),
+	// 	})
+	// }
+
+	return messagesWithSystemPrompt
+}
+
+interface ToolCall {
+	tool: string
+	tool_input: Record<string, string>
+}
+
+const toolNames = [
+	"execute_command",
+	"list_files",
+	"list_code_definition_names",
+	"search_files",
+	"read_file",
+	"write_to_file",
+	"ask_followup_question",
+	"attempt_completion",
+]
+
+function parseAIResponse(response: string): { normalText: string; toolCalls: ToolCall[] } {
+	// Create a regex pattern to match any tool call opening tag
+	const toolCallPattern = new RegExp(`<(${toolNames.join("|")})`, "i")
+	const match = response.match(toolCallPattern)
+
+	if (!match) {
+		// No tool calls found
+		return { normalText: response.trim(), toolCalls: [] }
+	}
+
+	const toolCallStart = match.index!
+	const normalText = response.slice(0, toolCallStart).trim()
+	const toolCallsText = response.slice(toolCallStart)
+
+	const toolCalls = parseToolCalls(toolCallsText)
+
+	return { normalText, toolCalls }
+}
+
+function parseToolCalls(toolCallsText: string): ToolCall[] {
+	const toolCalls: ToolCall[] = []
+
+	let remainingText = toolCallsText
+
+	while (remainingText.length > 0) {
+		const toolMatch = toolNames.find((tool) => new RegExp(`<${tool}`, "i").test(remainingText))
+
+		if (!toolMatch) {
+			break // No more tool calls found
+		}
+
+		const startTag = `<${toolMatch}`
+		const endTag = `</${toolMatch}>`
+		const startIndex = remainingText.indexOf(startTag)
+		const endIndex = remainingText.indexOf(endTag, startIndex)
+
+		if (endIndex === -1) {
+			break // Malformed XML, no closing tag found
+		}
+
+		const toolCallContent = remainingText.slice(startIndex, endIndex + endTag.length)
+		remainingText = remainingText.slice(endIndex + endTag.length).trim()
+
+		const toolCall = parseToolCall(toolMatch, toolCallContent)
+		if (toolCall) {
+			toolCalls.push(toolCall)
+		}
+	}
+
+	return toolCalls
+}
+
+function parseToolCall(toolName: string, content: string): ToolCall | null {
+	const tool_input: Record<string, string> = {}
+
+	// Remove the outer tool tags
+	const innerContent = content.replace(new RegExp(`^<${toolName}>|</${toolName}>$`, "g"), "").trim()
+
+	// Parse nested XML elements
+	const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/gs
+	let match
+
+	while ((match = paramRegex.exec(innerContent)) !== null) {
+		const [, paramName, paramValue] = match
+		// Preserve newlines and trim only leading/trailing whitespace
+		tool_input[paramName] = paramValue.replace(/^\s+|\s+$/g, "")
+	}
+
+	// Validate required parameters
+	if (!validateToolInput(toolName, tool_input)) {
+		console.error(`Invalid tool call for ${toolName}:`, content)
+		return null
+	}
+
+	return { tool: toolName, tool_input }
+}
+
+function validateToolInput(toolName: string, tool_input: Record<string, string>): boolean {
+	switch (toolName) {
+		case "execute_command":
+			return "command" in tool_input
+		case "read_file":
+		case "list_code_definition_names":
+		case "list_files":
+			return "path" in tool_input
+		case "search_files":
+			return "path" in tool_input && "regex" in tool_input
+		case "write_to_file":
+			return "path" in tool_input && "content" in tool_input
+		case "ask_followup_question":
+			return "question" in tool_input
+		case "attempt_completion":
+			return "result" in tool_input
+		default:
+			return false
+	}
+}
+
+// Example usage:
+// const aiResponse = `Here's my analysis of the situation...
+
+// <execute_command>
+//   <command>ls -la</command>
+// </execute_command>
+
+// <write_to_file>
+//   <path>./example.txt</path>
+//   <content>Hello, World!</content>
+// </write_to_file>`;
+//
+// const { normalText, toolCalls } = parseAIResponse(aiResponse);
+// console.log(normalText);
+// console.log(toolCalls);
+
+// Convert OpenAI response to Anthropic format
+export function convertO1ResponseToAnthropicMessage(
+	completion: OpenAI.Chat.Completions.ChatCompletion
+): Anthropic.Messages.Message {
+	const openAiMessage = completion.choices[0].message
+	const { normalText, toolCalls } = parseAIResponse(openAiMessage.content || "")
+
+	const anthropicMessage: Anthropic.Messages.Message = {
+		id: completion.id,
+		type: "message",
+		role: openAiMessage.role, // always "assistant"
+		content: [
+			{
+				type: "text",
+				text: normalText,
+			},
+		],
+		model: completion.model,
+		stop_reason: (() => {
+			switch (completion.choices[0].finish_reason) {
+				case "stop":
+					return "end_turn"
+				case "length":
+					return "max_tokens"
+				case "tool_calls":
+					return "tool_use"
+				case "content_filter": // Anthropic doesn't have an exact equivalent
+				default:
+					return null
+			}
+		})(),
+		stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
+		usage: {
+			input_tokens: completion.usage?.prompt_tokens || 0,
+			output_tokens: completion.usage?.completion_tokens || 0,
+		},
+	}
+
+	if (toolCalls.length > 0) {
+		anthropicMessage.content.push(
+			...toolCalls.map((toolCall: ToolCall, index: number): Anthropic.ToolUseBlock => {
+				return {
+					type: "tool_use",
+					id: `call_${index}_${Date.now()}`, // Generate a unique ID for each tool call
+					name: toolCall.tool,
+					input: toolCall.tool_input,
+				}
+			})
+		)
+	}
+
+	return anthropicMessage
+}
+
+// Example usage:
+// const openAICompletion = {
+//     id: "cmpl-123",
+//     choices: [{
+//         message: {
+//             role: "assistant",
+//             content: "Here's my analysis...\n\n<execute_command>\n  <command>ls -la</command>\n</execute_command>"
+//         },
+//         finish_reason: "stop"
+//     }],
+//     model: "gpt-3.5-turbo",
+//     usage: { prompt_tokens: 50, completion_tokens: 100 }
+// };
+// const anthropicMessage = convertO1ResponseToAnthropicMessage(openAICompletion);
+// console.log(anthropicMessage);
--- a/src/api/transform/openai-format.ts
+++ b/src/api/transform/openai-format.ts
@@ -0,0 +1,202 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+export function convertToOpenAiMessages(
+	anthropicMessages: Anthropic.Messages.MessageParam[]
+): OpenAI.Chat.ChatCompletionMessageParam[] {
+	const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = []
+
+	for (const anthropicMessage of anthropicMessages) {
+		if (typeof anthropicMessage.content === "string") {
+			openAiMessages.push({ role: anthropicMessage.role, content: anthropicMessage.content })
+		} else {
+			// image_url.url is base64 encoded image data
+			// ensure it contains the content-type of the image: data:image/png;base64,
+			/*
+        { role: "user", content: "" | { type: "text", text: string } | { type: "image_url", image_url: { url: string } } },
+         // content required unless tool_calls is present
+        { role: "assistant", content?: "" | null, tool_calls?: [{ id: "", function: { name: "", arguments: "" }, type: "function" }] },
+        { role: "tool", tool_call_id: "", content: ""}
+         */
+			if (anthropicMessage.role === "user") {
+				const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
+					nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]
+					toolMessages: Anthropic.ToolResultBlockParam[]
+				}>(
+					(acc, part) => {
+						if (part.type === "tool_result") {
+							acc.toolMessages.push(part)
+						} else if (part.type === "text" || part.type === "image") {
+							acc.nonToolMessages.push(part)
+						} // user cannot send tool_use messages
+						return acc
+					},
+					{ nonToolMessages: [], toolMessages: [] }
+				)
+
+				// Process tool result messages FIRST since they must follow the tool use messages
+				let toolResultImages: Anthropic.Messages.ImageBlockParam[] = []
+				toolMessages.forEach((toolMessage) => {
+					// The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility.
+					let content: string
+
+					if (typeof toolMessage.content === "string") {
+						content = toolMessage.content
+					} else {
+						content =
+							toolMessage.content
+								?.map((part) => {
+									if (part.type === "image") {
+										toolResultImages.push(part)
+										return "(see following user message for image)"
+									}
+									return part.text
+								})
+								.join("\n") ?? ""
+					}
+					openAiMessages.push({
+						role: "tool",
+						tool_call_id: toolMessage.tool_use_id,
+						content: content,
+					})
+				})
+
+				// If tool results contain images, send as a separate user message
+				// I ran into an issue where if I gave feedback for one of many tool uses, the request would fail.
+				// "Messages following `tool_use` blocks must begin with a matching number of `tool_result` blocks."
+				// Therefore we need to send these images after the tool result messages
+				// NOTE: it's actually okay to have multiple user messages in a row, the model will treat them as a continuation of the same input (this way works better than combining them into one message, since the tool result specifically mentions (see following user message for image)
+				if (toolResultImages.length > 0) {
+					openAiMessages.push({
+						role: "user",
+						content: toolResultImages.map((part) => ({
+							type: "image_url",
+							image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
+						})),
+					})
+				}
+
+				// Process non-tool messages
+				if (nonToolMessages.length > 0) {
+					openAiMessages.push({
+						role: "user",
+						content: nonToolMessages.map((part) => {
+							if (part.type === "image") {
+								return {
+									type: "image_url",
+									image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
+								}
+							}
+							return { type: "text", text: part.text }
+						}),
+					})
+				}
+			} else if (anthropicMessage.role === "assistant") {
+				const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
+					nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]
+					toolMessages: Anthropic.ToolUseBlockParam[]
+				}>(
+					(acc, part) => {
+						if (part.type === "tool_use") {
+							acc.toolMessages.push(part)
+						} else if (part.type === "text" || part.type === "image") {
+							acc.nonToolMessages.push(part)
+						} // assistant cannot send tool_result messages
+						return acc
+					},
+					{ nonToolMessages: [], toolMessages: [] }
+				)
+
+				// Process non-tool messages
+				let content: string | undefined
+				if (nonToolMessages.length > 0) {
+					content = nonToolMessages
+						.map((part) => {
+							if (part.type === "image") {
+								return "" // impossible as the assistant cannot send images
+							}
+							return part.text
+						})
+						.join("\n")
+				}
+
+				// Process tool use messages
+				let tool_calls: OpenAI.Chat.ChatCompletionMessageToolCall[] = toolMessages.map((toolMessage) => ({
+					id: toolMessage.id,
+					type: "function",
+					function: {
+						name: toolMessage.name,
+						// json string
+						arguments: JSON.stringify(toolMessage.input),
+					},
+				}))
+
+				openAiMessages.push({
+					role: "assistant",
+					content,
+					// Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty
+					tool_calls: tool_calls.length > 0 ? tool_calls : undefined,
+				})
+			}
+		}
+	}
+
+	return openAiMessages
+}
+
+// Convert OpenAI response to Anthropic format
+export function convertToAnthropicMessage(
+	completion: OpenAI.Chat.Completions.ChatCompletion
+): Anthropic.Messages.Message {
+	const openAiMessage = completion.choices[0].message
+	const anthropicMessage: Anthropic.Messages.Message = {
+		id: completion.id,
+		type: "message",
+		role: openAiMessage.role, // always "assistant"
+		content: [
+			{
+				type: "text",
+				text: openAiMessage.content || "",
+			},
+		],
+		model: completion.model,
+		stop_reason: (() => {
+			switch (completion.choices[0].finish_reason) {
+				case "stop":
+					return "end_turn"
+				case "length":
+					return "max_tokens"
+				case "tool_calls":
+					return "tool_use"
+				case "content_filter": // Anthropic doesn't have an exact equivalent
+				default:
+					return null
+			}
+		})(),
+		stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
+		usage: {
+			input_tokens: completion.usage?.prompt_tokens || 0,
+			output_tokens: completion.usage?.completion_tokens || 0,
+		},
+	}
+
+	if (openAiMessage.tool_calls && openAiMessage.tool_calls.length > 0) {
+		anthropicMessage.content.push(
+			...openAiMessage.tool_calls.map((toolCall): Anthropic.ToolUseBlock => {
+				let parsedInput = {}
+				try {
+					parsedInput = JSON.parse(toolCall.function.arguments || "{}")
+				} catch (error) {
+					console.error("Failed to parse tool arguments:", error)
+				}
+				return {
+					type: "tool_use",
+					id: toolCall.id,
+					name: toolCall.function.name,
+					input: parsedInput,
+				}
+			})
+		)
+	}
+	return anthropicMessage
+}