Remove sharp processing and use original images in requests

2026-02-05 20:15:12 -05:00 · 2024-08-08 07:15:14 -04:00
parent 77cbbbfe49
commit b6a8c03317
9 changed files with 49 additions and 606 deletions
--- a/src/ClaudeDev.ts
+++ b/src/ClaudeDev.ts
@@ -311,10 +311,15 @@ export class ClaudeDev {

 	private formatImagesIntoBlocks(images?: string[]): Anthropic.ImageBlockParam[] {
 		return images
-			? images.map((base64) => ({
-					type: "image",
-					source: { type: "base64", media_type: "image/webp", data: base64 },
-			  }))
+			? images.map((dataUrl) => {
+					// data:image/png;base64,base64string
+					const [rest, base64] = dataUrl.split(",")
+					const mimeType = rest.split(":")[1].split(";")[0]
+					return {
+						type: "image",
+						source: { type: "base64", media_type: mimeType, data: base64 },
+					} as Anthropic.ImageBlockParam
+			  })
 			: []
 	}

--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@@ -149,7 +149,7 @@ export class OpenRouterHandler implements ApiHandler {
 								if (part.type === "image") {
 									return {
 										type: "image_url",
-										image_url: { url: "data:image/webp;base64," + part.source.data },
+										image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
 									}
 								}
 								return { type: "text", text: part.text }
@@ -161,7 +161,7 @@ export class OpenRouterHandler implements ApiHandler {
 					toolMessages.forEach((toolMessage) => {
 						// The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility.
 						let content: string
-						let images: string[] = []
+						let images: Anthropic.Messages.ImageBlockParam[] = []
 						if (typeof toolMessage.content === "string") {
 							content = toolMessage.content
 						} else {
@@ -169,7 +169,7 @@ export class OpenRouterHandler implements ApiHandler {
 								toolMessage.content
 									?.map((part) => {
 										if (part.type === "image") {
-											images.push(part.source.data)
+											images.push(part)
 											return "(see following user message for image)"
 										}
 										return part.text
@@ -185,9 +185,9 @@ export class OpenRouterHandler implements ApiHandler {
 						if (images.length > 0) {
 							openAiMessages.push({
 								role: "user",
-								content: images.map((image) => ({
+								content: images.map((part) => ({
 									type: "image_url",
-									image_url: { url: "data:image/webp;base64," + image },
+									image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
 								})),
 							})
 						}
--- a/src/providers/ClaudeDevProvider.ts
+++ b/src/providers/ClaudeDevProvider.ts
@@ -1,14 +1,11 @@
-import { Anthropic } from "@anthropic-ai/sdk"
-import os from "os"
-import * as path from "path"
 import * as vscode from "vscode"
 import { Uri, Webview } from "vscode"
 import { ClaudeDev } from "../ClaudeDev"
 import { ApiProvider } from "../shared/api"
 import { ExtensionMessage } from "../shared/ExtensionMessage"
 import { WebviewMessage } from "../shared/WebviewMessage"
-import { processPastedImages, selectAndProcessImages } from "../utils/process-images"
 import { downloadTask } from "../utils/export-markdown"
+import { selectImages } from "../utils/process-images"

 /*
 https://github.com/microsoft/vscode-webview-ui-toolkit-samples/blob/main/default/weather-webview/src/providers/WeatherViewProvider.ts
@@ -301,16 +298,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 						downloadTask(this.claudeDev?.apiConversationHistory ?? [])
 						break
 					case "selectImages":
-						const images = await selectAndProcessImages()
+						const images = await selectImages()
 						await this.postMessageToWebview({ type: "selectedImages", images })
-						break
-					case "processPastedImages":
-						const pastedImages = message.images ?? []
-						if (pastedImages.length > 0) {
-							const processedImages = await processPastedImages(pastedImages)
-							await this.postMessageToWebview({ type: "selectedImages", images: processedImages })
-						}
-
 						break
 					// Add more switch case statements here as more webview message commands
 					// are created within the webview context (i.e. inside media/main.js)
--- a/src/shared/WebviewMessage.ts
+++ b/src/shared/WebviewMessage.ts
@@ -11,7 +11,6 @@ export interface WebviewMessage {
 		| "didShowAnnouncement"
 		| "downloadTask"
 		| "selectImages"
-		| "processPastedImages"
 	text?: string
 	askResponse?: ClaudeAskResponse
 	apiConfiguration?: ApiConfiguration
--- a/src/utils/process-images.ts
+++ b/src/utils/process-images.ts
@@ -1,13 +1,13 @@
 import * as vscode from "vscode"
 import fs from "fs/promises"
-import sharp from "sharp"
+import * as path from "path"

-export async function selectAndProcessImages(): Promise<string[]> {
+export async function selectImages(): Promise<string[]> {
 	const options: vscode.OpenDialogOptions = {
 		canSelectMany: true,
 		openLabel: "Select",
 		filters: {
-			Images: ["png", "jpg", "jpeg", "gif", "webp", "tiff", "avif", "svg"], // sharp can convert these to webp which both anthropic and openrouter support
+			Images: ["png", "jpg", "jpeg", "webp"], // supported by anthropic and openrouter
 		},
 	}

@@ -20,45 +20,26 @@ export async function selectAndProcessImages(): Promise<string[]> {
 	return await Promise.all(
 		fileUris.map(async (uri) => {
 			const imagePath = uri.fsPath
-			const originalBuffer = await fs.readFile(imagePath)
-			return convertToWebpBase64(originalBuffer)
+			const buffer = await fs.readFile(imagePath)
+			const base64 = buffer.toString("base64")
+			const mimeType = getMimeType(imagePath)
+			const dataUrl = `data:${mimeType};base64,${base64}`
+			return dataUrl
 		})
 	)
 }

-export async function processPastedImages(base64Strings: string[]): Promise<string[]> {
-	return await Promise.all(
-		base64Strings.map(async (base64) => {
-			const buffer = Buffer.from(base64, "base64")
-			return convertToWebpBase64(buffer)
-		})
-	)
-}
-
-async function convertToWebpBase64(buffer: Buffer): Promise<string> {
-	const processedBuffer = await sharp(buffer)
-		/*
-                Anthropic docs recommendations:
-                - To improve time-to-first-token resize images to no more than 1.15 megapixels (and within 1568 pixels in both dimensions)
-                - WebP is a newer image format that's more efficient than PNG and JPEG, so ideal for keeping token usage low. (ive seen the following compression decrease size by 10x)
-                */
-		.resize(1568, 1568, {
-			fit: "inside", // maintain aspect ratio
-			withoutEnlargement: true, // don't enlarge smaller images
-		})
-		.webp({
-			// NOTE: consider increasing effort from 4 to 6 (max), this may increase processing time by up to ~500ms
-			quality: 80,
-		})
-		.toBuffer()
-
-	const base64 = processedBuffer.toString("base64")
-
-	// console.log({
-	// 	originalSize: buffer.length,
-	// 	processedSize: processedBuffer.length,
-	// 	base64,
-	// })
-
-	return base64
+function getMimeType(filePath: string): string {
+	const ext = path.extname(filePath).toLowerCase()
+	switch (ext) {
+		case ".png":
+			return "image/png"
+		case ".jpeg":
+		case ".jpg":
+			return "image/jpeg"
+		case ".webp":
+			return "image/webp"
+		default:
+			throw new Error(`Unsupported file type: ${ext}`)
+	}
 }