mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-21 21:01:06 -05:00
Remove sharp processing and use original images in requests
This commit is contained in:
@@ -311,10 +311,15 @@ export class ClaudeDev {
|
||||
|
||||
private formatImagesIntoBlocks(images?: string[]): Anthropic.ImageBlockParam[] {
|
||||
return images
|
||||
? images.map((base64) => ({
|
||||
type: "image",
|
||||
source: { type: "base64", media_type: "image/webp", data: base64 },
|
||||
}))
|
||||
? images.map((dataUrl) => {
|
||||
// 
|
||||
const [rest, base64] = dataUrl.split(",")
|
||||
const mimeType = rest.split(":")[1].split(";")[0]
|
||||
return {
|
||||
type: "image",
|
||||
source: { type: "base64", media_type: mimeType, data: base64 },
|
||||
} as Anthropic.ImageBlockParam
|
||||
})
|
||||
: []
|
||||
}
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
if (part.type === "image") {
|
||||
return {
|
||||
type: "image_url",
|
||||
image_url: { url: "data:image/webp;base64," + part.source.data },
|
||||
image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
|
||||
}
|
||||
}
|
||||
return { type: "text", text: part.text }
|
||||
@@ -161,7 +161,7 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
toolMessages.forEach((toolMessage) => {
|
||||
// The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility.
|
||||
let content: string
|
||||
let images: string[] = []
|
||||
let images: Anthropic.Messages.ImageBlockParam[] = []
|
||||
if (typeof toolMessage.content === "string") {
|
||||
content = toolMessage.content
|
||||
} else {
|
||||
@@ -169,7 +169,7 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
toolMessage.content
|
||||
?.map((part) => {
|
||||
if (part.type === "image") {
|
||||
images.push(part.source.data)
|
||||
images.push(part)
|
||||
return "(see following user message for image)"
|
||||
}
|
||||
return part.text
|
||||
@@ -185,9 +185,9 @@ export class OpenRouterHandler implements ApiHandler {
|
||||
if (images.length > 0) {
|
||||
openAiMessages.push({
|
||||
role: "user",
|
||||
content: images.map((image) => ({
|
||||
content: images.map((part) => ({
|
||||
type: "image_url",
|
||||
image_url: { url: "data:image/webp;base64," + image },
|
||||
image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
import { Anthropic } from "@anthropic-ai/sdk"
|
||||
import os from "os"
|
||||
import * as path from "path"
|
||||
import * as vscode from "vscode"
|
||||
import { Uri, Webview } from "vscode"
|
||||
import { ClaudeDev } from "../ClaudeDev"
|
||||
import { ApiProvider } from "../shared/api"
|
||||
import { ExtensionMessage } from "../shared/ExtensionMessage"
|
||||
import { WebviewMessage } from "../shared/WebviewMessage"
|
||||
import { processPastedImages, selectAndProcessImages } from "../utils/process-images"
|
||||
import { downloadTask } from "../utils/export-markdown"
|
||||
import { selectImages } from "../utils/process-images"
|
||||
|
||||
/*
|
||||
https://github.com/microsoft/vscode-webview-ui-toolkit-samples/blob/main/default/weather-webview/src/providers/WeatherViewProvider.ts
|
||||
@@ -301,16 +298,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
|
||||
downloadTask(this.claudeDev?.apiConversationHistory ?? [])
|
||||
break
|
||||
case "selectImages":
|
||||
const images = await selectAndProcessImages()
|
||||
const images = await selectImages()
|
||||
await this.postMessageToWebview({ type: "selectedImages", images })
|
||||
break
|
||||
case "processPastedImages":
|
||||
const pastedImages = message.images ?? []
|
||||
if (pastedImages.length > 0) {
|
||||
const processedImages = await processPastedImages(pastedImages)
|
||||
await this.postMessageToWebview({ type: "selectedImages", images: processedImages })
|
||||
}
|
||||
|
||||
break
|
||||
// Add more switch case statements here as more webview message commands
|
||||
// are created within the webview context (i.e. inside media/main.js)
|
||||
|
||||
@@ -11,7 +11,6 @@ export interface WebviewMessage {
|
||||
| "didShowAnnouncement"
|
||||
| "downloadTask"
|
||||
| "selectImages"
|
||||
| "processPastedImages"
|
||||
text?: string
|
||||
askResponse?: ClaudeAskResponse
|
||||
apiConfiguration?: ApiConfiguration
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import * as vscode from "vscode"
|
||||
import fs from "fs/promises"
|
||||
import sharp from "sharp"
|
||||
import * as path from "path"
|
||||
|
||||
export async function selectAndProcessImages(): Promise<string[]> {
|
||||
export async function selectImages(): Promise<string[]> {
|
||||
const options: vscode.OpenDialogOptions = {
|
||||
canSelectMany: true,
|
||||
openLabel: "Select",
|
||||
filters: {
|
||||
Images: ["png", "jpg", "jpeg", "gif", "webp", "tiff", "avif", "svg"], // sharp can convert these to webp which both anthropic and openrouter support
|
||||
Images: ["png", "jpg", "jpeg", "webp"], // supported by anthropic and openrouter
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,45 +20,26 @@ export async function selectAndProcessImages(): Promise<string[]> {
|
||||
return await Promise.all(
|
||||
fileUris.map(async (uri) => {
|
||||
const imagePath = uri.fsPath
|
||||
const originalBuffer = await fs.readFile(imagePath)
|
||||
return convertToWebpBase64(originalBuffer)
|
||||
const buffer = await fs.readFile(imagePath)
|
||||
const base64 = buffer.toString("base64")
|
||||
const mimeType = getMimeType(imagePath)
|
||||
const dataUrl = `data:${mimeType};base64,${base64}`
|
||||
return dataUrl
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
export async function processPastedImages(base64Strings: string[]): Promise<string[]> {
|
||||
return await Promise.all(
|
||||
base64Strings.map(async (base64) => {
|
||||
const buffer = Buffer.from(base64, "base64")
|
||||
return convertToWebpBase64(buffer)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
async function convertToWebpBase64(buffer: Buffer): Promise<string> {
|
||||
const processedBuffer = await sharp(buffer)
|
||||
/*
|
||||
Anthropic docs recommendations:
|
||||
- To improve time-to-first-token resize images to no more than 1.15 megapixels (and within 1568 pixels in both dimensions)
|
||||
- WebP is a newer image format that's more efficient than PNG and JPEG, so ideal for keeping token usage low. (ive seen the following compression decrease size by 10x)
|
||||
*/
|
||||
.resize(1568, 1568, {
|
||||
fit: "inside", // maintain aspect ratio
|
||||
withoutEnlargement: true, // don't enlarge smaller images
|
||||
})
|
||||
.webp({
|
||||
// NOTE: consider increasing effort from 4 to 6 (max), this may increase processing time by up to ~500ms
|
||||
quality: 80,
|
||||
})
|
||||
.toBuffer()
|
||||
|
||||
const base64 = processedBuffer.toString("base64")
|
||||
|
||||
// console.log({
|
||||
// originalSize: buffer.length,
|
||||
// processedSize: processedBuffer.length,
|
||||
// base64,
|
||||
// })
|
||||
|
||||
return base64
|
||||
function getMimeType(filePath: string): string {
|
||||
const ext = path.extname(filePath).toLowerCase()
|
||||
switch (ext) {
|
||||
case ".png":
|
||||
return "image/png"
|
||||
case ".jpeg":
|
||||
case ".jpg":
|
||||
return "image/jpeg"
|
||||
case ".webp":
|
||||
return "image/webp"
|
||||
default:
|
||||
throw new Error(`Unsupported file type: ${ext}`)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user