Add ability to attach images to messages

This commit is contained in:
Saoud Rizwan
2024-08-08 02:44:51 -04:00
parent 9acae31fbb
commit 911dd159cd
16 changed files with 1129 additions and 179 deletions

View File

@@ -56,6 +56,7 @@ RULES
- NEVER end completion_attempt with a question or request to engage in further conversation! Formulate the end of your result in a way that is final and does not require further input from the user.
- NEVER start your responses with affirmations like "Certaintly", "Okay", "Sure", "Great", etc. You should NOT be conversational in your responses, but rather direct and to the point.
- Feel free to use markdown as much as you'd like in your responses. When using code blocks, always include a language specifier.
- When presented with images, utilize your vision capabilities to thoroughly examine them and extract meaningful information. Incorporate these insights into your thought process as you accomplish the user's task.
====
@@ -229,6 +230,8 @@ const tools: Tool[] = [
},
]
type ToolResponse = string | Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam>
export class ClaudeDev {
private api: ApiHandler
private maxRequestsPerTask: number
@@ -237,6 +240,7 @@ export class ClaudeDev {
claudeMessages: ClaudeMessage[] = []
private askResponse?: ClaudeAskResponse
private askResponseText?: string
private askResponseImages?: string[]
private lastMessageTs?: number
private providerRef: WeakRef<ClaudeDevProvider>
abort: boolean = false
@@ -245,13 +249,14 @@ export class ClaudeDev {
provider: ClaudeDevProvider,
task: string,
apiConfiguration: ApiConfiguration,
maxRequestsPerTask?: number
maxRequestsPerTask?: number,
images?: string[]
) {
this.providerRef = new WeakRef(provider)
this.api = buildApiHandler(apiConfiguration)
this.maxRequestsPerTask = maxRequestsPerTask ?? DEFAULT_MAX_REQUESTS_PER_TASK
this.startTask(task)
this.startTask(task, images)
}
updateApi(apiConfiguration: ApiConfiguration) {
@@ -262,18 +267,23 @@ export class ClaudeDev {
this.maxRequestsPerTask = maxRequestsPerTask ?? DEFAULT_MAX_REQUESTS_PER_TASK
}
async handleWebviewAskResponse(askResponse: ClaudeAskResponse, text?: string) {
async handleWebviewAskResponse(askResponse: ClaudeAskResponse, text?: string, images?: string[]) {
this.askResponse = askResponse
this.askResponseText = text
this.askResponseImages = images
}
async ask(type: ClaudeAsk, question: string): Promise<{ response: ClaudeAskResponse; text?: string }> {
async ask(
type: ClaudeAsk,
question: string
): Promise<{ response: ClaudeAskResponse; text?: string; images?: string[] }> {
// If this ClaudeDev instance was aborted by the provider, then the only thing keeping us alive is a promise still running in the background, in which case we don't want to send its result to the webview as it is attached to a new instance of ClaudeDev now. So we can safely ignore the result of any active promises, and this class will be deallocated. (Although we set claudeDev = undefined in provider, that simply removes the reference to this instance, but the instance is still alive until this promise resolves or rejects.)
if (this.abort) {
throw new Error("ClaudeDev instance aborted")
}
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
const askTs = Date.now()
this.lastMessageTs = askTs
this.claudeMessages.push({ ts: askTs, type: "ask", ask: type, text: question })
@@ -282,23 +292,44 @@ export class ClaudeDev {
if (this.lastMessageTs !== askTs) {
throw new Error("Current ask promise was ignored") // could happen if we send multiple asks in a row i.e. with command_output. It's important that when we know an ask could fail, it is handled gracefully
}
const result = { response: this.askResponse!, text: this.askResponseText }
const result = { response: this.askResponse!, text: this.askResponseText, images: this.askResponseImages }
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
return result
}
async say(type: ClaudeSay, text?: string): Promise<undefined> {
async say(type: ClaudeSay, text?: string, images?: string[]): Promise<undefined> {
if (this.abort) {
throw new Error("ClaudeDev instance aborted")
}
const sayTs = Date.now()
this.lastMessageTs = sayTs
this.claudeMessages.push({ ts: sayTs, type: "say", say: type, text: text })
this.claudeMessages.push({ ts: sayTs, type: "say", say: type, text: text, images })
await this.providerRef.deref()?.postStateToWebview()
}
private async startTask(task: string): Promise<void> {
private formatImagesIntoBlocks(images?: string[]): Anthropic.ImageBlockParam[] {
return images
? images.map((base64) => ({
type: "image",
source: { type: "base64", media_type: "image/webp", data: base64 },
}))
: []
}
private formatIntoToolResponse(text?: string, images?: string[]): ToolResponse {
if (images && images.length > 0) {
const textBlock: Anthropic.TextBlockParam = { type: "text", text: text ?? "" }
const imageBlocks: Anthropic.ImageBlockParam[] = this.formatImagesIntoBlocks(images)
// "Just as with document-query placement, Claude works best when images come before text. Images placed after text or interpolated with text will still perform well, but if your use case allows it, we recommend an image-then-text structure."
return [...imageBlocks, textBlock]
} else {
return text ?? ""
}
}
private async startTask(task: string, images?: string[]): Promise<void> {
// conversationHistory (for API) and claudeMessages (for webview) need to be in sync
// if the extension process were killed, then on restart the claudeMessages might not be empty, so we need to set it to [] when we create a new ClaudeDev client (otherwise webview would show stale messages from previous session)
this.claudeMessages = []
@@ -306,19 +337,22 @@ export class ClaudeDev {
await this.providerRef.deref()?.postStateToWebview()
// This first message kicks off a task, it is not included in every subsequent message.
let userPrompt = `Task: \"${task}\"`
let textBlock: Anthropic.TextBlockParam = { type: "text", text: `Task: \"${task}\"` }
let imageBlocks: Anthropic.ImageBlockParam[] = this.formatImagesIntoBlocks(images)
// TODO: create tools that let Claude interact with VSCode (e.g. open a file, list open files, etc.)
//const openFiles = vscode.window.visibleTextEditors?.map((editor) => editor.document.uri.fsPath).join("\n")
await this.say("text", task)
await this.say("text", task, images)
let totalInputTokens = 0
let totalOutputTokens = 0
while (this.requestCount < this.maxRequestsPerTask) {
const { didEndLoop, inputTokens, outputTokens } = await this.recursivelyMakeClaudeRequests([
{ type: "text", text: userPrompt },
...imageBlocks,
textBlock,
])
totalInputTokens += inputTokens
totalOutputTokens += outputTokens
@@ -328,6 +362,7 @@ export class ClaudeDev {
//const totalCost = this.calculateApiCost(totalInputTokens, totalOutputTokens)
if (didEndLoop) {
// for now this never happens
//this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`)
break
} else {
@@ -335,13 +370,16 @@ export class ClaudeDev {
// "tool",
// "Claude responded with only text blocks but has not called attempt_completion yet. Forcing him to continue with task..."
// )
userPrompt =
"Ask yourself if you have completed the user's task. If you have, use the attempt_completion tool, otherwise proceed to the next step. (This is an automated message, so do not respond to it conversationally. Just proceed with the task.)"
textBlock = {
type: "text",
text: "Ask yourself if you have completed the user's task. If you have, use the attempt_completion tool, otherwise proceed to the next step. (This is an automated message, so do not respond to it conversationally. Just proceed with the task.)",
}
imageBlocks = []
}
}
}
async executeTool(toolName: ToolName, toolInput: any, isLastWriteToFile: boolean = false): Promise<string> {
async executeTool(toolName: ToolName, toolInput: any, isLastWriteToFile: boolean = false): Promise<ToolResponse> {
switch (toolName) {
case "write_to_file":
return this.writeToFile(toolInput.path, toolInput.content, isLastWriteToFile)
@@ -374,7 +412,7 @@ export class ClaudeDev {
return totalCost
}
async writeToFile(relPath: string, newContent: string, isLast: boolean): Promise<string> {
async writeToFile(relPath: string, newContent: string, isLast: boolean): Promise<ToolResponse> {
try {
const absolutePath = path.resolve(cwd, relPath)
const fileExists = await fs
@@ -414,7 +452,7 @@ export class ClaudeDev {
`${fileName}: Original ↔ Suggested Changes`
)
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({
tool: "editedExistingFile",
@@ -426,9 +464,12 @@ export class ClaudeDev {
if (isLast) {
await this.closeDiffViews()
}
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -451,7 +492,7 @@ export class ClaudeDev {
}),
`${fileName}: New File`
)
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({
tool: "newFileCreated",
@@ -463,9 +504,12 @@ export class ClaudeDev {
if (isLast) {
await this.closeDiffViews()
}
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -497,18 +541,21 @@ export class ClaudeDev {
}
}
async readFile(relPath: string): Promise<string> {
async readFile(relPath: string): Promise<ToolResponse> {
try {
const absolutePath = path.resolve(cwd, relPath)
const content = await fs.readFile(absolutePath, "utf-8")
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({ tool: "readFile", path: this.getReadablePath(relPath), content } as ClaudeSayTool)
)
if (response !== "yesButtonTapped") {
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -520,12 +567,12 @@ export class ClaudeDev {
}
}
async listFilesTopLevel(relDirPath: string): Promise<string> {
async listFilesTopLevel(relDirPath: string): Promise<ToolResponse> {
try {
const absolutePath = path.resolve(cwd, relDirPath)
const files = await listFiles(absolutePath, false)
const result = this.formatFilesList(absolutePath, files)
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({
tool: "listFilesTopLevel",
@@ -534,9 +581,12 @@ export class ClaudeDev {
} as ClaudeSayTool)
)
if (response !== "yesButtonTapped") {
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -553,12 +603,12 @@ export class ClaudeDev {
}
}
async listFilesRecursive(relDirPath: string): Promise<string> {
async listFilesRecursive(relDirPath: string): Promise<ToolResponse> {
try {
const absolutePath = path.resolve(cwd, relDirPath)
const files = await listFiles(absolutePath, true)
const result = this.formatFilesList(absolutePath, files)
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({
tool: "listFilesRecursive",
@@ -567,9 +617,12 @@ export class ClaudeDev {
} as ClaudeSayTool)
)
if (response !== "yesButtonTapped") {
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -633,11 +686,11 @@ export class ClaudeDev {
}
}
async viewSourceCodeDefinitionsTopLevel(relDirPath: string): Promise<string> {
async viewSourceCodeDefinitionsTopLevel(relDirPath: string): Promise<ToolResponse> {
try {
const absolutePath = path.resolve(cwd, relDirPath)
const result = await parseSourceCodeForDefinitionsTopLevel(absolutePath)
const { response, text } = await this.ask(
const { response, text, images } = await this.ask(
"tool",
JSON.stringify({
tool: "viewSourceCodeDefinitionsTopLevel",
@@ -646,9 +699,12 @@ export class ClaudeDev {
} as ClaudeSayTool)
)
if (response !== "yesButtonTapped") {
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -665,12 +721,15 @@ export class ClaudeDev {
}
}
async executeCommand(command: string, returnEmptyStringOnSuccess: boolean = false): Promise<string> {
const { response, text } = await this.ask("command", command)
async executeCommand(command: string, returnEmptyStringOnSuccess: boolean = false): Promise<ToolResponse> {
const { response, text, images } = await this.ask("command", command)
if (response !== "yesButtonTapped") {
if (response === "textResponse" && text) {
await this.say("user_feedback", text)
return `The user denied this operation and provided the following feedback:\n\"${text}\"`
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
return this.formatIntoToolResponse(
`The user denied this operation and provided the following feedback:\n\"${text}\"`,
images
)
}
return "The user denied this operation."
}
@@ -756,13 +815,13 @@ export class ClaudeDev {
}
}
async askFollowupQuestion(question: string): Promise<string> {
const { text } = await this.ask("followup", question)
await this.say("user_feedback", text ?? "")
return `User's response:\n\"${text}\"`
async askFollowupQuestion(question: string): Promise<ToolResponse> {
const { text, images } = await this.ask("followup", question)
await this.say("user_feedback", text ?? "", images)
return this.formatIntoToolResponse(`User's response:\n\"${text}\"`, images)
}
async attemptCompletion(result: string, command?: string): Promise<string> {
async attemptCompletion(result: string, command?: string): Promise<ToolResponse> {
let resultToSend = result
if (command) {
await this.say("completion_result", resultToSend)
@@ -774,12 +833,15 @@ export class ClaudeDev {
}
resultToSend = ""
}
const { response, text } = await this.ask("completion_result", resultToSend) // this prompts webview to show 'new task' button, and enable text input (which would be the 'text' here)
const { response, text, images } = await this.ask("completion_result", resultToSend) // this prompts webview to show 'new task' button, and enable text input (which would be the 'text' here)
if (response === "yesButtonTapped") {
return ""
return "" // signals to recursive loop to stop (for now this never happens since yesButtonTapped will trigger a new task)
}
await this.say("user_feedback", text ?? "")
return `The user is not pleased with the results. Use the feedback they provided to successfully complete the task, and then attempt completion again.\nUser's feedback:\n\"${text}\"`
await this.say("user_feedback", text ?? "", images)
return this.formatIntoToolResponse(
`The user is not pleased with the results. Use the feedback they provided to successfully complete the task, and then attempt completion again.\nUser's feedback:\n\"${text}\"`,
images
)
}
async attemptApiRequest(): Promise<Anthropic.Messages.Message> {

View File

@@ -1,5 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
import { ApiHandlerOptions } from "../shared/api"
export class AnthropicHandler implements ApiHandler {
@@ -44,7 +44,7 @@ export class AnthropicHandler implements ApiHandler {
model: "claude-3-5-sonnet-20240620",
max_tokens: 8192,
system: "(see SYSTEM_PROMPT in src/ClaudeDev.ts)",
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: { type: "auto" },
}

View File

@@ -1,7 +1,7 @@
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk"
import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandlerOptions } from "../shared/api"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
// https://docs.anthropic.com/en/api/claude-on-amazon-bedrock
export class AwsBedrockHandler implements ApiHandler {
@@ -49,7 +49,7 @@ export class AwsBedrockHandler implements ApiHandler {
model: "anthropic.claude-3-5-sonnet-20240620-v1:0",
max_tokens: 4096,
system: "(see SYSTEM_PROMPT in src/ClaudeDev.ts)",
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: { type: "auto" },
}

View File

@@ -34,3 +34,31 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
return new AnthropicHandler(options)
}
}
export function withoutImageData(
userContent: Array<
| Anthropic.TextBlockParam
| Anthropic.ImageBlockParam
| Anthropic.ToolUseBlockParam
| Anthropic.ToolResultBlockParam
>
): Array<
Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam
> {
return userContent.map((part) => {
if (part.type === "image") {
return { ...part, source: { ...part.source, data: "..." } }
} else if (part.type === "tool_result" && typeof part.content !== "string") {
return {
...part,
content: part.content?.map((contentPart) => {
if (contentPart.type === "image") {
return { ...contentPart, source: { ...contentPart.source, data: "..." } }
}
return contentPart
}),
}
}
return part
})
}

View File

@@ -1,6 +1,6 @@
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"
import { ApiHandler } from "."
import { ApiHandler, withoutImageData } from "."
import { ApiHandlerOptions } from "../shared/api"
export class OpenRouterHandler implements ApiHandler {
@@ -118,6 +118,7 @@ export class OpenRouterHandler implements ApiHandler {
openAiMessages.push({ role: anthropicMessage.role, content: anthropicMessage.content })
} else {
// image_url.url is base64 encoded image data
// ensure it contains the content-type of the image: data:image/png;base64,
/*
{ role: "user", content: "" | { type: "text", text: string } | { type: "image_url", image_url: { url: string } } },
// content required unless tool_calls is present
@@ -146,7 +147,10 @@ export class OpenRouterHandler implements ApiHandler {
role: "user",
content: nonToolMessages.map((part) => {
if (part.type === "image") {
return { type: "image_url", image_url: { url: part.source.data } }
return {
type: "image_url",
image_url: { url: "data:image/webp;base64," + part.source.data },
}
}
return { type: "text", text: part.text }
}),
@@ -157,6 +161,7 @@ export class OpenRouterHandler implements ApiHandler {
toolMessages.forEach((toolMessage) => {
// The Anthropic SDK allows tool results to be a string or an array of text and image blocks, enabling rich and structured content. In contrast, the OpenAI SDK only supports tool results as a single string, so we map the Anthropic tool result parts into one concatenated string to maintain compatibility.
let content: string
let images: string[] = []
if (typeof toolMessage.content === "string") {
content = toolMessage.content
} else {
@@ -164,7 +169,8 @@ export class OpenRouterHandler implements ApiHandler {
toolMessage.content
?.map((part) => {
if (part.type === "image") {
return `{ type: "image_url", image_url: { url: ${part.source.data} } }`
images.push(part.source.data)
return "(see following user message for image)"
}
return part.text
})
@@ -175,6 +181,16 @@ export class OpenRouterHandler implements ApiHandler {
tool_call_id: toolMessage.tool_use_id,
content: content,
})
// If tool results contain images, send as a separate user message
if (images.length > 0) {
openAiMessages.push({
role: "user",
content: images.map((image) => ({
type: "image_url",
image_url: { url: "data:image/webp;base64," + image },
})),
})
}
})
} else if (anthropicMessage.role === "assistant") {
const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
@@ -198,7 +214,7 @@ export class OpenRouterHandler implements ApiHandler {
content = nonToolMessages
.map((part) => {
if (part.type === "image") {
return `{ type: "image_url", image_url: { url: ${part.source.data} } }`
return "" // impossible as the assistant cannot send images
}
return part.text
})
@@ -239,7 +255,7 @@ export class OpenRouterHandler implements ApiHandler {
return {
model: "anthropic/claude-3.5-sonnet:beta",
max_tokens: 4096,
messages: [{ conversation_history: "..." }, { role: "user", content: userContent }],
messages: [{ conversation_history: "..." }, { role: "user", content: withoutImageData(userContent) }],
tools: "(see tools in src/ClaudeDev.ts)",
tool_choice: "auto",
}

View File

@@ -1,12 +1,14 @@
import { Uri, Webview } from "vscode"
import { Anthropic } from "@anthropic-ai/sdk"
import os from "os"
import * as path from "path"
import * as vscode from "vscode"
import { Uri, Webview } from "vscode"
import { ClaudeDev } from "../ClaudeDev"
import { ApiProvider } from "../shared/api"
import { ExtensionMessage } from "../shared/ExtensionMessage"
import { WebviewMessage } from "../shared/WebviewMessage"
import { processPastedImages, selectAndProcessImages } from "../utils/process-images"
import { downloadTask } from "../utils/export-markdown"
/*
https://github.com/microsoft/vscode-webview-ui-toolkit-samples/blob/main/default/weather-webview/src/providers/WeatherViewProvider.ts
@@ -134,7 +136,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
this.outputChannel.appendLine("Webview view resolved")
}
async initClaudeDevWithTask(task: string) {
async initClaudeDevWithTask(task: string, images?: string[]) {
await this.clearTask() // ensures that an exising task doesn't exist before starting a new one, although this shouldn't be possible since user must clear task before starting a new one
const { apiProvider, apiKey, openRouterApiKey, awsAccessKey, awsSecretKey, awsRegion, maxRequestsPerTask } =
await this.getState()
@@ -142,7 +144,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
this,
task,
{ apiProvider, apiKey, openRouterApiKey, awsAccessKey, awsSecretKey, awsRegion },
maxRequestsPerTask
maxRequestsPerTask,
images
)
}
@@ -203,7 +206,8 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
create a content security policy meta tag so that only loading scripts with a nonce is allowed
As your extension grows you will likely want to add custom styles, fonts, and/or images to your webview. If you do, you will need to update the content security policy meta tag to explicity allow for these resources. E.g.
<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src ${webview.cspSource}; font-src ${webview.cspSource}; img-src ${webview.cspSource} https:; script-src 'nonce-${nonce}';">
- 'unsafe-inline' is required for styles due to vscode-webview-toolkit's dynamic style injection
- since we pass base64 images to the webview, we need to specify img-src ${webview.cspSource} data:;
in meta tag we add nonce attribute: A cryptographic nonce (only used once) to allow scripts. The server must generate a unique nonce value each time it transmits a policy. It is critical to provide a nonce that cannot be guessed as bypassing a resource's policy is otherwise trivial.
*/
@@ -217,7 +221,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no">
<meta name="theme-color" content="#000000">
<meta http-equiv="Content-Security-Policy" content="default-src 'none'; font-src ${webview.cspSource}; style-src ${webview.cspSource}; script-src 'nonce-${nonce}';">
<meta http-equiv="Content-Security-Policy" content="default-src 'none'; font-src ${webview.cspSource}; style-src ${webview.cspSource} 'unsafe-inline'; img-src ${webview.cspSource} data:; script-src 'nonce-${nonce}';">
<link rel="stylesheet" type="text/css" href="${stylesUri}">
<link href="${codiconsUri}" rel="stylesheet" />
<title>Claude Dev</title>
@@ -253,7 +257,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
// Could also do this in extension .ts
//this.postMessageToWebview({ type: "text", text: `Extension: ${Date.now()}` })
// initializing new instance of ClaudeDev will make sure that any agentically running promises in old instance don't affect our new task. this essentially creates a fresh slate for the new task
await this.initClaudeDevWithTask(message.text!)
await this.initClaudeDevWithTask(message.text!, message.images)
break
case "apiConfiguration":
if (message.apiConfiguration) {
@@ -282,7 +286,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
await this.postStateToWebview()
break
case "askResponse":
this.claudeDev?.handleWebviewAskResponse(message.askResponse!, message.text)
this.claudeDev?.handleWebviewAskResponse(message.askResponse!, message.text, message.images)
break
case "clearTask":
// newTask will start a new task with a given task text, while clear task resets the current session and allows for a new task to be started
@@ -294,7 +298,19 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
await this.postStateToWebview()
break
case "downloadTask":
this.downloadTask()
downloadTask(this.claudeDev?.apiConversationHistory ?? [])
break
case "selectImages":
const images = await selectAndProcessImages()
await this.postMessageToWebview({ type: "selectedImages", images })
break
case "processPastedImages":
const pastedImages = message.images ?? []
if (pastedImages.length > 0) {
const processedImages = await processPastedImages(pastedImages)
await this.postMessageToWebview({ type: "selectedImages", images: processedImages })
}
break
// Add more switch case statements here as more webview message commands
// are created within the webview context (i.e. inside media/main.js)
@@ -305,82 +321,6 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
)
}
async downloadTask() {
// File name
const date = new Date()
const month = date.toLocaleString("en-US", { month: "short" }).toLowerCase()
const day = date.getDate()
const year = date.getFullYear()
let hours = date.getHours()
const minutes = date.getMinutes().toString().padStart(2, "0")
const ampm = hours >= 12 ? "pm" : "am"
hours = hours % 12
hours = hours ? hours : 12 // the hour '0' should be '12'
const fileName = `claude_dev_task_${month}-${day}-${year}_${hours}-${minutes}-${ampm}.md`
// Generate markdown
const conversationHistory = this.claudeDev?.apiConversationHistory || []
const markdownContent = conversationHistory
.map((message) => {
const role = message.role === "user" ? "**User:**" : "**Assistant:**"
const content = Array.isArray(message.content)
? message.content.map(this.formatContentBlockToMarkdown).join("\n")
: message.content
return `${role}\n\n${content}\n\n`
})
.join("---\n\n")
// Prompt user for save location
const saveUri = await vscode.window.showSaveDialog({
filters: { Markdown: ["md"] },
defaultUri: vscode.Uri.file(path.join(os.homedir(), "Downloads", fileName)),
})
if (saveUri) {
// Write content to the selected location
await vscode.workspace.fs.writeFile(saveUri, Buffer.from(markdownContent))
vscode.window.showTextDocument(saveUri, { preview: true })
}
}
private formatContentBlockToMarkdown(
block:
| Anthropic.TextBlockParam
| Anthropic.ImageBlockParam
| Anthropic.ToolUseBlockParam
| Anthropic.ToolResultBlockParam
): string {
switch (block.type) {
case "text":
return block.text
case "image":
return `[Image: ${block.source.media_type}]`
case "tool_use":
let input: string
if (typeof block.input === "object" && block.input !== null) {
input = Object.entries(block.input)
.map(([key, value]) => `${key.charAt(0).toUpperCase() + key.slice(1)}: ${value}`)
.join("\n")
} else {
input = String(block.input)
}
return `[Tool Use: ${block.name}]\n${input}`
case "tool_result":
if (typeof block.content === "string") {
return `[Tool Result${block.is_error ? " (Error)" : ""}]\n${block.content}`
} else if (Array.isArray(block.content)) {
return `[Tool Result${block.is_error ? " (Error)" : ""}]\n${block.content
.map(this.formatContentBlockToMarkdown)
.join("\n")}`
} else {
return `[Tool Result${block.is_error ? " (Error)" : ""}]`
}
default:
return "[Unexpected content type]"
}
}
async postStateToWebview() {
const {
apiProvider,

View File

@@ -4,10 +4,11 @@ import { ApiConfiguration } from "./api"
// webview will hold state
export interface ExtensionMessage {
type: "action" | "state"
type: "action" | "state" | "selectedImages"
text?: string
action?: "plusButtonTapped" | "settingsButtonTapped" | "didBecomeVisible"
state?: ExtensionState
images?: string[]
}
export interface ExtensionState {
@@ -24,6 +25,7 @@ export interface ClaudeMessage {
ask?: ClaudeAsk
say?: ClaudeSay
text?: string
images?: string[]
}
export type ClaudeAsk =

View File

@@ -10,9 +10,12 @@ export interface WebviewMessage {
| "clearTask"
| "didShowAnnouncement"
| "downloadTask"
| "selectImages"
| "processPastedImages"
text?: string
askResponse?: ClaudeAskResponse
apiConfiguration?: ApiConfiguration
images?: string[]
}
export type ClaudeAskResponse = "yesButtonTapped" | "noButtonTapped" | "textResponse"
export type ClaudeAskResponse = "yesButtonTapped" | "noButtonTapped" | "messageResponse"

View File

@@ -0,0 +1,79 @@
import { Anthropic } from "@anthropic-ai/sdk"
import os from "os"
import * as path from "path"
import * as vscode from "vscode"
export async function downloadTask(conversationHistory: Anthropic.MessageParam[]) {
// File name
const date = new Date()
const month = date.toLocaleString("en-US", { month: "short" }).toLowerCase()
const day = date.getDate()
const year = date.getFullYear()
let hours = date.getHours()
const minutes = date.getMinutes().toString().padStart(2, "0")
const ampm = hours >= 12 ? "pm" : "am"
hours = hours % 12
hours = hours ? hours : 12 // the hour '0' should be '12'
const fileName = `claude_dev_task_${month}-${day}-${year}_${hours}-${minutes}-${ampm}.md`
// Generate markdown
const markdownContent = conversationHistory
.map((message) => {
const role = message.role === "user" ? "**User:**" : "**Assistant:**"
const content = Array.isArray(message.content)
? message.content.map(formatContentBlockToMarkdown).join("\n")
: message.content
return `${role}\n\n${content}\n\n`
})
.join("---\n\n")
// Prompt user for save location
const saveUri = await vscode.window.showSaveDialog({
filters: { Markdown: ["md"] },
defaultUri: vscode.Uri.file(path.join(os.homedir(), "Downloads", fileName)),
})
if (saveUri) {
// Write content to the selected location
await vscode.workspace.fs.writeFile(saveUri, Buffer.from(markdownContent))
vscode.window.showTextDocument(saveUri, { preview: true })
}
}
function formatContentBlockToMarkdown(
block:
| Anthropic.TextBlockParam
| Anthropic.ImageBlockParam
| Anthropic.ToolUseBlockParam
| Anthropic.ToolResultBlockParam
): string {
switch (block.type) {
case "text":
return block.text
case "image":
return `[Image]`
case "tool_use":
let input: string
if (typeof block.input === "object" && block.input !== null) {
input = Object.entries(block.input)
.map(([key, value]) => `${key.charAt(0).toUpperCase() + key.slice(1)}: ${value}`)
.join("\n")
} else {
input = String(block.input)
}
return `[Tool Use: ${block.name}]\n${input}`
case "tool_result":
if (typeof block.content === "string") {
return `[Tool Result${block.is_error ? " (Error)" : ""}]\n${block.content}`
} else if (Array.isArray(block.content)) {
return `[Tool Result${block.is_error ? " (Error)" : ""}]\n${block.content
.map(formatContentBlockToMarkdown)
.join("\n")}`
} else {
return `[Tool Result${block.is_error ? " (Error)" : ""}]`
}
default:
return "[Unexpected content type]"
}
}

View File

@@ -0,0 +1,64 @@
import * as vscode from "vscode"
import fs from "fs/promises"
import sharp from "sharp"
export async function selectAndProcessImages(): Promise<string[]> {
const options: vscode.OpenDialogOptions = {
canSelectMany: true,
openLabel: "Select",
filters: {
Images: ["png", "jpg", "jpeg", "gif", "webp", "tiff", "avif", "svg"], // sharp can convert these to webp which both anthropic and openrouter support
},
}
const fileUris = await vscode.window.showOpenDialog(options)
if (!fileUris || fileUris.length === 0) {
return []
}
return await Promise.all(
fileUris.map(async (uri) => {
const imagePath = uri.fsPath
const originalBuffer = await fs.readFile(imagePath)
return convertToWebpBase64(originalBuffer)
})
)
}
export async function processPastedImages(base64Strings: string[]): Promise<string[]> {
return await Promise.all(
base64Strings.map(async (base64) => {
const buffer = Buffer.from(base64, "base64")
return convertToWebpBase64(buffer)
})
)
}
async function convertToWebpBase64(buffer: Buffer): Promise<string> {
const processedBuffer = await sharp(buffer)
/*
Anthropic docs recommendations:
- To improve time-to-first-token resize images to no more than 1.15 megapixels (and within 1568 pixels in both dimensions)
- WebP is a newer image format that's more efficient than PNG and JPEG, so ideal for keeping token usage low. (ive seen the following compression decrease size by 10x)
*/
.resize(1568, 1568, {
fit: "inside", // maintain aspect ratio
withoutEnlargement: true, // don't enlarge smaller images
})
.webp({
// NOTE: consider increasing effort from 4 to 6 (max), this may increase processing time by up to ~500ms
quality: 80,
})
.toBuffer()
const base64 = processedBuffer.toString("base64")
// console.log({
// originalSize: buffer.length,
// processedSize: processedBuffer.length,
// base64,
// })
return base64
}