Initial streaming refactor

This commit is contained in:
Saoud Rizwan
2024-09-26 22:40:18 -04:00
parent e5e890d2eb
commit 1cc3546b7e
11 changed files with 805 additions and 80 deletions

11
package-lock.json generated
View File

@@ -1,18 +1,19 @@
{
"name": "claude-dev",
"version": "1.9.3",
"version": "1.9.7",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "claude-dev",
"version": "1.9.3",
"version": "1.9.7",
"license": "MIT",
"dependencies": {
"@anthropic-ai/bedrock-sdk": "^0.10.2",
"@anthropic-ai/sdk": "^0.26.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@google/generative-ai": "^0.18.0",
"@streamparser/json": "^0.0.21",
"@types/clone-deep": "^4.0.4",
"@types/pdf-parse": "^1.1.4",
"@types/turndown": "^5.0.5",
@@ -4492,6 +4493,12 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
"integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ=="
},
"node_modules/@streamparser/json": {
"version": "0.0.21",
"resolved": "https://registry.npmjs.org/@streamparser/json/-/json-0.0.21.tgz",
"integrity": "sha512-v+49JBiG1kmc/9Ug79Lz9wyKaRocBgCnpRaLpdy7p0d3ICKtOAfc/H/Epa1j3F6YdnzjnZKKrnJ8xnh/v1P8Aw==",
"license": "MIT"
},
"node_modules/@tootallnate/quickjs-emscripten": {
"version": "0.23.0",
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",

View File

@@ -149,6 +149,7 @@
"@anthropic-ai/sdk": "^0.26.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@google/generative-ai": "^0.18.0",
"@streamparser/json": "^0.0.21",
"@types/clone-deep": "^4.0.4",
"@types/pdf-parse": "^1.1.4",
"@types/turndown": "^5.0.5",

View File

@@ -1,4 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { Stream } from "@anthropic-ai/sdk/streaming"
import { ApiConfiguration, ModelInfo } from "../shared/api"
import { AnthropicHandler } from "./providers/anthropic"
import { AwsBedrockHandler } from "./providers/bedrock"
@@ -14,12 +15,14 @@ export interface ApiHandlerMessageResponse {
userCredits?: number
}
export type AnthropicStream = Stream<Anthropic.Beta.PromptCaching.Messages.RawPromptCachingBetaMessageStreamEvent>
export interface ApiHandler {
createMessage(
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse>
): Promise<AnthropicStream>
getModel(): { id: string; info: ModelInfo }
}

View File

@@ -1,5 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandler, ApiHandlerMessageResponse } from "../index"
import { AnthropicStream, ApiHandler, ApiHandlerMessageResponse } from "../index"
import {
anthropicDefaultModelId,
AnthropicModelId,
@@ -24,7 +24,7 @@ export class AnthropicHandler implements ApiHandler {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
): Promise<AnthropicStream> {
const modelId = this.getModel().id
switch (modelId) {
case "claude-3-5-sonnet-20240620":
@@ -39,7 +39,7 @@ export class AnthropicHandler implements ApiHandler {
)
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
const message = await this.client.beta.promptCaching.messages.create(
const stream = this.client.beta.promptCaching.messages.create(
{
model: modelId,
max_tokens: this.getModel().info.maxTokens,
@@ -69,6 +69,7 @@ export class AnthropicHandler implements ApiHandler {
}),
tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
tool_choice: { type: "auto" },
stream: true,
},
(() => {
// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
@@ -90,10 +91,14 @@ export class AnthropicHandler implements ApiHandler {
}
})()
)
return { message }
return stream
// throw new Error("Not implemented")
// return { message }
}
default: {
const message = await this.client.messages.create({
const stream = await this.client.messages.create({
model: modelId,
max_tokens: this.getModel().info.maxTokens,
temperature: 0.2,
@@ -101,8 +106,9 @@ export class AnthropicHandler implements ApiHandler {
messages,
tools,
tool_choice: { type: "auto" },
stream: true,
})
return { message }
return stream as AnthropicStream
}
}
}

View File

@@ -1,14 +1,15 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { PromptCachingBetaMessageStream } from "@anthropic-ai/sdk/lib/PromptCachingBetaMessageStream.mjs"
import { JSONParser, TokenType } from "@streamparser/json"
import delay from "delay"
import * as diff from "diff"
import fs from "fs/promises"
import os from "os"
import { SYSTEM_PROMPT } from "./prompts/system"
import pWaitFor from "p-wait-for"
import * as path from "path"
import { serializeError } from "serialize-error"
import * as vscode from "vscode"
import { ApiHandler, buildApiHandler } from "../api"
import { AnthropicStream, ApiHandler, buildApiHandler } from "../api"
import { diagnosticsToProblemsString, getNewDiagnostics } from "../integrations/diagnostics"
import { formatContentBlockToMarkdown } from "../integrations/misc/export-markdown"
import { extractTextFromFile } from "../integrations/misc/extract-text"
@@ -26,12 +27,14 @@ import { getApiMetrics } from "../shared/getApiMetrics"
import { HistoryItem } from "../shared/HistoryItem"
import { ToolName } from "../shared/Tool"
import { ClaudeAskResponse } from "../shared/WebviewMessage"
import { findLast, findLastIndex } from "../utils/array"
import { findLast, findLastIndex } from "../shared/array"
import { arePathsEqual } from "../utils/path"
import { parseMentions } from "./mentions"
import { SYSTEM_PROMPT } from "./prompts/system"
import { TOOLS } from "./prompts/tools"
import { truncateHalfConversation } from "./sliding-window"
import { ClaudeDevProvider } from "./webview/ClaudeDevProvider"
import cloneDeep from "clone-deep"
const cwd =
vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution
@@ -41,6 +44,10 @@ type UserContent = Array<
Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam
>
type AnthropicPartialContentBlock = Anthropic.Messages.ContentBlock & {
partial?: boolean
}
export class ClaudeDev {
readonly taskId: string
private api: ApiHandler
@@ -171,6 +178,7 @@ export class ClaudeDev {
}
private async saveClaudeMessages() {
console.log("Saving claude messages...")
try {
const filePath = path.join(await this.ensureTaskDirectoryExists(), "claude_messages.json")
await fs.writeFile(filePath, JSON.stringify(this.claudeMessages))
@@ -199,21 +207,107 @@ export class ClaudeDev {
}
}
// partial has three valid states true (partial message), false (completion of partial message), undefined (individual complete message)
async ask(
type: ClaudeAsk,
question?: string
text?: string,
partial?: boolean
): Promise<{ response: ClaudeAskResponse; text?: string; images?: string[] }> {
// If this ClaudeDev instance was aborted by the provider, then the only thing keeping us alive is a promise still running in the background, in which case we don't want to send its result to the webview as it is attached to a new instance of ClaudeDev now. So we can safely ignore the result of any active promises, and this class will be deallocated. (Although we set claudeDev = undefined in provider, that simply removes the reference to this instance, but the instance is still alive until this promise resolves or rejects.)
if (this.abort) {
throw new Error("ClaudeDev instance aborted")
}
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
const askTs = Date.now()
this.lastMessageTs = askTs
await this.addToClaudeMessages({ ts: askTs, type: "ask", ask: type, text: question })
await this.providerRef.deref()?.postStateToWebview()
//
let askTs: number
if (partial !== undefined) {
console.log(13)
const lastMessage = this.claudeMessages.at(-1)
const isUpdatingPreviousPartial =
lastMessage && lastMessage.partial && lastMessage.type === "ask" && lastMessage.ask === type
if (partial) {
console.log(14)
if (isUpdatingPreviousPartial) {
// existing partial message, so update it
lastMessage.text = text
lastMessage.partial = partial
// todo be more efficient about saving and posting only new data or one whole message at a time so ignore partial for saves, and only post parts of partial message instead of whole array in new listener
// await this.saveClaudeMessages()
// await this.providerRef.deref()?.postStateToWebview()
await this.providerRef
.deref()
?.postMessageToWebview({ type: "partialMessage", partialMessage: lastMessage })
throw new Error("Current ask promise was ignored")
} else {
console.log(15)
// this is a new partial message, so add it with partial state
// this.askResponse = undefined
// this.askResponseText = undefined
// this.askResponseImages = undefined
// askTs = Date.now()
// this.lastMessageTs = askTs
await this.addToClaudeMessages({ ts: Date.now(), type: "ask", ask: type, text, partial })
await this.providerRef.deref()?.postStateToWebview()
throw new Error("Current ask promise was ignored")
}
} else {
console.log(16)
// partial=false means its a complete version of a previously partial message
if (isUpdatingPreviousPartial) {
console.log(17)
// this is the complete version of a previously partial message, so replace the partial with the complete version
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
askTs = Date.now()
this.lastMessageTs = askTs
lastMessage.ts = askTs
lastMessage.text = text
lastMessage.partial = false
await this.saveClaudeMessages()
await this.providerRef.deref()?.postStateToWebview()
} else {
console.log(18)
// this is a new partial=false message, so add it like normal
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
askTs = Date.now()
this.lastMessageTs = askTs
await this.addToClaudeMessages({ ts: askTs, type: "ask", ask: type, text })
await this.providerRef.deref()?.postStateToWebview()
}
}
} else {
// this is a new non-partial message, so add it like normal
console.log(19)
// const lastMessage = this.claudeMessages.at(-1)
this.askResponse = undefined
this.askResponseText = undefined
this.askResponseImages = undefined
askTs = Date.now()
this.lastMessageTs = askTs
await this.addToClaudeMessages({ ts: askTs, type: "ask", ask: type, text })
await this.providerRef.deref()?.postStateToWebview()
}
//
// if (partial) {
// const lastMessage = this.claudeMessages.at(-1)
// if (lastMessage && lastMessage.type === "ask" && lastMessage.ask === type) {
// lastMessage.text = text
// lastMessage.partial = partial
// // todo be more efficient about saving and posting only new data or one whoe message at atime so ignore partial for saves, and only post parts of partial message instead of whole array in new listener
// await this.saveClaudeMessages()
// await this.providerRef.deref()?.postStateToWebview()
// throw new Error("Current ask promise was ignored")
// }
// }
await pWaitFor(() => this.askResponse !== undefined || this.lastMessageTs !== askTs, { interval: 100 })
if (this.lastMessageTs !== askTs) {
throw new Error("Current ask promise was ignored") // could happen if we send multiple asks in a row i.e. with command_output. It's important that when we know an ask could fail, it is handled gracefully
@@ -225,14 +319,61 @@ export class ClaudeDev {
return result
}
async say(type: ClaudeSay, text?: string, images?: string[]): Promise<undefined> {
async say(type: ClaudeSay, text?: string, images?: string[], partial?: boolean): Promise<undefined> {
if (this.abort) {
throw new Error("ClaudeDev instance aborted")
}
const sayTs = Date.now()
this.lastMessageTs = sayTs
await this.addToClaudeMessages({ ts: sayTs, type: "say", say: type, text: text, images })
await this.providerRef.deref()?.postStateToWebview()
if (partial !== undefined) {
const lastMessage = this.claudeMessages.at(-1)
const isUpdatingPreviousPartial =
lastMessage && lastMessage.partial && lastMessage.type === "say" && lastMessage.say === type
if (partial) {
if (isUpdatingPreviousPartial) {
// existing partial message, so update it
lastMessage.text = text
lastMessage.images = images
lastMessage.partial = partial
// await this.saveClaudeMessages()
// await this.providerRef.deref()?.postStateToWebview()
await this.providerRef
.deref()
?.postMessageToWebview({ type: "partialMessage", partialMessage: lastMessage })
} else {
// this is a new partial message, so add it with partial state
await this.addToClaudeMessages({ ts: Date.now(), type: "say", say: type, text, images, partial })
await this.providerRef.deref()?.postStateToWebview()
}
} else {
// partial=false means its a complete version of a previously partial message
if (isUpdatingPreviousPartial) {
// this is the complete version of a previously partial message, so replace the partial with the complete version
const sayTs = Date.now()
this.lastMessageTs = sayTs
lastMessage.ts = sayTs
lastMessage.text = text
lastMessage.images = images
lastMessage.partial = false
// instead of streaming partialMessage events, we do a save and post like normal to persist to disk
await this.saveClaudeMessages()
await this.providerRef.deref()?.postStateToWebview()
} else {
// this is a new partial=false message, so add it like normal
const sayTs = Date.now()
this.lastMessageTs = sayTs
await this.addToClaudeMessages({ ts: sayTs, type: "say", say: type, text, images })
await this.providerRef.deref()?.postStateToWebview()
}
}
} else {
// this is a new non-partial message, so add it like normal
const sayTs = Date.now()
this.lastMessageTs = sayTs
await this.addToClaudeMessages({ ts: sayTs, type: "say", say: type, text, images })
await this.providerRef.deref()?.postStateToWebview()
}
}
private async startTask(task?: string, images?: string[]): Promise<void> {
@@ -1432,7 +1573,7 @@ export class ClaudeDev {
]
}
async attemptApiRequest(): Promise<Anthropic.Messages.Message> {
async attemptApiRequest(): Promise<AnthropicStream> {
try {
let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages)
if (this.customInstructions && this.customInstructions.trim()) {
@@ -1467,16 +1608,12 @@ ${this.customInstructions.trim()}
await this.overwriteApiConversationHistory(truncatedMessages)
}
}
const { message, userCredits } = await this.api.createMessage(
const stream = await this.api.createMessage(
systemPrompt,
this.apiConversationHistory,
TOOLS(cwd, this.api.getModel().info.supportsImages)
)
if (userCredits !== undefined) {
console.log("Updating credits", userCredits)
// TODO: update credits
}
return message
return stream
} catch (error) {
const { response } = await this.ask(
"api_req_failed",
@@ -1491,6 +1628,385 @@ ${this.customInstructions.trim()}
}
}
private currentStreamingContentBlockIndex = 0
private assistantContentBlocks: AnthropicPartialContentBlock[] = []
private toolResults: Anthropic.ToolResultBlockParam[] = []
private toolResultsReady = false
private didRejectTool = false
// lock so it doesnt get spammed ie pwatifor?
private isLocked = false
async presentAssistantContent() {
if (this.isLocked) {
console.log("isLocked")
return
}
this.isLocked = true
// when current index finished, then increment and call stream claude content again if contentblocks length has one more.
// otherwise check isStreamingComplete, and set toolResultReady for function to continue
// if length is more than currentstreamingindex, then ignore it since when currentstreaming is finished it will call this func again
// if (this.currentStreamingContentBlockIndex !== this.assistantContentBlocks.length - 1) {
// console.log(10)
// console.log("currentStreamingContentBlockIndex", this.currentStreamingContentBlockIndex)
// console.log("assistantContentBlocks.length", this.assistantContentBlocks.length)
// // new content past the current streaming index, ignore for now
// // this function will be called one last time for a completed block
// return
// }
const block = cloneDeep(this.assistantContentBlocks[this.currentStreamingContentBlockIndex]) // need to create copy bc while stream is updating the array, it could be updating the reference block properties too
switch (block.type) {
case "text":
await this.say("text", block.text, undefined, block.partial)
break
case "tool_use":
const toolName = block.name as ToolName
const toolInput = block.input as any
const toolUseId = block.id
if (this.didRejectTool) {
// ignore any tool content after user has rejected tool once
// we'll fill it in with a rejection message when the message is complete
if (!block.partial) {
this.toolResults.push({
type: "tool_result",
tool_use_id: toolUseId,
content: "Skipping tool execution due to previous tool user rejection.",
})
}
break
}
const askApproval = async (type: ClaudeAsk, partialMessage?: string) => {
const { response, text, images } = await this.ask(type, partialMessage, false)
if (response !== "yesButtonTapped") {
if (response === "messageResponse") {
await this.say("user_feedback", text, images)
// this.toolResults.push()
// const [didUserReject, result] = await this.executeTool(toolName, toolInput)
this.toolResults.push({
type: "tool_result",
tool_use_id: toolUseId,
content: this.formatToolResponseWithImages(
await this.formatToolDeniedFeedback(text),
images
),
})
this.didRejectTool = true
return false
}
this.toolResults.push({
type: "tool_result",
tool_use_id: toolUseId,
content: await this.formatToolDenied(),
})
this.didRejectTool = true
return false
}
return true
}
const handleError = async (action: string, error: Error) => {
const errorString = `Error ${action}: ${JSON.stringify(serializeError(error))}`
await this.say(
"error",
`Error ${action}:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}`
)
this.toolResults.push({
type: "tool_result",
tool_use_id: toolUseId,
content: await this.formatToolError(errorString),
})
}
const pushToolResult = (
content: string | Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam>
) => {
this.toolResults.push({
type: "tool_result",
tool_use_id: toolUseId,
content,
})
}
switch (toolName) {
case "read_file": {
const relPath: string | undefined = toolInput.path
const sharedMessageProps: ClaudeSayTool = {
tool: "readFile",
path: relPath || "", //this.getReadablePath(relPath || ""),
}
try {
if (block.partial) {
const partialMessage = JSON.stringify({
...sharedMessageProps,
content: undefined,
} satisfies ClaudeSayTool)
if (this.alwaysAllowReadOnly) {
await this.say("tool", partialMessage, undefined, block.partial)
} else {
await this.ask("tool", partialMessage, block.partial).catch(() => {})
}
break
} else {
if (!relPath) {
this.consecutiveMistakeCount++
pushToolResult(await this.sayAndCreateMissingParamError("read_file", "path"))
break
}
this.consecutiveMistakeCount = 0
const absolutePath = path.resolve(cwd, relPath)
const completeMessage = JSON.stringify({
...sharedMessageProps,
content: absolutePath,
} satisfies ClaudeSayTool)
if (this.alwaysAllowReadOnly) {
await this.say("tool", completeMessage, undefined, false) // need to be sending partialValue bool, since undefined has its own purpose in that the message is treated neither as a partial or completion of a partial, but as a single complete message
} else {
const didApprove = await askApproval("tool", completeMessage)
if (!didApprove) {
break
}
}
// now execute the tool like normal
const content = await extractTextFromFile(absolutePath)
pushToolResult(content)
break
}
} catch (error) {
await handleError("reading file", error)
break
}
}
case "list_files": {
const relDirPath: string | undefined = toolInput.path
const recursiveRaw: string | undefined = toolInput.path
const recursive = recursiveRaw?.toLowerCase() === "true"
const sharedMessageProps: ClaudeSayTool = {
tool: !recursive ? "listFilesTopLevel" : "listFilesRecursive",
path: relDirPath || "",
}
try {
if (block.partial) {
const partialMessage = JSON.stringify({
...sharedMessageProps,
content: "",
} satisfies ClaudeSayTool)
if (this.alwaysAllowReadOnly) {
await this.say("tool", partialMessage, undefined, block.partial)
} else {
await this.ask("tool", partialMessage, block.partial).catch(() => {})
}
break
} else {
if (!relDirPath) {
this.consecutiveMistakeCount++
pushToolResult(await this.sayAndCreateMissingParamError("list_files", "path"))
break
}
this.consecutiveMistakeCount = 0
const absolutePath = path.resolve(cwd, relDirPath)
const [files, didHitLimit] = await listFiles(absolutePath, recursive, 200)
const result = this.formatFilesList(absolutePath, files, didHitLimit)
const completeMessage = JSON.stringify({
...sharedMessageProps,
content: result,
} satisfies ClaudeSayTool)
if (this.alwaysAllowReadOnly) {
await this.say("tool", completeMessage, undefined, false)
} else {
const didApprove = await askApproval("tool", completeMessage)
if (!didApprove) {
break
}
}
pushToolResult(await this.formatToolResult(result))
break
}
} catch (error) {
await handleError("reading file", error)
break
}
}
// case "write_to_file":
// return this.writeToFile(toolInput.path, toolInput.content)
// case "list_files":
// return this.listFiles(toolInput.path, toolInput.recursive)
// case "list_code_definition_names":
// return this.listCodeDefinitionNames(toolInput.path)
// case "search_files":
// return this.searchFiles(toolInput.path, toolInput.regex, toolInput.filePattern)
// case "execute_command":
// return this.executeCommand(toolInput.command)
// case "inspect_site":
// return this.inspectSite(toolInput.url)
// case "ask_followup_question":
// return this.askFollowupQuestion(toolInput.question)
// case "attempt_completion":
// return this.attemptCompletion(toolInput.result, toolInput.command)
// default:
// return [false, `Unknown tool: ${toolName}`]
}
break
}
console.log("unlocking")
this.isLocked = false
console.log(4)
if (!block.partial) {
console.log(5)
// content is complete, call next block if it exists (if not then read stream will call it when its ready)
// even if this.didRejectTool, we still need to fill in the tool results with rejection messages
this.currentStreamingContentBlockIndex++ // need to increment regardless, so when read stream calls this functio again it will be streaming the next block
if (this.currentStreamingContentBlockIndex < this.assistantContentBlocks.length) {
console.log(6)
// there are already more content blocks to stream, so we'll call this function ourselves
// await this.presentAssistantContent()
this.presentAssistantContent()
}
}
}
private partialJsonParser: JSONParser | undefined
// object being built incrementally
private partialObject: Record<string, string> = {}
private currentKey = ""
private currentValue = ""
private parsingKey: boolean = false
private parsingValue: boolean = false
updateAssistantContentWithPartialJson(chunkIndex: number, partialJson: string): Promise<void> {
return new Promise((resolve, reject) => {
const timeoutId = setTimeout(() => {
// may happen if json parsing class does call onToken, which *shouldnt* happen if passing in non-empty string
reject(new Error("Parsing JSON took too long (> 5 seconds)"))
}, 5_000)
const cleanupAndResolve = () => {
clearTimeout(timeoutId)
resolve()
}
const cleanupAndReject = (error: Error) => {
clearTimeout(timeoutId)
reject(error)
}
if (!this.partialJsonParser) {
this.partialJsonParser = new JSONParser({ emitPartialTokens: true, emitPartialValues: true })
// this package enforces setting up an onValue listener ("Can't emit data before the "onValue" callback has been set up."), even though we don't need it.
this.partialJsonParser.onValue = () => {
console.log("onValue")
}
this.partialJsonParser.onError = (error) => {
console.error("Error parsing input_json_delta", error)
}
this.partialJsonParser.onEnd = () => {
console.log("onEnd")
}
}
// our json will only ever be string to string maps
// { "key": "value", "key2": "value2" }
// so left brace, string, colon, comma, right brace
// Handle each token emitted by the parser
// need to recreate this listener each time to update the resolve ref
this.partialJsonParser.onToken = async ({ token, value, offset, partial }) => {
console.log("onToken")
try {
switch (token) {
case TokenType.LEFT_BRACE:
// Start of a new JSON object
this.partialObject = {}
this.currentKey = ""
this.parsingKey = false
this.parsingValue = false
break
case TokenType.RIGHT_BRACE:
// End of the current JSON object
this.currentKey = ""
this.currentValue = ""
this.parsingKey = false
this.parsingValue = false
// Finalize the object once parsing is complete
// ;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = this.partialObject
// this.assistantContentBlocks[chunkIndex]!.partial = false
// await this.presentAssistantContent() // NOTE: only set partial = false and call this once, since doing it several times will create duplicate messages.
console.log("Final parsed object:", this.partialObject)
break
case TokenType.STRING:
if (!this.parsingValue && !this.parsingKey) {
// Starting to parse a key
this.currentKey = value as string
this.parsingKey = !!partial // if not partial, we are done parsing key
} else if (this.parsingKey) {
// Continuing to parse a key
this.currentKey = value as string
this.parsingKey = !!partial
} else if (this.parsingValue) {
// Parsing a value
// Accumulate partial value and update the object
this.currentValue = value as string
if (this.currentKey) {
this.partialObject[this.currentKey] = this.currentValue
}
this.parsingValue = !!partial // if not partial, complete value
}
break
case TokenType.COLON:
// After a key and colon, expect a value
if (this.currentKey !== null) {
this.parsingValue = true
}
break
case TokenType.COMMA:
// Reset for the next key-value pair
this.currentKey = ""
this.currentValue = ""
this.parsingKey = false
this.parsingValue = false
break
default:
console.error("Unexpected token:", token)
}
// Debugging logs to trace the parsing process
console.log("Partial object:", this.partialObject)
console.log("Offset:", offset, "isPartialToken:", partial)
// Update the contentBlock with the current state of the partial object
// Use spread operator to ensure a new object reference
;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = {
...this.partialObject,
}
// right brace indicates the end of the json object
this.assistantContentBlocks[chunkIndex]!.partial = token !== TokenType.RIGHT_BRACE
cleanupAndResolve()
} catch (error) {
cleanupAndReject(error)
}
}
try {
this.partialJsonParser.write(partialJson)
} catch (error) {
console.error("Error parsing input_json_delta", error)
cleanupAndReject(error)
}
})
}
async recursivelyMakeClaudeRequests(
userContent: UserContent,
includeFileDetails: boolean = false
@@ -1595,23 +2111,182 @@ ${this.customInstructions.trim()}
await this.providerRef.deref()?.postStateToWebview()
try {
const response = await this.attemptApiRequest()
const stream = await this.attemptApiRequest()
let cacheCreationInputTokens = 0
let cacheReadInputTokens = 0
let inputTokens = 0
let outputTokens = 0
// todo add error listeners so we can return api error? or wil lfor await handle that below?
// let contentBlocks: AnthropicPartialContentBlock[] = []
this.assistantContentBlocks = []
this.currentStreamingContentBlockIndex = 0
for await (const chunk of stream) {
switch (chunk.type) {
case "message_start":
console.log("message_start", chunk.message.content, chunk.message.usage)
// tells us cache reads/writes/input/output
const usage = chunk.message.usage
cacheCreationInputTokens += usage.cache_creation_input_tokens || 0
cacheReadInputTokens += usage.cache_read_input_tokens || 0
inputTokens += usage.input_tokens || 0
outputTokens += usage.output_tokens || 0
break
case "message_delta":
// tells us stop_reason, stop_sequence, and output tokens along the way and at the end of the message
console.log("message_delta", chunk.delta, chunk.usage)
outputTokens += chunk.usage.output_tokens || 0
break
case "message_stop":
// no usage data, just an indicator that the message is done
console.log("message_stop", chunk.type)
break
case "content_block_start":
console.log("content_block_start", chunk.index)
// await delay(4_000)
switch (chunk.content_block.type) {
case "text":
console.log("text", chunk.content_block.text)
this.assistantContentBlocks.push(chunk.content_block)
this.assistantContentBlocks.at(-1)!.partial = true
this.presentAssistantContent()
break
case "tool_use":
console.log(
"tool_use",
chunk.index,
chunk.content_block.id,
chunk.content_block.name,
chunk.content_block.input // input is always object, which will be streamed as partial json in content_block_delta. (this initial 'input' will always be an empty object)
)
this.assistantContentBlocks.push(chunk.content_block)
this.assistantContentBlocks.at(-1)!.partial = true
this.presentAssistantContent()
// Initialize the JSON parser with partial tokens enabled
// partialJsonParser =
}
break
case "content_block_delta":
console.log("content_block_delta", chunk.index)
switch (chunk.delta.type) {
case "text_delta":
console.log("text_delta", chunk.delta.text)
;(this.assistantContentBlocks[chunk.index] as Anthropic.TextBlock).text +=
chunk.delta.text
this.presentAssistantContent()
break
case "input_json_delta":
console.log("input_json_delta", chunk.delta.partial_json)
// try {
// partialJsonParser?.write(chunk.delta.partial_json)
// } catch (error) {
// console.error("Error parsing input_json_delta", error)
// }
try {
// JSONParser will always give us a token unless we pass in an empty/undefined value (in which case the promise would never resolve)
if (chunk.delta.partial_json) {
// need to await this since we dont want to create multiple jsonparsers in case the read stream comes in faster than the jsonparser can parse
await this.updateAssistantContentWithPartialJson(
chunk.index,
chunk.delta.partial_json
)
}
} catch (error) {
// may be due to timeout, in which case we can safely ignore
console.error("Error parsing input_json_delta", error)
}
this.presentAssistantContent()
break
}
break
case "content_block_stop":
if (this.assistantContentBlocks[chunk.index]!.type === "text") {
// we only call this for text block since partialJsonParser handles calling this for tool_use blocks (we only eve want to set partial to false and presentAssistantContent once for each block)
console.log(11)
this.assistantContentBlocks[chunk.index]!.partial = false
this.presentAssistantContent()
}
console.log("content_block_stop", chunk.index)
// instead of calling .end ourselves, the parser will automatically call it when it sees the end of the json object. (Calling this here can result in "Tokenizer ended in the middle of a token (state: ENDED). Either not all the data was received or the data was invalid." since there is a delay between the last chunk.delta.partial_json and the end of the content_block_delta)
// partialJsonParser?.end()
break
}
}
console.log("contentBlocks", this.assistantContentBlocks)
let totalCost: string | undefined
await this.say(
"api_req_finished",
JSON.stringify({
tokensIn: inputTokens,
tokensOut: outputTokens,
cacheWrites: cacheCreationInputTokens,
cacheReads: cacheReadInputTokens,
cost:
totalCost ||
this.calculateApiCost(
inputTokens,
outputTokens,
cacheCreationInputTokens,
cacheReadInputTokens
),
})
)
// now add to apiconversationhistory
// need to save assistant responses to file before proceeding to tool use since user can exit at any moment and we wouldn't be able to save the assistant's response
let didEndLoop = false // do we need this
if (this.assistantContentBlocks.length > 0) {
await this.addToApiConversationHistory({ role: "assistant", content: this.assistantContentBlocks })
await pWaitFor(() => this.toolResultsReady)
const {
didEndLoop: recDidEndLoop,
inputTokens: recInputTokens,
outputTokens: recOutputTokens,
} = await this.recursivelyMakeClaudeRequests(this.toolResults)
didEndLoop = recDidEndLoop
inputTokens += recInputTokens
outputTokens += recOutputTokens
} else {
// this should never happen! it there's no assistant_responses, that means we got no text or tool_use content blocks from API which we should assume is an error
await this.say(
"error",
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output."
)
await this.addToApiConversationHistory({
role: "assistant",
content: [{ type: "text", text: "Failure: I did not provide a response." }],
})
}
// return { didEndLoop: false, inputTokens: 0, outputTokens: 0 } // fix
throw new Error("ClaudeDev fail")
if (this.abort) {
throw new Error("ClaudeDev instance aborted")
}
let assistantResponses: Anthropic.Messages.ContentBlock[] = []
let inputTokens = response.usage.input_tokens
let outputTokens = response.usage.output_tokens
let cacheCreationInputTokens =
(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
.cache_creation_input_tokens || undefined
let cacheReadInputTokens =
(response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
.cache_read_input_tokens || undefined
// let inputTokens = response.usage.input_tokens
// let outputTokens = response.usage.output_tokens
// let cacheCreationInputTokens =
// (response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
// .cache_creation_input_tokens || undefined
// let cacheReadInputTokens =
// (response as Anthropic.Beta.PromptCaching.Messages.PromptCachingBetaMessage).usage
// .cache_read_input_tokens || undefined
// @ts-ignore-next-line
let totalCost = response.usage.total_cost
// let totalCost = response.usage.total_cost
await this.say(
"api_req_finished",
@@ -1688,7 +2363,7 @@ ${this.customInstructions.trim()}
}
}
let didEndLoop = false
// let didEndLoop = false
// attempt_completion is always done last, since there might have been other tools that needed to be called first before the job is finished
// it's important to note that claude will order the tools logically in most cases, so we don't have to think about which tools make sense calling before others

View File

@@ -4,7 +4,7 @@ import { ClaudeDev } from "../ClaudeDev"
import { ApiProvider } from "../../shared/api"
import { ExtensionMessage } from "../../shared/ExtensionMessage"
import { WebviewMessage } from "../../shared/WebviewMessage"
import { findLast } from "../../utils/array"
import { findLast } from "../../shared/array"
import { getNonce } from "./getNonce"
import { getUri } from "./getUri"
import { selectImages } from "../../integrations/misc/process-images"

View File

@@ -5,7 +5,15 @@ import { HistoryItem } from "./HistoryItem"
// webview will hold state
export interface ExtensionMessage {
type: "action" | "state" | "selectedImages" | "ollamaModels" | "theme" | "workspaceUpdated" | "invoke"
type:
| "action"
| "state"
| "selectedImages"
| "ollamaModels"
| "theme"
| "workspaceUpdated"
| "invoke"
| "partialMessage"
text?: string
action?: "chatButtonTapped" | "settingsButtonTapped" | "historyButtonTapped" | "didBecomeVisible"
invoke?: "sendMessage" | "primaryButtonClick" | "secondaryButtonClick"
@@ -13,6 +21,7 @@ export interface ExtensionMessage {
images?: string[]
models?: string[]
filePaths?: string[]
partialMessage?: ClaudeMessage
}
export interface ExtensionState {
@@ -33,6 +42,7 @@ export interface ClaudeMessage {
say?: ClaudeSay
text?: string
images?: string[]
partial?: boolean
}
export type ClaudeAsk =

View File

@@ -741,10 +741,11 @@ const ProgressIndicator = () => (
const Markdown = memo(({ markdown }: { markdown?: string }) => {
// react-markdown lets us customize elements, so here we're using their example of replacing code blocks with SyntaxHighlighter. However when there are no language matches (` or ``` without a language specifier) then we default to a normal code element for inline code. Code blocks without a language specifier shouldn't be a common occurrence as we prompt Claude to always use a language specifier.
// when claude wraps text in thinking tags, he doesnt use line breaks so we need to insert those ourselves to render markdown correctly
const parsed = markdown?.replace(/<thinking>([\s\S]*?)<\/thinking>/g, (match, content) => {
return content
// return `_<thinking>_\n\n${content}\n\n_</thinking>_`
})
// const parsed = markdown?.replace(/<thinking>([\s\S]*?)<\/thinking>/g, (match, content) => {
// return content
// // return `_<thinking>_\n\n${content}\n\n_</thinking>_`
// })
const parsed = markdown
return (
<div style={{ wordBreak: "break-word", overflowWrap: "anywhere", marginBottom: -10, marginTop: -10 }}>
<ReactMarkdown

View File

@@ -59,6 +59,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
if (lastMessage) {
switch (lastMessage.type) {
case "ask":
const isPartial = lastMessage.partial === true
switch (lastMessage.ask) {
case "api_req_failed":
setTextAreaDisabled(true)
@@ -75,16 +76,16 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
setSecondaryButtonText("Start New Task")
break
case "followup":
setTextAreaDisabled(false)
setTextAreaDisabled(isPartial)
setClaudeAsk("followup")
setEnableButtons(false)
setEnableButtons(isPartial)
// setPrimaryButtonText(undefined)
// setSecondaryButtonText(undefined)
break
case "tool":
setTextAreaDisabled(false)
setTextAreaDisabled(isPartial)
setClaudeAsk("tool")
setEnableButtons(true)
setEnableButtons(!isPartial)
const tool = JSON.parse(lastMessage.text || "{}") as ClaudeSayTool
switch (tool.tool) {
case "editedExistingFile":
@@ -99,9 +100,9 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
}
break
case "command":
setTextAreaDisabled(false)
setTextAreaDisabled(isPartial)
setClaudeAsk("command")
setEnableButtons(true)
setEnableButtons(!isPartial)
setPrimaryButtonText("Run Command")
setSecondaryButtonText("Reject")
break
@@ -114,9 +115,9 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
break
case "completion_result":
// extension waiting for feedback. but we can just present a new task button
setTextAreaDisabled(false)
setTextAreaDisabled(isPartial)
setClaudeAsk("completion_result")
setEnableButtons(true)
setEnableButtons(!isPartial)
setPrimaryButtonText("Start New Task")
setSecondaryButtonText(undefined)
break

View File

@@ -4,6 +4,7 @@ import { ExtensionMessage, ExtensionState } from "../../../src/shared/ExtensionM
import { ApiConfiguration } from "../../../src/shared/api"
import { vscode } from "../utils/vscode"
import { convertTextMateToHljs } from "../utils/textMateToHljs"
import { findLastIndex } from "../../../src/shared/array"
interface ExtensionStateContextType extends ExtensionState {
didHydrateState: boolean
@@ -32,29 +33,49 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
const handleMessage = useCallback((event: MessageEvent) => {
const message: ExtensionMessage = event.data
if (message.type === "state" && message.state) {
setState(message.state)
const config = message.state?.apiConfiguration
const hasKey = config
? [
config.apiKey,
config.openRouterApiKey,
config.awsRegion,
config.vertexProjectId,
config.openAiApiKey,
config.ollamaModelId,
config.geminiApiKey,
config.openAiNativeApiKey,
].some((key) => key !== undefined)
: false
setShowWelcome(!hasKey)
setDidHydrateState(true)
}
if (message.type === "theme" && message.text) {
setTheme(convertTextMateToHljs(JSON.parse(message.text)))
}
if (message.type === "workspaceUpdated" && message.filePaths) {
setFilePaths(message.filePaths)
switch (message.type) {
case "state": {
setState(message.state!)
const config = message.state?.apiConfiguration
const hasKey = config
? [
config.apiKey,
config.openRouterApiKey,
config.awsRegion,
config.vertexProjectId,
config.openAiApiKey,
config.ollamaModelId,
config.geminiApiKey,
config.openAiNativeApiKey,
].some((key) => key !== undefined)
: false
setShowWelcome(!hasKey)
setDidHydrateState(true)
break
}
case "theme": {
if (message.text) {
setTheme(convertTextMateToHljs(JSON.parse(message.text)))
}
break
}
case "workspaceUpdated": {
setFilePaths(message.filePaths ?? [])
break
}
case "partialMessage": {
const partialMessage = message.partialMessage!
setState((prevState) => {
// worth noting it will never be possible for a more up-to-date message to be sent here or in normal messages post since the presentAssistantContent function uses lock
const lastIndex = findLastIndex(prevState.claudeMessages, (msg) => msg.ts === partialMessage.ts)
if (lastIndex !== -1) {
const newClaudeMessages = [...prevState.claudeMessages]
newClaudeMessages[lastIndex] = partialMessage
return { ...prevState, claudeMessages: newClaudeMessages }
}
return prevState
})
}
}
}, [])