Use custom tool calling prompts to get streaming working

2026-02-05 03:55:23 -05:00 · 2024-09-27 15:56:36 -04:00
parent 3b7af5749d
commit b1dcff8f64
4 changed files with 737 additions and 250 deletions
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -43,7 +43,7 @@ export class AnthropicHandler implements ApiHandler {
 					{
 						model: modelId,
 						max_tokens: this.getModel().info.maxTokens,
-						temperature: 0.2,
+						temperature: 0,
 						system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
 						messages: messages.map((message, index) => {
 							if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
@@ -67,8 +67,8 @@ export class AnthropicHandler implements ApiHandler {
 							}
 							return message
 						}),
-						tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
-						tool_choice: { type: "auto" },
+						// tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
+						// tool_choice: { type: "auto" },
 						stream: true,
 					},
 					(() => {
@@ -101,11 +101,11 @@ export class AnthropicHandler implements ApiHandler {
 				const stream = await this.client.messages.create({
 					model: modelId,
 					max_tokens: this.getModel().info.maxTokens,
-					temperature: 0.2,
+					temperature: 0,
 					system: [{ text: systemPrompt, type: "text" }],
 					messages,
-					tools,
-					tool_choice: { type: "auto" },
+					// tools,
+					// tool_choice: { type: "auto" },
 					stream: true,
 				})
 				return stream as AnthropicStream
--- a/src/core/AssistantMessage.ts
+++ b/src/core/AssistantMessage.ts
@@ -0,0 +1,95 @@
+// export interface AssistantMessage {
+// 	textContent: TextContent
+// 	toolCalls: ToolCall[]
+// }
+
+export type AssistantMessageContent = TextContent | ToolCall
+
+export interface TextContent {
+	type: "text"
+	content: string
+	partial: boolean
+}
+
+export const toolCallNames = [
+	"execute_command",
+	"read_file",
+	"write_to_file",
+	"search_files",
+	"list_files",
+	"list_code_definition_names",
+	"inspect_site",
+	"ask_followup_question",
+	"attempt_completion",
+] as const
+
+// Converts array of tool call names into a union type ("execute_command" | "read_file" | ...)
+export type ToolCallName = (typeof toolCallNames)[number]
+
+export const toolParamNames = [
+	"command",
+	"path",
+	"content",
+	"regex",
+	"file_pattern",
+	"recursive",
+	"url",
+	"question",
+	"result",
+] as const
+
+export type ToolParamName = (typeof toolParamNames)[number]
+
+export interface ToolCall {
+	type: "tool_call"
+	name: ToolCallName
+	// params is a partial record, allowing only some or none of the possible parameters to be used
+	params: Partial<Record<ToolParamName, string>>
+	partial: boolean
+}
+
+interface ExecuteCommandToolCall extends ToolCall {
+	name: "execute_command"
+	// Pick<Record<ToolParamName, string>, "command"> makes "command" required, but Partial<> makes it optional
+	params: Partial<Pick<Record<ToolParamName, string>, "command">>
+}
+
+interface ReadFileToolCall extends ToolCall {
+	name: "read_file"
+	params: Partial<Pick<Record<ToolParamName, string>, "path">>
+}
+
+interface WriteToFileToolCall extends ToolCall {
+	name: "write_to_file"
+	params: Partial<Pick<Record<ToolParamName, string>, "path" | "content">>
+}
+
+interface SearchFilesToolCall extends ToolCall {
+	name: "search_files"
+	params: Partial<Pick<Record<ToolParamName, string>, "path" | "regex" | "file_pattern">>
+}
+
+interface ListFilesToolCall extends ToolCall {
+	name: "list_files"
+	params: Partial<Pick<Record<ToolParamName, string>, "path" | "recursive">>
+}
+
+interface ListCodeDefinitionNamesToolCall extends ToolCall {
+	name: "list_code_definition_names"
+	params: Partial<Pick<Record<ToolParamName, string>, "path">>
+}
+
+interface InspectSiteToolCall extends ToolCall {
+	name: "inspect_site"
+	params: Partial<Pick<Record<ToolParamName, string>, "url">>
+}
+
+interface AskFollowupQuestionToolCall extends ToolCall {
+	name: "ask_followup_question"
+	params: Partial<Pick<Record<ToolParamName, string>, "question">>
+}
+
+interface AttemptCompletionToolCall extends ToolCall {
+	name: "attempt_completion"
+	params: Partial<Pick<Record<ToolParamName, string>, "result" | "command">>
+}
--- a/src/core/ClaudeDev.ts
+++ b/src/core/ClaudeDev.ts
@@ -34,6 +34,15 @@ import { TOOLS } from "./prompts/tools"
 import { truncateHalfConversation } from "./sliding-window"
 import { ClaudeDevProvider } from "./webview/ClaudeDevProvider"
 import cloneDeep from "clone-deep"
+import {
+	AssistantMessageContent,
+	TextContent,
+	ToolCall,
+	ToolCallName,
+	toolCallNames,
+	ToolParamName,
+	toolParamNames,
+} from "./AssistantMessage"

 const cwd =
 	vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution
@@ -43,9 +52,9 @@ type UserContent = Array<
 	Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam
 >

-type AnthropicPartialContentBlock = Anthropic.Messages.ContentBlock & {
-	partial?: boolean
-}
+// type AnthropicPartialContentBlock = Anthropic.Messages.ContentBlock & {
+// 	partial?: boolean
+// }

 export class ClaudeDev {
 	readonly taskId: string
@@ -65,29 +74,6 @@ export class ClaudeDev {
 	private providerRef: WeakRef<ClaudeDevProvider>
 	private abort: boolean = false

-	// streaming
-	private currentStreamingContentBlockIndex = 0
-	private didCompleteReadingStream = false
-	private assistantContentBlocks: AnthropicPartialContentBlock[] = []
-	private toolResults: Anthropic.ToolResultBlockParam[] = []
-	private toolResultsReady = false
-	private didRejectTool = false
-	private presentAssistantContentLocked = false
-	private partialJsonParser: JSONParser | undefined
-	private partialJsonParserState: {
-		partialObject: Record<string, string>
-		currentKey: string
-		currentValue: string
-		parsingKey: boolean
-		parsingValue: boolean
-	} = {
-		partialObject: {},
-		currentKey: "",
-		currentValue: "",
-		parsingKey: false,
-		parsingValue: false,
-	}
-
 	constructor(
 		provider: ClaudeDevProvider,
 		apiConfiguration: ApiConfiguration,
@@ -1725,63 +1711,105 @@ ${this.customInstructions.trim()}
 		}
 	}

-	private presentAssistantContentHasPendingUpdates = false
-	async presentAssistantContent() {
-		if (this.presentAssistantContentLocked) {
-			this.presentAssistantContentHasPendingUpdates = true
+	async presentAssistantMessage() {
+		if (this.presentAssistantMessageLocked) {
+			this.presentAssistantMessageHasPendingUpdates = true
 			return
 		}
-		this.presentAssistantContentLocked = true
-		this.presentAssistantContentHasPendingUpdates = false
+		this.presentAssistantMessageLocked = true
+		this.presentAssistantMessageHasPendingUpdates = false

-		if (this.currentStreamingContentBlockIndex >= this.assistantContentBlocks.length) {
+		if (this.currentStreamingContentIndex >= this.assistantMessageContent.length) {
 			throw new Error("No more content blocks to stream! This shouldn't happen...") // remove and just return after testing
 		}

-		const block = cloneDeep(this.assistantContentBlocks[this.currentStreamingContentBlockIndex]) // need to create copy bc while stream is updating the array, it could be updating the reference block properties too
+		const block = cloneDeep(this.assistantMessageContent[this.currentStreamingContentIndex]) // need to create copy bc while stream is updating the array, it could be updating the reference block properties too
 		switch (block.type) {
 			case "text":
-				await this.say("text", block.text, undefined, block.partial)
+				await this.say("text", block.content, undefined, block.partial)
 				break
-			case "tool_use":
-				const toolName = block.name as ToolName
-				const toolInput = block.input as any
-				const toolUseId = block.id
+			case "tool_call":
+				const toolDescription = () => {
+					switch (block.name) {
+						case "execute_command":
+							return `[${block.name} for '${block.params.command}']`
+						case "read_file":
+							return `[${block.name} for '${block.params.path}']`
+						case "write_to_file":
+							return `[${block.name} for '${block.params.path}']`
+						case "search_files":
+							return `[${block.name} for '${block.params.regex}'${
+								block.params.file_pattern ? ` in '${block.params.file_pattern}'` : ""
+							}]`
+						case "list_files":
+							return `[${block.name} for '${block.params.path}']`
+						case "list_code_definition_names":
+							return `[${block.name} for '${block.params.path}']`
+						case "inspect_site":
+							return `[${block.name} for '${block.params.url}']`
+						case "ask_followup_question":
+							return `[${block.name} for '${block.params.question}']`
+						case "attempt_completion":
+							return `[${block.name}]`
+					}
+				}

 				if (this.didRejectTool) {
 					// ignore any tool content after user has rejected tool once
 					// we'll fill it in with a rejection message when the message is complete
 					if (!block.partial) {
-						this.toolResults.push({
-							type: "tool_result",
-							tool_use_id: toolUseId,
-							content: "Skipping tool execution due to previous tool user rejection.",
+						this.userMessageContent.push({
+							type: "text",
+							text: `Skipping tool ${toolDescription()} due to user rejecting a previous tool.`,
 						})
 					}
 					break
 				}

+				const pushToolResult = (content: ToolResponse) => {
+					this.userMessageContent.push({
+						type: "text",
+						text: `${toolDescription()} Result:`,
+					})
+					if (typeof content === "string") {
+						this.userMessageContent.push({
+							type: "text",
+							text: content,
+						})
+					} else {
+						this.userMessageContent.push(...content)
+					}
+				}
+
 				const askApproval = async (type: ClaudeAsk, partialMessage?: string) => {
 					const { response, text, images } = await this.ask(type, partialMessage, false)
 					if (response !== "yesButtonTapped") {
 						if (response === "messageResponse") {
 							await this.say("user_feedback", text, images)
-							this.toolResults.push({
-								type: "tool_result",
-								tool_use_id: toolUseId,
-								content: this.formatToolResponseWithImages(
-									await this.formatToolDeniedFeedback(text),
-									images
-								),
-							})
+							pushToolResult(
+								this.formatToolResponseWithImages(await this.formatToolDeniedFeedback(text), images)
+							)
+							// this.userMessageContent.push({
+							// 	type: "text",
+							// 	text: `${toolDescription()}`,
+							// })
+							// this.toolResults.push({
+							// 	type: "tool_result",
+							// 	tool_use_id: toolUseId,
+							// 	content: this.formatToolResponseWithImages(
+							// 		await this.formatToolDeniedFeedback(text),
+							// 		images
+							// 	),
+							// })
 							this.didRejectTool = true
 							return false
 						}
-						this.toolResults.push({
-							type: "tool_result",
-							tool_use_id: toolUseId,
-							content: await this.formatToolDenied(),
-						})
+						pushToolResult(await this.formatToolDenied())
+						// this.toolResults.push({
+						// 	type: "tool_result",
+						// 	tool_use_id: toolUseId,
+						// 	content: await this.formatToolDenied(),
+						// })
 						this.didRejectTool = true
 						return false
 					}
@@ -1794,24 +1822,17 @@ ${this.customInstructions.trim()}
 						"error",
 						`Error ${action}:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}`
 					)
-					this.toolResults.push({
-						type: "tool_result",
-						tool_use_id: toolUseId,
-						content: await this.formatToolError(errorString),
-					})
+					// this.toolResults.push({
+					// 	type: "tool_result",
+					// 	tool_use_id: toolUseId,
+					// 	content: await this.formatToolError(errorString),
+					// })
+					pushToolResult(await this.formatToolError(errorString))
 				}

-				const pushToolResult = (content: ToolResponse) => {
-					this.toolResults.push({
-						type: "tool_result",
-						tool_use_id: toolUseId,
-						content,
-					})
-				}
-
-				switch (toolName) {
+				switch (block.name) {
 					case "read_file": {
-						const relPath: string | undefined = toolInput.path
+						const relPath: string | undefined = block.params.path
 						const sharedMessageProps: ClaudeSayTool = {
 							tool: "readFile",
 							path: relPath || "", //this.getReadablePath(relPath || ""),
@@ -1859,8 +1880,8 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "list_files": {
-						const relDirPath: string | undefined = toolInput.path
-						const recursiveRaw: string | undefined = toolInput.path
+						const relDirPath: string | undefined = block.params.path
+						const recursiveRaw: string | undefined = block.params.path
 						const recursive = recursiveRaw?.toLowerCase() === "true"
 						const sharedMessageProps: ClaudeSayTool = {
 							tool: !recursive ? "listFilesTopLevel" : "listFilesRecursive",
@@ -1909,7 +1930,7 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "list_code_definition_names": {
-						const relDirPath: string | undefined = toolInput.path
+						const relDirPath: string | undefined = block.params.path
 						const sharedMessageProps: ClaudeSayTool = {
 							tool: "listCodeDefinitionNames",
 							path: relDirPath || "",
@@ -1958,9 +1979,9 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "search_files": {
-						const relDirPath: string | undefined = toolInput.path
-						const regex: string | undefined = toolInput.regex
-						const filePattern: string | undefined = toolInput.filePattern
+						const relDirPath: string | undefined = block.params.path
+						const regex: string | undefined = block.params.regex
+						const filePattern: string | undefined = block.params.file_pattern
 						const sharedMessageProps: ClaudeSayTool = {
 							tool: "searchFiles",
 							path: relDirPath || "",
@@ -2014,7 +2035,7 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "inspect_site": {
-						const url: string | undefined = toolInput.url
+						const url: string | undefined = block.params.url
 						const sharedMessageProps: ClaudeSayTool = {
 							tool: "inspectSite",
 							path: url || "",
@@ -2076,7 +2097,7 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "execute_command": {
-						const command: string | undefined = toolInput.command
+						const command: string | undefined = block.params.command
 						try {
 							if (block.partial) {
 								await this.ask("command", command || "", block.partial).catch(() => {})
@@ -2108,7 +2129,7 @@ ${this.customInstructions.trim()}
 					}

 					case "ask_followup_question": {
-						const question: string | undefined = toolInput.question
+						const question: string | undefined = block.params.question
 						try {
 							if (block.partial) {
 								await this.ask("followup", question || "", block.partial).catch(() => {})
@@ -2135,8 +2156,8 @@ ${this.customInstructions.trim()}
 						}
 					}
 					case "attempt_completion": {
-						const result: string | undefined = toolInput.result
-						const command: string | undefined = toolInput.command
+						const result: string | undefined = block.params.result
+						const command: string | undefined = block.params.command
 						try {
 							const lastMessage = this.claudeMessages.at(-1)
 							if (block.partial) {
@@ -2214,135 +2235,268 @@ ${this.customInstructions.trim()}
 				break
 		}

-		this.presentAssistantContentLocked = false
+		this.presentAssistantMessageLocked = false
 		if (!block.partial) {
 			// block is finished streaming and executing
 			if (
-				this.currentStreamingContentBlockIndex === this.assistantContentBlocks.length - 1 &&
+				this.currentStreamingContentIndex === this.assistantMessageContent.length - 1 &&
 				this.didCompleteReadingStream
 			) {
 				// last block is complete and it is finished executing
-				this.toolResultsReady = true // will allow pwaitfor to continue
+				this.userMessageContentReady = true // will allow pwaitfor to continue
 			} else {
 				// call next block if it exists (if not then read stream will call it when its ready)
-				this.currentStreamingContentBlockIndex++ // need to increment regardless, so when read stream calls this function again it will be streaming the next block
-				if (this.currentStreamingContentBlockIndex < this.assistantContentBlocks.length) {
+				this.currentStreamingContentIndex++ // need to increment regardless, so when read stream calls this function again it will be streaming the next block
+				if (this.currentStreamingContentIndex < this.assistantMessageContent.length) {
 					// there are already more content blocks to stream, so we'll call this function ourselves
 					// await this.presentAssistantContent()
-					this.presentAssistantContent()
+					this.presentAssistantMessage()
 					return
 				}
 			}
 		}
 		// block is partial, but the read stream may have finished
-		if (this.presentAssistantContentHasPendingUpdates) {
-			this.presentAssistantContent()
+		if (this.presentAssistantMessageHasPendingUpdates) {
+			this.presentAssistantMessage()
 		}
 	}

-	//
+	// //
+	// private partialJsonParser: JSONParser | undefined
+	// private partialJsonParserState: {
+	// 	partialObject: Record<string, string>
+	// 	currentKey: string
+	// 	currentValue: string
+	// 	parsingKey: boolean
+	// 	parsingValue: boolean
+	// } = {
+	// 	partialObject: {},
+	// 	currentKey: "",
+	// 	currentValue: "",
+	// 	parsingKey: false,
+	// 	parsingValue: false,
+	// }
+	// private chunkIndexToJsonParser = new Map<number, JSONParser>()
+	// getJsonParserForChunk(chunkIndex: number): JSONParser {
+	// 	if (!this.chunkIndexToJsonParser.has(chunkIndex)) {
+	// 		const parser = new JSONParser({ emitPartialTokens: true, emitPartialValues: true })
+	// 		// this package enforces setting up an onValue listener ("Can't emit data before the "onValue" callback has been set up."), even though we don't need it.
+	// 		parser.onValue = () => console.log(`onValue for chunk ${chunkIndex}`)
+	// 		// parser.onError = (error) => console.error(`Error parsing JSON for chunk ${chunkIndex}:`, error);
+	// 		// parser.onEnd = () => console.log(`JSON parsing ended for chunk ${chunkIndex}`);

-	private chunkIndexToJsonParser = new Map<number, JSONParser>()
-	getJsonParserForChunk(chunkIndex: number): JSONParser {
-		if (!this.chunkIndexToJsonParser.has(chunkIndex)) {
-			const parser = new JSONParser({ emitPartialTokens: true, emitPartialValues: true })
-			// this package enforces setting up an onValue listener ("Can't emit data before the "onValue" callback has been set up."), even though we don't need it.
-			parser.onValue = () => console.log(`onValue for chunk ${chunkIndex}`)
-			// parser.onError = (error) => console.error(`Error parsing JSON for chunk ${chunkIndex}:`, error);
-			// parser.onEnd = () => console.log(`JSON parsing ended for chunk ${chunkIndex}`);
+	// 		let partialObject: Record<string, string> = {}
+	// 		let currentKey: string = ""
+	// 		let currentValue: string = ""
+	// 		let parsingKey: boolean = false
+	// 		let parsingValue: boolean = false

-			let partialObject: Record<string, string> = {}
-			let currentKey: string = ""
-			let currentValue: string = ""
-			let parsingKey: boolean = false
-			let parsingValue: boolean = false
+	// 		// our json will only ever be string to string maps
+	// 		// { "key": "value", "key2": "value2" }
+	// 		// so left brace, string, colon, comma, right brace
+	// 		// (need to recreate this listener each time to update the resolve ref)
+	// 		parser.onToken = ({ token, value, offset, partial }) => {
+	// 			console.log("onToken")

-			// our json will only ever be string to string maps
-			// { "key": "value", "key2": "value2" }
-			// so left brace, string, colon, comma, right brace
-			// (need to recreate this listener each time to update the resolve ref)
-			parser.onToken = ({ token, value, offset, partial }) => {
-				console.log("onToken")
+	// 			try {
+	// 				switch (token) {
+	// 					case TokenType.LEFT_BRACE:
+	// 						// Start of a new JSON object
+	// 						partialObject = {}
+	// 						currentKey = ""
+	// 						parsingKey = false
+	// 						parsingValue = false
+	// 						break
+	// 					case TokenType.RIGHT_BRACE:
+	// 						// End of the current JSON object
+	// 						currentKey = ""
+	// 						currentValue = ""
+	// 						parsingKey = false
+	// 						parsingValue = false

-				try {
-					switch (token) {
-						case TokenType.LEFT_BRACE:
-							// Start of a new JSON object
-							partialObject = {}
-							currentKey = ""
-							parsingKey = false
-							parsingValue = false
-							break
-						case TokenType.RIGHT_BRACE:
-							// End of the current JSON object
-							currentKey = ""
-							currentValue = ""
-							parsingKey = false
-							parsingValue = false
+	// 						// Finalize the object once parsing is complete
+	// 						// ;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = this.partialObject
+	// 						// this.assistantContentBlocks[chunkIndex]!.partial = false
+	// 						// await this.presentAssistantContent() // NOTE: only set partial = false and call this once, since doing it several times will create duplicate messages.
+	// 						console.log("Final parsed object:", partialObject)
+	// 						break
+	// 					case TokenType.STRING:
+	// 						if (!parsingValue && !parsingKey) {
+	// 							// Starting to parse a key
+	// 							currentKey = value as string
+	// 							parsingKey = !!partial // if not partial, we are done parsing key
+	// 						} else if (parsingKey) {
+	// 							// Continuing to parse a key
+	// 							currentKey = value as string
+	// 							parsingKey = !!partial
+	// 						} else if (parsingValue) {
+	// 							// Parsing a value
+	// 							// Accumulate partial value and update the object
+	// 							currentValue = value as string
+	// 							if (currentKey) {
+	// 								partialObject[currentKey] = currentValue
+	// 							}
+	// 							parsingValue = !!partial // if not partial, complete value
+	// 						}
+	// 						break
+	// 					case TokenType.COLON:
+	// 						// After a key and colon, expect a value
+	// 						if (currentKey !== null) {
+	// 							parsingValue = true
+	// 						}
+	// 						break
+	// 					case TokenType.COMMA:
+	// 						// Reset for the next key-value pair
+	// 						currentKey = ""
+	// 						currentValue = ""
+	// 						parsingKey = false
+	// 						parsingValue = false
+	// 						break
+	// 					default:
+	// 						console.error("Unexpected token:", token)
+	// 				}

-							// Finalize the object once parsing is complete
-							// ;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = this.partialObject
-							// this.assistantContentBlocks[chunkIndex]!.partial = false
-							// await this.presentAssistantContent() // NOTE: only set partial = false and call this once, since doing it several times will create duplicate messages.
-							console.log("Final parsed object:", partialObject)
-							break
-						case TokenType.STRING:
-							if (!parsingValue && !parsingKey) {
-								// Starting to parse a key
-								currentKey = value as string
-								parsingKey = !!partial // if not partial, we are done parsing key
-							} else if (parsingKey) {
-								// Continuing to parse a key
-								currentKey = value as string
-								parsingKey = !!partial
-							} else if (parsingValue) {
-								// Parsing a value
-								// Accumulate partial value and update the object
-								currentValue = value as string
-								if (currentKey) {
-									partialObject[currentKey] = currentValue
-								}
-								parsingValue = !!partial // if not partial, complete value
-							}
-							break
-						case TokenType.COLON:
-							// After a key and colon, expect a value
-							if (currentKey !== null) {
-								parsingValue = true
-							}
-							break
-						case TokenType.COMMA:
-							// Reset for the next key-value pair
-							currentKey = ""
-							currentValue = ""
-							parsingKey = false
-							parsingValue = false
-							break
-						default:
-							console.error("Unexpected token:", token)
-					}
+	// 				// Debugging logs to trace the parsing process
+	// 				console.log("Partial object:", partialObject)
+	// 				console.log("Offset:", offset, "isPartialToken:", partial)

-					// Debugging logs to trace the parsing process
-					console.log("Partial object:", partialObject)
-					console.log("Offset:", offset, "isPartialToken:", partial)
+	// 				// Update the contentBlock with the current state of the partial object
+	// 				// Use spread operator to ensure a new object reference
+	// 				;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = {
+	// 					...partialObject,
+	// 				}
+	// 				// right brace indicates the end of the json object
+	// 				this.assistantContentBlocks[chunkIndex]!.partial = token !== TokenType.RIGHT_BRACE

-					// Update the contentBlock with the current state of the partial object
-					// Use spread operator to ensure a new object reference
-					;(this.assistantContentBlocks[chunkIndex] as Anthropic.ToolUseBlock).input = {
-						...partialObject,
-					}
-					// right brace indicates the end of the json object
-					this.assistantContentBlocks[chunkIndex]!.partial = token !== TokenType.RIGHT_BRACE
+	// 				this.presentAssistantContent()
+	// 			} catch (error) {
+	// 				console.error("Error parsing input_json_delta", error)
+	// 			}
+	// 		}

-					this.presentAssistantContent()
-				} catch (error) {
-					console.error("Error parsing input_json_delta", error)
+	// 		this.chunkIndexToJsonParser.set(chunkIndex, parser)
+	// 	}
+	// 	return this.chunkIndexToJsonParser.get(chunkIndex)!
+	// }
+
+	// streaming
+	private currentStreamingContentIndex = 0
+	private assistantMessageContent: AssistantMessageContent[] = []
+	private didCompleteReadingStream = false
+	// private assistantMessage?: AssistantMessage
+	private userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = []
+	private userMessageContentReady = false
+	private didRejectTool = false
+	private presentAssistantMessageLocked = false
+	private presentAssistantMessageHasPendingUpdates = false
+
+	private parseTextStreamAccumulator = ""
+
+	parseTextStream(chunk: string) {
+		this.parseTextStreamAccumulator += chunk
+
+		// let text = ""
+		let textContent: TextContent = {
+			type: "text",
+			content: "",
+			partial: true,
+		}
+		let toolCalls: ToolCall[] = []
+
+		let currentToolCall: ToolCall | undefined = undefined
+		let currentParamName: ToolParamName | undefined = undefined
+		let currentParamValueLines: string[] = []
+		let textContentLines: string[] = []
+
+		const rawLines = this.parseTextStreamAccumulator.split("\n")
+
+		if (rawLines.length === 1) {
+			const firstLine = rawLines[0].trim()
+			if (!firstLine.startsWith("<t") && firstLine.startsWith("<")) {
+				// (we ignore tags that start with <t since it's most like a <thinking> tag (and none of our tags start with t)
+				// content is just starting, if it starts with < we can assume it's a tool call, so we'll wait for the next line
+				console.log("skipping reason 1")
+				return
+			}
+		}
+
+		if (
+			this.assistantMessageContent.length === 1 &&
+			this.assistantMessageContent[0].partial // first element is always TextContent
+		) {
+			// we're updating text content, so if we have a partial xml tag on the last line we can ignore it until we get the full line.
+			const lastLine = rawLines.at(-1)?.trim()
+			if (lastLine && !lastLine.startsWith("<t") && lastLine.startsWith("<") && !lastLine.endsWith(">")) {
+				console.log("skipping reason 2")
+				return
+			}
+		}
+
+		for (const line of rawLines) {
+			const trimmed = line.trim()
+			// if currenttoolcall or currentparamname look for closing tag, more efficient and safe
+			if (currentToolCall && currentParamName && trimmed === `</${currentParamName}>`) {
+				// End of a tool parameter
+				currentToolCall.params[currentParamName] = currentParamValueLines.join("\n")
+				currentParamName = undefined
+				currentParamValueLines = []
+				// currentParamValue = undefined
+				continue
+			} else if (currentToolCall && !currentParamName && trimmed === `</${currentToolCall.name}>`) {
+				// End of a tool call
+				currentToolCall.partial = false
+				toolCalls.push(currentToolCall)
+				currentToolCall = undefined
+				continue
+			}
+			if (!currentParamName && trimmed.startsWith("<") && trimmed.endsWith(">")) {
+				const tag = trimmed.slice(1, -1)
+				if (toolCallNames.includes(tag as ToolCallName)) {
+					// Start of a new tool call
+					currentToolCall = { type: "tool_call", name: tag as ToolCallName, params: {}, partial: true }
+					// This also indicates the end of the text content
+					textContent.partial = false
+					continue
+				} else if (currentToolCall && toolParamNames.includes(tag as ToolParamName)) {
+					// Start of a parameter
+					currentParamName = tag as ToolParamName
+					// currentToolCall.params[currentParamName] = ""
+					continue
 				}
 			}

-			this.chunkIndexToJsonParser.set(chunkIndex, parser)
+			if (currentToolCall && !currentParamName) {
+				// current tool doesn't have a param match yet, it's likely partial so ignore
+				continue
+			}
+
+			if (currentToolCall && currentParamName) {
+				// add line to current param value
+				currentParamValueLines.push(line)
+				continue
+			}
+
+			// only add text content if we haven't started a tool yet
+			if (textContent.partial) {
+				textContentLines.push(line)
+			}
 		}
-		return this.chunkIndexToJsonParser.get(chunkIndex)!
+
+		if (currentToolCall) {
+			// stream did not complete tool call, add it as partial
+			if (currentParamName) {
+				// tool call has a parameter that was not completed
+				currentToolCall.params[currentParamName] = currentParamValueLines.join("\n")
+			}
+			toolCalls.push(currentToolCall)
+		}
+
+		textContent.content = textContentLines.join("\n")
+
+		this.assistantMessageContent = [textContent, ...toolCalls]
+
+		// Present the updated content
+		this.presentAssistantMessage()
 	}

 	async recursivelyMakeClaudeRequests(
@@ -2412,11 +2566,18 @@ ${this.customInstructions.trim()}

 			// todo add error listeners so we can return api error? or wil lfor await handle that below?

-			// let contentBlocks: AnthropicPartialContentBlock[] = []
-			this.assistantContentBlocks = []
+			let apiContentBlocks: Anthropic.ContentBlock[] = []
+			this.currentStreamingContentIndex = 0
+			this.assistantMessageContent = []
 			this.didCompleteReadingStream = false
-			this.currentStreamingContentBlockIndex = 0
-			this.chunkIndexToJsonParser.clear()
+			this.userMessageContent = []
+			this.userMessageContentReady = false
+			this.didRejectTool = false
+			this.presentAssistantMessageLocked = false
+			this.presentAssistantMessageHasPendingUpdates = false
+			this.parseTextStreamAccumulator = ""
+
+			// this.chunkIndexToJsonParser.clear()
 			for await (const chunk of stream) {
 				switch (chunk.type) {
 					case "message_start":
@@ -2443,24 +2604,37 @@ ${this.customInstructions.trim()}
 						switch (chunk.content_block.type) {
 							case "text":
 								console.log("text", chunk.content_block.text)
-								this.assistantContentBlocks.push(chunk.content_block)
-								this.assistantContentBlocks.at(-1)!.partial = true
-								this.presentAssistantContent()
+								// this.assistantContentBlocks.push({
+								// 	text: chunk.content_block.text,
+								// 	toolCalls: [],
+								// 	partial: true,
+								// })
+								apiContentBlocks.push(chunk.content_block)
+
+								// we may receive multiple text blocks, in which case just insert a line break between them
+								if (chunk.index > 0) {
+									this.parseTextStream("\n")
+								}
+
+								this.parseTextStream(chunk.content_block.text)
+								// this.assistantContentBlocks.at(-1)!.partial = true
+								this.presentAssistantMessage()
 								break
-							case "tool_use":
-								console.log(
-									"tool_use",
-									chunk.index,
-									chunk.content_block.id,
-									chunk.content_block.name,
-									chunk.content_block.input // input is always object, which will be streamed as partial json in content_block_delta. (this initial 'input' will always be an empty object)
-								)
-								this.assistantContentBlocks.push(chunk.content_block)
-								this.assistantContentBlocks.at(-1)!.partial = true
-								this.presentAssistantContent()
-								// Initialize the JSON parser with partial tokens enabled
-								// partialJsonParser =
-								this.getJsonParserForChunk(chunk.index)
+							// case "tool_use":
+							// 	console.log(
+							// 		"tool_use",
+							// 		chunk.index,
+							// 		chunk.content_block.id,
+							// 		chunk.content_block.name,
+							// 		chunk.content_block.input // input is always object, which will be streamed as partial json in content_block_delta. (this initial 'input' will always be an empty object)
+							// 	)
+							// 	apiContentBlocks.push(chunk.content_block)
+							// 	this.assistantContentBlocks.push(chunk.content_block)
+							// 	this.assistantContentBlocks.at(-1)!.partial = true
+							// 	this.presentAssistantContent()
+							// // Initialize the JSON parser with partial tokens enabled
+							// // partialJsonParser =
+							// this.getJsonParserForChunk(chunk.index)
 						}
 						break
 					case "content_block_delta":
@@ -2468,42 +2642,42 @@ ${this.customInstructions.trim()}
 						switch (chunk.delta.type) {
 							case "text_delta":
 								console.log("text_delta", chunk.delta.text)
-								;(this.assistantContentBlocks[chunk.index] as Anthropic.TextBlock).text +=
-									chunk.delta.text
-								this.presentAssistantContent()
+								;(apiContentBlocks[chunk.index] as Anthropic.TextBlock).text += chunk.delta.text
+								this.parseTextStream(chunk.delta.text)
+								this.presentAssistantMessage()
 								break
-							case "input_json_delta":
-								console.log("input_json_delta", chunk.delta.partial_json)
-								try {
-									this.getJsonParserForChunk(chunk.index).write(chunk.delta.partial_json)
-								} catch (error) {
-									console.error("Error parsing input_json_delta", error)
-								}
+							// case "input_json_delta":
+							// 	console.log("input_json_delta", chunk.delta.partial_json)
+							// 	try {
+							// 		// this.getJsonParserForChunk(chunk.index).write(chunk.delta.partial_json)
+							// 	} catch (error) {
+							// 		console.error("Error parsing input_json_delta", error)
+							// 	}

-								// try {
-								// 	// JSONParser will always give us a token unless we pass in an empty/undefined value (in which case the promise would never resolve)
-								// 	if (chunk.delta.partial_json) {
-								// 		// need to await this since we dont want to create multiple jsonparsers in case the read stream comes in faster than the jsonparser can parse
-								// 		await this.updateAssistantContentWithPartialJson(
-								// 			chunk.index,
-								// 			chunk.delta.partial_json
-								// 		)
-								// 	}
-								// } catch (error) {
-								// 	// may be due to timeout, in which case we can safely ignore
-								// 	console.error("Error parsing input_json_delta", error)
-								// }
-								// this.presentAssistantContent()
-								break
+							// 	// try {
+							// 	// 	// JSONParser will always give us a token unless we pass in an empty/undefined value (in which case the promise would never resolve)
+							// 	// 	if (chunk.delta.partial_json) {
+							// 	// 		// need to await this since we dont want to create multiple jsonparsers in case the read stream comes in faster than the jsonparser can parse
+							// 	// 		await this.updateAssistantContentWithPartialJson(
+							// 	// 			chunk.index,
+							// 	// 			chunk.delta.partial_json
+							// 	// 		)
+							// 	// 	}
+							// 	// } catch (error) {
+							// 	// 	// may be due to timeout, in which case we can safely ignore
+							// 	// 	console.error("Error parsing input_json_delta", error)
+							// 	// }
+							// 	// this.presentAssistantContent()
+							// 	break
 						}
 						break
 					case "content_block_stop":
-						if (this.assistantContentBlocks[chunk.index]!.type === "text") {
-							// we only call this for text block since partialJsonParser handles calling this for tool_use blocks (we only eve want to set partial to false and presentAssistantContent once for each block)
-							console.log(11)
-							this.assistantContentBlocks[chunk.index]!.partial = false
-							this.presentAssistantContent()
-						}
+						// if (apiContentBlocks[chunk.index]!.type === "text") {
+						// 	// we only call this for text block since partialJsonParser handles calling this for tool_use blocks (we only eve want to set partial to false and presentAssistantContent once for each block)
+						// 	console.log(11)
+						// 	this.assistantContentBlocks[chunk.index]!.partial = false
+						// 	this.presentAssistantContent()
+						// }

 						console.log("content_block_stop", chunk.index)

@@ -2514,7 +2688,7 @@ ${this.customInstructions.trim()}
 			}
 			this.didCompleteReadingStream = true

-			console.log("contentBlocks", this.assistantContentBlocks)
+			console.log("contentBlocks", apiContentBlocks)

 			let totalCost: string | undefined

@@ -2539,19 +2713,19 @@ ${this.customInstructions.trim()}
 			// now add to apiconversationhistory
 			// need to save assistant responses to file before proceeding to tool use since user can exit at any moment and we wouldn't be able to save the assistant's response
 			let didEndLoop = false
-			if (this.assistantContentBlocks.length > 0) {
+			if (apiContentBlocks.length > 0) {
 				// Remove 'partial' prop from assistantContentBlocks
-				const blocksWithoutPartial: Anthropic.Messages.ContentBlock[] = this.assistantContentBlocks.map(
-					(block) => {
-						const { partial, ...rest } = block
-						return rest
-					}
-				)
-				await this.addToApiConversationHistory({ role: "assistant", content: blocksWithoutPartial })
+				// const blocksWithoutPartial: Anthropic.Messages.ContentBlock[] = this.assistantContentBlocks.map(
+				// 	(block) => {
+				// 		const { partial, ...rest } = block
+				// 		return rest
+				// 	}
+				// )
+				await this.addToApiConversationHistory({ role: "assistant", content: apiContentBlocks })

-				await pWaitFor(() => this.toolResultsReady)
+				await pWaitFor(() => this.userMessageContentReady)

-				const recDidEndLoop = await this.recursivelyMakeClaudeRequests(this.toolResults)
+				const recDidEndLoop = await this.recursivelyMakeClaudeRequests(this.userMessageContent)
 				didEndLoop = recDidEndLoop
 			} else {
 				// this should never happen! it there's no assistant_responses, that means we got no text or tool_use content blocks from API which we should assume is an error
--- a/src/core/prompts/system.ts
+++ b/src/core/prompts/system.ts
@@ -70,4 +70,222 @@ Operating System: ${osName()}
 Default Shell: ${defaultShell}
 Home Directory: ${os.homedir().toPosix()}
 Current Working Directory: ${cwd.toPosix()}
+
+====
+
+INSTRUCTIONS FOR FORMULATING YOUR RESPONSE
+
+You must respond to the user's request by using at least one tool call. When formulating your response, follow these guidelines:
+
+1. Begin your response with normal text, explaining your thoughts, analysis, or plan of action.
+2. If you need to use any tools, place ALL tool calls at the END of your message, after your normal text explanation.
+3. You can use multiple tool calls if needed, but they should all be grouped together at the end of your message.
+4. After placing the tool calls, do not add any additional normal text. The tool calls should be the final content in your message.
+
+Here's the general structure your responses should follow:
+
+\`\`\`
+[Your normal text response explaining your thoughts and actions]
+
+[Tool Call 1]
+[Tool Call 2 if needed]
+[Tool Call 3 if needed]
+...
+\`\`\`
+
+Remember:
+- Choose the most appropriate tool(s) based on the task and the tool descriptions provided.
+- Formulate your tool calls using the XML format specified for each tool.
+- Provide clear explanations in your normal text about what actions you're taking and why you're using particular tools.
+- Act as if the tool calls will be executed immediately after your message, and your next response will have access to their results.
+
+# Tool Descriptions and XML Formats
+
+## execute_command
+<execute_command>
+<command>
+Your command here
+</command>
+</execute_command>
+Description: Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: ${cwd.toPosix()}
+Parameters:
+- command: (required) The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
+
+## read_file
+<read_file>
+<path>
+File path here
+</path>
+</read_file>
+Description: Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
+Parameters:
+- path: (required) The path of the file to read (relative to the current working directory ${cwd.toPosix()})
+
+## write_to_file
+<write_to_file>
+<path>
+File path here
+</path>
+<content>
+Your file content here
+</content>
+</write_to_file>
+Description: Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.
+Parameters:
+- path: (required) The path of the file to write to (relative to the current working directory ${cwd.toPosix()})
+- content: (required) The full content to write to the file.
+
+## search_files
+<search_files>
+<path>
+Directory path here
+</path>
+<regex>
+Your regex pattern here
+</regex>
+<file_pattern>
+Optional file pattern here
+</file_pattern>
+</search_files>
+Description: Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.
+Parameters:
+- path: (required) The path of the directory to search in (relative to the current working directory ${cwd.toPosix()}). This directory will be recursively searched.
+- regex: (required) The regular expression pattern to search for. Uses Rust regex syntax.
+- file_pattern: (optional) Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).
+
+## list_files
+<list_files>
+<path>
+Directory path here
+</path>
+<recursive>
+true or false (optional)
+</recursive>
+</list_files>
+Description: List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.
+Parameters:
+- path: (required) The path of the directory to list contents for (relative to the current working directory ${cwd.toPosix()})
+- recursive: (optional) Whether to list files recursively. Use true for recursive listing, false or omit for top-level only.
+
+## list_code_definition_names
+<list_code_definition_names>
+<path>
+Directory path here
+</path>
+</list_code_definition_names>
+Description: Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.
+Parameters:
+- path: (required) The path of the directory (relative to the current working directory ${cwd.toPosix()}) to list top level source code definitions for.${
+	supportsImages
+		? `
+
+## inspect_site
+
+<inspect_site>
+<url>
+URL of the site to inspect
+</url>
+</inspect_site>
+Description: Captures a screenshot and console logs of the initial state of a website. This tool navigates to the specified URL, takes a screenshot of the entire page as it appears immediately after loading, and collects any console logs or errors that occur during page load. It does not interact with the page or capture any state changes after the initial load.
+Parameters:
+- url: (required) The URL of the site to inspect. This should be a valid URL including the protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.)`
+		: ""
+}
+
+## ask_followup_question
+<ask_followup_question>
+<question>
+Your question here
+</question>
+</ask_followup_question>
+Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.
+Parameters:
+- question: (required) The question to ask the user. This should be a clear, specific question that addresses the information you need.
+
+## attempt_completion
+<attempt_completion>
+<result>
+Your final result description here
+</result>
+<command>
+Optional command to demonstrate result
+</command>
+</attempt_completion>
+Description: Once you've completed the task, use this tool to present the result to the user. Optionally you may provide a CLI command to showcase the result of your work, but avoid using commands like 'echo' or 'cat' that merely print text. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
+Parameters:
+- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.
+- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use 'open index.html' to display a created website. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
+
+
+# Examples
+
+Here are some examples of how to structure your responses with tool calls:
+
+## Example 1: Using a single tool
+
+Let's run the test suite for our project. This will help us ensure that all our components are functioning correctly.
+
+<execute_command>
+<command>
+npm test
+</command>
+</execute_command>
+
+## Example 2: Using multiple tools
+
+Let's create two new configuration files for the web application, one for the frontend and one for the backend.
+
+<write_to_file>
+<path>
+./frontend-config.json
+</path>
+<content>
+{
+  "apiEndpoint": "https://api.example.com",
+  "theme": {
+    "primaryColor": "#007bff",
+    "secondaryColor": "#6c757d",
+    "fontFamily": "Arial, sans-serif"
+  },
+  "features": {
+    "darkMode": true,
+    "notifications": true,
+    "analytics": false
+  },
+  "version": "1.0.0"
+}
+</content>
+</write_to_file>
+
+<write_to_file>
+<path>
+./backend-config.yaml
+</path>
+<content>
+database:
+  host: localhost
+  port: 5432
+  name: myapp_db
+  user: admin
+
+server:
+  port: 3000
+  environment: development
+  logLevel: debug
+
+externalServices:
+  emailProvider: sendgrid
+  storageProvider: aws-s3
+</content>
+</write_to_file>
+
+## Example 3: Asking a follow-up question
+
+I've analyzed the project structure, but I need more information to proceed. Let me ask the user for clarification.
+
+<ask_followup_question>
+<question>
+Which specific feature would you like me to implement in the example.py file?
+</question>
+</ask_followup_question>
 `