diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
index eba6f3e..ffaf99e 100644
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -69,6 +69,7 @@ export class AnthropicHandler implements ApiHandler {
}),
// tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
// tool_choice: { type: "auto" },
+ // tools: tools,
stream: true,
},
(() => {
diff --git a/src/core/ClaudeDev.ts b/src/core/ClaudeDev.ts
index dca0b1e..bdc60b5 100644
--- a/src/core/ClaudeDev.ts
+++ b/src/core/ClaudeDev.ts
@@ -1285,6 +1285,7 @@ ${this.customInstructions.trim()}
this.userMessageContentReady = true
}
// console.log("no more content blocks to stream! this shouldn't happen?")
+ this.presentAssistantMessageLocked = false
return
//throw new Error("No more content blocks to stream! This shouldn't happen...") // remove and just return after testing
}
@@ -1403,6 +1404,7 @@ ${this.customInstructions.trim()}
if (!relPath || !newContent) {
// checking for newContent ensure relPath is complete
// wait so we can determine if it's a new file or editing an existing file
+ console.log("no relpath or content")
break
}
// Check if file exists using cached map or fs.access
@@ -2171,31 +2173,36 @@ ${this.customInstructions.trim()}
break
}
- this.presentAssistantMessageLocked = false
+ /*
+ seeing out of bounds is fine, it means that the next too call is being built up and ready to add to assistantMessageContent to present.
+ When you see the UI inactive during this, it means that a tool is breaking without presenting any UI. For example the write_to_file tool was breaking when relpath was undefined, and for invalid relpath it never presented UI.
+
+ */
+
if (!block.partial) {
// block is finished streaming and executing
- if (
- this.currentStreamingContentIndex === this.assistantMessageContent.length - 1 &&
- this.didCompleteReadingStream
- ) {
+ if (this.currentStreamingContentIndex === this.assistantMessageContent.length - 1) {
// its okay that we increment if !didCompleteReadingStream, it'll just return bc out of bounds and as streaming continues it will call presentAssitantMessage if a new block is ready. if streaming is finished then we set userMessageContentReady to true when out of bounds. This gracefully allows the stream to continue on and all potential content blocks be presented.
// last block is complete and it is finished executing
this.userMessageContentReady = true // will allow pwaitfor to continue
- } else {
- // call next block if it exists (if not then read stream will call it when its ready)
- this.currentStreamingContentIndex++ // need to increment regardless, so when read stream calls this function again it will be streaming the next block
- if (this.currentStreamingContentIndex < this.assistantMessageContent.length) {
- // there are already more content blocks to stream, so we'll call this function ourselves
- // await this.presentAssistantContent()
- this.presentAssistantMessage()
- return
- }
+ }
+
+ // call next block if it exists (if not then read stream will call it when its ready)
+ this.currentStreamingContentIndex++ // need to increment regardless, so when read stream calls this function again it will be streaming the next block
+
+ if (this.currentStreamingContentIndex < this.assistantMessageContent.length) {
+ // there are already more content blocks to stream, so we'll call this function ourselves
+ // await this.presentAssistantContent()
+ this.presentAssistantMessageLocked = false
+ this.presentAssistantMessage()
+ return
}
}
// block is partial, but the read stream may have finished
if (this.presentAssistantMessageHasPendingUpdates) {
this.presentAssistantMessage()
}
+ this.presentAssistantMessageLocked = false
}
// streaming
@@ -2316,7 +2323,13 @@ ${this.customInstructions.trim()}
textContent.content = textContentLines.join("\n")
+ const prevLength = this.assistantMessageContent.length
+
this.assistantMessageContent = [textContent, ...toolCalls]
+
+ if (this.assistantMessageContent.length > prevLength) {
+ this.userMessageContentReady = false // new content we need to present, reset to false in case previous content set this to true
+ }
}
async recursivelyMakeClaudeRequests(
@@ -2501,6 +2514,27 @@ ${this.customInstructions.trim()}
// }
// )
await this.addToApiConversationHistory({ role: "assistant", content: apiContentBlocks })
+
+ // incase the content blocks finished
+
+ // it may be the api stream finished after the last parsed content block was executed, so we are able to detect out of bounds and set userMessageContentReady to true (not you should not call presentAssistantMessage since if the last block is completed it will be presented again)
+
+ const completeBlocks = this.assistantMessageContent.filter((block) => !block.partial) // if there are any partial blocks after the stream ended we can consider them invalid
+
+ // console.log(
+ // "checking userMessageContentReady",
+ // this.userMessageContentReady,
+ // this.currentStreamingContentIndex,
+ // this.assistantMessageContent.length,
+ // completeBlocks.length
+ // )
+
+ if (this.currentStreamingContentIndex >= completeBlocks.length) {
+ console.log("setting userMessageContentReady to true")
+ this.userMessageContentReady = true
+ //throw new Error("No more content blocks to stream! This shouldn't happen...") // remove and just return after testing
+ }
+
await pWaitFor(() => this.userMessageContentReady)
const recDidEndLoop = await this.recursivelyMakeClaudeRequests(this.userMessageContent)
diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts
index fb738ea..8361b48 100644
--- a/src/core/prompts/system.ts
+++ b/src/core/prompts/system.ts
@@ -36,7 +36,6 @@ RULES
- Before using the execute_command tool, you must first think about the SYSTEM INFORMATION context provided to understand the user's environment and tailor your commands to ensure they are compatible with their system. You must also consider if the command you need to run should be executed in a specific directory outside of the current working directory '${cwd.toPosix()}', and if so prepend with \`cd\`'ing into that directory && then executing the command (as one command since you are stuck operating from '${cwd.toPosix()}'). For example, if you needed to run \`npm install\` in a project outside of '${cwd.toPosix()}', you would need to prepend with a \`cd\` i.e. pseudocode for this would be \`cd (path to project) && (command, in this case npm install)\`.
- When using the search_files tool, craft your regex patterns carefully to balance specificity and flexibility. Based on the user's task you may use it to find code patterns, TODO comments, function definitions, or any text-based information across the project. The results include context, so analyze the surrounding code to better understand the matches. Leverage the search_files tool in combination with other tools for more comprehensive analysis. For example, use it to find specific code patterns, then use read_file to examine the full context of interesting matches before using write_to_file to make informed changes.
- When creating a new project (such as an app, website, or any software project), organize all new files within a dedicated project directory unless the user specifies otherwise. Use appropriate file paths when writing files, as the write_to_file tool will automatically create any necessary directories. Structure the project logically, adhering to best practices for the specific type of project being created. Unless otherwise specified, new projects should be easily run without additional setup, for example most projects can be built in HTML, CSS, and JavaScript - which you can open in a browser.
-- You must try to use multiple tools in one request when possible. For example if you were to create a website, you would use the write_to_file tool to create the necessary files with their appropriate contents all at once. Or if you wanted to analyze a project, you could use the read_file tool multiple times to look at several key files. This will help you accomplish the user's task more efficiently.
- Be sure to consider the type of project (e.g. Python, JavaScript, web application) when determining the appropriate structure and files to include. Also consider what files may be most relevant to accomplishing the task, for example looking at a project's manifest file would help you understand the project's dependencies, which you could incorporate into any code you write.
- When making changes to code, always consider the context in which the code is being used. Ensure that your changes are compatible with the existing codebase and that they follow the project's coding standards and best practices.
- Do not ask for more information than necessary. Use the tools provided to accomplish the user's request efficiently and effectively. When you've completed your task, you must use the attempt_completion tool to present the result to the user. The user may provide feedback, which you can use to make improvements and try again.
@@ -52,18 +51,6 @@ RULES
====
-OBJECTIVE
-
-You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically.
-
-1. Analyze the user's task and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order.
-2. Work through these goals sequentially, utilizing available tools as necessary. Each goal should correspond to a distinct step in your problem-solving process. It is okay for certain steps to take multiple iterations, i.e. if you need to create many files, it's okay to create a few files at a time as each subsequent iteration will keep you informed on the work completed and what's remaining.
-3. Remember, you have extensive capabilities with access to a wide range of tools that can be used in powerful and clever ways as necessary to accomplish each goal. Before calling a tool, do some analysis within tags. First, analyze the file structure provided in environment_details to gain context and insights for proceeding effectively. Then, think about which of the provided tools is the most relevant tool to accomplish the user's task. Next, go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, close the thinking tag and proceed with the tool call. BUT, if one of the values for a required parameter is missing, DO NOT invoke the function (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided.
-4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built.
-5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance.
-
-====
-
SYSTEM INFORMATION
Operating System: ${osName()}
@@ -73,34 +60,42 @@ Current Working Directory: ${cwd.toPosix()}
====
-INSTRUCTIONS FOR FORMULATING RESPONSE
+OBJECTIVE
-You must respond to the user's message with at least one tool call. When formulating your response, follow these guidelines:
+You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically.
-1. You might begin your response explaining your thoughts, analysis, plan of action, etc.
-2. Place ALL tool calls at the END of your message.
-3. You can use multiple tool calls if needed, but they should all be grouped together at the end of your message.
+1. Analyze the user's task and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order.
+2. Work through these goals sequentially, utilizing available tools as necessary. Each goal should correspond to a distinct step in your problem-solving process. It is okay for certain steps to take multiple iterations, i.e. if you need to create many files, it's okay to create a few files at a time as each subsequent iteration will keep you informed on the work completed and what's remaining.
+3. Remember, you have extensive capabilities with access to a wide range of tools that can be used in powerful and clever ways as necessary to accomplish each goal. Before calling a tool, do some analysis within tags. First, analyze the file structure provided in environment_details to gain context and insights for proceeding effectively. Then, think about which of the provided tools is the most relevant tool to accomplish the user's task. Next, go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, close the thinking tag and proceed with the tool call. BUT, if one of the values for a required parameter is missing, DO NOT invoke the tool (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided.
+4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built.
+5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance.
-Here's the general structure your responses should follow:
+====
+
+TOOL USE
+
+# Formulating Your Response
+
+You must respond to the user's message with at least one tool use. When formulating your response, place tool calls at the end of your message. Here is the general structure your responses should follow:
\`\`\`
...Your thoughts...
-[Tool Call 1]
-[Tool Call 2 if needed]
-[Tool Call 3 if needed]
+[Tool Use 1]
+[Tool Use 2 if needed]
+[Tool Use 3 if needed]
...
\`\`\`
Remember:
- Choose the most appropriate tool(s) based on the task and the tool descriptions provided.
-- Formulate your tool calls using the XML format specified for each tool.
+- Formulate your tool uses using the XML format specified for each tool.
- Provide clear explanations about what actions you're taking and why you're using particular tools.
-- After making tool calls, you will receive the results of these calls in the user's next response. These results will provide you with the necessary information to continue your task or make further decisions.
+- After making tool uses, you will receive the results in the user's next response. These results will provide you with the necessary information to continue your task or make further decisions.
-# Tool Calls Formatting
+# Tool Use Formatting
-Tool calls are formatted with the name of the tool enclosed in XML tags on their own line.
+Tool uses are formatted with the name of the tool enclosed in XML tags on their own line.
Each parameter is defined within its own set of XML tags, also each on their own line.
Example:
@@ -111,31 +106,38 @@ value1
value2
-Ensure that each tool call follows this structure for consistent parsing and execution.
+Ensure that each tool use follows this structure for consistent parsing and execution.
# Tool Descriptions
## execute_command
+Description: Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: ${cwd.toPosix()}
+Parameters:
+- command: (required) The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
+Usage:
Your command here
-Description: Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: ${cwd.toPosix()}
-Parameters:
-- command: (required) The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
## read_file
+Description: Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
+Parameters:
+- path: (required) The path of the file to read (relative to the current working directory ${cwd.toPosix()})
+Usage:
File path here
-Description: Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
-Parameters:
-- path: (required) The path of the file to read (relative to the current working directory ${cwd.toPosix()})
## write_to_file
+Description: Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.
+Parameters:
+- path: (required) The path of the file to write to (relative to the current working directory ${cwd.toPosix()})
+- content: (required) The full content to write to the file.
+Usage:
File path here
@@ -144,12 +146,14 @@ File path here
Your file content here
-Description: Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.
-Parameters:
-- path: (required) The path of the file to write to (relative to the current working directory ${cwd.toPosix()})
-- content: (required) The full content to write to the file.
## search_files
+Description: Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.
+Parameters:
+- path: (required) The path of the directory to search in (relative to the current working directory ${cwd.toPosix()}). This directory will be recursively searched.
+- regex: (required) The regular expression pattern to search for. Uses Rust regex syntax.
+- file_pattern: (optional) Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).
+Usage:
Directory path here
@@ -161,13 +165,13 @@ Your regex pattern here
Optional file pattern here
-Description: Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.
-Parameters:
-- path: (required) The path of the directory to search in (relative to the current working directory ${cwd.toPosix()}). This directory will be recursively searched.
-- regex: (required) The regular expression pattern to search for. Uses Rust regex syntax.
-- file_pattern: (optional) Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).
## list_files
+Description: List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.
+Parameters:
+- path: (required) The path of the directory to list contents for (relative to the current working directory ${cwd.toPosix()})
+- recursive: (optional) Whether to list files recursively. Use true for recursive listing, false or omit for top-level only.
+Usage:
Directory path here
@@ -176,47 +180,50 @@ Directory path here
true or false (optional)
-Description: List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.
-Parameters:
-- path: (required) The path of the directory to list contents for (relative to the current working directory ${cwd.toPosix()})
-- recursive: (optional) Whether to list files recursively. Use true for recursive listing, false or omit for top-level only.
## list_code_definition_names
+Description: Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.
+Parameters:
+- path: (required) The path of the directory (relative to the current working directory ${cwd.toPosix()}) to list top level source code definitions for.
+Usage:
Directory path here
-
-Description: Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.
-Parameters:
-- path: (required) The path of the directory (relative to the current working directory ${cwd.toPosix()}) to list top level source code definitions for.${
+${
supportsImages
? `
## inspect_site
-
+Description: Captures a screenshot and console logs of the initial state of a website. This tool navigates to the specified URL, takes a screenshot of the entire page as it appears immediately after loading, and collects any console logs or errors that occur during page load. It does not interact with the page or capture any state changes after the initial load.
+Parameters:
+- url: (required) The URL of the site to inspect. This should be a valid URL including the protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.)
+Usage:
URL of the site to inspect
-
-Description: Captures a screenshot and console logs of the initial state of a website. This tool navigates to the specified URL, takes a screenshot of the entire page as it appears immediately after loading, and collects any console logs or errors that occur during page load. It does not interact with the page or capture any state changes after the initial load.
-Parameters:
-- url: (required) The URL of the site to inspect. This should be a valid URL including the protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.)`
+`
: ""
}
## ask_followup_question
+Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.
+Parameters:
+- question: (required) The question to ask the user. This should be a clear, specific question that addresses the information you need.
+Usage:
Your question here
-Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.
-Parameters:
-- question: (required) The question to ask the user. This should be a clear, specific question that addresses the information you need.
## attempt_completion
+Description: Once you've completed the task, use this tool to present the result to the user. Optionally you may provide a CLI command to showcase the result of your work, but avoid using commands like 'echo' or 'cat' that merely print text. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
+Parameters:
+- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.
+- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use 'open index.html' to display a created website. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
+Usage:
Your final result description here
@@ -225,11 +232,6 @@ Your final result description here
Optional command to demonstrate result
-Description: Once you've completed the task, use this tool to present the result to the user. Optionally you may provide a CLI command to showcase the result of your work, but avoid using commands like 'echo' or 'cat' that merely print text. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
-Parameters:
-- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.
-- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use 'open index.html' to display a created website. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.
-
# Tool Calls Examples
@@ -295,3 +297,65 @@ Which specific feature would you like me to implement in the example.py file?
`
+
+const adf = `
+
+# Effective Tool Use
+
+When using tools to accomplish tasks, follow these guidelines for effective and informed decision-making:
+
+- Prioritize an iterative approach, especially for tasks that involve sequential steps or depend on the outcomes of previous actions. Make tool uses in a logical sequence, waiting for results in the user's message when necessary before proceeding.
+- While you can make multiple tool uses in a single response when it's efficient and logical to do so, always ensure each use is well-informed and necessary. Avoid making premature or speculative tool uses.
+- After receiving the result of a tool use, analyze it within tags. Use this analysis to inform your next steps and subsequent tool uses.
+- For tasks that require multiple steps, make decisions sequentially. Each tool use should be informed by the results of previous uses and your analysis of those results.
+- Be prepared to adjust your approach based on the results of each tool use. If a tool use reveals unexpected information, reassess your plan and adapt accordingly.
+
+## Example of Effective Tool Calling
+
+1. Analyze task and formulate initial plan
+
+The task requires improving a website's appearance. First, I need to see the current state of the site and check its CSS.
+
+
+
+
+http://localhost:3000
+
+
+
+
+
+src/styles/main.css
+
+
+
+2. After receiving screenshot and CSS content, analyze and plan changes
+
+The screenshot shows [analysis of the site's current appearance]. The current CSS reveals [analysis of CSS structure and styles]. To improve it, I should modify the following aspects: [list of improvements]. I'll now update the CSS file.
+
+
+
+
+src/styles/main.css
+
+
+/* Updated CSS content */
+
+
+
+3. Present the result to the user
+
+Now that I've updated the CSS, I should present the result to the user and provide a command to open the site for them to see the changes.
+
+
+
+
+I have successfully updated the CSS to improve the website's appearance. The changes have been applied to the src/styles/main.css file. You can now view the updated website to see the improvements.
+
+
+open http://localhost:3000
+
+
+
+By following this approach, you make informed decisions at each step, using multiple tool calls when efficient, but always ensuring each action is based on current information and logical progression of the task.
+`