From 9d62a7bb7738137f748a571dd5196f1e84fa2346 Mon Sep 17 00:00:00 2001 From: RaySinner <118297374+RaySinner@users.noreply.github.com> Date: Tue, 7 Jan 2025 01:23:22 +0300 Subject: [PATCH] feat(vscode-lm): implement VS Code Language Models provider --- docs/vscode_lm_api_docs.md | 1319 +++++++++++++++++ package.json | 19 +- src/api/index.ts | 60 +- src/api/providers/vscode-lm.ts | 569 +++++++ src/api/transform/vscode-lm-format.ts | 209 +++ src/core/webview/ClineProvider.ts | 52 +- src/extension.ts | 2 +- src/shared/ExtensionMessage.ts | 58 +- src/shared/WebviewMessage.ts | 93 +- src/shared/api.ts | 68 +- src/shared/vsCodeSelectorUtils.ts | 14 + src/types/vscode.d.ts | 86 ++ .../src/components/settings/ApiOptions.tsx | 69 +- webview-ui/src/types/vscode.d.ts | 8 + 14 files changed, 2473 insertions(+), 153 deletions(-) create mode 100644 docs/vscode_lm_api_docs.md create mode 100644 src/api/providers/vscode-lm.ts create mode 100644 src/api/transform/vscode-lm-format.ts create mode 100644 src/shared/vsCodeSelectorUtils.ts create mode 100644 src/types/vscode.d.ts create mode 100644 webview-ui/src/types/vscode.d.ts diff --git a/docs/vscode_lm_api_docs.md b/docs/vscode_lm_api_docs.md new file mode 100644 index 0000000..471873f --- /dev/null +++ b/docs/vscode_lm_api_docs.md @@ -0,0 +1,1319 @@ +--- +# DO NOT TOUCH — Managed by doc writer +ContentId: 9bdc3d4e-e6ba-43d3-bd09-2e127cb63ce7 +DateApproved: 12/11/2024 + +# Summarize the whole topic in less than 300 characters for SEO purpose +MetaDescription: A guide to adding AI-powered features to a VS Code extension by using language models and natural language understanding. +--- + +# Language Model API + +The Language Model API enables you to [use the Language Model](/api/references/vscode-api#lm) and integrate AI-powered features and natural language processing in your Visual Studio Code extension. + +You can use the Language Model API in different types of extensions. A typical use for this API is in [chat extensions](/api/extension-guides/chat), where you use a language model to interpret the user's request and help provide an answer. However, the use of the Language Model API is not limited to this scenario. You might use a language model in a [language](/api/language-extensions/overview) or [debugger](/api/extension-guides/debugger-extension) extension, or as part of a [command](/api/extension-guides/command) or [task](/api/extension-guides/task-provider) in a custom extension. For example, the Rust extension might use the Language Model to offer default names to improve its rename experience. + +The process for using the Language Model API consists of the following steps: + +1. Build the language model prompt +1. Send the language model request +1. Interpret the response + +The following sections provide more details on how to implement these steps in your extension. + +## Links + +- [Chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) +- [LanguageModels API](/api/references/vscode-api#lm) +- [@vscode/prompt-tsx npm package](https://www.npmjs.com/package/@vscode/prompt-tsx) + +## Build the language model prompt + +To interact with a language model, extensions should first craft their prompt, and then send a request to the language model. You can use prompts to provide instructions to the language model on the broad task that you're using the model for. Prompts can also define the context in which user messages are interpreted. + +The Language Model API supports two types of messages when building the language model prompt: + +- **User** - used for providing instructions and the user's request +- **Assistant** - used for adding the history of previous language model responses as context to the prompt + +> **Note**: Currently, the Language Model API doesn't support the use of system messages. + +You can use two approaches for building the language model prompt: + +- `LanguageModelChatMessage` - create the prompt by providing one or more messages as strings. You might use this approach if you're just getting started with the Language Model API. +- [`@vscode/prompt-tsx`](https://www.npmjs.com/package/@vscode/prompt-tsx) - declare the prompt by using the TSX syntax. + +You can use the `prompt-tsx` library if you want more control over how the language model prompt is composed. For example, the library can help with dynamically adapting the length of the prompt to each language model's context window size. Learn more about [`@vscode/prompt-tsx`](https://www.npmjs.com/package/@vscode/prompt-tsx) or explore the [chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) to get started. + +To learn more about the concepts of prompt engineering, we suggest reading OpenAI's excellent [Prompt engineering guidelines](https://platform.openai.com/docs/guides/prompt-engineering). + +>**Tip:** take advantage of the rich VS Code extension API to get the most relevant context and include it in your prompt. For example, to include the contents of the active file in the editor. + +### Use the `LanguageModelChatMessage` class + +The Language Model API provides the `LanguageModelChatMessage` class to represent and create chat messages. You can use the `LanguageModelChatMessage.User` or `LanguageModelChatMessage.Assistant` methods to create user or assistant messages respectively. + +In the following example, the first message provides context for the prompt: + +- The persona used by the model in its replies (in this case, a cat) +- The rules the model should follow when generating responses (in this case, explaining computer science concepts in a funny manner by using cat metaphors) + +The second message then provides the specific request or instruction coming from the user. It determines the specific task to be accomplished, given the context provided by the first message. + +```typescript +const craftedPrompt = [ + vscode.LanguageModelChatMessage.User('You are a cat! Think carefully and step by step like a cat would. Your job is to explain computer science concepts in the funny manner of a cat, using cat metaphors. Always start your response by stating what concept you are explaining. Always include code samples.'), + vscode.LanguageModelChatMessage.User('I want to understand recursion') +]; +``` + +## Send the language model request + +Once you've built the prompt for the language model, you first select the language model you want to use with the [`selectChatModels`](/api/references/vscode-api#lm.selectChatModels) method. This method returns an array of language models that match the specified criteria. If you are implementing a chat participant, we recommend that you instead use the model that is passed as part of the `request` object in your chat request handler. This ensures that your extension respects the model that the user chose in the chat model dropdown. Then, you send the request to the language model by using the [`sendRequest`](/api/references/vscode-api#LanguageModelChat) method. + +To select the language model, you can specify the following properties: `vendor`, `id`, `family`, or `version`. Use these properties to either broadly match all models of a given vendor or family, or select one specific model by its ID. Learn more about these properties in the [API reference](/api/references/vscode-api#LanguageModelChat). + +> **Note**: Currently, `gpt-4o`, `gpt-4o-mini`, `o1-preview`, `o1-mini`, `claude-3.5-sonnet`, `gemini-1.5-pro` are supported for the language model family. If you are unsure what model to use, we recommend `gpt-4o` for it's performance and quality. For interactions directly in the editor, we recommend `gpt-4o-mini` for it's performance. + +If there are no models that match the specified criteria, the `selectChatModels` method returns an empty array. Your extension must appropriately handle this case. + +The following example shows how to select all `Copilot` models, regardless of the family or version: + +```typescript +const models = await vscode.lm.selectChatModels({ + vendor: 'copilot' +}); + +// No models available +if (models.length === 0) { + // TODO: handle the case when no models are available +} +``` + +> **Important**: Copilot's language models require consent from the user before an extension can use them. Consent is implemented as an authentication dialog. Because of that, `selectChatModels` should be called as part of a user-initiated action, such as a command. + +After you select a model, you can send a request to the language model by invoking the [`sendRequest`](/api/references/vscode-api#LanguageModelChat) method on the model instance. You pass the [prompt](#build-the-language-model-prompt) you crafted earlier, along with any additional options, and a cancellation token. + +When you make a request to the Language Model API, the request might fail. For example, because the model doesn't exist, or the user didn't give consent to use the Language Model API, or because quota limits are exceeded. Use `LanguageModelError` to distinguish between different types of errors. + +The following code snippet shows how to make a language model request: + +```typescript +try { + const [model] = await vscode.lm.selectChatModels({ vendor: 'copilot', family: 'gpt-4o' }); + const request = model.sendRequest(craftedPrompt, {}, token); +} catch (err) { + // Making the chat request might fail because + // - model does not exist + // - user consent not given + // - quota limits were exceeded + if (err instanceof vscode.LanguageModelError) { + console.log(err.message, err.code, err.cause); + if (err.cause instanceof Error && err.cause.message.includes('off_topic')) { + stream.markdown(vscode.l10n.t('I\'m sorry, I can only explain computer science concepts.')); + } + } else { + // add other error handling logic + throw err; + } +} +``` + +## Interpret the response + +After you've sent the request, you have to process the response from the language model API. Depending on your usage scenario, you can pass the response directly on to the user, or you can interpret the response and perform extra logic. + +The response ([`LanguageModelChatResponse`](/api/references/vscode-api#LanguageModelChatResponse)) from the Language Model API is streaming-based, which enables you to provide a smooth user experience. For example, by reporting results and progress continuously when you use the API in combination with the [Chat API](/api/extension-guides/chat). + +Errors might occur while processing the streaming response, such as network connection issues. Make sure to add appropriate error handling in your code to handle these errors. + +The following code snippet shows how an extension can register a command, which uses the language model to change all variable names in the active editor with funny cat names. Notice that the extension streams the code back to the editor for a smooth user experience. + +```typescript + vscode.commands.registerTextEditorCommand('cat.namesInEditor', async (textEditor: vscode.TextEditor) => { + // Replace all variables in active editor with cat names and words + + const [model] = await vscode.lm.selectChatModels({ vendor: 'copilot', family: 'gpt-4o' }); + let chatResponse: vscode.LanguageModelChatResponse | undefined; + + const text = textEditor.document.getText(); + + const messages = [ + vscode.LanguageModelChatMessage.User(`You are a cat! Think carefully and step by step like a cat would. + Your job is to replace all variable names in the following code with funny cat variable names. Be creative. IMPORTANT respond just with code. Do not use markdown!`), + vscode.LanguageModelChatMessage.User(text) + ]; + + try { + chatResponse = await model.sendRequest(messages, {}, new vscode.CancellationTokenSource().token); + } catch (err) { + if (err instanceof vscode.LanguageModelError) { + console.log(err.message, err.code, err.cause) + } else { + throw err; + } + return; + } + + // Clear the editor content before inserting new content + await textEditor.edit(edit => { + const start = new vscode.Position(0, 0); + const end = new vscode.Position(textEditor.document.lineCount - 1, textEditor.document.lineAt(textEditor.document.lineCount - 1).text.length); + edit.delete(new vscode.Range(start, end)); + }); + + try { + // Stream the code into the editor as it is coming in from the Language Model + for await (const fragment of chatResponse.text) { + await textEditor.edit(edit => { + const lastLine = textEditor.document.lineAt(textEditor.document.lineCount - 1); + const position = new vscode.Position(lastLine.lineNumber, lastLine.text.length); + edit.insert(position, fragment); + }); + } + } catch (err) { + // async response stream may fail, e.g network interruption or server side error + await textEditor.edit(edit => { + const lastLine = textEditor.document.lineAt(textEditor.document.lineCount - 1); + const position = new vscode.Position(lastLine.lineNumber, lastLine.text.length); + edit.insert(position, (err).message); + }); + } +}); +``` + +## Considerations + +### Model availability + +We don't expect specific models to stay supported forever. When you reference a language model in your extension, make sure to take a "defensive" approach when sending requests to that language model. This means that you should gracefully handle cases where you don't have access to a particular model. + +### Choosing the appropriate model + +Extension authors can choose which model is the most appropriate for their extension. We recommend using `gpt-4o` for its performance and quality. To get a full list of available models, you can use this code snippet: +```typescript +const allModels = await vscode.lm.selectChatModels(MODEL_SELECTOR); +``` +> **Note**: The recommended GPT-4o model has a limit of `64K` tokens. The returned model object from the `selectChatModels` call has a `maxInputTokens` attribute that shows the token limit. These limits will be expanded as we learn more about how extensions are using the language models. + +### Rate limiting + +Extensions should responsibly use the language model and be aware of rate limiting. VS Code is transparent to the user regarding how extensions are using language models and how many requests each extension is sending and how that influences their respective quotas. + +Extensions should not use the Language Model API for integration tests due to rate-limitations. Internally, VS Code uses a dedicated non-production language model for simulation testing, and we are currently thinking how to provide a scalable language model testing solution for extensions. + +## Testing your extension + +The responses that the Language Model API provides are nondeterministic, which means that you might get a different response for an identical request. This behavior can be challenging for testing your extension. + +The part of the extension for building prompts and interpreting language model responses is deterministic, and can thus be unit tested without using an actual language model. However, interacting and getting responses from the language model itself, is nondeterministic and can’t be easily tested. Consider designing your extension code in a modular way to enable you to unit test the specific parts that can be tested. + +## Publishing your extension + +Once you have created your AI extension, you can publish your extension to the Visual Studio Marketplace: + +- Before publishing to the VS Marketplace we recommend that you read the [Microsoft AI tools and practices guidelines](https://www.microsoft.com/en-us/ai/tools-practices). These guidelines provide best practices for the responsible development and use of AI technologies. +- By publishing to the VS Marketplace, your extension is adhering to the [GitHub Copilot extensibility acceptable development and use policy](https://docs.github.com/en/early-access/copilot/github-copilot-extensibility-platform-partnership-plugin-acceptable-development-and-use-policy). +- If your extension already contributes functionality other than using the Language Model API, we recommend that you do not introduce an extension dependency on GitHub Copilot in the [extension manifest](/api/references/extension-manifest). This ensures that extension users that do not use GitHub Copilot can use the non language model functionality without having to install GitHub Copilot. Make sure to have appropriate error handling when accessing language models for this case. +- Upload to the Marketplace as described in [Publishing Extension](https://code.visualstudio.com/api/working-with-extensions/publishing-extension). + +## Related content + +- [Build a VS Code chat extension](/api/extension-guides/chat) +- [Learn more about @vscode/prompt-tsx](https://www.npmjs.com/package/@vscode/prompt-tsx) +- [Chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) +- [GitHub Copilot Trust Center](https://resources.github.com/copilot-trust-center/) + +--- +# DO NOT TOUCH — Managed by doc writer +ContentId: d9038699-4ffe-485b-b40a-b1260a9973ad +DateApproved: 12/11/2024 + +# Summarize the whole topic in less than 300 characters for SEO purpose +MetaDescription: Tutorial that walks you through creating a VS Code extension that uses the Language Model API to generate AI-powered code annotations. +--- + +# Tutorial: Generate AI-powered code annotations by using the Language Model API + +In this tutorial, You'll learn how to create a VS Code extension to build an AI-powered Code Tutor. You use the Language Model (LM) API to generate suggestions to improve your code and take advantage of the VS Code extension APIs to integrate it seamlessly in the editor as inline annotations that the user can hover over for more information. After you complete this tutorial, you will know how to implement custom AI features in VS Code. + +![VS Code displaying custom annotations from GitHub Copilot as annotations](./images/lm-api/code-tutor-annotations-gif.gif) + +## Prerequisites + +You'll need the following tools and accounts to complete this tutorial: + +- [Visual Studio Code](https://code.visualstudio.com/download) +- [GitHub Copilot](https://marketplace.visualstudio.com/items?itemName=GitHub.copilot-chat) +- [Node.js](https://nodejs.org/en/download/) + +## Scaffold out the extension + +First, use Yeoman and VS Code Extension Generator to scaffold a TypeScript or JavaScript project ready for development. + +```bash +npx --package yo --package generator-code -- yo code +``` + +Select the following options to complete the new extension wizard... + +```bash +# ? What type of extension do you want to create? New Extension (TypeScript) +# ? What's the name of your extension? Code Tutor + +### Press to choose default for all options below ### + +# ? What's the identifier of your extension? code-tutor +# ? What's the description of your extension? LEAVE BLANK +# ? Initialize a git repository? Yes +# ? Bundle the source code with webpack? No +# ? Which package manager to use? npm + +# ? Do you want to open the new folder with Visual Studio Code? Open with `code` +``` + +## Modify the package.json file to include the correct commands + +The scaffolded project includes a single "helloWorld" command in the `package.json` file. This command is what shows up in the Command Palette when your extension is installed. + +```json +"contributes": { + "commands": [ + { + "command": "code-tutor.helloWorld", + "title": "Hello World" + } + ] +} +``` + +Since we're building a Code Tutor extension that will be adding annotations to lines, we'll need a command to allow the user to toggle these annotations on and off. Update the `command` and `title` properties: + +```json +"contributes": { + "commands": [ + { + "command": "code-tutor.annotate", + "title": "Toggle Tutor Annotations" + } + ] +} +``` + +While the `package.json` defines the commands and UI elements for an extension, the `src/extension.ts` file is where you put the code that should be executed for those commands. + +Open the `src/extension.ts` file and change the `registerCommand` method so that it matches the `command` property in the `package.json` file. + +```ts +const disposable = vscode.commands.registerCommand('code-tutor.annotate', () => { +``` + +Run the extension by pressing `kbstyle(F5)`. This will open a new VS Code instance with the extension installed. Open the Command Palette by pressing `kb(workbench.action.showCommands)`, and search for "tutor". You should see the "Tutor Annotations" command. + +![The "Toggle Tutor Annotations" command in the VS Code Command Palette](./images/lm-api/tutor-command-command-palette.png) + +If you select the "Tutor Annotations" command, you'll see a "Hello World" notification message. + +![The message 'Hello World from Code Tutor' displayed in a notification](./images/lm-api/code-tutor-hello-world.png) + +## Implement the "annotate" command + +To get our Code Tutor annotations working, we need to send it some code and ask it to provide annotations. We'll do this in three steps: + +1. Get the code with line numbers from the current tab the user has open. +2. Send that code to the Language Model API along with a custom prompt that instructs the model on how to provide annotations. +3. Parse the annotations and display them in the editor. + +### Step 1: Get the code with line numbers + +To get the code from the current tab, we need a reference to the tab that the user has open. We can get that by modifying the `registerCommand` method to be a `registerTextEditorCommand`. The difference between these two commands is that the latter gives us a reference to the tab that the user has open, called the `TextEditor`. + +```ts +const disposable = vscode.commands.registerTextEditorCommand('code-tutor.annotate', async (textEditor: vscode.TextEditor) => { +``` + +Now we can use the `textEditor` reference to get all of the code in the "viewable editor space". This is the code that can be seen on the screen - it does not include code that is either above or below what is in the viewable editor space. + +Add the following method directly above the `export function deactivate() { }` line at the bottom of the `extension.ts` file. + +```ts +function getVisibleCodeWithLineNumbers(textEditor: vscode.TextEditor) { + // get the position of the first and last visible lines + let currentLine = textEditor.visibleRanges[0].start.line; + const endLine = textEditor.visibleRanges[0].end.line; + + let code = ''; + + // get the text from the line at the current position. + // The line number is 0-based, so we add 1 to it to make it 1-based. + while (currentLine < endLine) { + code += `${currentLine + 1}: ${textEditor.document.lineAt(currentLine).text} \n`; + // move to the next line position + currentLine++; + } + return code; +} +``` + +This code uses the `visibleRanges` property of the TextEditor to get the position of the lines that are currently visible in the editor. It then starts with the first line position and moves to the last line position, adding each line of code to a string along with the line number. Finally, it returns the string that contains all the viewable code with line numbers. + +Now we can call this method from the `code-tutor.annotate` command. Modify the implementation of the command so that it looks like this: + +```ts +const disposable = vscode.commands.registerTextEditorCommand('code-tutor.annotate', async (textEditor: vscode.TextEditor) => { + + // Get the code with line numbers from the current editor + const codeWithLineNumbers = getVisibleCodeWithLineNumbers(textEditor); + +}); +``` + +### Step 2: Send code and prompt to language model API + +The next step is to call the GitHub Copilot language model and send it the user's code along with instructions to create the annotations. + +To do this, we first need to specify which chat model we want to use. We select 4o here because it is a fast and capable model for the kind of interaction we are building. + +```ts +const disposable = vscode.commands.registerTextEditorCommand('code-tutor.annotate', async (textEditor: vscode.TextEditor) => { + + // Get the code with line numbers from the current editor + const codeWithLineNumbers = getVisibleCodeWithLineNumbers(textEditor); + + // select the 4o chat model + let [model] = await vscode.lm.selectChatModels({ + vendor: 'copilot', + family: 'gpt-4o', + }); +}); +``` + +We need instructions - or a "prompt" - that will tell the model to create the annotations and what format we want the response to be. Add the following code to the top of the file directly under the imports. + +```ts +const ANNOTATION_PROMPT = `You are a code tutor who helps students learn how to write better code. Your job is to evaluate a block of code that the user gives you and then annotate any lines that could be improved with a brief suggestion and the reason why you are making that suggestion. Only make suggestions when you feel the severity is enough that it will impact the readability and maintainability of the code. Be friendly with your suggestions and remember that these are students so they need gentle guidance. Format each suggestion as a single JSON object. It is not necessary to wrap your response in triple backticks. Here is an example of what your response should look like: + +{ "line": 1, "suggestion": "I think you should use a for loop instead of a while loop. A for loop is more concise and easier to read." }{ "line": 12, "suggestion": "I think you should use a for loop instead of a while loop. A for loop is more concise and easier to read." } +`; +``` + +This is a special prompt that instructs the language model on how to generate annotations. It also includes examples for how the model should format its response. These examples (also called, "multi-shot") are what enable us to define what the format the response will be so that we can parse it and display it as annotations. + +We pass messages to the model in an array. This array can contain as many messages as you like. In our case, it contains the prompt followed by the users code with line numbers. + +```ts +const disposable = vscode.commands.registerTextEditorCommand('code-tutor.annotate', async (textEditor: vscode.TextEditor) => { + + // Get the code with line numbers from the current editor + const codeWithLineNumbers = getVisibleCodeWithLineNumbers(textEditor); + + // select the 4o chat model + let [model] = await vscode.lm.selectChatModels({ + vendor: 'copilot', + family: 'gpt-4o', + }); + + // init the chat message + const messages = [ + vscode.LanguageModelChatMessage.User(ANNOTATION_PROMPT), + vscode.LanguageModelChatMessage.User(codeWithLineNumbers), + ]; +}); +``` + +To send the messages to the model, we need to first make sure the selected model is available. This handles cases where the extension is not ready or the user is not signed in to GitHub Copilot. Then we send the messages to the model. + +```ts +const disposable = vscode.commands.registerTextEditorCommand('code-tutor.annotate', async (textEditor: vscode.TextEditor) => { + + // Get the code with line numbers from the current editor + const codeWithLineNumbers = getVisibleCodeWithLineNumbers(textEditor); + + // select the 4o chat model + let [model] = await vscode.lm.selectChatModels({ + vendor: 'copilot', + family: 'gpt-4o', + }); + + // init the chat message + const messages = [ + vscode.LanguageModelChatMessage.User(ANNOTATION_PROMPT), + vscode.LanguageModelChatMessage.User(codeWithLineNumbers), + ]; + + // make sure the model is available + if (model) { + + // send the messages array to the model and get the response + let chatResponse = await model.sendRequest(messages, {}, new vscode.CancellationTokenSource().token); + + // handle chat response + await parseChatResponse(chatResponse, textEditor); + } +}); +``` + +Chat responses come in as fragments. These fragments usually contain single words, but sometimes they contain just punctuation. In order to display annotations as the response streams in, we want to wait until we have a complete annotation before we display it. Because of the way we have instructed our model to return its response, we know that when we see a closing `}` we have a complete annotation. We can then parse the annotation and display it in the editor. + +Add the missing `parseChatResponse` function above the `getVisibleCodeWithLineNumbers` method in the `extension.ts` file. + +```ts +async function parseChatResponse(chatResponse: vscode.LanguageModelChatResponse, textEditor: vscode.TextEditor) { + let accumulatedResponse = ""; + + for await (const fragment of chatResponse.text) { + accumulatedResponse += fragment; + + // if the fragment is a }, we can try to parse the whole line + if (fragment.includes("}")) { + try { + const annotation = JSON.parse(accumulatedResponse); + applyDecoration(textEditor, annotation.line, annotation.suggestion); + // reset the accumulator for the next line + accumulatedResponse = ""; + } + catch (e) { + // do nothing + } + } + } +} +``` + +We need one last method to actually display the annotations. VS Code calls these "decorations". Add the following method above the `parseChatResponse` method in the `extension.ts` file. + +```ts +function applyDecoration(editor: vscode.TextEditor, line: number, suggestion: string) { + + const decorationType = vscode.window.createTextEditorDecorationType({ + after: { + contentText: ` ${suggestion.substring(0, 25) + "..."}`, + color: "grey", + }, + }); + + // get the end of the line with the specified line number + const lineLength = editor.document.lineAt(line - 1).text.length; + const range = new vscode.Range( + new vscode.Position(line - 1, lineLength), + new vscode.Position(line - 1, lineLength), + ); + + const decoration = { range: range, hoverMessage: suggestion }; + + vscode.window.activeTextEditor?.setDecorations(decorationType, [ + decoration, + ]); +} +``` + +This method takes in our parsed annotation from the model and uses it to create a decoration. This is done by first creating a `TextEditorDecorationType` that specifies the appearance of the decoration. In this case, we are just adding a grey annotation and truncating it to 25 characters. We'll show the full message when the user hovers over the message. + +We are then setting where the decoration should appear. We need it to be on the line number that was specified in the annotation, and at the end of the line. + +Finally, we set the decoration on the active text editor which is what causes the annotation to appear in the editor. + +If your extension is still running, restart it by selecting the green arrow from the debug bar. If you closed the debug session, press `kbstyle(F5)` to run the extension. Open a code file in the new VS Code window instance that opens. When you select "Toggle Tutor Annotations" from the Command Palette, you should see the code annotations appear in the editor. + +![A code file with annotations from GitHub Copilot](./images/lm-api/code-with-annotations.png) + +## Add a button to the editor title bar + +You can enable your command to be invoked from places other than the Command Palette. In our case, we can add a button to the top of the current tab that allows the user to easily toggle the annotations. + +To do this, modify the "contributes" portion of the `package.json` as follows: + +```json +"contributes": { + "commands": [ + { + "command": "code-tutor.annotate", + "title": "Toggle Tutor Annotations", + "icon": "$(comment)" + } + ], + "menus": { + "editor/title": [ + { + "command": "code-tutor.annotate", + "group": "navigation" + } + ] + } +} +``` + +This causes a button to appear in the navigation area (right-side) of the editor title bar. The "icon" comes from the [Product Icon Reference](https://code.visualstudio.com/api/references/icons-in-labels). + +Restart your extension with the green arrow or press `kbstyle(F5)` if the extension is not already running. You should now see a comment icon that will trigger the "Toggle Tutor Annotations" command. + +![A comment icon appears in the title bar of the active tab in VS Code](./images/lm-api/code-tutor-annotations-gif.gif) + +## Next Steps + +In this tutorial, you learned how to create a VS Code extension that integrates AI into the editor with the language model API. You used the VS Code extension API to get the code from the current tab, sent it to the model with a custom prompt, and then parsed and displayed the model result right in the editor using decorators. + +Next, you can extend your Code Tutor extension to [include a chat participant](/api/extension-guides/chat-tutorial) as well which will allow users to interact directly with your extension via the GitHub Copilot chat interface. You can also [explore the full range of API's in VS Code](/api/references/vscode-api) to explore new ways of building custom AI experiences your editor. + +You can find the complete source code for this tutorial in the [vscode-extensions-sample repository](https://github.com/microsoft/vscode-extension-samples/tree/main/lm-api-tutorial). + +## Related content + +- [Language Model API extension guide](/api/extension-guides/language-model) +- [Tutorial: Create a code tutor chat participant with the Chat API](/api/extension-guides/chat-tutorial) +- [VS Code Chat API reference](/api/extension-guides/chat) + +--- +# DO NOT TOUCH — Managed by doc writer +ContentId: 9bdc3d4e-e6ba-43d3-bd09-2e127cb63ce7 +DateApproved: 12/11/2024 + +# Summarize the whole topic in less than 300 characters for SEO purpose +MetaDescription: A guide to adding AI-powered features to a VS Code extension by using language models and natural language understanding. +--- + +# Language Model API + +The Language Model API enables you to [use the Language Model](/api/references/vscode-api#lm) and integrate AI-powered features and natural language processing in your Visual Studio Code extension. + +You can use the Language Model API in different types of extensions. A typical use for this API is in [chat extensions](/api/extension-guides/chat), where you use a language model to interpret the user's request and help provide an answer. However, the use of the Language Model API is not limited to this scenario. You might use a language model in a [language](/api/language-extensions/overview) or [debugger](/api/extension-guides/debugger-extension) extension, or as part of a [command](/api/extension-guides/command) or [task](/api/extension-guides/task-provider) in a custom extension. For example, the Rust extension might use the Language Model to offer default names to improve its rename experience. + +The process for using the Language Model API consists of the following steps: + +1. Build the language model prompt +1. Send the language model request +1. Interpret the response + +The following sections provide more details on how to implement these steps in your extension. + +## Links + +- [Chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) +- [LanguageModels API](/api/references/vscode-api#lm) +- [@vscode/prompt-tsx npm package](https://www.npmjs.com/package/@vscode/prompt-tsx) + +## Build the language model prompt + +To interact with a language model, extensions should first craft their prompt, and then send a request to the language model. You can use prompts to provide instructions to the language model on the broad task that you're using the model for. Prompts can also define the context in which user messages are interpreted. + +The Language Model API supports two types of messages when building the language model prompt: + +- **User** - used for providing instructions and the user's request +- **Assistant** - used for adding the history of previous language model responses as context to the prompt + +> **Note**: Currently, the Language Model API doesn't support the use of system messages. + +You can use two approaches for building the language model prompt: + +- `LanguageModelChatMessage` - create the prompt by providing one or more messages as strings. You might use this approach if you're just getting started with the Language Model API. +- [`@vscode/prompt-tsx`](https://www.npmjs.com/package/@vscode/prompt-tsx) - declare the prompt by using the TSX syntax. + +You can use the `prompt-tsx` library if you want more control over how the language model prompt is composed. For example, the library can help with dynamically adapting the length of the prompt to each language model's context window size. Learn more about [`@vscode/prompt-tsx`](https://www.npmjs.com/package/@vscode/prompt-tsx) or explore the [chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) to get started. + +To learn more about the concepts of prompt engineering, we suggest reading OpenAI's excellent [Prompt engineering guidelines](https://platform.openai.com/docs/guides/prompt-engineering). + +>**Tip:** take advantage of the rich VS Code extension API to get the most relevant context and include it in your prompt. For example, to include the contents of the active file in the editor. + +### Use the `LanguageModelChatMessage` class + +The Language Model API provides the `LanguageModelChatMessage` class to represent and create chat messages. You can use the `LanguageModelChatMessage.User` or `LanguageModelChatMessage.Assistant` methods to create user or assistant messages respectively. + +In the following example, the first message provides context for the prompt: + +- The persona used by the model in its replies (in this case, a cat) +- The rules the model should follow when generating responses (in this case, explaining computer science concepts in a funny manner by using cat metaphors) + +The second message then provides the specific request or instruction coming from the user. It determines the specific task to be accomplished, given the context provided by the first message. + +```typescript +const craftedPrompt = [ + vscode.LanguageModelChatMessage.User('You are a cat! Think carefully and step by step like a cat would. Your job is to explain computer science concepts in the funny manner of a cat, using cat metaphors. Always start your response by stating what concept you are explaining. Always include code samples.'), + vscode.LanguageModelChatMessage.User('I want to understand recursion') +]; +``` + +## Send the language model request + +Once you've built the prompt for the language model, you first select the language model you want to use with the [`selectChatModels`](/api/references/vscode-api#lm.selectChatModels) method. This method returns an array of language models that match the specified criteria. If you are implementing a chat participant, we recommend that you instead use the model that is passed as part of the `request` object in your chat request handler. This ensures that your extension respects the model that the user chose in the chat model dropdown. Then, you send the request to the language model by using the [`sendRequest`](/api/references/vscode-api#LanguageModelChat) method. + +To select the language model, you can specify the following properties: `vendor`, `id`, `family`, or `version`. Use these properties to either broadly match all models of a given vendor or family, or select one specific model by its ID. Learn more about these properties in the [API reference](/api/references/vscode-api#LanguageModelChat). + +> **Note**: Currently, `gpt-4o`, `gpt-4o-mini`, `o1-preview`, `o1-mini`, `claude-3.5-sonnet`, `gemini-1.5-pro` are supported for the language model family. If you are unsure what model to use, we recommend `gpt-4o` for it's performance and quality. For interactions directly in the editor, we recommend `gpt-4o-mini` for it's performance. + +If there are no models that match the specified criteria, the `selectChatModels` method returns an empty array. Your extension must appropriately handle this case. + +The following example shows how to select all `Copilot` models, regardless of the family or version: + +```typescript +const models = await vscode.lm.selectChatModels({ + vendor: 'copilot' +}); + +// No models available +if (models.length === 0) { + // TODO: handle the case when no models are available +} +``` + +> **Important**: Copilot's language models require consent from the user before an extension can use them. Consent is implemented as an authentication dialog. Because of that, `selectChatModels` should be called as part of a user-initiated action, such as a command. + +After you select a model, you can send a request to the language model by invoking the [`sendRequest`](/api/references/vscode-api#LanguageModelChat) method on the model instance. You pass the [prompt](#build-the-language-model-prompt) you crafted earlier, along with any additional options, and a cancellation token. + +When you make a request to the Language Model API, the request might fail. For example, because the model doesn't exist, or the user didn't give consent to use the Language Model API, or because quota limits are exceeded. Use `LanguageModelError` to distinguish between different types of errors. + +The following code snippet shows how to make a language model request: + +```typescript +try { + const [model] = await vscode.lm.selectChatModels({ vendor: 'copilot', family: 'gpt-4o' }); + const request = model.sendRequest(craftedPrompt, {}, token); +} catch (err) { + // Making the chat request might fail because + // - model does not exist + // - user consent not given + // - quota limits were exceeded + if (err instanceof vscode.LanguageModelError) { + console.log(err.message, err.code, err.cause); + if (err.cause instanceof Error && err.cause.message.includes('off_topic')) { + stream.markdown(vscode.l10n.t('I\'m sorry, I can only explain computer science concepts.')); + } + } else { + // add other error handling logic + throw err; + } +} +``` + +## Interpret the response + +After you've sent the request, you have to process the response from the language model API. Depending on your usage scenario, you can pass the response directly on to the user, or you can interpret the response and perform extra logic. + +The response ([`LanguageModelChatResponse`](/api/references/vscode-api#LanguageModelChatResponse)) from the Language Model API is streaming-based, which enables you to provide a smooth user experience. For example, by reporting results and progress continuously when you use the API in combination with the [Chat API](/api/extension-guides/chat). + +Errors might occur while processing the streaming response, such as network connection issues. Make sure to add appropriate error handling in your code to handle these errors. + +The following code snippet shows how an extension can register a command, which uses the language model to change all variable names in the active editor with funny cat names. Notice that the extension streams the code back to the editor for a smooth user experience. + +```typescript + vscode.commands.registerTextEditorCommand('cat.namesInEditor', async (textEditor: vscode.TextEditor) => { + // Replace all variables in active editor with cat names and words + + const [model] = await vscode.lm.selectChatModels({ vendor: 'copilot', family: 'gpt-4o' }); + let chatResponse: vscode.LanguageModelChatResponse | undefined; + + const text = textEditor.document.getText(); + + const messages = [ + vscode.LanguageModelChatMessage.User(`You are a cat! Think carefully and step by step like a cat would. + Your job is to replace all variable names in the following code with funny cat variable names. Be creative. IMPORTANT respond just with code. Do not use markdown!`), + vscode.LanguageModelChatMessage.User(text) + ]; + + try { + chatResponse = await model.sendRequest(messages, {}, new vscode.CancellationTokenSource().token); + } catch (err) { + if (err instanceof vscode.LanguageModelError) { + console.log(err.message, err.code, err.cause) + } else { + throw err; + } + return; + } + + // Clear the editor content before inserting new content + await textEditor.edit(edit => { + const start = new vscode.Position(0, 0); + const end = new vscode.Position(textEditor.document.lineCount - 1, textEditor.document.lineAt(textEditor.document.lineCount - 1).text.length); + edit.delete(new vscode.Range(start, end)); + }); + + try { + // Stream the code into the editor as it is coming in from the Language Model + for await (const fragment of chatResponse.text) { + await textEditor.edit(edit => { + const lastLine = textEditor.document.lineAt(textEditor.document.lineCount - 1); + const position = new vscode.Position(lastLine.lineNumber, lastLine.text.length); + edit.insert(position, fragment); + }); + } + } catch (err) { + // async response stream may fail, e.g network interruption or server side error + await textEditor.edit(edit => { + const lastLine = textEditor.document.lineAt(textEditor.document.lineCount - 1); + const position = new vscode.Position(lastLine.lineNumber, lastLine.text.length); + edit.insert(position, (err).message); + }); + } +}); +``` + +## Considerations + +### Model availability + +We don't expect specific models to stay supported forever. When you reference a language model in your extension, make sure to take a "defensive" approach when sending requests to that language model. This means that you should gracefully handle cases where you don't have access to a particular model. + +### Choosing the appropriate model + +Extension authors can choose which model is the most appropriate for their extension. We recommend using `gpt-4o` for its performance and quality. To get a full list of available models, you can use this code snippet: +```typescript +const allModels = await vscode.lm.selectChatModels(MODEL_SELECTOR); +``` +> **Note**: The recommended GPT-4o model has a limit of `64K` tokens. The returned model object from the `selectChatModels` call has a `maxInputTokens` attribute that shows the token limit. These limits will be expanded as we learn more about how extensions are using the language models. + +### Rate limiting + +Extensions should responsibly use the language model and be aware of rate limiting. VS Code is transparent to the user regarding how extensions are using language models and how many requests each extension is sending and how that influences their respective quotas. + +Extensions should not use the Language Model API for integration tests due to rate-limitations. Internally, VS Code uses a dedicated non-production language model for simulation testing, and we are currently thinking how to provide a scalable language model testing solution for extensions. + +## Testing your extension + +The responses that the Language Model API provides are nondeterministic, which means that you might get a different response for an identical request. This behavior can be challenging for testing your extension. + +The part of the extension for building prompts and interpreting language model responses is deterministic, and can thus be unit tested without using an actual language model. However, interacting and getting responses from the language model itself, is nondeterministic and can’t be easily tested. Consider designing your extension code in a modular way to enable you to unit test the specific parts that can be tested. + +## Publishing your extension + +Once you have created your AI extension, you can publish your extension to the Visual Studio Marketplace: + +- Before publishing to the VS Marketplace we recommend that you read the [Microsoft AI tools and practices guidelines](https://www.microsoft.com/en-us/ai/tools-practices). These guidelines provide best practices for the responsible development and use of AI technologies. +- By publishing to the VS Marketplace, your extension is adhering to the [GitHub Copilot extensibility acceptable development and use policy](https://docs.github.com/en/early-access/copilot/github-copilot-extensibility-platform-partnership-plugin-acceptable-development-and-use-policy). +- If your extension already contributes functionality other than using the Language Model API, we recommend that you do not introduce an extension dependency on GitHub Copilot in the [extension manifest](/api/references/extension-manifest). This ensures that extension users that do not use GitHub Copilot can use the non language model functionality without having to install GitHub Copilot. Make sure to have appropriate error handling when accessing language models for this case. +- Upload to the Marketplace as described in [Publishing Extension](https://code.visualstudio.com/api/working-with-extensions/publishing-extension). + +## Related content + +- [Build a VS Code chat extension](/api/extension-guides/chat) +- [Learn more about @vscode/prompt-tsx](https://www.npmjs.com/package/@vscode/prompt-tsx) +- [Chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) +- [GitHub Copilot Trust Center](https://resources.github.com/copilot-trust-center/) + +--- +# DO NOT TOUCH — Managed by doc writer +ContentId: aa6d312f-cbac-4633-8579-64d3cb4d17be +DateApproved: 12/11/2024 + +# Summarize the whole topic in less than 300 characters for SEO purpose +MetaDescription: A guide to creating a language model tool and how to implement tool calling in a chat extension +--- + +# LanguageModelTool API + +In this extension guide, you'll learn how to create a language model tool and how to implement tool calling in a chat extension. + +## What is tool calling in an LLM? + +Tool calling enables you to extend the functionality of a large language model (LLM) by connecting it to external tools and systems to perform tasks that go beyond text processing. + +A language model tool is a function that can be invoked as part of language model request. For example, you might have a function that retrieves information from a database, finds files, or performs some calculation. You can implement a language model tool in your extension, or use publicly available tools from other extensions. + +The LLM never actually executes the tool itself, instead the LLM generates the parameters that can be used to call your tool, which your code can then choose how to handle by calling the indicated function. Your extension is always in full control of the tool calling process. + +Read more about [function calling](https://platform.openai.com/docs/guides/function-calling) in the OpenAI documentation. + +## Why use tool calling? + +There are multiple scenarios where you might want to use tool calling in a chat extension. Some examples include: + +- **Let the LLM dynamically ask for more context**. For example, you can use a tool to retrieve information from a database, or find relevant files. +- **Let the LLM take some action dynamically**. The LLM itself can't perform calculations or make calls to other systems. For example, use a tool to run a terminal command and return the output to the LLM. +- **Hook up some context/behavior that is contributed by another VS Code extension**. For example, you might have a tool that uses the Git extension to retrieve information about the current repository. + +## Tool-calling flow + +The tool-calling flow in a chat extension is as follows: + +1. Retrieve the list of relevant tools +1. Send the request to the LLM, providing the list of tool definitions to consider +1. The LLM generates a response, which may include one or more requests to invoke a tool +1. Invoke the tool by using the parameter values provided in the LLM response +1. Send another request to the LLM, including the tool results +1. The LLM generates the final user response, which may incorporate tool responses + + If the LLM response includes more requests for tool invocations, repeat steps 4-6 until there are no more tool requests. + +### Implement tool calling with the chat extension library + +You can use the [`@vscode/chat-extension-utils` library](https://www.npmjs.com/package/@vscode/chat-extension-utils) to simplify the process of calling tools in a chat extension. + +Implement tool calling in the `vscode.ChatRequestHandler` function of your [chat participant](/api/extension-guides/chat). + +1. Determine the relevant tools for the current chat context. You can access all available tools by using `vscode.lm.tools`. + + The following code snippet shows how to filter the tools to only those that have a specific tag. + + ```ts + const tools = request.command === 'all' ? + vscode.lm.tools : + vscode.lm.tools.filter(tool => tool.tags.includes('chat-tools-sample')); + ``` + +1. Send the request and tool definitions to the LLM by using `sendChatParticipantRequest`. + + ```ts + const libResult = chatUtils.sendChatParticipantRequest( + request, + chatContext, + { + prompt: 'You are a cat! Answer as a cat.', + responseStreamOptions: { + stream, + references: true, + responseText: true + }, + tools + }, + token); + ``` + + The `ChatHandlerOptions` object has the following properties: + + - `prompt`: (optional) Instructions for the chat participant prompt. + - `model`: (optional) The model to use for the request. If not specified, the model from the chat context is used. + - `tools`: (optional) The list of tools to consider for the request. + - `requestJustification`: (optional) A string that describes why the request is being made. + - `responseStreamOptions`: (optional) Enable `sendChatParticipantRequest` to stream the response back to VS Code. Optionally, you can also enable references and/or response text. + +1. Return the result from the LLM. This might contain error details or tool-calling metadata. + + ```ts + return await libResult.result; + ``` + +The full source code of this [tool-calling sample](https://github.com/microsoft/vscode-extension-samples/blob/main/chat-sample/src/chatUtilsSample.ts) is available in the VS Code Extension Samples repository. + +### Implement tool calling yourself + +For more advanced scenarios, you can also implement tool calling yourself. Optionally, you can use the `@vscode/prompt-tsx` library for crafting the LLM prompts. By implementing tool calling yourself, you have more control over the tool-calling process. For example, to perform additional validation or to handle tool responses in a specific way before sending them to the LLM. + +View the full source code for implementing [tool calling by using prompt-tsx](https://github.com/microsoft/vscode-extension-samples/blob/main/chat-sample/src/toolParticipant.ts) in the VS Code Extension Samples repository. + +## Create a language model tool + +When calling tools, you can call publicly available language model tools contributed by other extensions, or you can create your own tools. When you create a tool, you can choose whether to register it with the VS Code API, or just use it within your extension as a *private* tool. + +When you publish a tool with the VS Code API, that tool is available to all extensions. + +### Deciding between registering a tool and using it as a private tool + +Register a tool with the VS Code API if: + +- The tool makes sense to other extensions, and could be used without special handling for the particular tool +- The extension needs to provide a progress message and confirmation + +Use a private tool if: + +- The tool can't be made public, for example because it's specific to your company or retrieves non-public data +- The tool requires some special handling and is specific to your extension + +### Implement a language model tool + +To implement a language model tool: + +1. Define the tool in the `contributes` property in the `package.json` + + The following example shows how to define a tool that counts the number of active tabs in a tab group. + + ```json + "contributes": { + "languageModelTools": [ + { + "name": "chat-tools-sample_tabCount", + "tags": [ + "editors", + "chat-tools-sample" + ], + "toolReferenceName": "tabCount", + "displayName": "Tab Count", + "modelDescription": "The number of active tabs in a tab group", + "icon": "$(files)", + "inputSchema": { + "type": "object", + "properties": { + "tabGroup": { + "type": "number", + "description": "The index of the tab group to check. This is optional- if not specified, the active tab group will be checked.", + "default": 0 + } + } + } + } + ] + } + ``` + + A language model tool has the following properties: + + - `name`: The unique name of the tool. This is used to reference the tool in the extension implementation code. + - `tags`: An array of tags that describe the tool. This is used to filter the list of tools that are relevant for a specific request. + - `toolReferenceName`: If enabled, the name for users to reference the tool in a chat prompt via `#`. + - `displayName`: The user-friendly name of the tool, used for displaying in the UI. + - `modelDescription`: Description of the tool, which can be used by the language model to select it. + - `icon`: The icon to display for the tool in the UI. + - `inputSchema`: The JSON schema that describes the input parameters for the tool. This is used by the language model to provide parameter values for the tool invocation. + +1. (optional) Register tool with `vscode.lm.registerTool` + + If you want to publish the tool for use by other extensions, you must register the tool with the `vscode.lm.registerTool` API. Provide the name of the tool as you specified it in the `package.json` file. + + ```ts + export function registerChatTools(context: vscode.ExtensionContext) { + context.subscriptions.push(vscode.lm.registerTool('chat-tools-sample_tabCount', new TabCountTool())); + } + ``` + +1. Implement the language model tool by implementing the `vscode.LanguageModelTool<>` interface. + + - Implement `prepareInvocation` to provide a confirmation message for the tool invocation. + + The following example shows how to provide a confirmation message for the tab count tool. + + ```ts + async prepareInvocation( + options: vscode.LanguageModelToolInvocationPrepareOptions, + _token: vscode.CancellationToken + ) { + const confirmationMessages = { + title: 'Count the number of open tabs', + message: new vscode.MarkdownString( + `Count the number of open tabs?` + + (options.input.tabGroup !== undefined + ? ` in tab group ${options.input.tabGroup}` + : '') + ), + }; + + return { + invocationMessage: 'Counting the number of tabs', + confirmationMessages, + }; + } + ``` + + - Define an interface that describes the tool input parameters. This interface is used in the `invoke` method. + + The following example shows the interface for the tab count tool. + + ```ts + export interface ITabCountParameters { + tabGroup?: number; + } + ``` + + - Implement `invoke`, which is called when the tool is invoked. It receives the tool input parameters in the `options` parameter. + + The following example shows the implementation of the tab count tool. The result of the tool is an instance of type `vscode.LanguageModelToolResult`. + + ```ts + async invoke( + options: vscode.LanguageModelToolInvocationOptions, + _token: vscode.CancellationToken + ) { + const params = options.input; + if (typeof params.tabGroup === 'number') { + const group = vscode.window.tabGroups.all[Math.max(params.tabGroup - 1, 0)]; + const nth = + params.tabGroup === 1 + ? '1st' + : params.tabGroup === 2 + ? '2nd' + : params.tabGroup === 3 + ? '3rd' + : `${params.tabGroup}th`; + return new vscode.LanguageModelToolResult([new vscode.LanguageModelTextPart(`There are ${group.tabs.length} tabs open in the ${nth} tab group.`)]); + } else { + const group = vscode.window.tabGroups.activeTabGroup; + return new vscode.LanguageModelToolResult([new vscode.LanguageModelTextPart(`There are ${group.tabs.length} tabs open.`)]); + } + } + ``` + +View the full source code for implementing a [language model tool](https://github.com/microsoft/vscode-extension-samples/blob/main/chat-sample/src/tools.ts) in the VS Code Extension Samples repository. + +## Getting started + +- [Chat extension sample](https://github.com/microsoft/vscode-extension-samples/tree/main/chat-sample) + +## Related content + +- [Get started with the Language Model API](/api/extension-guides/language-model) +- [Build a chat extension](/api/extension-guides/chat) +- [Use Prompt-tsx](/api/extension-guides/prompt-tsx) +- [@vscode/vscode-chat-extension-utils library](https://github.com/microsoft/vscode-chat-extension-utils) + +--- +# DO NOT TOUCH — Managed by doc writer +ContentId: 05d1e8f8-9bc0-45a4-a8c5-348005fd7ca8 +DateApproved: 12/11/2024 + +# Summarize the whole topic in less than 300 characters for SEO purpose +MetaDescription: A guide for how to build language model prompts using the prompt-tsx library +--- + +# Craft language model prompts + +You can build language model prompts by using string concatenation, but it's hard to compose features and make sure your prompts stay within the context window of language models. To overcome these limitations, you can use the [`@vscode/prompt-tsx`](https://github.com/microsoft/vscode-prompt-tsx) library. + +The `@vscode/prompt-tsx` library provides the following features: + +- **TSX-based prompt rendering**: Compose prompts using TSX components, making them more readable and maintainable +- **Priority-based pruning**: Automatically prune less important parts of prompts to fit within the model's context window +- **Flexible token management**: Use properties like `flexGrow`, `flexReserve`, and `flexBasis` to cooperatively use token budgets +- **Tool integration**: Integrate with VS Code's language model tools API + +For a complete overview of all features and detailed usage instructions, refer to the [full README](https://github.com/microsoft/vscode-prompt-tsx/blob/main/README.md). + +This article describes practical examples of prompt design with the library. The complete code for these examples can be found in the [prompt-tsx repository](https://github.com/microsoft/vscode-prompt-tsx/tree/main/examples). + +## Manage priorities in the conversation history + +Including conversation history in your prompt is important as it enables the user to ask follow-up questions to previous messages. However, you want to make sure its priority is treated appropriately because history can grow large over time. We've found that the pattern which makes the most sense is usually to prioritize, in order: + +1. The base prompt instructions +2. The current user query +3. The last couple of turns of chat history +4. Any supporting data +5. As much of the remaining history as you can fit + +For this reason, split the history into two parts in the prompt, where recent prompt turns are prioritized over general contextual information. + +In this library, each TSX node in the tree has a priority that is conceptually similar to a zIndex where a higher number means a higher priority. + +### Step 1: Define the HistoryMessages component + +To list history messages, define a `HistoryMessages` component. This example provides a good starting point, but you might have to expand it if you deal with more complex data types. + +This example uses the `PrioritizedList` helper component, which automatically assigns ascending or descending priorities to each of its children. + +```tsx +import { + UserMessage, + AssistantMessage, + PromptElement, + BasePromptElementProps, + PrioritizedList, +} from '@vscode/prompt-tsx'; +import { ChatContext, ChatRequestTurn, ChatResponseTurn, ChatResponseMarkdownPart } from 'vscode'; + +interface IHistoryMessagesProps extends BasePromptElementProps { + history: ChatContext['history']; +} + +export class HistoryMessages extends PromptElement { + render(): PromptPiece { + const history: (UserMessage | AssistantMessage)[] = []; + for (const turn of this.props.history) { + if (turn instanceof ChatRequestTurn) { + history.push({turn.prompt}); + } else if (turn instanceof ChatResponseTurn) { + history.push( + + {chatResponseToMarkdown(turn)} + + ); + } + } + return ( + + {history} + + ); + } +} +``` + +### Step 2: Define the Prompt component + +Next, define a `MyPrompt` component that includes the base instructions, user query, and history messages with their appropriate priorities. Priority values are local among siblings. Remember that you might want to trim older messages in the history before touching anything else in the prompt, so you need to split up two `` elements: + +```tsx +import { + SystemMessage, + UserMessage, + PromptElement, + BasePromptElementProps, +} from '@vscode/prompt-tsx'; + +interface IMyPromptProps extends BasePromptElementProps { + history: ChatContext['history']; + userQuery: string; +} + +export class MyPrompt extends PromptElement { + render() { + return ( + <> + + Here are your base instructions. They have the highest priority because you want to make + sure they're always included! + + {/* Older messages in the history have the lowest priority since they're less relevant */} + + {/* The last 2 history messages are preferred over any workspace context you have below */} + + {/* The user query is right behind the system message in priority */} + {this.props.userQuery} + + With a slightly lower priority, you can include some contextual data about the workspace + or files here... + + + ); + } +} +``` + +Now, all older history messages are pruned before the library tries to prune other elements of the prompt. + +### Step 3: Define the History component + +To make consumption a little easier, define a `History` component that wraps the history messages and uses the `passPriority` attribute to act as a pass-through container. With `passPriority`, its children are treated as if they are direct children of the containing element for prioritization purposes. + +```tsx +import { PromptElement, BasePromptElementProps } from '@vscode/prompt-tsx'; + +interface IHistoryProps extends BasePromptElementProps { + history: ChatContext['history']; + newer: number; // last 2 message priority values + older: number; // previous message priority values + passPriority: true; // require this prop be set! +} + +export class History extends PromptElement { + render(): PromptPiece { + return ( + <> + + + + ); + } +} +``` + +Now, you can use and reuse this single element to include chat history: + +```tsx + +``` + +## Grow file contents to fit + +In this example, you want to include the contents of all files the user is currently looking at in their prompt. These files could be large, to the point where including all of them would lead to their text being pruned! This example shows how to use the `flexGrow` property to cooperatively size the file contents to fit within the token budget. + +### Step 1: Define base instructions and user query + +First, you define a `SystemMessage` component that includes the base instructions. This component has the highest priority to ensure it is always included. + +```tsx +Here are your base instructions. +``` + +You then include the user query by using the `UserMessage` component. This component has a high priority to ensure it is included right after the base instructions. + +```tsx +{this.props.userQuery} +``` + +### Step 2: Include the File Contents + +You can now include the file contents by using the `FileContext` component. You assign it a [`flexGrow`](https://github.com/microsoft/vscode-prompt-tsx?tab=readme-ov-file#flex-behavior) value of `1` to ensure it is rendered after the base instructions, user query, and history. + +```tsx + +``` + +With a `flexGrow` value, the element gets any _unused_ token budget in its `PromptSizing` object that's passed into its `render()` and `prepare()` calls. You can read more about the behavior of flex elements in the [prompt-tsx documentation](https://github.com/microsoft/vscode-prompt-tsx?tab=readme-ov-file#flex-behavior). + +### Step 3: Include the history + +Next, include the history messages using the `History` component that you created previously. This is a little trickier, since you do want some history to be shown, but also want the file contents to take up most the prompt. + +Therefore, assign the `History` component a `flexGrow` value of `2` to ensure it is rendered after all other elements, including ``. But, also set a `flexReserve` value of `"/5"` to reserve 1/5th of the total budget for history. + +```tsx + +``` + +### Step 3: Combine all elements of the prompt + +Now, combine all the elements into the `MyPrompt` component. + +```tsx +import { + SystemMessage, + UserMessage, + PromptElement, + BasePromptElementProps, +} from '@vscode/prompt-tsx'; +import { History } from './history'; + +interface IFilesToInclude { + document: TextDocument; + line: number; +} + +interface IMyPromptProps extends BasePromptElementProps { + history: ChatContext['history']; + userQuery: string; + files: IFilesToInclude[]; +} + +export class MyPrompt extends PromptElement { + render() { + return ( + <> + Here are your base instructions. + + {this.props.userQuery} + + + ); + } +} +``` + +### Step 4: Define the FileContext component + +Finally, define a `FileContext` component that includes the contents of the files the user is currently looking at. Because you used `flexGrow`, you can implement logic that gets as many of the lines around the 'interesting' line for each file by using the information in `PromptSizing`. + +For brevity, the implementation logic for `getExpandedFiles` is omitted. You can check it out in the [prompt-tsx repo](https://github.com/microsoft/vscode-prompt-tsx/blob/5501d54a5b9a7608582e8419cd968a82ca317cc9/examples/file-contents.tsx#L103). + +```tsx +import { PromptElement, BasePromptElementProps, PromptSizing, PromptPiece } from '@vscode/prompt-tsx'; + +class FileContext extends PromptElement<{ files: IFilesToInclude[] } & BasePromptElementProps> { + async render(_state: void, sizing: PromptSizing): Promise { + const files = await this.getExpandedFiles(sizing); + return <>{files.map(f => f.toString())}; + } + + private async getExpandedFiles(sizing: PromptSizing) { + // Implementation details are summarized here. + // Refer to the repo for the complete implementation. + } +} +``` + +## Summary + +In these examples, you created a `MyPrompt` component that includes base instructions, user query, history messages, and file contents with different priorities. You used `flexGrow` to cooperatively size the file contents to fit within the token budget. + +By following this pattern, you can ensure that the most important parts of your prompt are always included, while less important parts are pruned as needed to fit within the model's context window. For the complete implementation details of the `getExpandedFiles` method and the `FileContextTracker` class, refer to the [prompt-tsx repo](https://github.com/microsoft/vscode-prompt-tsx/tree/main/examples). + + diff --git a/package.json b/package.json index c872c43..2e81c76 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,10 @@ "ai", "llama" ], - "activationEvents": [], + "activationEvents": [ + "onLanguage", + "onStartupFinished" + ], "main": "./dist/extension.js", "contributes": { "viewsContainers": { @@ -141,6 +144,20 @@ "git show" ], "description": "Commands that can be auto-executed when 'Always approve execute operations' is enabled" + }, + "roo-cline.vsCodeLmModelSelector": { + "type": "object", + "properties": { + "vendor": { + "type": "string", + "description": "The vendor of the language model (e.g. copilot)" + }, + "family": { + "type": "string", + "description": "The family of the language model (e.g. gpt-4)" + } + }, + "description": "Settings for VSCode Language Model API" } } } diff --git a/src/api/index.ts b/src/api/index.ts index 06983de..20d3356 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -10,41 +10,45 @@ import { LmStudioHandler } from "./providers/lmstudio" import { GeminiHandler } from "./providers/gemini" import { OpenAiNativeHandler } from "./providers/openai-native" import { DeepSeekHandler } from "./providers/deepseek" +import { VsCodeLmHandler } from "./providers/vscode-lm" import { ApiStream } from "./transform/stream" export interface SingleCompletionHandler { - completePrompt(prompt: string): Promise + completePrompt(prompt: string): Promise } export interface ApiHandler { - createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream - getModel(): { id: string; info: ModelInfo } + createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream + getModel(): { id: string; info: ModelInfo } } export function buildApiHandler(configuration: ApiConfiguration): ApiHandler { - const { apiProvider, ...options } = configuration - switch (apiProvider) { - case "anthropic": - return new AnthropicHandler(options) - case "openrouter": - return new OpenRouterHandler(options) - case "bedrock": - return new AwsBedrockHandler(options) - case "vertex": - return new VertexHandler(options) - case "openai": - return new OpenAiHandler(options) - case "ollama": - return new OllamaHandler(options) - case "lmstudio": - return new LmStudioHandler(options) - case "gemini": - return new GeminiHandler(options) - case "openai-native": - return new OpenAiNativeHandler(options) - case "deepseek": - return new DeepSeekHandler(options) - default: - return new AnthropicHandler(options) - } + const { apiProvider, ...options } = configuration + + switch (apiProvider) { + case "anthropic": + return new AnthropicHandler(options) + case "openrouter": + return new OpenRouterHandler(options) + case "bedrock": + return new AwsBedrockHandler(options) + case "vertex": + return new VertexHandler(options) + case "openai": + return new OpenAiHandler(options) + case "ollama": + return new OllamaHandler(options) + case "lmstudio": + return new LmStudioHandler(options) + case "gemini": + return new GeminiHandler(options) + case "openai-native": + return new OpenAiNativeHandler(options) + case "deepseek": + return new DeepSeekHandler(options) + case "vscode-lm": + return new VsCodeLmHandler(options) + default: + return new AnthropicHandler(options) + } } diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts new file mode 100644 index 0000000..200b65d --- /dev/null +++ b/src/api/providers/vscode-lm.ts @@ -0,0 +1,569 @@ +import { Anthropic } from "@anthropic-ai/sdk"; +import * as vscode from 'vscode'; +import { ApiHandler, SingleCompletionHandler } from "../"; +import { calculateApiCost } from "../../utils/cost"; +import { ApiStream } from "../transform/stream"; +import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format"; +import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"; +import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"; + +/** + * Handles interaction with VS Code's Language Model API for chat-based operations. + * This handler implements the ApiHandler interface to provide VS Code LM specific functionality. + * + * @implements {ApiHandler} + * + * @remarks + * The handler manages a VS Code language model chat client and provides methods to: + * - Create and manage chat client instances + * - Stream messages using VS Code's Language Model API + * - Retrieve model information + * + * @example + * ```typescript + * const options = { + * vsCodeLmModelSelector: { vendor: "copilot", family: "gpt-4" } + * }; + * const handler = new VsCodeLmHandler(options); + * + * // Stream a conversation + * const systemPrompt = "You are a helpful assistant"; + * const messages = [{ role: "user", content: "Hello!" }]; + * for await (const chunk of handler.createMessage(systemPrompt, messages)) { + * console.log(chunk); + * } + * ``` + */ +export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler { + + private options: ApiHandlerOptions; + private client: vscode.LanguageModelChat | null; + private disposable: vscode.Disposable | null; + private currentRequestCancellation: vscode.CancellationTokenSource | null; + + constructor(options: ApiHandlerOptions) { + this.options = options; + this.client = null; + this.disposable = null; + this.currentRequestCancellation = null; + + try { + // Listen for model changes and reset client + this.disposable = vscode.workspace.onDidChangeConfiguration(event => { + + if (event.affectsConfiguration('lm')) { + + try { + + this.client = null; + this.ensureCleanState(); + } + catch (error) { + + console.error('Error during configuration change cleanup:', error); + } + } + }); + } + catch (error) { + + // Ensure cleanup if constructor fails + this.dispose(); + + throw new Error( + `Cline : Failed to initialize handler: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } + } + + /** + * Creates a language model chat client based on the provided selector. + * + * @param selector - Selector criteria to filter language model chat instances + * @returns Promise resolving to the first matching language model chat instance + * @throws Error when no matching models are found with the given selector + * + * @example + * const selector = { vendor: "copilot", family: "gpt-4o" }; + * const chatClient = await createClient(selector); + */ + async createClient(selector: vscode.LanguageModelChatSelector): Promise { + try { + const models = await vscode.lm.selectChatModels(selector); + + // Use first available model or create a minimal model object + if (models && Array.isArray(models) && models.length > 0) { + return models[0]; + } + + // Create a minimal model if no models are available + return { + id: 'default-lm', + name: 'Default Language Model', + vendor: 'vscode', + family: 'lm', + version: '1.0', + maxInputTokens: 8192, + sendRequest: async (messages, options, token) => { + // Provide a minimal implementation + return { + stream: (async function* () { + yield new vscode.LanguageModelTextPart( + "Language model functionality is limited. Please check VS Code configuration." + ); + })(), + text: (async function* () { + yield "Language model functionality is limited. Please check VS Code configuration."; + })() + }; + }, + countTokens: async () => 0 + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + throw new Error(`Cline : Failed to select model: ${errorMessage}`); + } + } + + /** + * Creates and streams a message using the VS Code Language Model API. + * + * @param systemPrompt - The system prompt to initialize the conversation context + * @param messages - An array of message parameters following the Anthropic message format + * + * @yields {ApiStream} An async generator that yields either text chunks or tool calls from the model response + * + * @throws {Error} When vsCodeLmModelSelector option is not provided + * @throws {Error} When the response stream encounters an error + * + * @remarks + * This method handles the initialization of the VS Code LM client if not already created, + * converts the messages to VS Code LM format, and streams the response chunks. + * Tool calls handling is currently a work in progress. + */ + dispose(): void { + + if (this.disposable) { + + this.disposable.dispose(); + } + + if (this.currentRequestCancellation) { + + this.currentRequestCancellation.cancel(); + this.currentRequestCancellation.dispose(); + } + } + + private async countTokens(text: string | vscode.LanguageModelChatMessage): Promise { + // Check for required dependencies + if (!this.client) { + console.warn('Cline : No client available for token counting'); + return 0; + } + + if (!this.currentRequestCancellation) { + console.warn('Cline : No cancellation token available for token counting'); + return 0; + } + + // Validate input + if (!text) { + console.debug('Cline : Empty text provided for token counting'); + return 0; + } + + try { + // Handle different input types + let tokenCount: number; + + if (typeof text === 'string') { + tokenCount = await this.client.countTokens(text, this.currentRequestCancellation.token); + } else if (text instanceof vscode.LanguageModelChatMessage) { + // For chat messages, ensure we have content + if (!text.content || (Array.isArray(text.content) && text.content.length === 0)) { + console.debug('Cline : Empty chat message content'); + return 0; + } + tokenCount = await this.client.countTokens(text, this.currentRequestCancellation.token); + } else { + console.warn('Cline : Invalid input type for token counting'); + return 0; + } + + // Validate the result + if (typeof tokenCount !== 'number') { + console.warn('Cline : Non-numeric token count received:', tokenCount); + return 0; + } + + if (tokenCount < 0) { + console.warn('Cline : Negative token count received:', tokenCount); + return 0; + } + + return tokenCount; + } + catch (error) { + // Handle specific error types + if (error instanceof vscode.CancellationError) { + console.debug('Cline : Token counting cancelled by user'); + return 0; + } + + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + console.warn('Cline : Token counting failed:', errorMessage); + + // Log additional error details if available + if (error instanceof Error && error.stack) { + console.debug('Token counting error stack:', error.stack); + } + + return 0; // Fallback to prevent stream interruption + } + } + + private async calculateTotalInputTokens(systemPrompt: string, vsCodeLmMessages: vscode.LanguageModelChatMessage[]): Promise { + + const systemTokens: number = await this.countTokens(systemPrompt); + + const messageTokens: number[] = await Promise.all( + vsCodeLmMessages.map(msg => this.countTokens(msg)) + ); + + return systemTokens + messageTokens.reduce( + (sum: number, tokens: number): number => sum + tokens, 0 + ); + } + + private ensureCleanState(): void { + + if (this.currentRequestCancellation) { + + this.currentRequestCancellation.cancel(); + this.currentRequestCancellation.dispose(); + this.currentRequestCancellation = null; + } + } + + private async getClient(): Promise { + if (!this.client) { + console.debug('Cline : Getting client with options:', { + vsCodeLmModelSelector: this.options.vsCodeLmModelSelector, + hasOptions: !!this.options, + selectorKeys: this.options.vsCodeLmModelSelector ? Object.keys(this.options.vsCodeLmModelSelector) : [] + }); + + try { + // Use default empty selector if none provided to get all available models + const selector = this.options?.vsCodeLmModelSelector || {}; + console.debug('Cline : Creating client with selector:', selector); + this.client = await this.createClient(selector); + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown error'; + console.error('Cline : Client creation failed:', message); + throw new Error(`Cline : Failed to create client: ${message}`); + } + } + + return this.client; + } + + private cleanTerminalOutput(text: string): string { + if (!text) { + return ''; + } + + return text + // Нормализуем переносы строк + .replace(/\r\n/g, '\n') + .replace(/\r/g, '\n') + + // Удаляем ANSI escape sequences + .replace(/\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])/g, '') // Полный набор ANSI sequences + .replace(/\x9B[0-?]*[ -/]*[@-~]/g, '') // CSI sequences + + // Удаляем последовательности установки заголовка терминала и прочие OSC sequences + .replace(/\x1B\][0-9;]*(?:\x07|\x1B\\)/g, '') + + // Удаляем управляющие символы + .replace(/[\x00-\x09\x0B-\x0C\x0E-\x1F\x7F]/g, '') + + // Удаляем escape-последовательности VS Code + .replace(/\x1B[PD].*?\x1B\\/g, '') // DCS sequences + .replace(/\x1B_.*?\x1B\\/g, '') // APC sequences + .replace(/\x1B\^.*?\x1B\\/g, '') // PM sequences + .replace(/\x1B\[[\d;]*[HfABCDEFGJKST]/g, '') // Cursor movement and clear screen + + // Удаляем пути Windows и служебную информацию + .replace(/^(?:PS )?[A-Z]:\\[^\n]*$/mg, '') + .replace(/^;?Cwd=.*$/mg, '') + + // Очищаем экранированные последовательности + .replace(/\\x[0-9a-fA-F]{2}/g, '') + .replace(/\\u[0-9a-fA-F]{4}/g, '') + + // Финальная очистка + .replace(/\n{3,}/g, '\n\n') // Убираем множественные пустые строки + .trim(); + } + + private cleanMessageContent(content: any): any { + if (!content) { + return content; + } + + if (typeof content === 'string') { + return this.cleanTerminalOutput(content); + } + + if (Array.isArray(content)) { + return content.map(item => this.cleanMessageContent(item)); + } + + if (typeof content === 'object') { + const cleaned: any = {}; + for (const [key, value] of Object.entries(content)) { + cleaned[key] = this.cleanMessageContent(value); + } + return cleaned; + } + + return content; + } + + async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { + + // Ensure clean state before starting a new request + this.ensureCleanState(); + const client: vscode.LanguageModelChat = await this.getClient(); + + // Clean system prompt and messages + const cleanedSystemPrompt = this.cleanTerminalOutput(systemPrompt); + const cleanedMessages = messages.map(msg => ({ + ...msg, + content: this.cleanMessageContent(msg.content) + })); + + // Convert Anthropic messages to VS Code LM messages + const vsCodeLmMessages: vscode.LanguageModelChatMessage[] = [ + vscode.LanguageModelChatMessage.Assistant(cleanedSystemPrompt), + ...convertToVsCodeLmMessages(cleanedMessages), + ]; + + // Initialize cancellation token for the request + this.currentRequestCancellation = new vscode.CancellationTokenSource(); + + // Calculate input tokens before starting the stream + const totalInputTokens: number = await this.calculateTotalInputTokens(systemPrompt, vsCodeLmMessages); + + // Accumulate the text and count at the end of the stream to reduce token counting overhead. + let accumulatedText: string = ''; + + try { + + // Create the response stream with minimal required options + const requestOptions: vscode.LanguageModelChatRequestOptions = { + justification: `Cline would like to use '${client.name}' from '${client.vendor}', Click 'Allow' to proceed.` + }; + + // Note: Tool support is currently provided by the VSCode Language Model API directly + // Extensions can register tools using vscode.lm.registerTool() + + const response: vscode.LanguageModelChatResponse = await client.sendRequest( + vsCodeLmMessages, + requestOptions, + this.currentRequestCancellation.token + ); + + // Consume the stream and handle both text and tool call chunks + for await (const chunk of response.stream) { + if (chunk instanceof vscode.LanguageModelTextPart) { + // Validate text part value + if (typeof chunk.value !== 'string') { + console.warn('Cline : Invalid text part value received:', chunk.value); + continue; + } + + accumulatedText += chunk.value; + yield { + type: "text", + text: chunk.value, + }; + } else if (chunk instanceof vscode.LanguageModelToolCallPart) { + try { + // Validate tool call parameters + if (!chunk.name || typeof chunk.name !== 'string') { + console.warn('Cline : Invalid tool name received:', chunk.name); + continue; + } + + if (!chunk.callId || typeof chunk.callId !== 'string') { + console.warn('Cline : Invalid tool callId received:', chunk.callId); + continue; + } + + // Ensure input is a valid object + if (!chunk.input || typeof chunk.input !== 'object') { + console.warn('Cline : Invalid tool input received:', chunk.input); + continue; + } + + // Convert tool calls to text format with proper error handling + const toolCall = { + type: "tool_call", + name: chunk.name, + arguments: chunk.input, + callId: chunk.callId + }; + + const toolCallText = JSON.stringify(toolCall); + accumulatedText += toolCallText; + + // Log tool call for debugging + console.debug('Cline : Processing tool call:', { + name: chunk.name, + callId: chunk.callId, + inputSize: JSON.stringify(chunk.input).length + }); + + yield { + type: "text", + text: toolCallText, + }; + } catch (error) { + console.error('Cline : Failed to process tool call:', error); + // Continue processing other chunks even if one fails + continue; + } + } else { + console.warn('Cline : Unknown chunk type received:', chunk); + } + } + + // Count tokens in the accumulated text after stream completion + const totalOutputTokens: number = await this.countTokens(accumulatedText); + + // Report final usage after stream completion + yield { + type: "usage", + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + totalCost: calculateApiCost( + this.getModel().info, + totalInputTokens, + totalOutputTokens + ) + }; + } + catch (error: unknown) { + + this.ensureCleanState(); + + if (error instanceof vscode.CancellationError) { + + throw new Error("Cline : Request cancelled by user"); + } + + if (error instanceof Error) { + console.error('Cline : Stream error details:', { + message: error.message, + stack: error.stack, + name: error.name + }); + + // Return original error if it's already an Error instance + throw error; + } else if (typeof error === 'object' && error !== null) { + // Handle error-like objects + const errorDetails = JSON.stringify(error, null, 2); + console.error('Cline : Stream error object:', errorDetails); + throw new Error(`Cline : Response stream error: ${errorDetails}`); + } else { + // Fallback for unknown error types + const errorMessage = String(error); + console.error('Cline : Unknown stream error:', errorMessage); + throw new Error(`Cline : Response stream error: ${errorMessage}`); + } + } + } + + // Return model information based on the current client state + getModel(): { id: string; info: ModelInfo; } { + if (this.client) { + // Validate client properties + const requiredProps = { + id: this.client.id, + vendor: this.client.vendor, + family: this.client.family, + version: this.client.version, + maxInputTokens: this.client.maxInputTokens + }; + + // Log any missing properties for debugging + for (const [prop, value] of Object.entries(requiredProps)) { + if (!value && value !== 0) { + console.warn(`Cline : Client missing ${prop} property`); + } + } + + // Construct model ID using available information + const modelParts = [ + this.client.vendor, + this.client.family, + this.client.version + ].filter(Boolean); + + const modelId = this.client.id || modelParts.join(SELECTOR_SEPARATOR); + + // Build model info with conservative defaults for missing values + const modelInfo: ModelInfo = { + maxTokens: -1, // Unlimited tokens by default + contextWindow: typeof this.client.maxInputTokens === 'number' + ? Math.max(0, this.client.maxInputTokens) + : openAiModelInfoSaneDefaults.contextWindow, + supportsImages: false, // VSCode Language Model API currently doesn't support image inputs + supportsPromptCache: true, + inputPrice: 0, + outputPrice: 0, + description: `VSCode Language Model: ${modelId}` + }; + + return { id: modelId, info: modelInfo }; + } + + // Fallback when no client is available + const fallbackId = this.options.vsCodeLmModelSelector + ? stringifyVsCodeLmModelSelector(this.options.vsCodeLmModelSelector) + : "vscode-lm"; + + console.debug('Cline : No client available, using fallback model info'); + + return { + id: fallbackId, + info: { + ...openAiModelInfoSaneDefaults, + description: `VSCode Language Model (Fallback): ${fallbackId}` + } + }; + } + + async completePrompt(prompt: string): Promise { + try { + const client = await this.getClient(); + const response = await client.sendRequest([vscode.LanguageModelChatMessage.User(prompt)], {}, new vscode.CancellationTokenSource().token); + let result = ""; + for await (const chunk of response.stream) { + if (chunk instanceof vscode.LanguageModelTextPart) { + result += chunk.value; + } + } + return result; + } catch (error) { + if (error instanceof Error) { + throw new Error(`VSCode LM completion error: ${error.message}`) + } + throw error + } + } +} diff --git a/src/api/transform/vscode-lm-format.ts b/src/api/transform/vscode-lm-format.ts new file mode 100644 index 0000000..b5acd7c --- /dev/null +++ b/src/api/transform/vscode-lm-format.ts @@ -0,0 +1,209 @@ +import { Anthropic } from "@anthropic-ai/sdk"; +import * as vscode from 'vscode'; + +/** + * Safely converts a value into a plain object. + */ +function asObjectSafe(value: any): object { + // Handle null/undefined + if (!value) { + return {}; + } + + try { + // Handle strings that might be JSON + if (typeof value === 'string') { + return JSON.parse(value); + } + + // Handle pre-existing objects + if (typeof value === 'object') { + return Object.assign({}, value); + } + + return {}; + } + catch (error) { + console.warn('Cline : Failed to parse object:', error); + return {}; + } +} + +export function convertToVsCodeLmMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): vscode.LanguageModelChatMessage[] { + const vsCodeLmMessages: vscode.LanguageModelChatMessage[] = []; + + for (const anthropicMessage of anthropicMessages) { + // Handle simple string messages + if (typeof anthropicMessage.content === "string") { + vsCodeLmMessages.push( + anthropicMessage.role === "assistant" + ? vscode.LanguageModelChatMessage.Assistant(anthropicMessage.content) + : vscode.LanguageModelChatMessage.User(anthropicMessage.content) + ); + continue; + } + + // Handle complex message structures + switch (anthropicMessage.role) { + case "user": { + const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ + nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]; + toolMessages: Anthropic.ToolResultBlockParam[]; + }>( + (acc, part) => { + if (part.type === "tool_result") { + acc.toolMessages.push(part); + } + else if (part.type === "text" || part.type === "image") { + acc.nonToolMessages.push(part); + } + return acc; + }, + { nonToolMessages: [], toolMessages: [] }, + ); + + // Process tool messages first then non-tool messages + const contentParts = [ + // Convert tool messages to ToolResultParts + ...toolMessages.map((toolMessage) => { + // Process tool result content into TextParts + const toolContentParts: vscode.LanguageModelTextPart[] = ( + typeof toolMessage.content === "string" + ? [new vscode.LanguageModelTextPart(toolMessage.content)] + : ( + toolMessage.content?.map((part) => { + if (part.type === "image") { + return new vscode.LanguageModelTextPart( + `[Image (${part.source?.type || 'Unknown source-type'}): ${part.source?.media_type || 'unknown media-type'} not supported by VSCode LM API]` + ); + } + return new vscode.LanguageModelTextPart(part.text); + }) + ?? [new vscode.LanguageModelTextPart("")] + ) + ); + + return new vscode.LanguageModelToolResultPart( + toolMessage.tool_use_id, + toolContentParts + ); + }), + + // Convert non-tool messages to TextParts after tool messages + ...nonToolMessages.map((part) => { + if (part.type === "image") { + return new vscode.LanguageModelTextPart( + `[Image (${part.source?.type || 'Unknown source-type'}): ${part.source?.media_type || 'unknown media-type'} not supported by VSCode LM API]` + ); + } + return new vscode.LanguageModelTextPart(part.text); + }) + ]; + + // Add single user message with all content parts + vsCodeLmMessages.push(vscode.LanguageModelChatMessage.User(contentParts)); + break; + } + + case "assistant": { + const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ + nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]; + toolMessages: Anthropic.ToolUseBlockParam[]; + }>( + (acc, part) => { + if (part.type === "tool_use") { + acc.toolMessages.push(part); + } + else if (part.type === "text" || part.type === "image") { + acc.nonToolMessages.push(part); + } + return acc; + }, + { nonToolMessages: [], toolMessages: [] }, + ); + + // Process tool messages first then non-tool messages + const contentParts = [ + // Convert tool messages to ToolCallParts first + ...toolMessages.map((toolMessage) => + new vscode.LanguageModelToolCallPart( + toolMessage.id, + toolMessage.name, + asObjectSafe(toolMessage.input) + ) + ), + + // Convert non-tool messages to TextParts after tool messages + ...nonToolMessages.map((part) => { + if (part.type === "image") { + return new vscode.LanguageModelTextPart("[Image generation not supported by VSCode LM API]"); + } + return new vscode.LanguageModelTextPart(part.text); + }) + ]; + + // Add the assistant message to the list of messages + vsCodeLmMessages.push(vscode.LanguageModelChatMessage.Assistant(contentParts)); + break; + } + } + } + + return vsCodeLmMessages; +} + +export function convertToAnthropicRole(vsCodeLmMessageRole: vscode.LanguageModelChatMessageRole): string | null { + switch (vsCodeLmMessageRole) { + case vscode.LanguageModelChatMessageRole.Assistant: + return "assistant"; + case vscode.LanguageModelChatMessageRole.User: + return "user"; + default: + return null; + } +} + +export async function convertToAnthropicMessage(vsCodeLmMessage: vscode.LanguageModelChatMessage): Promise { + const anthropicRole: string | null = convertToAnthropicRole(vsCodeLmMessage.role); + if (anthropicRole !== "assistant") { + throw new Error("Cline : Only assistant messages are supported."); + } + + return { + id: crypto.randomUUID(), + type: "message", + model: "vscode-lm", + role: anthropicRole, + content: ( + vsCodeLmMessage.content + .map((part): Anthropic.ContentBlock | null => { + if (part instanceof vscode.LanguageModelTextPart) { + return { + type: "text", + text: part.value + }; + } + + if (part instanceof vscode.LanguageModelToolCallPart) { + return { + type: "tool_use", + id: part.callId || crypto.randomUUID(), + name: part.name, + input: asObjectSafe(part.input) + }; + } + + return null; + }) + .filter( + (part): part is Anthropic.ContentBlock => part !== null + ) + ), + stop_reason: null, + stop_sequence: null, + usage: { + input_tokens: 0, + output_tokens: 0, + } + }; +} diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 45f9d06..b8757ab 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -41,6 +41,7 @@ type SecretKey = | "geminiApiKey" | "openAiNativeApiKey" | "deepSeekApiKey" + type GlobalStateKey = | "apiProvider" | "apiModelId" @@ -79,6 +80,8 @@ type GlobalStateKey = | "writeDelayMs" | "terminalOutputLineLimit" | "mcpEnabled" + | "vsCodeLmModelSelector" + export const GlobalFileNames = { apiConversationHistory: "api_conversation_history.json", uiMessages: "ui_messages.json", @@ -228,7 +231,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { diffEnabled, fuzzyMatchThreshold } = await this.getState() - + this.cline = new Cline( this, apiConfiguration, @@ -248,7 +251,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { diffEnabled, fuzzyMatchThreshold } = await this.getState() - + this.cline = new Cline( this, apiConfiguration, @@ -314,15 +317,15 @@ export class ClineProvider implements vscode.WebviewViewProvider { // Use a nonce to only allow a specific script to be run. /* - content security policy of your webview to only allow scripts that have a specific nonce - create a content security policy meta tag so that only loading scripts with a nonce is allowed - As your extension grows you will likely want to add custom styles, fonts, and/or images to your webview. If you do, you will need to update the content security policy meta tag to explicity allow for these resources. E.g. - + content security policy of your webview to only allow scripts that have a specific nonce + create a content security policy meta tag so that only loading scripts with a nonce is allowed + As your extension grows you will likely want to add custom styles, fonts, and/or images to your webview. If you do, you will need to update the content security policy meta tag to explicity allow for these resources. E.g. + - 'unsafe-inline' is required for styles due to vscode-webview-toolkit's dynamic style injection - since we pass base64 images to the webview, we need to specify img-src ${webview.cspSource} data:; - in meta tag we add nonce attribute: A cryptographic nonce (only used once) to allow scripts. The server must generate a unique nonce value each time it transmits a policy. It is critical to provide a nonce that cannot be guessed as bypassing a resource's policy is otherwise trivial. - */ + in meta tag we add nonce attribute: A cryptographic nonce (only used once) to allow scripts. The server must generate a unique nonce value each time it transmits a policy. It is critical to provide a nonce that cannot be guessed as bypassing a resource's policy is otherwise trivial. + */ const nonce = getNonce() // Tip: Install the es6-string-html VS Code extension to enable code highlighting below @@ -426,6 +429,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { openRouterModelId, openRouterModelInfo, openRouterUseMiddleOutTransform, + vsCodeLmModelSelector, } = message.apiConfiguration await this.updateGlobalState("apiProvider", apiProvider) await this.updateGlobalState("apiModelId", apiModelId) @@ -454,6 +458,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { await this.updateGlobalState("openRouterModelId", openRouterModelId) await this.updateGlobalState("openRouterModelInfo", openRouterModelInfo) await this.updateGlobalState("openRouterUseMiddleOutTransform", openRouterUseMiddleOutTransform) + await this.updateGlobalState("vsCodeLmModelSelector", vsCodeLmModelSelector) if (this.cline) { this.cline.api = buildApiHandler(message.apiConfiguration) } @@ -525,6 +530,10 @@ export class ClineProvider implements vscode.WebviewViewProvider { const lmStudioModels = await this.getLmStudioModels(message.text) this.postMessageToWebview({ type: "lmStudioModels", lmStudioModels }) break + case "requestVsCodeLmModels": + const vsCodeLmModels = await this.getVsCodeLmModels() + this.postMessageToWebview({ type: "vsCodeLmModels", vsCodeLmModels }) + break case "refreshOpenRouterModels": await this.refreshOpenRouterModels() break @@ -532,7 +541,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { if (message?.values?.baseUrl && message?.values?.apiKey) { const openAiModels = await this.getOpenAiModels(message?.values?.baseUrl, message?.values?.apiKey) this.postMessageToWebview({ type: "openAiModels", openAiModels }) - } + } break case "openImage": openImage(message.text!) @@ -664,7 +673,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { ) if (answer === "Yes" && this.cline && typeof message.value === 'number' && message.value) { const timeCutoff = message.value - 1000; // 1 second buffer before the message to delete - const messageIndex = this.cline.clineMessages.findIndex(msg => msg.ts && msg.ts >= timeCutoff) + const messageIndex = this.cline.clineMessages.findIndex(msg => msg.ts && msg.ts >= timeCutoff) const apiConversationHistoryIndex = this.cline.apiConversationHistory.findIndex(msg => msg.ts && msg.ts >= timeCutoff) if (messageIndex !== -1) { const { historyItem } = await this.getTaskWithId(this.cline.taskId) @@ -773,6 +782,17 @@ export class ClineProvider implements vscode.WebviewViewProvider { } } + // VSCode LM API + private async getVsCodeLmModels() { + try { + const models = await vscode.lm.selectChatModels({}); + return models || []; + } catch (error) { + console.error('Error fetching VS Code LM models:', error); + return []; + } + } + // OpenAi async getOpenAiModels(baseUrl?: string, apiKey?: string) { @@ -1042,9 +1062,9 @@ export class ClineProvider implements vscode.WebviewViewProvider { } async getStateToPostToWebview() { - const { - apiConfiguration, - lastShownAnnouncementId, + const { + apiConfiguration, + lastShownAnnouncementId, customInstructions, alwaysAllowReadOnly, alwaysAllowWrite, @@ -1063,7 +1083,8 @@ export class ClineProvider implements vscode.WebviewViewProvider { fuzzyMatchThreshold, mcpEnabled, } = await this.getState() - + + const allowedCommands = vscode.workspace .getConfiguration('roo-cline') .get('allowedCommands') || [] @@ -1196,6 +1217,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { screenshotQuality, terminalOutputLineLimit, mcpEnabled, + vsCodeLmModelSelector, ] = await Promise.all([ this.getGlobalState("apiProvider") as Promise, this.getGlobalState("apiModelId") as Promise, @@ -1243,6 +1265,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { this.getGlobalState("screenshotQuality") as Promise, this.getGlobalState("terminalOutputLineLimit") as Promise, this.getGlobalState("mcpEnabled") as Promise, + this.getGlobalState("vsCodeLmModelSelector") as Promise, ]) let apiProvider: ApiProvider @@ -1288,6 +1311,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { openRouterModelId, openRouterModelInfo, openRouterUseMiddleOutTransform, + vsCodeLmModelSelector, }, lastShownAnnouncementId, customInstructions, diff --git a/src/extension.ts b/src/extension.ts index 5b94fad..24c0eba 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -36,7 +36,7 @@ export function activate(context: vscode.ExtensionContext) { context.globalState.update('allowedCommands', defaultCommands); } - const sidebarProvider = new ClineProvider(context, outputChannel) + const sidebarProvider = new ClineProvider(context, outputChannel); context.subscriptions.push( vscode.window.registerWebviewViewProvider(ClineProvider.sideBarId, sidebarProvider, { diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index c00fa6c..e855fe7 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -6,32 +6,36 @@ import { McpServer } from "./mcp" // webview will hold state export interface ExtensionMessage { - type: - | "action" - | "state" - | "selectedImages" - | "ollamaModels" - | "lmStudioModels" - | "theme" - | "workspaceUpdated" - | "invoke" - | "partialMessage" - | "openRouterModels" - | "openAiModels" - | "mcpServers" - | "enhancedPrompt" +type: +| "action" +| "state" +| "selectedImages" +| "ollamaModels" +| "lmStudioModels" +| "vsCodeLmModels" +| "vsCodeLmApiAvailable" +| "requestVsCodeLmModels" +| "theme" +| "workspaceUpdated" +| "invoke" +| "partialMessage" +| "openRouterModels" +| "openAiModels" +| "mcpServers" +| "enhancedPrompt" text?: string action?: - | "chatButtonClicked" - | "mcpButtonClicked" - | "settingsButtonClicked" - | "historyButtonClicked" - | "didBecomeVisible" + | "chatButtonClicked" + | "mcpButtonClicked" + | "settingsButtonClicked" + | "historyButtonClicked" + | "didBecomeVisible" invoke?: "sendMessage" | "primaryButtonClick" | "secondaryButtonClick" state?: ExtensionState images?: string[] ollamaModels?: string[] lmStudioModels?: string[] + vsCodeLmModels?: { vendor?: string; family?: string; version?: string; id?: string }[] filePaths?: string[] partialMessage?: ClineMessage openRouterModels?: Record @@ -109,14 +113,14 @@ export type ClineSay = export interface ClineSayTool { tool: - | "editedExistingFile" - | "appliedDiff" - | "newFileCreated" - | "readFile" - | "listFilesTopLevel" - | "listFilesRecursive" - | "listCodeDefinitionNames" - | "searchFiles" + | "editedExistingFile" + | "appliedDiff" + | "newFileCreated" + | "readFile" + | "listFilesTopLevel" + | "listFilesRecursive" + | "listCodeDefinitionNames" + | "searchFiles" path?: string diff?: string content?: string diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index ee602ed..99512c4 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -4,52 +4,53 @@ export type AudioType = "notification" | "celebration" | "progress_loop" export interface WebviewMessage { type: - | "apiConfiguration" - | "customInstructions" - | "allowedCommands" - | "alwaysAllowReadOnly" - | "alwaysAllowWrite" - | "alwaysAllowExecute" - | "webviewDidLaunch" - | "newTask" - | "askResponse" - | "clearTask" - | "didShowAnnouncement" - | "selectImages" - | "exportCurrentTask" - | "showTaskWithId" - | "deleteTaskWithId" - | "exportTaskWithId" - | "resetState" - | "requestOllamaModels" - | "requestLmStudioModels" - | "openImage" - | "openFile" - | "openMention" - | "cancelTask" - | "refreshOpenRouterModels" - | "refreshOpenAiModels" - | "alwaysAllowBrowser" - | "alwaysAllowMcp" - | "playSound" - | "soundEnabled" - | "soundVolume" - | "diffEnabled" - | "browserViewportSize" - | "screenshotQuality" - | "openMcpSettings" - | "restartMcpServer" - | "toggleToolAlwaysAllow" - | "toggleMcpServer" - | "fuzzyMatchThreshold" - | "preferredLanguage" - | "writeDelayMs" - | "enhancePrompt" - | "enhancedPrompt" - | "draggedImages" - | "deleteMessage" - | "terminalOutputLineLimit" - | "mcpEnabled" + | "apiConfiguration" + | "customInstructions" + | "allowedCommands" + | "alwaysAllowReadOnly" + | "alwaysAllowWrite" + | "alwaysAllowExecute" + | "webviewDidLaunch" + | "newTask" + | "askResponse" + | "clearTask" + | "didShowAnnouncement" + | "selectImages" + | "exportCurrentTask" + | "showTaskWithId" + | "deleteTaskWithId" + | "exportTaskWithId" + | "resetState" + | "requestOllamaModels" + | "requestLmStudioModels" + | "requestVsCodeLmModels" + | "openImage" + | "openFile" + | "openMention" + | "cancelTask" + | "refreshOpenRouterModels" + | "refreshOpenAiModels" + | "alwaysAllowBrowser" + | "alwaysAllowMcp" + | "playSound" + | "soundEnabled" + | "soundVolume" + | "diffEnabled" + | "browserViewportSize" + | "screenshotQuality" + | "openMcpSettings" + | "restartMcpServer" + | "toggleToolAlwaysAllow" + | "toggleMcpServer" + | "fuzzyMatchThreshold" + | "preferredLanguage" + | "writeDelayMs" + | "enhancePrompt" + | "enhancedPrompt" + | "draggedImages" + | "deleteMessage" + | "terminalOutputLineLimit" + | "mcpEnabled" text?: string disabled?: boolean askResponse?: ClineAskResponse diff --git a/src/shared/api.ts b/src/shared/api.ts index 2759a26..f73a7c5 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -1,3 +1,5 @@ +import * as vscode from 'vscode'; + export type ApiProvider = | "anthropic" | "openrouter" @@ -9,11 +11,13 @@ export type ApiProvider = | "gemini" | "openai-native" | "deepseek" + | "vscode-lm" export interface ApiHandlerOptions { apiModelId?: string apiKey?: string // anthropic anthropicBaseUrl?: string + vsCodeLmModelSelector?: vscode.LanguageModelChatSelector openRouterApiKey?: string openRouterModelId?: string openRouterModelInfo?: ModelInfo @@ -47,16 +51,17 @@ export interface ApiHandlerOptions { export type ApiConfiguration = ApiHandlerOptions & { apiProvider?: ApiProvider + vsCodeLmModelSelector?: vscode.LanguageModelChatSelector; } // Models export interface ModelInfo { maxTokens?: number - contextWindow?: number + contextWindow: number supportsImages?: boolean supportsComputerUse?: boolean - supportsPromptCache: boolean // this value is hardcoded for now + supportsPromptCache: boolean inputPrice?: number outputPrice?: number cacheWritesPrice?: number @@ -115,24 +120,24 @@ export const anthropicModels = { // AWS Bedrock // https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html export interface MessageContent { - type: 'text' | 'image' | 'video' | 'tool_use' | 'tool_result'; - text?: string; - source?: { - type: 'base64'; - data: string | Uint8Array; // string for Anthropic, Uint8Array for Bedrock - media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; - }; - // Video specific fields - format?: string; - s3Location?: { - uri: string; - bucketOwner?: string; - }; - // Tool use and result fields - toolUseId?: string; - name?: string; - input?: any; - output?: any; // Used for tool_result type + type: 'text' | 'image' | 'video' | 'tool_use' | 'tool_result'; + text?: string; + source?: { + type: 'base64'; + data: string | Uint8Array; // string for Anthropic, Uint8Array for Bedrock + media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; + }; + // Video specific fields + format?: string; + s3Location?: { + uri: string; + bucketOwner?: string; + }; + // Tool use and result fields + toolUseId?: string; + name?: string; + input?: any; + output?: any; // Used for tool_result type } export type BedrockModelId = keyof typeof bedrockModels @@ -226,7 +231,7 @@ export const bedrockModels = { inputPrice: 0.25, outputPrice: 1.25, }, - "meta.llama3-2-90b-instruct-v1:0" : { + "meta.llama3-2-90b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: true, @@ -235,7 +240,7 @@ export const bedrockModels = { inputPrice: 0.72, outputPrice: 0.72, }, - "meta.llama3-2-11b-instruct-v1:0" : { + "meta.llama3-2-11b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: true, @@ -244,7 +249,7 @@ export const bedrockModels = { inputPrice: 0.16, outputPrice: 0.16, }, - "meta.llama3-2-3b-instruct-v1:0" : { + "meta.llama3-2-3b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, @@ -253,7 +258,7 @@ export const bedrockModels = { inputPrice: 0.15, outputPrice: 0.15, }, - "meta.llama3-2-1b-instruct-v1:0" : { + "meta.llama3-2-1b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, @@ -262,7 +267,7 @@ export const bedrockModels = { inputPrice: 0.1, outputPrice: 0.1, }, - "meta.llama3-1-405b-instruct-v1:0" : { + "meta.llama3-1-405b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, @@ -271,7 +276,7 @@ export const bedrockModels = { inputPrice: 2.4, outputPrice: 2.4, }, - "meta.llama3-1-70b-instruct-v1:0" : { + "meta.llama3-1-70b-instruct-v1:0": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, @@ -280,7 +285,7 @@ export const bedrockModels = { inputPrice: 0.72, outputPrice: 0.72, }, - "meta.llama3-1-8b-instruct-v1:0" : { + "meta.llama3-1-8b-instruct-v1:0": { maxTokens: 8192, contextWindow: 8_000, supportsImages: false, @@ -289,8 +294,8 @@ export const bedrockModels = { inputPrice: 0.22, outputPrice: 0.22, }, - "meta.llama3-70b-instruct-v1:0" : { - maxTokens: 2048 , + "meta.llama3-70b-instruct-v1:0": { + maxTokens: 2048, contextWindow: 8_000, supportsImages: false, supportsComputerUse: false, @@ -298,8 +303,8 @@ export const bedrockModels = { inputPrice: 2.65, outputPrice: 3.5, }, - "meta.llama3-8b-instruct-v1:0" : { - maxTokens: 2048 , + "meta.llama3-8b-instruct-v1:0": { + maxTokens: 2048, contextWindow: 4_000, supportsImages: false, supportsComputerUse: false, @@ -514,4 +519,3 @@ export const deepSeekModels = { // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs export const azureOpenAiDefaultApiVersion = "2024-08-01-preview" - diff --git a/src/shared/vsCodeSelectorUtils.ts b/src/shared/vsCodeSelectorUtils.ts new file mode 100644 index 0000000..eabbced --- /dev/null +++ b/src/shared/vsCodeSelectorUtils.ts @@ -0,0 +1,14 @@ +import { LanguageModelChatSelector } from 'vscode'; + +export const SELECTOR_SEPARATOR = '/'; + +export function stringifyVsCodeLmModelSelector(selector: LanguageModelChatSelector): string { + return [ + selector.vendor, + selector.family, + selector.version, + selector.id + ] + .filter(Boolean) + .join(SELECTOR_SEPARATOR); +} diff --git a/src/types/vscode.d.ts b/src/types/vscode.d.ts new file mode 100644 index 0000000..1b4c402 --- /dev/null +++ b/src/types/vscode.d.ts @@ -0,0 +1,86 @@ +declare namespace vscode { + enum LanguageModelChatMessageRole { + User = 1, + Assistant = 2 + } + + enum LanguageModelChatToolMode { + Auto = 1, + Required = 2 + } + + interface LanguageModelChatSelector { + vendor?: string; + family?: string; + version?: string; + id?: string; + } + + interface LanguageModelChatTool { + name: string; + description: string; + inputSchema?: object; + } + + interface LanguageModelChatRequestOptions { + justification?: string; + modelOptions?: { [name: string]: any; }; + tools?: LanguageModelChatTool[]; + toolMode?: LanguageModelChatToolMode; + } + + class LanguageModelTextPart { + value: string; + constructor(value: string); + } + + class LanguageModelToolCallPart { + callId: string; + name: string; + input: object; + constructor(callId: string, name: string, input: object); + } + + interface LanguageModelChatResponse { + stream: AsyncIterable; + text: AsyncIterable; + } + + interface LanguageModelChat { + readonly name: string; + readonly id: string; + readonly vendor: string; + readonly family: string; + readonly version: string; + readonly maxInputTokens: number; + + sendRequest(messages: LanguageModelChatMessage[], options?: LanguageModelChatRequestOptions, token?: CancellationToken): Thenable; + countTokens(text: string | LanguageModelChatMessage, token?: CancellationToken): Thenable; + } + + class LanguageModelPromptTsxPart { + value: unknown; + constructor(value: unknown); + } + + class LanguageModelToolResultPart { + callId: string; + content: Array; + constructor(callId: string, content: Array); + } + + class LanguageModelChatMessage { + static User(content: string | Array, name?: string): LanguageModelChatMessage; + static Assistant(content: string | Array, name?: string): LanguageModelChatMessage; + + role: LanguageModelChatMessageRole; + content: Array; + name: string | undefined; + + constructor(role: LanguageModelChatMessageRole, content: string | Array, name?: string); + } + + namespace lm { + function selectChatModels(selector?: LanguageModelChatSelector): Thenable; + } +} diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index c72342e..8b1edd7 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -49,6 +49,7 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage }: const { apiConfiguration, setApiConfiguration, uriScheme } = useExtensionState() const [ollamaModels, setOllamaModels] = useState([]) const [lmStudioModels, setLmStudioModels] = useState([]) + const [vsCodeLmModels, setVsCodeLmModels] = useState([]) const [anthropicBaseUrlSelected, setAnthropicBaseUrlSelected] = useState(!!apiConfiguration?.anthropicBaseUrl) const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion) const [isDescriptionExpanded, setIsDescriptionExpanded] = useState(false) @@ -67,21 +68,24 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage }: vscode.postMessage({ type: "requestOllamaModels", text: apiConfiguration?.ollamaBaseUrl }) } else if (selectedProvider === "lmstudio") { vscode.postMessage({ type: "requestLmStudioModels", text: apiConfiguration?.lmStudioBaseUrl }) + } else if (selectedProvider === "vscode-lm") { + vscode.postMessage({ type: "requestVsCodeLmModels" }) } }, [selectedProvider, apiConfiguration?.ollamaBaseUrl, apiConfiguration?.lmStudioBaseUrl]) useEffect(() => { - if (selectedProvider === "ollama" || selectedProvider === "lmstudio") { + if (selectedProvider === "ollama" || selectedProvider === "lmstudio" || selectedProvider === "vscode-lm") { requestLocalModels() } }, [selectedProvider, requestLocalModels]) - useInterval(requestLocalModels, selectedProvider === "ollama" || selectedProvider === "lmstudio" ? 2000 : null) - + useInterval(requestLocalModels, selectedProvider === "ollama" || selectedProvider === "lmstudio" || selectedProvider === "vscode-lm" ? 2000 : null) const handleMessage = useCallback((event: MessageEvent) => { const message: ExtensionMessage = event.data if (message.type === "ollamaModels" && message.ollamaModels) { setOllamaModels(message.ollamaModels) } else if (message.type === "lmStudioModels" && message.lmStudioModels) { setLmStudioModels(message.lmStudioModels) + } else if (message.type === "vsCodeLmModels" && message.vsCodeLmModels) { + setVsCodeLmModels(message.vsCodeLmModels) } }, []) useEvent("message", handleMessage) @@ -139,6 +143,7 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage }: AWS Bedrock LM Studio Ollama + VS Code LM API @@ -261,7 +266,7 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage }: }}> Compress prompts and message chains to the context size (OpenRouter Transforms) -
+
)} @@ -591,6 +596,50 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage }: )} + {selectedProvider === "vscode-lm" && ( +
+
+ + {vsCodeLmModels.length > 0 ? ( + { + const value = (e.target as HTMLInputElement).value; + const [vendor, family] = value.split('/'); + setApiConfiguration({ + ...apiConfiguration, + vsCodeLmModelSelector: value ? { vendor, family } : undefined + }); + }} + style={{ width: "100%" }}> + Select a model... + {vsCodeLmModels.map((model) => ( + + {model.vendor} - {model.family} + + ))} + + ) : ( +

+ No language models available.
+ You can use any VS Code extension that provides language model capabilities. +

+ )} +
+
+ )} + {selectedProvider === "ollama" && (