diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 168d9d5..506e879 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -41,7 +41,7 @@ export class AnthropicHandler implements ApiHandler { stream = await this.client.beta.promptCaching.messages.create( { model: modelId, - max_tokens: this.getModel().info.maxTokens, + max_tokens: this.getModel().info.maxTokens || 8192, temperature: 0, system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it messages: messages.map((message, index) => { @@ -96,7 +96,7 @@ export class AnthropicHandler implements ApiHandler { default: { stream = (await this.client.messages.create({ model: modelId, - max_tokens: this.getModel().info.maxTokens, + max_tokens: this.getModel().info.maxTokens || 8192, temperature: 0, system: [{ text: systemPrompt, type: "text" }], messages, diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index f098426..ad6e8df 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -27,7 +27,7 @@ export class AwsBedrockHandler implements ApiHandler { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { const stream = await this.client.messages.create({ model: this.getModel().id, - max_tokens: this.getModel().info.maxTokens, + max_tokens: this.getModel().info.maxTokens || 8192, temperature: 0, system: systemPrompt, messages, diff --git a/src/api/providers/vertex.ts b/src/api/providers/vertex.ts index 6e02b80..60e6967 100644 --- a/src/api/providers/vertex.ts +++ b/src/api/providers/vertex.ts @@ -21,7 +21,7 @@ export class VertexHandler implements ApiHandler { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { const stream = await this.client.messages.create({ model: this.getModel().id, - max_tokens: this.getModel().info.maxTokens, + max_tokens: this.getModel().info.maxTokens || 8192, temperature: 0, system: systemPrompt, messages, diff --git a/src/core/ClaudeDev.ts b/src/core/ClaudeDev.ts index 92823e6..6ad2aa6 100644 --- a/src/core/ClaudeDev.ts +++ b/src/core/ClaudeDev.ts @@ -709,7 +709,7 @@ export class ClaudeDev { async *attemptApiRequest(previousApiReqIndex: number): ApiStream { try { - let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages) + let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false) if (this.customInstructions && this.customInstructions.trim()) { // altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with systemPrompt += addCustomInstructions(this.customInstructions) @@ -723,7 +723,7 @@ export class ClaudeDev { previousRequest.text ) const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) - const contextWindow = this.api.getModel().info.contextWindow + const contextWindow = this.api.getModel().info.contextWindow || 128_000 const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) if (totalTokens >= maxAllowedSize) { const truncatedMessages = truncateHalfConversation(this.apiConversationHistory) diff --git a/src/core/webview/ClaudeDevProvider.ts b/src/core/webview/ClaudeDevProvider.ts index 2ad96ab..b5d1489 100644 --- a/src/core/webview/ClaudeDevProvider.ts +++ b/src/core/webview/ClaudeDevProvider.ts @@ -537,9 +537,17 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider { // await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome } - async readOpenRouterModels(): Promise | undefined> { + private async ensureCacheDirectoryExists(): Promise { const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache") - const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels) + await fs.mkdir(cacheDir, { recursive: true }) + return cacheDir + } + + async readOpenRouterModels(): Promise | undefined> { + const openRouterModelsFilePath = path.join( + await this.ensureCacheDirectoryExists(), + GlobalFileNames.openRouterModels + ) const fileExists = await fileExistsAtPath(openRouterModelsFilePath) if (fileExists) { const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8") @@ -549,8 +557,10 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider { } async refreshOpenRouterModels() { - const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache") - const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels) + const openRouterModelsFilePath = path.join( + await this.ensureCacheDirectoryExists(), + GlobalFileNames.openRouterModels + ) let models: Record = {} try { @@ -581,16 +591,22 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider { "per_request_limits": null }, */ - if (response.data) { - const rawModels = response.data + if (response.data?.data) { + const rawModels = response.data.data + const parsePrice = (price: any) => { + if (price) { + return parseFloat(price) * 1_000_000 + } + return undefined + } for (const rawModel of rawModels) { const modelInfo: ModelInfo = { - maxTokens: rawModel.top_provider?.max_completion_tokens || 2048, - contextWindow: rawModel.context_length || 128_000, - supportsImages: rawModel.architecture?.modality?.includes("image") ?? false, + maxTokens: rawModel.top_provider?.max_completion_tokens, + contextWindow: rawModel.context_length, + supportsImages: rawModel.architecture?.modality?.includes("image"), supportsPromptCache: false, - inputPrice: parseFloat(rawModel.pricing?.prompt || 0) * 1_000_000, - outputPrice: parseFloat(rawModel.pricing?.completion || 0) * 1_000_000, + inputPrice: parsePrice(rawModel.pricing?.prompt), + outputPrice: parsePrice(rawModel.pricing?.completion), description: rawModel.description, } @@ -621,6 +637,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider { console.error("Invalid response from OpenRouter API") } await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models)) + console.log("OpenRouter models fetched and saved", models) } catch (error) { console.error("Error fetching OpenRouter models:", error) } diff --git a/src/shared/api.ts b/src/shared/api.ts index 4bac61b..12e6573 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -38,12 +38,12 @@ export type ApiConfiguration = ApiHandlerOptions & { // Models export interface ModelInfo { - maxTokens: number - contextWindow: number - supportsImages: boolean - supportsPromptCache: boolean - inputPrice: number - outputPrice: number + maxTokens?: number + contextWindow?: number + supportsImages?: boolean + supportsPromptCache: boolean // this value is hardcoded for now + inputPrice?: number + outputPrice?: number cacheWritesPrice?: number cacheReadsPrice?: number description?: string @@ -130,6 +130,8 @@ export const openRouterDefaultModelInfo: ModelInfo = { outputPrice: 15.0, cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, + description: + "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._", } const openRouterModels = { "anthropic/claude-3.5-sonnet:beta": { diff --git a/src/utils/cost.ts b/src/utils/cost.ts index 309da6e..6a777d8 100644 --- a/src/utils/cost.ts +++ b/src/utils/cost.ts @@ -17,8 +17,8 @@ export function calculateApiCost( if (cacheReadInputTokens && modelCacheReadsPrice) { cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens } - const baseInputCost = (modelInfo.inputPrice / 1_000_000) * inputTokens - const outputCost = (modelInfo.outputPrice / 1_000_000) * outputTokens + const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens + const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost return totalCost } diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index cbf12c5..dba15ee 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -7,7 +7,7 @@ import { VSCodeRadioGroup, VSCodeTextField, } from "@vscode/webview-ui-toolkit/react" -import { memo, useCallback, useEffect, useMemo, useState } from "react" +import { Fragment, memo, useCallback, useEffect, useMemo, useState } from "react" import { useEvent, useInterval } from "react-use" import { ApiConfiguration, @@ -31,7 +31,7 @@ import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage" import { useExtensionState } from "../../context/ExtensionStateContext" import { vscode } from "../../utils/vscode" import VSCodeButtonLink from "../common/VSCodeButtonLink" -import OpenRouterModelPicker from "./OpenRouterModelPicker" +import OpenRouterModelPicker, { ModelDescriptionMarkdown } from "./OpenRouterModelPicker" interface ApiOptionsProps { showModelOptions: boolean @@ -440,7 +440,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => { marginTop: 3, color: "var(--vscode-descriptionForeground)", }}> - You can use any OpenAI compatible API with models that support tool use.{" "} (Note: Claude Dev uses complex prompts and works best with Claude models. Less capable models may not work as expected.) @@ -504,12 +503,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => { href="https://github.com/ollama/ollama/blob/main/README.md" style={{ display: "inline", fontSize: "inherit" }}> quickstart guide. - {" "} - You can use any model that supports{" "} - - tool use. (Note: Claude Dev uses complex prompts and works @@ -570,80 +563,70 @@ export const formatPrice = (price: number) => { export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => { const isGemini = Object.keys(geminiModels).includes(selectedModelId) - const isO1 = selectedModelId && selectedModelId.includes("o1") + + const infoItems = [ + modelInfo.description && , + , + !isGemini && ( + + ), + modelInfo.maxTokens !== undefined && modelInfo.maxTokens > 0 && ( + + Max output: {modelInfo.maxTokens?.toLocaleString()} tokens + + ), + modelInfo.inputPrice !== undefined && modelInfo.inputPrice > 0 && ( + + Input price: {formatPrice(modelInfo.inputPrice)}/million tokens + + ), + modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && ( + + Cache writes price:{" "} + {formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens + + ), + modelInfo.supportsPromptCache && modelInfo.cacheReadsPrice && ( + + Cache reads price:{" "} + {formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens + + ), + modelInfo.outputPrice !== undefined && modelInfo.outputPrice > 0 && ( + + Output price: {formatPrice(modelInfo.outputPrice)}/million + tokens + + ), + isGemini && ( + + * Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per minute. + After that, billing depends on prompt size.{" "} + + For more info, see pricing details. + + + ), + ].filter(Boolean) + return (

- -
- {!isGemini && ( - <> - -
- - )} - Max output: {modelInfo?.maxTokens?.toLocaleString()} tokens - {modelInfo.inputPrice > 0 && ( - <> -
- Input price: {formatPrice(modelInfo.inputPrice)}/million - tokens - - )} - {modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && ( - <> -
- Cache writes price:{" "} - {formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens -
- Cache reads price:{" "} - {formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens - - )} - {modelInfo.outputPrice > 0 && ( - <> -
- Output price: {formatPrice(modelInfo.outputPrice)}/million - tokens - - )} - {isGemini && ( - <> -
- - * Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per - minute. After that, billing depends on prompt size.{" "} - - For more info, see pricing details. - - - - )} - {isO1 && ( - <> -
- - * This model does not support tool use or system prompts, so Claude Dev uses structured output - prompting to achieve similar results. Your mileage may vary. - - - )} + {infoItems.map((item, index) => ( + + {item} + {index < infoItems.length - 1 &&
} +
+ ))}

) } diff --git a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx index 13fd6f9..c94d953 100644 --- a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx +++ b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx @@ -1,9 +1,12 @@ import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react" import React, { useMemo } from "react" -import { useExtensionState } from "../../context/ExtensionStateContext" -import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions" import { useMount } from "react-use" +import { useExtensionState } from "../../context/ExtensionStateContext" import { vscode } from "../../utils/vscode" +import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions" +import { memo, useEffect } from "react" +import { useRemark } from "react-remark" +import styled from "styled-components" interface OpenRouterModelPickerProps {} @@ -27,8 +30,12 @@ const OpenRouterModelPicker: React.FC = () => { vscode.postMessage({ type: "refreshOpenRouterModels" }) }) + const modelIds = useMemo(() => { + return Object.keys(openRouterModels).sort((a, b) => a.localeCompare(b)) + }, [openRouterModels]) + return ( -
+ <>
- {selectedModelInfo.description && ( -

- {selectedModelInfo.description} -

- )} - -
+ ) } export default OpenRouterModelPicker + +const StyledMarkdown = styled.div` + font-family: var(--vscode-font-family), system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, + Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif; + font-size: 12px; + color: var(--vscode-descriptionForeground); + + p, + li, + ol, + ul { + line-height: 1.25; + margin: 0; + } + + ol, + ul { + padding-left: 1.5em; + margin-left: 0; + } + + p { + white-space: pre-wrap; + } +` + +export const ModelDescriptionMarkdown = memo(({ markdown, key }: { markdown?: string; key: string }) => { + const [reactContent, setMarkdown] = useRemark() + + useEffect(() => { + setMarkdown(markdown || "") + }, [markdown, setMarkdown]) + + return ( + + {reactContent} + + ) +})