Add markdown rendering for model description; fixes

2026-02-05 20:15:12 -05:00 · 2024-10-03 21:23:49 -04:00
parent 9ffe01888a
commit f6a14fdfb9
9 changed files with 159 additions and 117 deletions
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -41,7 +41,7 @@ export class AnthropicHandler implements ApiHandler {
 				stream = await this.client.beta.promptCaching.messages.create(
 					{
 						model: modelId,
-						max_tokens: this.getModel().info.maxTokens,
+						max_tokens: this.getModel().info.maxTokens || 8192,
 						temperature: 0,
 						system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
 						messages: messages.map((message, index) => {
@@ -96,7 +96,7 @@ export class AnthropicHandler implements ApiHandler {
 			default: {
 				stream = (await this.client.messages.create({
 					model: modelId,
-					max_tokens: this.getModel().info.maxTokens,
+					max_tokens: this.getModel().info.maxTokens || 8192,
 					temperature: 0,
 					system: [{ text: systemPrompt, type: "text" }],
 					messages,
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -27,7 +27,7 @@ export class AwsBedrockHandler implements ApiHandler {
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.messages.create({
 			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
+			max_tokens: this.getModel().info.maxTokens || 8192,
 			temperature: 0,
 			system: systemPrompt,
 			messages,
--- a/src/api/providers/vertex.ts
+++ b/src/api/providers/vertex.ts
@@ -21,7 +21,7 @@ export class VertexHandler implements ApiHandler {
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.messages.create({
 			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
+			max_tokens: this.getModel().info.maxTokens || 8192,
 			temperature: 0,
 			system: systemPrompt,
 			messages,
--- a/src/core/ClaudeDev.ts
+++ b/src/core/ClaudeDev.ts
@@ -709,7 +709,7 @@ export class ClaudeDev {
 	async *attemptApiRequest(previousApiReqIndex: number): ApiStream {
 		try {
-			let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages)
+			let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
 			if (this.customInstructions && this.customInstructions.trim()) {
 				// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
 				systemPrompt += addCustomInstructions(this.customInstructions)
@@ -723,7 +723,7 @@ export class ClaudeDev {
 						previousRequest.text
 					)
 					const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-					const contextWindow = this.api.getModel().info.contextWindow
+					const contextWindow = this.api.getModel().info.contextWindow || 128_000
 					const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
 					if (totalTokens >= maxAllowedSize) {
 						const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
--- a/src/core/webview/ClaudeDevProvider.ts
+++ b/src/core/webview/ClaudeDevProvider.ts
@@ -537,9 +537,17 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 		// await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome
 	}
-	async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
+	private async ensureCacheDirectoryExists(): Promise<string> {
 		const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
-		const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
+		await fs.mkdir(cacheDir, { recursive: true })
 		return cacheDir
 	}
 	async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
 		const openRouterModelsFilePath = path.join(
 			await this.ensureCacheDirectoryExists(),
 			GlobalFileNames.openRouterModels
 		)
 		const fileExists = await fileExistsAtPath(openRouterModelsFilePath)
 		if (fileExists) {
 			const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8")
@@ -549,8 +557,10 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 	}
 	async refreshOpenRouterModels() {
-		const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
+		const openRouterModelsFilePath = path.join(
-		const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
+			await this.ensureCacheDirectoryExists(),
 			GlobalFileNames.openRouterModels
 		)
 		let models: Record<string, ModelInfo> = {}
 		try {
@@ -581,16 +591,22 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 				"per_request_limits": null
 			},
 			*/
-			if (response.data) {
+			if (response.data?.data) {
-				const rawModels = response.data
+				const rawModels = response.data.data
 				const parsePrice = (price: any) => {
 					if (price) {
 						return parseFloat(price) * 1_000_000
 					}
 					return undefined
 				}
 				for (const rawModel of rawModels) {
 					const modelInfo: ModelInfo = {
-						maxTokens: rawModel.top_provider?.max_completion_tokens || 2048,
+						maxTokens: rawModel.top_provider?.max_completion_tokens,
-						contextWindow: rawModel.context_length || 128_000,
+						contextWindow: rawModel.context_length,
-						supportsImages: rawModel.architecture?.modality?.includes("image") ?? false,
+						supportsImages: rawModel.architecture?.modality?.includes("image"),
 						supportsPromptCache: false,
-						inputPrice: parseFloat(rawModel.pricing?.prompt || 0) * 1_000_000,
+						inputPrice: parsePrice(rawModel.pricing?.prompt),
-						outputPrice: parseFloat(rawModel.pricing?.completion || 0) * 1_000_000,
+						outputPrice: parsePrice(rawModel.pricing?.completion),
 						description: rawModel.description,
 					}
@@ -621,6 +637,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 				console.error("Invalid response from OpenRouter API")
 			}
 			await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models))
 			console.log("OpenRouter models fetched and saved", models)
 		} catch (error) {
 			console.error("Error fetching OpenRouter models:", error)
 		}
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -38,12 +38,12 @@ export type ApiConfiguration = ApiHandlerOptions & {
 // Models
 export interface ModelInfo {
-	maxTokens: number
+	maxTokens?: number
-	contextWindow: number
+	contextWindow?: number
-	supportsImages: boolean
+	supportsImages?: boolean
-	supportsPromptCache: boolean
+	supportsPromptCache: boolean // this value is hardcoded for now
-	inputPrice: number
+	inputPrice?: number
-	outputPrice: number
+	outputPrice?: number
 	cacheWritesPrice?: number
 	cacheReadsPrice?: number
 	description?: string
@@ -130,6 +130,8 @@ export const openRouterDefaultModelInfo: ModelInfo = {
 	outputPrice: 15.0,
 	cacheWritesPrice: 3.75,
 	cacheReadsPrice: 0.3,
 	description:
 		"Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
 }
 const openRouterModels = {
 	"anthropic/claude-3.5-sonnet:beta": {
--- a/src/utils/cost.ts
+++ b/src/utils/cost.ts
@@ -17,8 +17,8 @@ export function calculateApiCost(
 	if (cacheReadInputTokens && modelCacheReadsPrice) {
 		cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens
 	}
-	const baseInputCost = (modelInfo.inputPrice / 1_000_000) * inputTokens
+	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
-	const outputCost = (modelInfo.outputPrice / 1_000_000) * outputTokens
+	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 	return totalCost
 }
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -7,7 +7,7 @@ import {
 	VSCodeRadioGroup,
 	VSCodeTextField,
 } from "@vscode/webview-ui-toolkit/react"
-import { memo, useCallback, useEffect, useMemo, useState } from "react"
+import { Fragment, memo, useCallback, useEffect, useMemo, useState } from "react"
 import { useEvent, useInterval } from "react-use"
 import {
 	ApiConfiguration,
@@ -31,7 +31,7 @@ import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
 import { useExtensionState } from "../../context/ExtensionStateContext"
 import { vscode } from "../../utils/vscode"
 import VSCodeButtonLink from "../common/VSCodeButtonLink"
-import OpenRouterModelPicker from "./OpenRouterModelPicker"
+import OpenRouterModelPicker, { ModelDescriptionMarkdown } from "./OpenRouterModelPicker"
 interface ApiOptionsProps {
 	showModelOptions: boolean
@@ -440,7 +440,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
 							marginTop: 3,
 							color: "var(--vscode-descriptionForeground)",
 						}}>
 						You can use any OpenAI compatible API with models that support tool use.{" "}
 						<span style={{ color: "var(--vscode-errorForeground)" }}>
 							(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
 							best with Claude models. Less capable models may not work as expected.)
@@ -504,12 +503,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
 							href="https://github.com/ollama/ollama/blob/main/README.md"
 							style={{ display: "inline", fontSize: "inherit" }}>
 							quickstart guide.
 						</VSCodeLink>{" "}
 						You can use any model that supports{" "}
 						<VSCodeLink
 							href="https://ollama.com/search?c=tools"
 							style={{ display: "inline", fontSize: "inherit" }}>
 							tool use.
 						</VSCodeLink>
 						<span style={{ color: "var(--vscode-errorForeground)" }}>
 							(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
@@ -570,80 +563,70 @@ export const formatPrice = (price: number) => {
 export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
 	const isGemini = Object.keys(geminiModels).includes(selectedModelId)
-	const isO1 = selectedModelId && selectedModelId.includes("o1")
+
 	const infoItems = [
 		modelInfo.description && <ModelDescriptionMarkdown key="description" markdown={modelInfo.description} />,
 		<ModelInfoSupportsItem
 			key="supportsImages"
 			isSupported={modelInfo.supportsImages ?? false}
 			supportsLabel="Supports images"
 			doesNotSupportLabel="Does not support images"
 		/>,
 		!isGemini && (
 			<ModelInfoSupportsItem
 				key="supportsPromptCache"
 				isSupported={modelInfo.supportsPromptCache}
 				supportsLabel="Supports prompt caching"
 				doesNotSupportLabel="Does not support prompt caching"
 			/>
 		),
 		modelInfo.maxTokens !== undefined && modelInfo.maxTokens > 0 && (
 			<span key="maxTokens">
 				<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens?.toLocaleString()} tokens
 			</span>
 		),
 		modelInfo.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
 			<span key="inputPrice">
 				<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million tokens
 			</span>
 		),
 		modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && (
 			<span key="cacheWritesPrice">
 				<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
 				{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
 			</span>
 		),
 		modelInfo.supportsPromptCache && modelInfo.cacheReadsPrice && (
 			<span key="cacheReadsPrice">
 				<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
 				{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
 			</span>
 		),
 		modelInfo.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
 			<span key="outputPrice">
 				<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
 				tokens
 			</span>
 		),
 		isGemini && (
 			<span key="geminiInfo" style={{ fontStyle: "italic" }}>
 				* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per minute.
 				After that, billing depends on prompt size.{" "}
 				<VSCodeLink href="https://ai.google.dev/pricing" style={{ display: "inline", fontSize: "inherit" }}>
 					For more info, see pricing details.
 				</VSCodeLink>
 			</span>
 		),
 	].filter(Boolean)
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
-			<ModelInfoSupportsItem
+			{infoItems.map((item, index) => (
-				isSupported={modelInfo.supportsImages}
+				<Fragment key={index}>
-				supportsLabel="Supports images"
+					{item}
-				doesNotSupportLabel="Does not support images"
+					{index < infoItems.length - 1 && <br />}
-			/>
+				</Fragment>
-			<br />
+			))}
 			{!isGemini && (
 				<>
 					<ModelInfoSupportsItem
 						isSupported={modelInfo.supportsPromptCache}
 						supportsLabel="Supports prompt caching"
 						doesNotSupportLabel="Does not support prompt caching"
 					/>
 					<br />
 				</>
 			)}
 			<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo?.maxTokens?.toLocaleString()} tokens
 			{modelInfo.inputPrice > 0 && (
 				<>
 					<br />
 					<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million
 					tokens
 				</>
 			)}
 			{modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && (
 				<>
 					<br />
 					<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
 					{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
 					<br />
 					<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
 					{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
 				</>
 			)}
 			{modelInfo.outputPrice > 0 && (
 				<>
 					<br />
 					<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
 					tokens
 				</>
 			)}
 			{isGemini && (
 				<>
 					<br />
 					<span
 						style={{
 							fontStyle: "italic",
 						}}>
 						* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per
 						minute. After that, billing depends on prompt size.{" "}
 						<VSCodeLink
 							href="https://ai.google.dev/pricing"
 							style={{ display: "inline", fontSize: "inherit" }}>
 							For more info, see pricing details.
 						</VSCodeLink>
 					</span>
 				</>
 			)}
 			{isO1 && (
 				<>
 					<br />
 					<span
 						style={{
 							fontStyle: "italic",
 							color: "var(--vscode-errorForeground)",
 						}}>
 						* This model does not support tool use or system prompts, so Claude Dev uses structured output
 						prompting to achieve similar results. Your mileage may vary.
 					</span>
 				</>
 			)}
 		</p>
 	)
 }
--- a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx
+++ b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx
@@ -1,9 +1,12 @@
 import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
 import React, { useMemo } from "react"
 import { useExtensionState } from "../../context/ExtensionStateContext"
 import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
 import { useMount } from "react-use"
 import { useExtensionState } from "../../context/ExtensionStateContext"
 import { vscode } from "../../utils/vscode"
 import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
 import { memo, useEffect } from "react"
 import { useRemark } from "react-remark"
 import styled from "styled-components"
 interface OpenRouterModelPickerProps {}
@@ -27,8 +30,12 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 		vscode.postMessage({ type: "refreshOpenRouterModels" })
 	})
 	const modelIds = useMemo(() => {
 		return Object.keys(openRouterModels).sort((a, b) => a.localeCompare(b))
 	}, [openRouterModels])
 	return (
-		<div style={{ display: "flex", flexDirection: "column", gap: 5 }}>
+		<>
 			<div className="dropdown-container">
 				<label htmlFor="model-id">
 					<span style={{ fontWeight: 500 }}>Model</span>
@@ -39,7 +46,7 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 					onChange={handleModelChange}
 					style={{ width: "100%" }}>
 					<VSCodeOption value="">Select a model...</VSCodeOption>
-					{Object.keys(openRouterModels).map((modelId) => (
+					{modelIds.map((modelId) => (
 						<VSCodeOption
 							key={modelId}
 							value={modelId}
@@ -54,15 +61,48 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 				</VSCodeDropdown>
 			</div>
 			{selectedModelInfo.description && (
 				<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
 					{selectedModelInfo.description}
 				</p>
 			)}
 			<ModelInfoView selectedModelId={selectedModelId} modelInfo={selectedModelInfo} />
-		</div>
+		</>
 	)
 }
 export default OpenRouterModelPicker
 const StyledMarkdown = styled.div`
 	font-family: var(--vscode-font-family), system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen,
 		Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
 	font-size: 12px;
 	color: var(--vscode-descriptionForeground);
 	p,
 	li,
 	ol,
 	ul {
 		line-height: 1.25;
 		margin: 0;
 	}
 	ol,
 	ul {
 		padding-left: 1.5em;
 		margin-left: 0;
 	}
 	p {
 		white-space: pre-wrap;
 	}
 `
 export const ModelDescriptionMarkdown = memo(({ markdown, key }: { markdown?: string; key: string }) => {
 	const [reactContent, setMarkdown] = useRemark()
 	useEffect(() => {
 		setMarkdown(markdown || "")
 	}, [markdown, setMarkdown])
 	return (
 		<StyledMarkdown key={key} style={{ display: "inline-block", marginBottom: 5 }}>
 			{reactContent}
 		</StyledMarkdown>
 	)
 })