Add markdown rendering for model description; fixes

This commit is contained in:
Saoud Rizwan
2024-10-03 21:23:49 -04:00
parent 9ffe01888a
commit f6a14fdfb9
9 changed files with 159 additions and 117 deletions

View File

@@ -41,7 +41,7 @@ export class AnthropicHandler implements ApiHandler {
stream = await this.client.beta.promptCaching.messages.create( stream = await this.client.beta.promptCaching.messages.create(
{ {
model: modelId, model: modelId,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0, temperature: 0,
system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
messages: messages.map((message, index) => { messages: messages.map((message, index) => {
@@ -96,7 +96,7 @@ export class AnthropicHandler implements ApiHandler {
default: { default: {
stream = (await this.client.messages.create({ stream = (await this.client.messages.create({
model: modelId, model: modelId,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0, temperature: 0,
system: [{ text: systemPrompt, type: "text" }], system: [{ text: systemPrompt, type: "text" }],
messages, messages,

View File

@@ -27,7 +27,7 @@ export class AwsBedrockHandler implements ApiHandler {
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const stream = await this.client.messages.create({ const stream = await this.client.messages.create({
model: this.getModel().id, model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0, temperature: 0,
system: systemPrompt, system: systemPrompt,
messages, messages,

View File

@@ -21,7 +21,7 @@ export class VertexHandler implements ApiHandler {
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const stream = await this.client.messages.create({ const stream = await this.client.messages.create({
model: this.getModel().id, model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0, temperature: 0,
system: systemPrompt, system: systemPrompt,
messages, messages,

View File

@@ -709,7 +709,7 @@ export class ClaudeDev {
async *attemptApiRequest(previousApiReqIndex: number): ApiStream { async *attemptApiRequest(previousApiReqIndex: number): ApiStream {
try { try {
let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages) let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
if (this.customInstructions && this.customInstructions.trim()) { if (this.customInstructions && this.customInstructions.trim()) {
// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details> // altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
systemPrompt += addCustomInstructions(this.customInstructions) systemPrompt += addCustomInstructions(this.customInstructions)
@@ -723,7 +723,7 @@ export class ClaudeDev {
previousRequest.text previousRequest.text
) )
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
const contextWindow = this.api.getModel().info.contextWindow const contextWindow = this.api.getModel().info.contextWindow || 128_000
const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
if (totalTokens >= maxAllowedSize) { if (totalTokens >= maxAllowedSize) {
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory) const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)

View File

@@ -537,9 +537,17 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
// await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome // await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome
} }
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> { private async ensureCacheDirectoryExists(): Promise<string> {
const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache") const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels) await fs.mkdir(cacheDir, { recursive: true })
return cacheDir
}
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
const openRouterModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
GlobalFileNames.openRouterModels
)
const fileExists = await fileExistsAtPath(openRouterModelsFilePath) const fileExists = await fileExistsAtPath(openRouterModelsFilePath)
if (fileExists) { if (fileExists) {
const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8") const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8")
@@ -549,8 +557,10 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
} }
async refreshOpenRouterModels() { async refreshOpenRouterModels() {
const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache") const openRouterModelsFilePath = path.join(
const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels) await this.ensureCacheDirectoryExists(),
GlobalFileNames.openRouterModels
)
let models: Record<string, ModelInfo> = {} let models: Record<string, ModelInfo> = {}
try { try {
@@ -581,16 +591,22 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
"per_request_limits": null "per_request_limits": null
}, },
*/ */
if (response.data) { if (response.data?.data) {
const rawModels = response.data const rawModels = response.data.data
const parsePrice = (price: any) => {
if (price) {
return parseFloat(price) * 1_000_000
}
return undefined
}
for (const rawModel of rawModels) { for (const rawModel of rawModels) {
const modelInfo: ModelInfo = { const modelInfo: ModelInfo = {
maxTokens: rawModel.top_provider?.max_completion_tokens || 2048, maxTokens: rawModel.top_provider?.max_completion_tokens,
contextWindow: rawModel.context_length || 128_000, contextWindow: rawModel.context_length,
supportsImages: rawModel.architecture?.modality?.includes("image") ?? false, supportsImages: rawModel.architecture?.modality?.includes("image"),
supportsPromptCache: false, supportsPromptCache: false,
inputPrice: parseFloat(rawModel.pricing?.prompt || 0) * 1_000_000, inputPrice: parsePrice(rawModel.pricing?.prompt),
outputPrice: parseFloat(rawModel.pricing?.completion || 0) * 1_000_000, outputPrice: parsePrice(rawModel.pricing?.completion),
description: rawModel.description, description: rawModel.description,
} }
@@ -621,6 +637,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
console.error("Invalid response from OpenRouter API") console.error("Invalid response from OpenRouter API")
} }
await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models)) await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models))
console.log("OpenRouter models fetched and saved", models)
} catch (error) { } catch (error) {
console.error("Error fetching OpenRouter models:", error) console.error("Error fetching OpenRouter models:", error)
} }

View File

@@ -38,12 +38,12 @@ export type ApiConfiguration = ApiHandlerOptions & {
// Models // Models
export interface ModelInfo { export interface ModelInfo {
maxTokens: number maxTokens?: number
contextWindow: number contextWindow?: number
supportsImages: boolean supportsImages?: boolean
supportsPromptCache: boolean supportsPromptCache: boolean // this value is hardcoded for now
inputPrice: number inputPrice?: number
outputPrice: number outputPrice?: number
cacheWritesPrice?: number cacheWritesPrice?: number
cacheReadsPrice?: number cacheReadsPrice?: number
description?: string description?: string
@@ -130,6 +130,8 @@ export const openRouterDefaultModelInfo: ModelInfo = {
outputPrice: 15.0, outputPrice: 15.0,
cacheWritesPrice: 3.75, cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3, cacheReadsPrice: 0.3,
description:
"Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
} }
const openRouterModels = { const openRouterModels = {
"anthropic/claude-3.5-sonnet:beta": { "anthropic/claude-3.5-sonnet:beta": {

View File

@@ -17,8 +17,8 @@ export function calculateApiCost(
if (cacheReadInputTokens && modelCacheReadsPrice) { if (cacheReadInputTokens && modelCacheReadsPrice) {
cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens
} }
const baseInputCost = (modelInfo.inputPrice / 1_000_000) * inputTokens const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = (modelInfo.outputPrice / 1_000_000) * outputTokens const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
return totalCost return totalCost
} }

View File

@@ -7,7 +7,7 @@ import {
VSCodeRadioGroup, VSCodeRadioGroup,
VSCodeTextField, VSCodeTextField,
} from "@vscode/webview-ui-toolkit/react" } from "@vscode/webview-ui-toolkit/react"
import { memo, useCallback, useEffect, useMemo, useState } from "react" import { Fragment, memo, useCallback, useEffect, useMemo, useState } from "react"
import { useEvent, useInterval } from "react-use" import { useEvent, useInterval } from "react-use"
import { import {
ApiConfiguration, ApiConfiguration,
@@ -31,7 +31,7 @@ import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
import { useExtensionState } from "../../context/ExtensionStateContext" import { useExtensionState } from "../../context/ExtensionStateContext"
import { vscode } from "../../utils/vscode" import { vscode } from "../../utils/vscode"
import VSCodeButtonLink from "../common/VSCodeButtonLink" import VSCodeButtonLink from "../common/VSCodeButtonLink"
import OpenRouterModelPicker from "./OpenRouterModelPicker" import OpenRouterModelPicker, { ModelDescriptionMarkdown } from "./OpenRouterModelPicker"
interface ApiOptionsProps { interface ApiOptionsProps {
showModelOptions: boolean showModelOptions: boolean
@@ -440,7 +440,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
marginTop: 3, marginTop: 3,
color: "var(--vscode-descriptionForeground)", color: "var(--vscode-descriptionForeground)",
}}> }}>
You can use any OpenAI compatible API with models that support tool use.{" "}
<span style={{ color: "var(--vscode-errorForeground)" }}> <span style={{ color: "var(--vscode-errorForeground)" }}>
(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works (<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
best with Claude models. Less capable models may not work as expected.) best with Claude models. Less capable models may not work as expected.)
@@ -504,12 +503,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
href="https://github.com/ollama/ollama/blob/main/README.md" href="https://github.com/ollama/ollama/blob/main/README.md"
style={{ display: "inline", fontSize: "inherit" }}> style={{ display: "inline", fontSize: "inherit" }}>
quickstart guide. quickstart guide.
</VSCodeLink>{" "}
You can use any model that supports{" "}
<VSCodeLink
href="https://ollama.com/search?c=tools"
style={{ display: "inline", fontSize: "inherit" }}>
tool use.
</VSCodeLink> </VSCodeLink>
<span style={{ color: "var(--vscode-errorForeground)" }}> <span style={{ color: "var(--vscode-errorForeground)" }}>
(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works (<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
@@ -570,80 +563,70 @@ export const formatPrice = (price: number) => {
export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => { export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
const isGemini = Object.keys(geminiModels).includes(selectedModelId) const isGemini = Object.keys(geminiModels).includes(selectedModelId)
const isO1 = selectedModelId && selectedModelId.includes("o1")
const infoItems = [
modelInfo.description && <ModelDescriptionMarkdown key="description" markdown={modelInfo.description} />,
<ModelInfoSupportsItem
key="supportsImages"
isSupported={modelInfo.supportsImages ?? false}
supportsLabel="Supports images"
doesNotSupportLabel="Does not support images"
/>,
!isGemini && (
<ModelInfoSupportsItem
key="supportsPromptCache"
isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt caching"
/>
),
modelInfo.maxTokens !== undefined && modelInfo.maxTokens > 0 && (
<span key="maxTokens">
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens?.toLocaleString()} tokens
</span>
),
modelInfo.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
<span key="inputPrice">
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million tokens
</span>
),
modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && (
<span key="cacheWritesPrice">
<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
</span>
),
modelInfo.supportsPromptCache && modelInfo.cacheReadsPrice && (
<span key="cacheReadsPrice">
<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
</span>
),
modelInfo.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
<span key="outputPrice">
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
tokens
</span>
),
isGemini && (
<span key="geminiInfo" style={{ fontStyle: "italic" }}>
* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per minute.
After that, billing depends on prompt size.{" "}
<VSCodeLink href="https://ai.google.dev/pricing" style={{ display: "inline", fontSize: "inherit" }}>
For more info, see pricing details.
</VSCodeLink>
</span>
),
].filter(Boolean)
return ( return (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}> <p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
<ModelInfoSupportsItem {infoItems.map((item, index) => (
isSupported={modelInfo.supportsImages} <Fragment key={index}>
supportsLabel="Supports images" {item}
doesNotSupportLabel="Does not support images" {index < infoItems.length - 1 && <br />}
/> </Fragment>
<br /> ))}
{!isGemini && (
<>
<ModelInfoSupportsItem
isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt caching"
/>
<br />
</>
)}
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo?.maxTokens?.toLocaleString()} tokens
{modelInfo.inputPrice > 0 && (
<>
<br />
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million
tokens
</>
)}
{modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && (
<>
<br />
<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
<br />
<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
</>
)}
{modelInfo.outputPrice > 0 && (
<>
<br />
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
tokens
</>
)}
{isGemini && (
<>
<br />
<span
style={{
fontStyle: "italic",
}}>
* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per
minute. After that, billing depends on prompt size.{" "}
<VSCodeLink
href="https://ai.google.dev/pricing"
style={{ display: "inline", fontSize: "inherit" }}>
For more info, see pricing details.
</VSCodeLink>
</span>
</>
)}
{isO1 && (
<>
<br />
<span
style={{
fontStyle: "italic",
color: "var(--vscode-errorForeground)",
}}>
* This model does not support tool use or system prompts, so Claude Dev uses structured output
prompting to achieve similar results. Your mileage may vary.
</span>
</>
)}
</p> </p>
) )
} }

View File

@@ -1,9 +1,12 @@
import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react" import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
import React, { useMemo } from "react" import React, { useMemo } from "react"
import { useExtensionState } from "../../context/ExtensionStateContext"
import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
import { useMount } from "react-use" import { useMount } from "react-use"
import { useExtensionState } from "../../context/ExtensionStateContext"
import { vscode } from "../../utils/vscode" import { vscode } from "../../utils/vscode"
import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
import { memo, useEffect } from "react"
import { useRemark } from "react-remark"
import styled from "styled-components"
interface OpenRouterModelPickerProps {} interface OpenRouterModelPickerProps {}
@@ -27,8 +30,12 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
vscode.postMessage({ type: "refreshOpenRouterModels" }) vscode.postMessage({ type: "refreshOpenRouterModels" })
}) })
const modelIds = useMemo(() => {
return Object.keys(openRouterModels).sort((a, b) => a.localeCompare(b))
}, [openRouterModels])
return ( return (
<div style={{ display: "flex", flexDirection: "column", gap: 5 }}> <>
<div className="dropdown-container"> <div className="dropdown-container">
<label htmlFor="model-id"> <label htmlFor="model-id">
<span style={{ fontWeight: 500 }}>Model</span> <span style={{ fontWeight: 500 }}>Model</span>
@@ -39,7 +46,7 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
onChange={handleModelChange} onChange={handleModelChange}
style={{ width: "100%" }}> style={{ width: "100%" }}>
<VSCodeOption value="">Select a model...</VSCodeOption> <VSCodeOption value="">Select a model...</VSCodeOption>
{Object.keys(openRouterModels).map((modelId) => ( {modelIds.map((modelId) => (
<VSCodeOption <VSCodeOption
key={modelId} key={modelId}
value={modelId} value={modelId}
@@ -54,15 +61,48 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
</VSCodeDropdown> </VSCodeDropdown>
</div> </div>
{selectedModelInfo.description && (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
{selectedModelInfo.description}
</p>
)}
<ModelInfoView selectedModelId={selectedModelId} modelInfo={selectedModelInfo} /> <ModelInfoView selectedModelId={selectedModelId} modelInfo={selectedModelInfo} />
</div> </>
) )
} }
export default OpenRouterModelPicker export default OpenRouterModelPicker
const StyledMarkdown = styled.div`
font-family: var(--vscode-font-family), system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen,
Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
font-size: 12px;
color: var(--vscode-descriptionForeground);
p,
li,
ol,
ul {
line-height: 1.25;
margin: 0;
}
ol,
ul {
padding-left: 1.5em;
margin-left: 0;
}
p {
white-space: pre-wrap;
}
`
export const ModelDescriptionMarkdown = memo(({ markdown, key }: { markdown?: string; key: string }) => {
const [reactContent, setMarkdown] = useRemark()
useEffect(() => {
setMarkdown(markdown || "")
}, [markdown, setMarkdown])
return (
<StyledMarkdown key={key} style={{ display: "inline-block", marginBottom: 5 }}>
{reactContent}
</StyledMarkdown>
)
})