Add markdown rendering for model description; fixes

This commit is contained in:
Saoud Rizwan
2024-10-03 21:23:49 -04:00
parent 9ffe01888a
commit f6a14fdfb9
9 changed files with 159 additions and 117 deletions

View File

@@ -41,7 +41,7 @@ export class AnthropicHandler implements ApiHandler {
stream = await this.client.beta.promptCaching.messages.create(
{
model: modelId,
max_tokens: this.getModel().info.maxTokens,
max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0,
system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
messages: messages.map((message, index) => {
@@ -96,7 +96,7 @@ export class AnthropicHandler implements ApiHandler {
default: {
stream = (await this.client.messages.create({
model: modelId,
max_tokens: this.getModel().info.maxTokens,
max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0,
system: [{ text: systemPrompt, type: "text" }],
messages,

View File

@@ -27,7 +27,7 @@ export class AwsBedrockHandler implements ApiHandler {
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const stream = await this.client.messages.create({
model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens,
max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0,
system: systemPrompt,
messages,

View File

@@ -21,7 +21,7 @@ export class VertexHandler implements ApiHandler {
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const stream = await this.client.messages.create({
model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens,
max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0,
system: systemPrompt,
messages,

View File

@@ -709,7 +709,7 @@ export class ClaudeDev {
async *attemptApiRequest(previousApiReqIndex: number): ApiStream {
try {
let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages)
let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
if (this.customInstructions && this.customInstructions.trim()) {
// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
systemPrompt += addCustomInstructions(this.customInstructions)
@@ -723,7 +723,7 @@ export class ClaudeDev {
previousRequest.text
)
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
const contextWindow = this.api.getModel().info.contextWindow
const contextWindow = this.api.getModel().info.contextWindow || 128_000
const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
if (totalTokens >= maxAllowedSize) {
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)

View File

@@ -537,9 +537,17 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
// await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome
}
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
private async ensureCacheDirectoryExists(): Promise<string> {
const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
await fs.mkdir(cacheDir, { recursive: true })
return cacheDir
}
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
const openRouterModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
GlobalFileNames.openRouterModels
)
const fileExists = await fileExistsAtPath(openRouterModelsFilePath)
if (fileExists) {
const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8")
@@ -549,8 +557,10 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
}
async refreshOpenRouterModels() {
const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
const openRouterModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
GlobalFileNames.openRouterModels
)
let models: Record<string, ModelInfo> = {}
try {
@@ -581,16 +591,22 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
"per_request_limits": null
},
*/
if (response.data) {
const rawModels = response.data
if (response.data?.data) {
const rawModels = response.data.data
const parsePrice = (price: any) => {
if (price) {
return parseFloat(price) * 1_000_000
}
return undefined
}
for (const rawModel of rawModels) {
const modelInfo: ModelInfo = {
maxTokens: rawModel.top_provider?.max_completion_tokens || 2048,
contextWindow: rawModel.context_length || 128_000,
supportsImages: rawModel.architecture?.modality?.includes("image") ?? false,
maxTokens: rawModel.top_provider?.max_completion_tokens,
contextWindow: rawModel.context_length,
supportsImages: rawModel.architecture?.modality?.includes("image"),
supportsPromptCache: false,
inputPrice: parseFloat(rawModel.pricing?.prompt || 0) * 1_000_000,
outputPrice: parseFloat(rawModel.pricing?.completion || 0) * 1_000_000,
inputPrice: parsePrice(rawModel.pricing?.prompt),
outputPrice: parsePrice(rawModel.pricing?.completion),
description: rawModel.description,
}
@@ -621,6 +637,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
console.error("Invalid response from OpenRouter API")
}
await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models))
console.log("OpenRouter models fetched and saved", models)
} catch (error) {
console.error("Error fetching OpenRouter models:", error)
}

View File

@@ -38,12 +38,12 @@ export type ApiConfiguration = ApiHandlerOptions & {
// Models
export interface ModelInfo {
maxTokens: number
contextWindow: number
supportsImages: boolean
supportsPromptCache: boolean
inputPrice: number
outputPrice: number
maxTokens?: number
contextWindow?: number
supportsImages?: boolean
supportsPromptCache: boolean // this value is hardcoded for now
inputPrice?: number
outputPrice?: number
cacheWritesPrice?: number
cacheReadsPrice?: number
description?: string
@@ -130,6 +130,8 @@ export const openRouterDefaultModelInfo: ModelInfo = {
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
description:
"Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
}
const openRouterModels = {
"anthropic/claude-3.5-sonnet:beta": {

View File

@@ -17,8 +17,8 @@ export function calculateApiCost(
if (cacheReadInputTokens && modelCacheReadsPrice) {
cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens
}
const baseInputCost = (modelInfo.inputPrice / 1_000_000) * inputTokens
const outputCost = (modelInfo.outputPrice / 1_000_000) * outputTokens
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
return totalCost
}

View File

@@ -7,7 +7,7 @@ import {
VSCodeRadioGroup,
VSCodeTextField,
} from "@vscode/webview-ui-toolkit/react"
import { memo, useCallback, useEffect, useMemo, useState } from "react"
import { Fragment, memo, useCallback, useEffect, useMemo, useState } from "react"
import { useEvent, useInterval } from "react-use"
import {
ApiConfiguration,
@@ -31,7 +31,7 @@ import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
import { useExtensionState } from "../../context/ExtensionStateContext"
import { vscode } from "../../utils/vscode"
import VSCodeButtonLink from "../common/VSCodeButtonLink"
import OpenRouterModelPicker from "./OpenRouterModelPicker"
import OpenRouterModelPicker, { ModelDescriptionMarkdown } from "./OpenRouterModelPicker"
interface ApiOptionsProps {
showModelOptions: boolean
@@ -440,7 +440,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
marginTop: 3,
color: "var(--vscode-descriptionForeground)",
}}>
You can use any OpenAI compatible API with models that support tool use.{" "}
<span style={{ color: "var(--vscode-errorForeground)" }}>
(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
best with Claude models. Less capable models may not work as expected.)
@@ -504,12 +503,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
href="https://github.com/ollama/ollama/blob/main/README.md"
style={{ display: "inline", fontSize: "inherit" }}>
quickstart guide.
</VSCodeLink>{" "}
You can use any model that supports{" "}
<VSCodeLink
href="https://ollama.com/search?c=tools"
style={{ display: "inline", fontSize: "inherit" }}>
tool use.
</VSCodeLink>
<span style={{ color: "var(--vscode-errorForeground)" }}>
(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
@@ -570,80 +563,70 @@ export const formatPrice = (price: number) => {
export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
const isGemini = Object.keys(geminiModels).includes(selectedModelId)
const isO1 = selectedModelId && selectedModelId.includes("o1")
const infoItems = [
modelInfo.description && <ModelDescriptionMarkdown key="description" markdown={modelInfo.description} />,
<ModelInfoSupportsItem
key="supportsImages"
isSupported={modelInfo.supportsImages ?? false}
supportsLabel="Supports images"
doesNotSupportLabel="Does not support images"
/>,
!isGemini && (
<ModelInfoSupportsItem
key="supportsPromptCache"
isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt caching"
/>
),
modelInfo.maxTokens !== undefined && modelInfo.maxTokens > 0 && (
<span key="maxTokens">
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens?.toLocaleString()} tokens
</span>
),
modelInfo.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
<span key="inputPrice">
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million tokens
</span>
),
modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && (
<span key="cacheWritesPrice">
<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
</span>
),
modelInfo.supportsPromptCache && modelInfo.cacheReadsPrice && (
<span key="cacheReadsPrice">
<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
</span>
),
modelInfo.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
<span key="outputPrice">
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
tokens
</span>
),
isGemini && (
<span key="geminiInfo" style={{ fontStyle: "italic" }}>
* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per minute.
After that, billing depends on prompt size.{" "}
<VSCodeLink href="https://ai.google.dev/pricing" style={{ display: "inline", fontSize: "inherit" }}>
For more info, see pricing details.
</VSCodeLink>
</span>
),
].filter(Boolean)
return (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
<ModelInfoSupportsItem
isSupported={modelInfo.supportsImages}
supportsLabel="Supports images"
doesNotSupportLabel="Does not support images"
/>
<br />
{!isGemini && (
<>
<ModelInfoSupportsItem
isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt caching"
/>
<br />
</>
)}
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo?.maxTokens?.toLocaleString()} tokens
{modelInfo.inputPrice > 0 && (
<>
<br />
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million
tokens
</>
)}
{modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && (
<>
<br />
<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
<br />
<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
</>
)}
{modelInfo.outputPrice > 0 && (
<>
<br />
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
tokens
</>
)}
{isGemini && (
<>
<br />
<span
style={{
fontStyle: "italic",
}}>
* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per
minute. After that, billing depends on prompt size.{" "}
<VSCodeLink
href="https://ai.google.dev/pricing"
style={{ display: "inline", fontSize: "inherit" }}>
For more info, see pricing details.
</VSCodeLink>
</span>
</>
)}
{isO1 && (
<>
<br />
<span
style={{
fontStyle: "italic",
color: "var(--vscode-errorForeground)",
}}>
* This model does not support tool use or system prompts, so Claude Dev uses structured output
prompting to achieve similar results. Your mileage may vary.
</span>
</>
)}
{infoItems.map((item, index) => (
<Fragment key={index}>
{item}
{index < infoItems.length - 1 && <br />}
</Fragment>
))}
</p>
)
}

View File

@@ -1,9 +1,12 @@
import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
import React, { useMemo } from "react"
import { useExtensionState } from "../../context/ExtensionStateContext"
import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
import { useMount } from "react-use"
import { useExtensionState } from "../../context/ExtensionStateContext"
import { vscode } from "../../utils/vscode"
import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
import { memo, useEffect } from "react"
import { useRemark } from "react-remark"
import styled from "styled-components"
interface OpenRouterModelPickerProps {}
@@ -27,8 +30,12 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
vscode.postMessage({ type: "refreshOpenRouterModels" })
})
const modelIds = useMemo(() => {
return Object.keys(openRouterModels).sort((a, b) => a.localeCompare(b))
}, [openRouterModels])
return (
<div style={{ display: "flex", flexDirection: "column", gap: 5 }}>
<>
<div className="dropdown-container">
<label htmlFor="model-id">
<span style={{ fontWeight: 500 }}>Model</span>
@@ -39,7 +46,7 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
onChange={handleModelChange}
style={{ width: "100%" }}>
<VSCodeOption value="">Select a model...</VSCodeOption>
{Object.keys(openRouterModels).map((modelId) => (
{modelIds.map((modelId) => (
<VSCodeOption
key={modelId}
value={modelId}
@@ -54,15 +61,48 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
</VSCodeDropdown>
</div>
{selectedModelInfo.description && (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
{selectedModelInfo.description}
</p>
)}
<ModelInfoView selectedModelId={selectedModelId} modelInfo={selectedModelInfo} />
</div>
</>
)
}
export default OpenRouterModelPicker
const StyledMarkdown = styled.div`
font-family: var(--vscode-font-family), system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen,
Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
font-size: 12px;
color: var(--vscode-descriptionForeground);
p,
li,
ol,
ul {
line-height: 1.25;
margin: 0;
}
ol,
ul {
padding-left: 1.5em;
margin-left: 0;
}
p {
white-space: pre-wrap;
}
`
export const ModelDescriptionMarkdown = memo(({ markdown, key }: { markdown?: string; key: string }) => {
const [reactContent, setMarkdown] = useRemark()
useEffect(() => {
setMarkdown(markdown || "")
}, [markdown, setMarkdown])
return (
<StyledMarkdown key={key} style={{ display: "inline-block", marginBottom: 5 }}>
{reactContent}
</StyledMarkdown>
)
})