Update announcement/settings with details about prompt caching

This commit is contained in:
Saoud Rizwan
2024-08-15 04:21:28 -04:00
parent ec2bfa352a
commit 6989779dd6
7 changed files with 48 additions and 18 deletions

View File

@@ -4,6 +4,10 @@ All notable changes to the "claude-dev" extension will be documented in this fil
<!-- Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. --> <!-- Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. -->
## [1.2.0]
- Add support for Prompt Caching to significantly reduce costs and response times (currently only available through Anthropic API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
## [1.1.1] ## [1.1.1]
- Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter) - Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)

View File

@@ -2,7 +2,7 @@
"name": "claude-dev", "name": "claude-dev",
"displayName": "Claude Dev", "displayName": "Claude Dev",
"description": "Autonomous software engineer right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.", "description": "Autonomous software engineer right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.",
"version": "1.1.15", "version": "1.2.0",
"icon": "icon.png", "icon": "icon.png",
"engines": { "engines": {
"vscode": "^1.84.0" "vscode": "^1.84.0"

View File

@@ -417,12 +417,12 @@ export class ClaudeDev {
cacheCreationInputTokens?: number, cacheCreationInputTokens?: number,
cacheReadInputTokens?: number cacheReadInputTokens?: number
): number { ): number {
const modelCacheWritesPrice = this.api.getModel().info.cacheWrites const modelCacheWritesPrice = this.api.getModel().info.cacheWritesPrice
let cacheWritesCost = 0 let cacheWritesCost = 0
if (cacheCreationInputTokens && modelCacheWritesPrice) { if (cacheCreationInputTokens && modelCacheWritesPrice) {
cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens
} }
const modelCacheReadsPrice = this.api.getModel().info.cacheReads const modelCacheReadsPrice = this.api.getModel().info.cacheReadsPrice
let cacheReadsCost = 0 let cacheReadsCost = 0
if (cacheReadInputTokens && modelCacheReadsPrice) { if (cacheReadInputTokens && modelCacheReadsPrice) {
cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens

View File

@@ -25,7 +25,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
private disposables: vscode.Disposable[] = [] private disposables: vscode.Disposable[] = []
private view?: vscode.WebviewView | vscode.WebviewPanel private view?: vscode.WebviewView | vscode.WebviewPanel
private claudeDev?: ClaudeDev private claudeDev?: ClaudeDev
private latestAnnouncementId = "aug-11-2024" // update to some unique identifier when we add a new announcement private latestAnnouncementId = "aug-15-2024" // update to some unique identifier when we add a new announcement
constructor( constructor(
private readonly context: vscode.ExtensionContext, private readonly context: vscode.ExtensionContext,

View File

@@ -21,8 +21,8 @@ export interface ModelInfo {
supportsPromptCache: boolean supportsPromptCache: boolean
inputPrice: number inputPrice: number
outputPrice: number outputPrice: number
cacheWrites?: number cacheWritesPrice?: number
cacheReads?: number cacheReadsPrice?: number
} }
export type ApiModelId = AnthropicModelId | OpenRouterModelId | BedrockModelId export type ApiModelId = AnthropicModelId | OpenRouterModelId | BedrockModelId
@@ -38,8 +38,8 @@ export const anthropicModels = {
supportsPromptCache: true, supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens outputPrice: 15.0, // $15 per million output tokens
cacheWrites: 3.75, // $3.75 per million tokens cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReads: 0.3, // $0.30 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens
}, },
"claude-3-opus-20240229": { "claude-3-opus-20240229": {
maxTokens: 4096, maxTokens: 4096,
@@ -47,8 +47,8 @@ export const anthropicModels = {
supportsPromptCache: false, supportsPromptCache: false,
inputPrice: 15.0, inputPrice: 15.0,
outputPrice: 75.0, outputPrice: 75.0,
cacheWrites: 18.75, cacheWritesPrice: 18.75,
cacheReads: 1.5, cacheReadsPrice: 1.5,
}, },
"claude-3-sonnet-20240229": { "claude-3-sonnet-20240229": {
maxTokens: 4096, maxTokens: 4096,
@@ -63,8 +63,8 @@ export const anthropicModels = {
supportsPromptCache: true, supportsPromptCache: true,
inputPrice: 0.25, inputPrice: 0.25,
outputPrice: 1.25, outputPrice: 1.25,
cacheWrites: 0.3, cacheWritesPrice: 0.3,
cacheReads: 0.03, cacheReadsPrice: 0.03,
}, },
} as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly } as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly

View File

@@ -27,18 +27,25 @@ const Announcement = ({ version, hideAnnouncement }: AnnouncementProps) => {
🎉{" "}New in v{version} 🎉{" "}New in v{version}
</h3> </h3>
<ul style={{ margin: "0 0 8px", paddingLeft: "20px" }}> <ul style={{ margin: "0 0 8px", paddingLeft: "20px" }}>
<li>
Adds support for{" "}
<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
Prompt Caching
</VSCodeLink>{" "}
to reduce costs by up to 90% and latency by up to 85% (currently only available through Anthropic
API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
</li>
<li> <li>
Paste images in chat and turn mockups into fully functional applications or fix bugs with Paste images in chat and turn mockups into fully functional applications or fix bugs with
screenshots screenshots
</li> </li>
<li> <li>
Added option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter) Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)
</li> </li>
<li> <li>
You can now add custom instructions to the end of the system prompt (e.g. "Always use Python", You can now add custom instructions to the end of the system prompt (e.g. "Always use Python",
"Speak in Spanish") "Speak in Spanish")
</li> </li>
<li>Improved support for running interactive terminal commands and servers</li>
</ul> </ul>
<p style={{ margin: "0" }}> <p style={{ margin: "0" }}>
Follow me for more updates!{" "} Follow me for more updates!{" "}

View File

@@ -215,13 +215,19 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
}).format(price) }).format(price)
} }
const showPromptCachingPrices =
modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice
return ( return (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}> <p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
<ModelInfoSupportsItem <ModelInfoSupportsItem
isSupported={modelInfo.supportsPromptCache} isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt cache" supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt cache" doesNotSupportLabel="Does not support prompt caching"
/> />{" "}
<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
(what is this?)
</VSCodeLink>
<br /> <br />
<ModelInfoSupportsItem <ModelInfoSupportsItem
isSupported={modelInfo.supportsImages} isSupported={modelInfo.supportsImages}
@@ -231,7 +237,20 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
<br /> <br />
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens.toLocaleString()} tokens <span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens.toLocaleString()} tokens
<br /> <br />
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)} per million tokens <span style={{ fontWeight: 500 }}>
{showPromptCachingPrices ? "Base input price:" : "Input price:"}
</span>{" "}
{formatPrice(modelInfo.inputPrice)} per million tokens
{showPromptCachingPrices && (
<>
<br />
<span style={{ fontWeight: 500 }}>Prompt caching write price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)} per million tokens
<br />
<span style={{ fontWeight: 500 }}>Prompt caching read price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)} per million tokens
</>
)}
<br /> <br />
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)} per million <span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)} per million
tokens tokens