Update announcement/settings with details about prompt caching

This commit is contained in:
Saoud Rizwan
2024-08-15 04:21:28 -04:00
parent ec2bfa352a
commit 6989779dd6
7 changed files with 48 additions and 18 deletions

View File

@@ -4,6 +4,10 @@ All notable changes to the "claude-dev" extension will be documented in this fil
<!-- Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. -->
## [1.2.0]
- Add support for Prompt Caching to significantly reduce costs and response times (currently only available through Anthropic API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
## [1.1.1]
- Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)

View File

@@ -2,7 +2,7 @@
"name": "claude-dev",
"displayName": "Claude Dev",
"description": "Autonomous software engineer right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.",
"version": "1.1.15",
"version": "1.2.0",
"icon": "icon.png",
"engines": {
"vscode": "^1.84.0"

View File

@@ -417,12 +417,12 @@ export class ClaudeDev {
cacheCreationInputTokens?: number,
cacheReadInputTokens?: number
): number {
const modelCacheWritesPrice = this.api.getModel().info.cacheWrites
const modelCacheWritesPrice = this.api.getModel().info.cacheWritesPrice
let cacheWritesCost = 0
if (cacheCreationInputTokens && modelCacheWritesPrice) {
cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens
}
const modelCacheReadsPrice = this.api.getModel().info.cacheReads
const modelCacheReadsPrice = this.api.getModel().info.cacheReadsPrice
let cacheReadsCost = 0
if (cacheReadInputTokens && modelCacheReadsPrice) {
cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens

View File

@@ -25,7 +25,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
private disposables: vscode.Disposable[] = []
private view?: vscode.WebviewView | vscode.WebviewPanel
private claudeDev?: ClaudeDev
private latestAnnouncementId = "aug-11-2024" // update to some unique identifier when we add a new announcement
private latestAnnouncementId = "aug-15-2024" // update to some unique identifier when we add a new announcement
constructor(
private readonly context: vscode.ExtensionContext,

View File

@@ -21,8 +21,8 @@ export interface ModelInfo {
supportsPromptCache: boolean
inputPrice: number
outputPrice: number
cacheWrites?: number
cacheReads?: number
cacheWritesPrice?: number
cacheReadsPrice?: number
}
export type ApiModelId = AnthropicModelId | OpenRouterModelId | BedrockModelId
@@ -38,8 +38,8 @@ export const anthropicModels = {
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens
cacheWrites: 3.75, // $3.75 per million tokens
cacheReads: 0.3, // $0.30 per million tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
},
"claude-3-opus-20240229": {
maxTokens: 4096,
@@ -47,8 +47,8 @@ export const anthropicModels = {
supportsPromptCache: false,
inputPrice: 15.0,
outputPrice: 75.0,
cacheWrites: 18.75,
cacheReads: 1.5,
cacheWritesPrice: 18.75,
cacheReadsPrice: 1.5,
},
"claude-3-sonnet-20240229": {
maxTokens: 4096,
@@ -63,8 +63,8 @@ export const anthropicModels = {
supportsPromptCache: true,
inputPrice: 0.25,
outputPrice: 1.25,
cacheWrites: 0.3,
cacheReads: 0.03,
cacheWritesPrice: 0.3,
cacheReadsPrice: 0.03,
},
} as const satisfies Record<string, ModelInfo> // as const assertion makes the object deeply readonly

View File

@@ -27,18 +27,25 @@ const Announcement = ({ version, hideAnnouncement }: AnnouncementProps) => {
🎉{" "}New in v{version}
</h3>
<ul style={{ margin: "0 0 8px", paddingLeft: "20px" }}>
<li>
Adds support for{" "}
<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
Prompt Caching
</VSCodeLink>{" "}
to reduce costs by up to 90% and latency by up to 85% (currently only available through Anthropic
API for Claude 3.5 Sonnet and Claude 3.0 Haiku)
</li>
<li>
Paste images in chat and turn mockups into fully functional applications or fix bugs with
screenshots
</li>
<li>
Added option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)
Adds option to choose other Claude models (+ GPT-4o, DeepSeek, and Mistral if you use OpenRouter)
</li>
<li>
You can now add custom instructions to the end of the system prompt (e.g. "Always use Python",
"Speak in Spanish")
</li>
<li>Improved support for running interactive terminal commands and servers</li>
</ul>
<p style={{ margin: "0" }}>
Follow me for more updates!{" "}

View File

@@ -215,13 +215,19 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
}).format(price)
}
const showPromptCachingPrices =
modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice
return (
<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
<ModelInfoSupportsItem
isSupported={modelInfo.supportsPromptCache}
supportsLabel="Supports prompt cache"
doesNotSupportLabel="Does not support prompt cache"
/>
supportsLabel="Supports prompt caching"
doesNotSupportLabel="Does not support prompt caching"
/>{" "}
<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
(what is this?)
</VSCodeLink>
<br />
<ModelInfoSupportsItem
isSupported={modelInfo.supportsImages}
@@ -231,7 +237,20 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
<br />
<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens.toLocaleString()} tokens
<br />
<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)} per million tokens
<span style={{ fontWeight: 500 }}>
{showPromptCachingPrices ? "Base input price:" : "Input price:"}
</span>{" "}
{formatPrice(modelInfo.inputPrice)} per million tokens
{showPromptCachingPrices && (
<>
<br />
<span style={{ fontWeight: 500 }}>Prompt caching write price:</span>{" "}
{formatPrice(modelInfo.cacheWritesPrice || 0)} per million tokens
<br />
<span style={{ fontWeight: 500 }}>Prompt caching read price:</span>{" "}
{formatPrice(modelInfo.cacheReadsPrice || 0)} per million tokens
</>
)}
<br />
<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)} per million
tokens