Fix haiku prompt caching when tool prompt doesnt meet min required tokens

2026-02-05 20:15:12 -05:00 · 2024-08-15 20:37:15 -04:00
parent 6989779dd6
commit a0f2a08f35
2 changed files with 14 additions and 26 deletions
--- a/src/api/anthropic.ts
+++ b/src/api/anthropic.ts
@@ -56,9 +56,7 @@ export class AnthropicHandler implements ApiHandler {
 							}
 							return message
 						}),
-						tools: tools.map((tool, index) =>
-							index === tools.length - 1 ? { ...tool, cache_control: { type: "ephemeral" } } : tool
-						),
+						tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
 						tool_choice: { type: "auto" },
 					},
 					(() => {
--- a/webview-ui/src/components/ApiOptions.tsx
+++ b/webview-ui/src/components/ApiOptions.tsx
@@ -215,45 +215,35 @@ const ModelInfoView = ({ modelInfo }: { modelInfo: ModelInfo }) => {
 		}).format(price)
 	}

-	const showPromptCachingPrices =
-		modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice
-
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
-			<ModelInfoSupportsItem
-				isSupported={modelInfo.supportsPromptCache}
-				supportsLabel="Supports prompt caching"
-				doesNotSupportLabel="Does not support prompt caching"
-			/>{" "}
-			<VSCodeLink href="https://www.anthropic.com/news/prompt-caching" style={{ display: "inline" }}>
-				(what is this?)
-			</VSCodeLink>
-			<br />
 			<ModelInfoSupportsItem
 				isSupported={modelInfo.supportsImages}
 				supportsLabel="Supports images"
 				doesNotSupportLabel="Does not support images"
 			/>
 			<br />
+			<ModelInfoSupportsItem
+				isSupported={modelInfo.supportsPromptCache}
+				supportsLabel="Supports prompt caching"
+				doesNotSupportLabel="Does not support prompt caching"
+			/>
+			<br />
 			<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens.toLocaleString()} tokens
 			<br />
-			<span style={{ fontWeight: 500 }}>
-				{showPromptCachingPrices ? "Base input price:" : "Input price:"}
-			</span>{" "}
-			{formatPrice(modelInfo.inputPrice)} per million tokens
-			{showPromptCachingPrices && (
+			<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million tokens
+			{modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && (
 				<>
 					<br />
-					<span style={{ fontWeight: 500 }}>Prompt caching write price:</span>{" "}
-					{formatPrice(modelInfo.cacheWritesPrice || 0)} per million tokens
+					<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
+					{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
 					<br />
-					<span style={{ fontWeight: 500 }}>Prompt caching read price:</span>{" "}
-					{formatPrice(modelInfo.cacheReadsPrice || 0)} per million tokens
+					<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
+					{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
 				</>
 			)}
 			<br />
-			<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)} per million
-			tokens
+			<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million tokens
 		</p>
 	)
 }