diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
index 168d9d5..506e879 100644
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -41,7 +41,7 @@ export class AnthropicHandler implements ApiHandler {
 				stream = await this.client.beta.promptCaching.messages.create(
 					{
 						model: modelId,
-						max_tokens: this.getModel().info.maxTokens,
+						max_tokens: this.getModel().info.maxTokens || 8192,
 						temperature: 0,
 						system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
 						messages: messages.map((message, index) => {
@@ -96,7 +96,7 @@ export class AnthropicHandler implements ApiHandler {
 			default: {
 				stream = (await this.client.messages.create({
 					model: modelId,
-					max_tokens: this.getModel().info.maxTokens,
+					max_tokens: this.getModel().info.maxTokens || 8192,
 					temperature: 0,
 					system: [{ text: systemPrompt, type: "text" }],
 					messages,
diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index f098426..ad6e8df 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -27,7 +27,7 @@ export class AwsBedrockHandler implements ApiHandler {
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.messages.create({
 			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
+			max_tokens: this.getModel().info.maxTokens || 8192,
 			temperature: 0,
 			system: systemPrompt,
 			messages,
diff --git a/src/api/providers/vertex.ts b/src/api/providers/vertex.ts
index 6e02b80..60e6967 100644
--- a/src/api/providers/vertex.ts
+++ b/src/api/providers/vertex.ts
@@ -21,7 +21,7 @@ export class VertexHandler implements ApiHandler {
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.messages.create({
 			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
+			max_tokens: this.getModel().info.maxTokens || 8192,
 			temperature: 0,
 			system: systemPrompt,
 			messages,
diff --git a/src/core/ClaudeDev.ts b/src/core/ClaudeDev.ts
index 92823e6..6ad2aa6 100644
--- a/src/core/ClaudeDev.ts
+++ b/src/core/ClaudeDev.ts
@@ -709,7 +709,7 @@ export class ClaudeDev {
 
 	async *attemptApiRequest(previousApiReqIndex: number): ApiStream {
 		try {
-			let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages)
+			let systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsImages ?? false)
 			if (this.customInstructions && this.customInstructions.trim()) {
 				// altering the system prompt mid-task will break the prompt cache, but in the grand scheme this will not change often so it's better to not pollute user messages with it the way we have to with <potentially relevant details>
 				systemPrompt += addCustomInstructions(this.customInstructions)
@@ -723,7 +723,7 @@ export class ClaudeDev {
 						previousRequest.text
 					)
 					const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-					const contextWindow = this.api.getModel().info.contextWindow
+					const contextWindow = this.api.getModel().info.contextWindow || 128_000
 					const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
 					if (totalTokens >= maxAllowedSize) {
 						const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
diff --git a/src/core/webview/ClaudeDevProvider.ts b/src/core/webview/ClaudeDevProvider.ts
index 2ad96ab..b5d1489 100644
--- a/src/core/webview/ClaudeDevProvider.ts
+++ b/src/core/webview/ClaudeDevProvider.ts
@@ -537,9 +537,17 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 		// await this.postMessageToWebview({ type: "action", action: "settingsButtonTapped" }) // bad ux if user is on welcome
 	}
 
-	async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
+	private async ensureCacheDirectoryExists(): Promise<string> {
 		const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
-		const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
+		await fs.mkdir(cacheDir, { recursive: true })
+		return cacheDir
+	}
+
+	async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
+		const openRouterModelsFilePath = path.join(
+			await this.ensureCacheDirectoryExists(),
+			GlobalFileNames.openRouterModels
+		)
 		const fileExists = await fileExistsAtPath(openRouterModelsFilePath)
 		if (fileExists) {
 			const fileContents = await fs.readFile(openRouterModelsFilePath, "utf8")
@@ -549,8 +557,10 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 	}
 
 	async refreshOpenRouterModels() {
-		const cacheDir = path.join(this.context.globalStorageUri.fsPath, "cache")
-		const openRouterModelsFilePath = path.join(cacheDir, GlobalFileNames.openRouterModels)
+		const openRouterModelsFilePath = path.join(
+			await this.ensureCacheDirectoryExists(),
+			GlobalFileNames.openRouterModels
+		)
 
 		let models: Record<string, ModelInfo> = {}
 		try {
@@ -581,16 +591,22 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 				"per_request_limits": null
 			},
 			*/
-			if (response.data) {
-				const rawModels = response.data
+			if (response.data?.data) {
+				const rawModels = response.data.data
+				const parsePrice = (price: any) => {
+					if (price) {
+						return parseFloat(price) * 1_000_000
+					}
+					return undefined
+				}
 				for (const rawModel of rawModels) {
 					const modelInfo: ModelInfo = {
-						maxTokens: rawModel.top_provider?.max_completion_tokens || 2048,
-						contextWindow: rawModel.context_length || 128_000,
-						supportsImages: rawModel.architecture?.modality?.includes("image") ?? false,
+						maxTokens: rawModel.top_provider?.max_completion_tokens,
+						contextWindow: rawModel.context_length,
+						supportsImages: rawModel.architecture?.modality?.includes("image"),
 						supportsPromptCache: false,
-						inputPrice: parseFloat(rawModel.pricing?.prompt || 0) * 1_000_000,
-						outputPrice: parseFloat(rawModel.pricing?.completion || 0) * 1_000_000,
+						inputPrice: parsePrice(rawModel.pricing?.prompt),
+						outputPrice: parsePrice(rawModel.pricing?.completion),
 						description: rawModel.description,
 					}
 
@@ -621,6 +637,7 @@ export class ClaudeDevProvider implements vscode.WebviewViewProvider {
 				console.error("Invalid response from OpenRouter API")
 			}
 			await fs.writeFile(openRouterModelsFilePath, JSON.stringify(models))
+			console.log("OpenRouter models fetched and saved", models)
 		} catch (error) {
 			console.error("Error fetching OpenRouter models:", error)
 		}
diff --git a/src/shared/api.ts b/src/shared/api.ts
index 4bac61b..12e6573 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -38,12 +38,12 @@ export type ApiConfiguration = ApiHandlerOptions & {
 // Models
 
 export interface ModelInfo {
-	maxTokens: number
-	contextWindow: number
-	supportsImages: boolean
-	supportsPromptCache: boolean
-	inputPrice: number
-	outputPrice: number
+	maxTokens?: number
+	contextWindow?: number
+	supportsImages?: boolean
+	supportsPromptCache: boolean // this value is hardcoded for now
+	inputPrice?: number
+	outputPrice?: number
 	cacheWritesPrice?: number
 	cacheReadsPrice?: number
 	description?: string
@@ -130,6 +130,8 @@ export const openRouterDefaultModelInfo: ModelInfo = {
 	outputPrice: 15.0,
 	cacheWritesPrice: 3.75,
 	cacheReadsPrice: 0.3,
+	description:
+		"Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
 }
 const openRouterModels = {
 	"anthropic/claude-3.5-sonnet:beta": {
diff --git a/src/utils/cost.ts b/src/utils/cost.ts
index 309da6e..6a777d8 100644
--- a/src/utils/cost.ts
+++ b/src/utils/cost.ts
@@ -17,8 +17,8 @@ export function calculateApiCost(
 	if (cacheReadInputTokens && modelCacheReadsPrice) {
 		cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens
 	}
-	const baseInputCost = (modelInfo.inputPrice / 1_000_000) * inputTokens
-	const outputCost = (modelInfo.outputPrice / 1_000_000) * outputTokens
+	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
+	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
 	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 	return totalCost
 }
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index cbf12c5..dba15ee 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -7,7 +7,7 @@ import {
 	VSCodeRadioGroup,
 	VSCodeTextField,
 } from "@vscode/webview-ui-toolkit/react"
-import { memo, useCallback, useEffect, useMemo, useState } from "react"
+import { Fragment, memo, useCallback, useEffect, useMemo, useState } from "react"
 import { useEvent, useInterval } from "react-use"
 import {
 	ApiConfiguration,
@@ -31,7 +31,7 @@ import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
 import { useExtensionState } from "../../context/ExtensionStateContext"
 import { vscode } from "../../utils/vscode"
 import VSCodeButtonLink from "../common/VSCodeButtonLink"
-import OpenRouterModelPicker from "./OpenRouterModelPicker"
+import OpenRouterModelPicker, { ModelDescriptionMarkdown } from "./OpenRouterModelPicker"
 
 interface ApiOptionsProps {
 	showModelOptions: boolean
@@ -440,7 +440,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
 							marginTop: 3,
 							color: "var(--vscode-descriptionForeground)",
 						}}>
-						You can use any OpenAI compatible API with models that support tool use.{" "}
 						<span style={{ color: "var(--vscode-errorForeground)" }}>
 							(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
 							best with Claude models. Less capable models may not work as expected.)
@@ -504,12 +503,6 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage }: ApiOptionsProps) => {
 							href="https://github.com/ollama/ollama/blob/main/README.md"
 							style={{ display: "inline", fontSize: "inherit" }}>
 							quickstart guide.
-						</VSCodeLink>{" "}
-						You can use any model that supports{" "}
-						<VSCodeLink
-							href="https://ollama.com/search?c=tools"
-							style={{ display: "inline", fontSize: "inherit" }}>
-							tool use.
 						</VSCodeLink>
 						<span style={{ color: "var(--vscode-errorForeground)" }}>
 							(<span style={{ fontWeight: 500 }}>Note:</span> Claude Dev uses complex prompts and works
@@ -570,80 +563,70 @@ export const formatPrice = (price: number) => {
 
 export const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
 	const isGemini = Object.keys(geminiModels).includes(selectedModelId)
-	const isO1 = selectedModelId && selectedModelId.includes("o1")
+
+	const infoItems = [
+		modelInfo.description && <ModelDescriptionMarkdown key="description" markdown={modelInfo.description} />,
+		<ModelInfoSupportsItem
+			key="supportsImages"
+			isSupported={modelInfo.supportsImages ?? false}
+			supportsLabel="Supports images"
+			doesNotSupportLabel="Does not support images"
+		/>,
+		!isGemini && (
+			<ModelInfoSupportsItem
+				key="supportsPromptCache"
+				isSupported={modelInfo.supportsPromptCache}
+				supportsLabel="Supports prompt caching"
+				doesNotSupportLabel="Does not support prompt caching"
+			/>
+		),
+		modelInfo.maxTokens !== undefined && modelInfo.maxTokens > 0 && (
+			<span key="maxTokens">
+				<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo.maxTokens?.toLocaleString()} tokens
+			</span>
+		),
+		modelInfo.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
+			<span key="inputPrice">
+				<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million tokens
+			</span>
+		),
+		modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && (
+			<span key="cacheWritesPrice">
+				<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
+				{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
+			</span>
+		),
+		modelInfo.supportsPromptCache && modelInfo.cacheReadsPrice && (
+			<span key="cacheReadsPrice">
+				<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
+				{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
+			</span>
+		),
+		modelInfo.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
+			<span key="outputPrice">
+				<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
+				tokens
+			</span>
+		),
+		isGemini && (
+			<span key="geminiInfo" style={{ fontStyle: "italic" }}>
+				* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per minute.
+				After that, billing depends on prompt size.{" "}
+				<VSCodeLink href="https://ai.google.dev/pricing" style={{ display: "inline", fontSize: "inherit" }}>
+					For more info, see pricing details.
+				</VSCodeLink>
+			</span>
+		),
+	].filter(Boolean)
+
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
-			<ModelInfoSupportsItem
-				isSupported={modelInfo.supportsImages}
-				supportsLabel="Supports images"
-				doesNotSupportLabel="Does not support images"
-			/>
-			<br />
-			{!isGemini && (
-				<>
-					<ModelInfoSupportsItem
-						isSupported={modelInfo.supportsPromptCache}
-						supportsLabel="Supports prompt caching"
-						doesNotSupportLabel="Does not support prompt caching"
-					/>
-					<br />
-				</>
-			)}
-			<span style={{ fontWeight: 500 }}>Max output:</span> {modelInfo?.maxTokens?.toLocaleString()} tokens
-			{modelInfo.inputPrice > 0 && (
-				<>
-					<br />
-					<span style={{ fontWeight: 500 }}>Input price:</span> {formatPrice(modelInfo.inputPrice)}/million
-					tokens
-				</>
-			)}
-			{modelInfo.supportsPromptCache && modelInfo.cacheWritesPrice && modelInfo.cacheReadsPrice && (
-				<>
-					<br />
-					<span style={{ fontWeight: 500 }}>Cache writes price:</span>{" "}
-					{formatPrice(modelInfo.cacheWritesPrice || 0)}/million tokens
-					<br />
-					<span style={{ fontWeight: 500 }}>Cache reads price:</span>{" "}
-					{formatPrice(modelInfo.cacheReadsPrice || 0)}/million tokens
-				</>
-			)}
-			{modelInfo.outputPrice > 0 && (
-				<>
-					<br />
-					<span style={{ fontWeight: 500 }}>Output price:</span> {formatPrice(modelInfo.outputPrice)}/million
-					tokens
-				</>
-			)}
-			{isGemini && (
-				<>
-					<br />
-					<span
-						style={{
-							fontStyle: "italic",
-						}}>
-						* Free up to {selectedModelId && selectedModelId.includes("flash") ? "15" : "2"} requests per
-						minute. After that, billing depends on prompt size.{" "}
-						<VSCodeLink
-							href="https://ai.google.dev/pricing"
-							style={{ display: "inline", fontSize: "inherit" }}>
-							For more info, see pricing details.
-						</VSCodeLink>
-					</span>
-				</>
-			)}
-			{isO1 && (
-				<>
-					<br />
-					<span
-						style={{
-							fontStyle: "italic",
-							color: "var(--vscode-errorForeground)",
-						}}>
-						* This model does not support tool use or system prompts, so Claude Dev uses structured output
-						prompting to achieve similar results. Your mileage may vary.
-					</span>
-				</>
-			)}
+			{infoItems.map((item, index) => (
+				<Fragment key={index}>
+					{item}
+					{index < infoItems.length - 1 && <br />}
+				</Fragment>
+			))}
 		</p>
 	)
 }
diff --git a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx
index 13fd6f9..c94d953 100644
--- a/webview-ui/src/components/settings/OpenRouterModelPicker.tsx
+++ b/webview-ui/src/components/settings/OpenRouterModelPicker.tsx
@@ -1,9 +1,12 @@
 import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
 import React, { useMemo } from "react"
-import { useExtensionState } from "../../context/ExtensionStateContext"
-import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
 import { useMount } from "react-use"
+import { useExtensionState } from "../../context/ExtensionStateContext"
 import { vscode } from "../../utils/vscode"
+import { ModelInfoView, normalizeApiConfiguration } from "./ApiOptions"
+import { memo, useEffect } from "react"
+import { useRemark } from "react-remark"
+import styled from "styled-components"
 
 interface OpenRouterModelPickerProps {}
 
@@ -27,8 +30,12 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 		vscode.postMessage({ type: "refreshOpenRouterModels" })
 	})
 
+	const modelIds = useMemo(() => {
+		return Object.keys(openRouterModels).sort((a, b) => a.localeCompare(b))
+	}, [openRouterModels])
+
 	return (
-		<div style={{ display: "flex", flexDirection: "column", gap: 5 }}>
+		<>
 			<div className="dropdown-container">
 				<label htmlFor="model-id">
 					<span style={{ fontWeight: 500 }}>Model</span>
@@ -39,7 +46,7 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 					onChange={handleModelChange}
 					style={{ width: "100%" }}>
 					<VSCodeOption value="">Select a model...</VSCodeOption>
-					{Object.keys(openRouterModels).map((modelId) => (
+					{modelIds.map((modelId) => (
 						<VSCodeOption
 							key={modelId}
 							value={modelId}
@@ -54,15 +61,48 @@ const OpenRouterModelPicker: React.FC<OpenRouterModelPickerProps> = () => {
 				</VSCodeDropdown>
 			</div>
 
-			{selectedModelInfo.description && (
-				<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
-					{selectedModelInfo.description}
-				</p>
-			)}
-
 			<ModelInfoView selectedModelId={selectedModelId} modelInfo={selectedModelInfo} />
-		</div>
+		</>
 	)
 }
 
 export default OpenRouterModelPicker
+
+const StyledMarkdown = styled.div`
+	font-family: var(--vscode-font-family), system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen,
+		Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
+	font-size: 12px;
+	color: var(--vscode-descriptionForeground);
+
+	p,
+	li,
+	ol,
+	ul {
+		line-height: 1.25;
+		margin: 0;
+	}
+
+	ol,
+	ul {
+		padding-left: 1.5em;
+		margin-left: 0;
+	}
+
+	p {
+		white-space: pre-wrap;
+	}
+`
+
+export const ModelDescriptionMarkdown = memo(({ markdown, key }: { markdown?: string; key: string }) => {
+	const [reactContent, setMarkdown] = useRemark()
+
+	useEffect(() => {
+		setMarkdown(markdown || "")
+	}, [markdown, setMarkdown])
+
+	return (
+		<StyledMarkdown key={key} style={{ display: "inline-block", marginBottom: 5 }}>
+			{reactContent}
+		</StyledMarkdown>
+	)
+})