feat: add Glama gateway

2025-12-21 21:01:06 -05:00 · 2025-01-04 21:25:33 -06:00
parent 5e099e2960
commit e5e700ffcb
14 changed files with 765 additions and 6 deletions
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -1,4 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { GlamaHandler } from "./providers/glama"
 import { ApiConfiguration, ModelInfo } from "../shared/api"
 import { AnthropicHandler } from "./providers/anthropic"
 import { AwsBedrockHandler } from "./providers/bedrock"
@@ -26,6 +27,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 	switch (apiProvider) {
 		case "anthropic":
 			return new AnthropicHandler(options)
+		case "glama":
+			return new GlamaHandler(options)
 		case "openrouter":
 			return new OpenRouterHandler(options)
 		case "bedrock":
--- a/src/api/providers/glama.ts
+++ b/src/api/providers/glama.ts
@@ -0,0 +1,134 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import axios from "axios"
+import OpenAI from "openai"
+import { ApiHandler } from "../"
+import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+import delay from "delay"
+
+export class GlamaHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: OpenAI
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+		this.client = new OpenAI({
+			baseURL: "https://glama.ai/api/gateway/openai/v1",
+			apiKey: this.options.glamaApiKey,
+		})
+	}
+
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to OpenAI format
+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
+		if (this.getModel().id.startsWith("anthropic/claude-3")) {
+			openAiMessages[0] = {
+				role: "system",
+				content: [
+					{
+						type: "text",
+						text: systemPrompt,
+						// @ts-ignore-next-line
+						cache_control: { type: "ephemeral" },
+					},
+				],
+			}
+
+			// Add cache_control to the last two user messages
+			// (note: this works because we only ever add one user message at a time,
+			// but if we added multiple we'd need to mark the user message before the last assistant message)
+			const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+			lastTwoUserMessages.forEach((msg) => {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end.
+					// but if it weren't there, and the user added a image_url type message,
+					// it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
+					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
+				}
+			})
+		}
+
+		// Required by Anthropic
+		// Other providers default to max tokens allowed.
+		let maxTokens: number | undefined
+
+		if (this.getModel().id.startsWith("anthropic/")) {
+			maxTokens = 8_192
+		}
+
+		const { data: completion, response } = await this.client.chat.completions.create({
+			model: this.getModel().id,
+			max_tokens: maxTokens,
+			temperature: 0,
+			messages: openAiMessages,
+			stream: true,
+		}).withResponse();
+
+		const completionRequestUuid = response.headers.get(
+			'x-completion-request-uuid',
+		);
+
+		for await (const chunk of completion) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+		}
+
+		// The usage information is only available after a few moments after the completion
+		await delay(1000)
+
+		try {
+			const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestUuid}`, {
+				headers: {
+					Authorization: `Bearer ${this.options.glamaApiKey}`,
+				},
+			})
+
+			const completionRequest = response.data;
+
+			if (completionRequest.tokenUsage) {
+				yield {
+					type: "usage",
+					inputTokens: completionRequest.tokenUsage.promptTokens,
+					outputTokens: completionRequest.tokenUsage.completionTokens,
+					totalCost: completionRequest.totalCostUsd,
+				}
+			}			
+		} catch (error) {
+			// ignore if fails
+			console.error("Error fetching Glama generation details:", error)
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.glamaModelId
+		const modelInfo = this.options.glamaModelInfo
+
+		if (modelId && modelInfo) {
+			return { id: modelId, info: modelInfo }
+		}
+		
+		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
+	}
+}
--- a/src/core/webview/ClineProvider.ts
+++ b/src/core/webview/ClineProvider.ts
@@ -33,6 +33,7 @@ https://github.com/KumarVariable/vscode-extension-sidebar-html/blob/master/src/c

 type SecretKey =
 	| "apiKey"
+	| "glamaApiKey"
 	| "openRouterApiKey"
 	| "awsAccessKey"
 	| "awsSecretKey"
@@ -44,6 +45,8 @@ type SecretKey =
 type GlobalStateKey =
 	| "apiProvider"
 	| "apiModelId"
+	| "glamaModelId"
+	| "glamaModelInfo"
 	| "awsRegion"
 	| "awsUseCrossRegionInference"
 	| "vertexProjectId"
@@ -82,6 +85,7 @@ type GlobalStateKey =
 export const GlobalFileNames = {
 	apiConversationHistory: "api_conversation_history.json",
 	uiMessages: "ui_messages.json",
+	glamaModels: "glama_models.json",
 	openRouterModels: "openrouter_models.json",
 	mcpSettings: "cline_mcp_settings.json",
 }
@@ -385,6 +389,24 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 								}
 							}
 						})
+						this.readGlamaModels().then((glamaModels) => {
+							if (glamaModels) {
+								this.postMessageToWebview({ type: "glamaModels", glamaModels })
+							}
+						})
+						this.refreshGlamaModels().then(async (glamaModels) => {
+							if (glamaModels) {
+								// update model info in state (this needs to be done here since we don't want to update state while settings is open, and we may refresh models there)
+								const { apiConfiguration } = await this.getState()
+								if (apiConfiguration.glamaModelId) {
+									await this.updateGlobalState(
+										"glamaModelInfo",
+										glamaModels[apiConfiguration.glamaModelId],
+									)
+									await this.postStateToWebview()
+								}
+							}
+						})
 						break
 					case "newTask":
 						// Code that should run in response to the hello message command
@@ -403,6 +425,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 								apiProvider,
 								apiModelId,
 								apiKey,
+								glamaModelId,
+								glamaModelInfo,
+								glamaApiKey,
 								openRouterApiKey,
 								awsAccessKey,
 								awsSecretKey,
@@ -430,6 +455,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 							await this.updateGlobalState("apiProvider", apiProvider)
 							await this.updateGlobalState("apiModelId", apiModelId)
 							await this.storeSecret("apiKey", apiKey)
+							await this.updateGlobalState("glamaModelId", glamaModelId)
+							await this.updateGlobalState("glamaModelInfo", glamaModelInfo)
+							await this.storeSecret("glamaApiKey", glamaApiKey)
 							await this.storeSecret("openRouterApiKey", openRouterApiKey)
 							await this.storeSecret("awsAccessKey", awsAccessKey)
 							await this.storeSecret("awsSecretKey", awsSecretKey)
@@ -525,6 +553,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 						const lmStudioModels = await this.getLmStudioModels(message.text)
 						this.postMessageToWebview({ type: "lmStudioModels", lmStudioModels })
 						break
+					case "refreshGlamaModels":
+							await this.refreshGlamaModels()
+						break
 					case "refreshOpenRouterModels":
 						await this.refreshOpenRouterModels()
 						break
@@ -831,6 +862,93 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 		return cacheDir
 	}

+	async readGlamaModels(): Promise<Record<string, ModelInfo> | undefined> {
+		const glamaModelsFilePath = path.join(
+			await this.ensureCacheDirectoryExists(),
+			GlobalFileNames.glamaModels,
+		)
+		const fileExists = await fileExistsAtPath(glamaModelsFilePath)
+		if (fileExists) {
+			const fileContents = await fs.readFile(glamaModelsFilePath, "utf8")
+			return JSON.parse(fileContents)
+		}
+		return undefined
+	}
+
+	async refreshGlamaModels() {
+		const glamaModelsFilePath = path.join(
+			await this.ensureCacheDirectoryExists(),
+			GlobalFileNames.glamaModels,
+		)
+
+		let models: Record<string, ModelInfo> = {}
+		try {
+			const response = await axios.get("https://glama.ai/api/gateway/v1/models")
+			/*
+				{
+					"added": "2024-12-24T15:12:49.324Z",
+					"capabilities": [
+						"adjustable_safety_settings",
+						"caching",
+						"code_execution",
+						"function_calling",
+						"json_mode",
+						"json_schema",
+						"system_instructions",
+						"tuning",
+						"input:audio",
+						"input:image",
+						"input:text",
+						"input:video",
+						"output:text"
+					],
+					"id": "google-vertex/gemini-1.5-flash-002",
+					"maxTokensInput": 1048576,
+					"maxTokensOutput": 8192,
+					"pricePerToken": {
+						"cacheRead": null,
+						"cacheWrite": null,
+						"input": "0.000000075",
+						"output": "0.0000003"
+					}
+				}
+			*/
+			if (response.data) {
+				const rawModels = response.data;
+				const parsePrice = (price: any) => {
+					if (price) {
+						return parseFloat(price) * 1_000_000
+					}
+					return undefined
+				}
+				for (const rawModel of rawModels) {
+					const modelInfo: ModelInfo = {
+						maxTokens: rawModel.maxTokensOutput,
+						contextWindow: rawModel.maxTokensInput,
+						supportsImages: rawModel.capabilities?.includes("input:image"),
+						supportsPromptCache: rawModel.capabilities?.includes("caching"),
+						inputPrice: parsePrice(rawModel.pricePerToken?.input),
+						outputPrice: parsePrice(rawModel.pricePerToken?.output),
+						description: undefined,
+						cacheWritesPrice: parsePrice(rawModel.pricePerToken?.cacheWrite),
+						cacheReadsPrice: parsePrice(rawModel.pricePerToken?.cacheRead),
+					}
+
+					models[rawModel.id] = modelInfo
+				}
+			} else {
+				console.error("Invalid response from Glama API")
+			}
+			await fs.writeFile(glamaModelsFilePath, JSON.stringify(models))
+			console.log("Glama models fetched and saved", models)
+		} catch (error) {
+			console.error("Error fetching Glama models:", error)
+		}
+
+		await this.postMessageToWebview({ type: "glamaModels", glamaModels: models })
+		return models
+	}
+
 	async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
 		const openRouterModelsFilePath = path.join(
 			await this.ensureCacheDirectoryExists(),
@@ -1153,6 +1271,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			storedApiProvider,
 			apiModelId,
 			apiKey,
+			glamaApiKey,
+			glamaModelId,
+			glamaModelInfo,
 			openRouterApiKey,
 			awsAccessKey,
 			awsSecretKey,
@@ -1200,6 +1321,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 			this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
 			this.getGlobalState("apiModelId") as Promise<string | undefined>,
 			this.getSecret("apiKey") as Promise<string | undefined>,
+			this.getSecret("glamaApiKey") as Promise<string | undefined>,
+			this.getGlobalState("glamaModelId") as Promise<string | undefined>,
+			this.getGlobalState("glamaModelInfo") as Promise<ModelInfo | undefined>,
 			this.getSecret("openRouterApiKey") as Promise<string | undefined>,
 			this.getSecret("awsAccessKey") as Promise<string | undefined>,
 			this.getSecret("awsSecretKey") as Promise<string | undefined>,
@@ -1264,6 +1388,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 				apiProvider,
 				apiModelId,
 				apiKey,
+				glamaApiKey,
+				glamaModelId,
+				glamaModelInfo,
 				openRouterApiKey,
 				awsAccessKey,
 				awsSecretKey,
@@ -1402,6 +1529,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
 		}
 		const secretKeys: SecretKey[] = [
 			"apiKey",
+			"glamaApiKey",
 			"openRouterApiKey",
 			"awsAccessKey",
 			"awsSecretKey",
--- a/src/shared/ExtensionMessage.ts
+++ b/src/shared/ExtensionMessage.ts
@@ -16,6 +16,7 @@ export interface ExtensionMessage {
 		| "workspaceUpdated"
 		| "invoke"
 		| "partialMessage"
+		| "glamaModels"
 		| "openRouterModels"
 		| "openAiModels"
 		| "mcpServers"
@@ -34,6 +35,7 @@ export interface ExtensionMessage {
 	lmStudioModels?: string[]
 	filePaths?: string[]
 	partialMessage?: ClineMessage
+	glamaModels?: Record<string, ModelInfo>
 	openRouterModels?: Record<string, ModelInfo>
 	openAiModels?: string[]
 	mcpServers?: McpServer[]
--- a/src/shared/WebviewMessage.ts
+++ b/src/shared/WebviewMessage.ts
@@ -27,6 +27,7 @@ export interface WebviewMessage {
 		| "openFile"
 		| "openMention"
 		| "cancelTask"
+		| "refreshGlamaModels"
 		| "refreshOpenRouterModels"
 		| "refreshOpenAiModels"
 		| "alwaysAllowBrowser"
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -1,5 +1,6 @@
 export type ApiProvider =
 	| "anthropic"
+	| "glama"
 	| "openrouter"
 	| "bedrock"
 	| "vertex"
@@ -14,6 +15,9 @@ export interface ApiHandlerOptions {
 	apiModelId?: string
 	apiKey?: string // anthropic
 	anthropicBaseUrl?: string
+	glamaModelId?: string
+	glamaModelInfo?: ModelInfo
+	glamaApiKey?: string
 	openRouterApiKey?: string
 	openRouterModelId?: string
 	openRouterModelInfo?: ModelInfo
@@ -309,6 +313,23 @@ export const bedrockModels = {
 	},
 } as const satisfies Record<string, ModelInfo>

+// Glama
+// https://glama.ai/models
+export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet" // will always exist in openRouterModels
+export const glamaDefaultModelInfo: ModelInfo = {
+	maxTokens: 8192,
+	contextWindow: 200_000,
+	supportsImages: true,
+	supportsComputerUse: true,
+	supportsPromptCache: true,
+	inputPrice: 3.0,
+	outputPrice: 15.0,
+	cacheWritesPrice: 3.75,
+	cacheReadsPrice: 0.3,
+	description:
+		"The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
+}
+
 // OpenRouter
 // https://openrouter.ai/models?order=newest&supported_parameters=tools
 export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels