feat: add Glama gateway

2026-03-23 09:39:29 -04:00 · 2025-01-04 21:25:33 -06:00
parent 5e099e2960
commit e5e700ffcb
14 changed files with 765 additions and 6 deletions
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -1,4 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { GlamaHandler } from "./providers/glama"
 import { ApiConfiguration, ModelInfo } from "../shared/api"
 import { AnthropicHandler } from "./providers/anthropic"
 import { AwsBedrockHandler } from "./providers/bedrock"
@@ -26,6 +27,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 	switch (apiProvider) {
 		case "anthropic":
 			return new AnthropicHandler(options)
+		case "glama":
+			return new GlamaHandler(options)
 		case "openrouter":
 			return new OpenRouterHandler(options)
 		case "bedrock":
--- a/src/api/providers/glama.ts
+++ b/src/api/providers/glama.ts
@@ -0,0 +1,134 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import axios from "axios"
+import OpenAI from "openai"
+import { ApiHandler } from "../"
+import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+import delay from "delay"
+
+export class GlamaHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: OpenAI
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+		this.client = new OpenAI({
+			baseURL: "https://glama.ai/api/gateway/openai/v1",
+			apiKey: this.options.glamaApiKey,
+		})
+	}
+
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to OpenAI format
+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
+		if (this.getModel().id.startsWith("anthropic/claude-3")) {
+			openAiMessages[0] = {
+				role: "system",
+				content: [
+					{
+						type: "text",
+						text: systemPrompt,
+						// @ts-ignore-next-line
+						cache_control: { type: "ephemeral" },
+					},
+				],
+			}
+
+			// Add cache_control to the last two user messages
+			// (note: this works because we only ever add one user message at a time,
+			// but if we added multiple we'd need to mark the user message before the last assistant message)
+			const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+			lastTwoUserMessages.forEach((msg) => {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end.
+					// but if it weren't there, and the user added a image_url type message,
+					// it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
+					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
+				}
+			})
+		}
+
+		// Required by Anthropic
+		// Other providers default to max tokens allowed.
+		let maxTokens: number | undefined
+
+		if (this.getModel().id.startsWith("anthropic/")) {
+			maxTokens = 8_192
+		}
+
+		const { data: completion, response } = await this.client.chat.completions.create({
+			model: this.getModel().id,
+			max_tokens: maxTokens,
+			temperature: 0,
+			messages: openAiMessages,
+			stream: true,
+		}).withResponse();
+
+		const completionRequestUuid = response.headers.get(
+			'x-completion-request-uuid',
+		);
+
+		for await (const chunk of completion) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+		}
+
+		// The usage information is only available after a few moments after the completion
+		await delay(1000)
+
+		try {
+			const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestUuid}`, {
+				headers: {
+					Authorization: `Bearer ${this.options.glamaApiKey}`,
+				},
+			})
+
+			const completionRequest = response.data;
+
+			if (completionRequest.tokenUsage) {
+				yield {
+					type: "usage",
+					inputTokens: completionRequest.tokenUsage.promptTokens,
+					outputTokens: completionRequest.tokenUsage.completionTokens,
+					totalCost: completionRequest.totalCostUsd,
+				}
+			}			
+		} catch (error) {
+			// ignore if fails
+			console.error("Error fetching Glama generation details:", error)
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.glamaModelId
+		const modelInfo = this.options.glamaModelInfo
+
+		if (modelId && modelInfo) {
+			return { id: modelId, info: modelInfo }
+		}
+		
+		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
+	}
+}