import { Anthropic } from "@anthropic-ai/sdk" import axios from "axios" import OpenAI from "openai" import { ApiHandler } from "../" import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import delay from "delay" export class GlamaHandler implements ApiHandler { private options: ApiHandlerOptions private client: OpenAI constructor(options: ApiHandlerOptions) { this.options = options this.client = new OpenAI({ baseURL: "https://glama.ai/api/gateway/openai/v1", apiKey: this.options.glamaApiKey, }) } async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { // Convert Anthropic messages to OpenAI format const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages), ] // this is specifically for claude models (some models may 'support prompt caching' automatically without this) if (this.getModel().id.startsWith("anthropic/claude-3")) { openAiMessages[0] = { role: "system", content: [ { type: "text", text: systemPrompt, // @ts-ignore-next-line cache_control: { type: "ephemeral" }, }, ], } // Add cache_control to the last two user messages // (note: this works because we only ever add one user message at a time, // but if we added multiple we'd need to mark the user message before the last assistant message) const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2) lastTwoUserMessages.forEach((msg) => { if (typeof msg.content === "string") { msg.content = [{ type: "text", text: msg.content }] } if (Array.isArray(msg.content)) { // NOTE: this is fine since env details will always be added at the end. // but if it weren't there, and the user added a image_url type message, // it would pop a text part before it and then move it after to the end. let lastTextPart = msg.content.filter((part) => part.type === "text").pop() if (!lastTextPart) { lastTextPart = { type: "text", text: "..." } msg.content.push(lastTextPart) } // @ts-ignore-next-line lastTextPart["cache_control"] = { type: "ephemeral" } } }) } // Required by Anthropic // Other providers default to max tokens allowed. let maxTokens: number | undefined if (this.getModel().id.startsWith("anthropic/")) { maxTokens = 8_192 } const { data: completion, response } = await this.client.chat.completions.create({ model: this.getModel().id, max_tokens: maxTokens, temperature: 0, messages: openAiMessages, stream: true, }).withResponse(); const completionRequestId = response.headers.get( 'x-completion-request-id', ); for await (const chunk of completion) { const delta = chunk.choices[0]?.delta if (delta?.content) { yield { type: "text", text: delta.content, } } } try { const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestId}`, { headers: { Authorization: `Bearer ${this.options.glamaApiKey}`, }, }) const completionRequest = response.data; if (completionRequest.tokenUsage) { yield { type: "usage", cacheWriteTokens: completionRequest.tokenUsage.cacheCreationInputTokens, cacheReadTokens: completionRequest.tokenUsage.cacheReadInputTokens, inputTokens: completionRequest.tokenUsage.promptTokens, outputTokens: completionRequest.tokenUsage.completionTokens, totalCost: parseFloat(completionRequest.totalCostUsd), } } } catch (error) { // ignore if fails console.error("Error fetching Glama generation details:", error) } } getModel(): { id: string; info: ModelInfo } { const modelId = this.options.glamaModelId const modelInfo = this.options.glamaModelInfo if (modelId && modelInfo) { return { id: modelId, info: modelInfo } } return { id: glamaDefaultModelId, info: glamaDefaultModelInfo } } }