feat: add Glama gateway

This commit is contained in:
Frank
2025-01-04 21:25:33 -06:00
committed by Matt Rubens
parent 5e099e2960
commit e5e700ffcb
14 changed files with 765 additions and 6 deletions

View File

@@ -1,4 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { GlamaHandler } from "./providers/glama"
import { ApiConfiguration, ModelInfo } from "../shared/api"
import { AnthropicHandler } from "./providers/anthropic"
import { AwsBedrockHandler } from "./providers/bedrock"
@@ -26,6 +27,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
switch (apiProvider) {
case "anthropic":
return new AnthropicHandler(options)
case "glama":
return new GlamaHandler(options)
case "openrouter":
return new OpenRouterHandler(options)
case "bedrock":

134
src/api/providers/glama.ts Normal file
View File

@@ -0,0 +1,134 @@
import { Anthropic } from "@anthropic-ai/sdk"
import axios from "axios"
import OpenAI from "openai"
import { ApiHandler } from "../"
import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
import delay from "delay"
export class GlamaHandler implements ApiHandler {
private options: ApiHandlerOptions
private client: OpenAI
constructor(options: ApiHandlerOptions) {
this.options = options
this.client = new OpenAI({
baseURL: "https://glama.ai/api/gateway/openai/v1",
apiKey: this.options.glamaApiKey,
})
}
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
// Convert Anthropic messages to OpenAI format
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
if (this.getModel().id.startsWith("anthropic/claude-3")) {
openAiMessages[0] = {
role: "system",
content: [
{
type: "text",
text: systemPrompt,
// @ts-ignore-next-line
cache_control: { type: "ephemeral" },
},
],
}
// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time,
// but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}
if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end.
// but if it weren't there, and the user added a image_url type message,
// it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
if (!lastTextPart) {
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}
// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
})
}
// Required by Anthropic
// Other providers default to max tokens allowed.
let maxTokens: number | undefined
if (this.getModel().id.startsWith("anthropic/")) {
maxTokens = 8_192
}
const { data: completion, response } = await this.client.chat.completions.create({
model: this.getModel().id,
max_tokens: maxTokens,
temperature: 0,
messages: openAiMessages,
stream: true,
}).withResponse();
const completionRequestUuid = response.headers.get(
'x-completion-request-uuid',
);
for await (const chunk of completion) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}
}
// The usage information is only available after a few moments after the completion
await delay(1000)
try {
const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestUuid}`, {
headers: {
Authorization: `Bearer ${this.options.glamaApiKey}`,
},
})
const completionRequest = response.data;
if (completionRequest.tokenUsage) {
yield {
type: "usage",
inputTokens: completionRequest.tokenUsage.promptTokens,
outputTokens: completionRequest.tokenUsage.completionTokens,
totalCost: completionRequest.totalCostUsd,
}
}
} catch (error) {
// ignore if fails
console.error("Error fetching Glama generation details:", error)
}
}
getModel(): { id: string; info: ModelInfo } {
const modelId = this.options.glamaModelId
const modelInfo = this.options.glamaModelInfo
if (modelId && modelInfo) {
return { id: modelId, info: modelInfo }
}
return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
}
}

View File

@@ -33,6 +33,7 @@ https://github.com/KumarVariable/vscode-extension-sidebar-html/blob/master/src/c
type SecretKey =
| "apiKey"
| "glamaApiKey"
| "openRouterApiKey"
| "awsAccessKey"
| "awsSecretKey"
@@ -44,6 +45,8 @@ type SecretKey =
type GlobalStateKey =
| "apiProvider"
| "apiModelId"
| "glamaModelId"
| "glamaModelInfo"
| "awsRegion"
| "awsUseCrossRegionInference"
| "vertexProjectId"
@@ -82,6 +85,7 @@ type GlobalStateKey =
export const GlobalFileNames = {
apiConversationHistory: "api_conversation_history.json",
uiMessages: "ui_messages.json",
glamaModels: "glama_models.json",
openRouterModels: "openrouter_models.json",
mcpSettings: "cline_mcp_settings.json",
}
@@ -385,6 +389,24 @@ export class ClineProvider implements vscode.WebviewViewProvider {
}
}
})
this.readGlamaModels().then((glamaModels) => {
if (glamaModels) {
this.postMessageToWebview({ type: "glamaModels", glamaModels })
}
})
this.refreshGlamaModels().then(async (glamaModels) => {
if (glamaModels) {
// update model info in state (this needs to be done here since we don't want to update state while settings is open, and we may refresh models there)
const { apiConfiguration } = await this.getState()
if (apiConfiguration.glamaModelId) {
await this.updateGlobalState(
"glamaModelInfo",
glamaModels[apiConfiguration.glamaModelId],
)
await this.postStateToWebview()
}
}
})
break
case "newTask":
// Code that should run in response to the hello message command
@@ -403,6 +425,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
apiProvider,
apiModelId,
apiKey,
glamaModelId,
glamaModelInfo,
glamaApiKey,
openRouterApiKey,
awsAccessKey,
awsSecretKey,
@@ -430,6 +455,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
await this.updateGlobalState("apiProvider", apiProvider)
await this.updateGlobalState("apiModelId", apiModelId)
await this.storeSecret("apiKey", apiKey)
await this.updateGlobalState("glamaModelId", glamaModelId)
await this.updateGlobalState("glamaModelInfo", glamaModelInfo)
await this.storeSecret("glamaApiKey", glamaApiKey)
await this.storeSecret("openRouterApiKey", openRouterApiKey)
await this.storeSecret("awsAccessKey", awsAccessKey)
await this.storeSecret("awsSecretKey", awsSecretKey)
@@ -525,6 +553,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
const lmStudioModels = await this.getLmStudioModels(message.text)
this.postMessageToWebview({ type: "lmStudioModels", lmStudioModels })
break
case "refreshGlamaModels":
await this.refreshGlamaModels()
break
case "refreshOpenRouterModels":
await this.refreshOpenRouterModels()
break
@@ -831,6 +862,93 @@ export class ClineProvider implements vscode.WebviewViewProvider {
return cacheDir
}
async readGlamaModels(): Promise<Record<string, ModelInfo> | undefined> {
const glamaModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
GlobalFileNames.glamaModels,
)
const fileExists = await fileExistsAtPath(glamaModelsFilePath)
if (fileExists) {
const fileContents = await fs.readFile(glamaModelsFilePath, "utf8")
return JSON.parse(fileContents)
}
return undefined
}
async refreshGlamaModels() {
const glamaModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
GlobalFileNames.glamaModels,
)
let models: Record<string, ModelInfo> = {}
try {
const response = await axios.get("https://glama.ai/api/gateway/v1/models")
/*
{
"added": "2024-12-24T15:12:49.324Z",
"capabilities": [
"adjustable_safety_settings",
"caching",
"code_execution",
"function_calling",
"json_mode",
"json_schema",
"system_instructions",
"tuning",
"input:audio",
"input:image",
"input:text",
"input:video",
"output:text"
],
"id": "google-vertex/gemini-1.5-flash-002",
"maxTokensInput": 1048576,
"maxTokensOutput": 8192,
"pricePerToken": {
"cacheRead": null,
"cacheWrite": null,
"input": "0.000000075",
"output": "0.0000003"
}
}
*/
if (response.data) {
const rawModels = response.data;
const parsePrice = (price: any) => {
if (price) {
return parseFloat(price) * 1_000_000
}
return undefined
}
for (const rawModel of rawModels) {
const modelInfo: ModelInfo = {
maxTokens: rawModel.maxTokensOutput,
contextWindow: rawModel.maxTokensInput,
supportsImages: rawModel.capabilities?.includes("input:image"),
supportsPromptCache: rawModel.capabilities?.includes("caching"),
inputPrice: parsePrice(rawModel.pricePerToken?.input),
outputPrice: parsePrice(rawModel.pricePerToken?.output),
description: undefined,
cacheWritesPrice: parsePrice(rawModel.pricePerToken?.cacheWrite),
cacheReadsPrice: parsePrice(rawModel.pricePerToken?.cacheRead),
}
models[rawModel.id] = modelInfo
}
} else {
console.error("Invalid response from Glama API")
}
await fs.writeFile(glamaModelsFilePath, JSON.stringify(models))
console.log("Glama models fetched and saved", models)
} catch (error) {
console.error("Error fetching Glama models:", error)
}
await this.postMessageToWebview({ type: "glamaModels", glamaModels: models })
return models
}
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
const openRouterModelsFilePath = path.join(
await this.ensureCacheDirectoryExists(),
@@ -1153,6 +1271,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
storedApiProvider,
apiModelId,
apiKey,
glamaApiKey,
glamaModelId,
glamaModelInfo,
openRouterApiKey,
awsAccessKey,
awsSecretKey,
@@ -1200,6 +1321,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
this.getGlobalState("apiModelId") as Promise<string | undefined>,
this.getSecret("apiKey") as Promise<string | undefined>,
this.getSecret("glamaApiKey") as Promise<string | undefined>,
this.getGlobalState("glamaModelId") as Promise<string | undefined>,
this.getGlobalState("glamaModelInfo") as Promise<ModelInfo | undefined>,
this.getSecret("openRouterApiKey") as Promise<string | undefined>,
this.getSecret("awsAccessKey") as Promise<string | undefined>,
this.getSecret("awsSecretKey") as Promise<string | undefined>,
@@ -1264,6 +1388,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
apiProvider,
apiModelId,
apiKey,
glamaApiKey,
glamaModelId,
glamaModelInfo,
openRouterApiKey,
awsAccessKey,
awsSecretKey,
@@ -1402,6 +1529,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
}
const secretKeys: SecretKey[] = [
"apiKey",
"glamaApiKey",
"openRouterApiKey",
"awsAccessKey",
"awsSecretKey",

View File

@@ -16,6 +16,7 @@ export interface ExtensionMessage {
| "workspaceUpdated"
| "invoke"
| "partialMessage"
| "glamaModels"
| "openRouterModels"
| "openAiModels"
| "mcpServers"
@@ -34,6 +35,7 @@ export interface ExtensionMessage {
lmStudioModels?: string[]
filePaths?: string[]
partialMessage?: ClineMessage
glamaModels?: Record<string, ModelInfo>
openRouterModels?: Record<string, ModelInfo>
openAiModels?: string[]
mcpServers?: McpServer[]

View File

@@ -27,6 +27,7 @@ export interface WebviewMessage {
| "openFile"
| "openMention"
| "cancelTask"
| "refreshGlamaModels"
| "refreshOpenRouterModels"
| "refreshOpenAiModels"
| "alwaysAllowBrowser"

View File

@@ -1,5 +1,6 @@
export type ApiProvider =
| "anthropic"
| "glama"
| "openrouter"
| "bedrock"
| "vertex"
@@ -14,6 +15,9 @@ export interface ApiHandlerOptions {
apiModelId?: string
apiKey?: string // anthropic
anthropicBaseUrl?: string
glamaModelId?: string
glamaModelInfo?: ModelInfo
glamaApiKey?: string
openRouterApiKey?: string
openRouterModelId?: string
openRouterModelInfo?: ModelInfo
@@ -309,6 +313,23 @@ export const bedrockModels = {
},
} as const satisfies Record<string, ModelInfo>
// Glama
// https://glama.ai/models
export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet" // will always exist in openRouterModels
export const glamaDefaultModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
description:
"The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
}
// OpenRouter
// https://openrouter.ai/models?order=newest&supported_parameters=tools
export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels