mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-21 21:01:06 -05:00
feat: add Glama gateway
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { Anthropic } from "@anthropic-ai/sdk"
|
||||
import { GlamaHandler } from "./providers/glama"
|
||||
import { ApiConfiguration, ModelInfo } from "../shared/api"
|
||||
import { AnthropicHandler } from "./providers/anthropic"
|
||||
import { AwsBedrockHandler } from "./providers/bedrock"
|
||||
@@ -26,6 +27,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
|
||||
switch (apiProvider) {
|
||||
case "anthropic":
|
||||
return new AnthropicHandler(options)
|
||||
case "glama":
|
||||
return new GlamaHandler(options)
|
||||
case "openrouter":
|
||||
return new OpenRouterHandler(options)
|
||||
case "bedrock":
|
||||
|
||||
134
src/api/providers/glama.ts
Normal file
134
src/api/providers/glama.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import { Anthropic } from "@anthropic-ai/sdk"
|
||||
import axios from "axios"
|
||||
import OpenAI from "openai"
|
||||
import { ApiHandler } from "../"
|
||||
import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
|
||||
import { convertToOpenAiMessages } from "../transform/openai-format"
|
||||
import { ApiStream } from "../transform/stream"
|
||||
import delay from "delay"
|
||||
|
||||
export class GlamaHandler implements ApiHandler {
|
||||
private options: ApiHandlerOptions
|
||||
private client: OpenAI
|
||||
|
||||
constructor(options: ApiHandlerOptions) {
|
||||
this.options = options
|
||||
this.client = new OpenAI({
|
||||
baseURL: "https://glama.ai/api/gateway/openai/v1",
|
||||
apiKey: this.options.glamaApiKey,
|
||||
})
|
||||
}
|
||||
|
||||
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||
// Convert Anthropic messages to OpenAI format
|
||||
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
||||
{ role: "system", content: systemPrompt },
|
||||
...convertToOpenAiMessages(messages),
|
||||
]
|
||||
|
||||
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
|
||||
if (this.getModel().id.startsWith("anthropic/claude-3")) {
|
||||
openAiMessages[0] = {
|
||||
role: "system",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: systemPrompt,
|
||||
// @ts-ignore-next-line
|
||||
cache_control: { type: "ephemeral" },
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
// Add cache_control to the last two user messages
|
||||
// (note: this works because we only ever add one user message at a time,
|
||||
// but if we added multiple we'd need to mark the user message before the last assistant message)
|
||||
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
|
||||
lastTwoUserMessages.forEach((msg) => {
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = [{ type: "text", text: msg.content }]
|
||||
}
|
||||
if (Array.isArray(msg.content)) {
|
||||
// NOTE: this is fine since env details will always be added at the end.
|
||||
// but if it weren't there, and the user added a image_url type message,
|
||||
// it would pop a text part before it and then move it after to the end.
|
||||
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
|
||||
|
||||
if (!lastTextPart) {
|
||||
lastTextPart = { type: "text", text: "..." }
|
||||
msg.content.push(lastTextPart)
|
||||
}
|
||||
// @ts-ignore-next-line
|
||||
lastTextPart["cache_control"] = { type: "ephemeral" }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Required by Anthropic
|
||||
// Other providers default to max tokens allowed.
|
||||
let maxTokens: number | undefined
|
||||
|
||||
if (this.getModel().id.startsWith("anthropic/")) {
|
||||
maxTokens = 8_192
|
||||
}
|
||||
|
||||
const { data: completion, response } = await this.client.chat.completions.create({
|
||||
model: this.getModel().id,
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0,
|
||||
messages: openAiMessages,
|
||||
stream: true,
|
||||
}).withResponse();
|
||||
|
||||
const completionRequestUuid = response.headers.get(
|
||||
'x-completion-request-uuid',
|
||||
);
|
||||
|
||||
for await (const chunk of completion) {
|
||||
const delta = chunk.choices[0]?.delta
|
||||
|
||||
if (delta?.content) {
|
||||
yield {
|
||||
type: "text",
|
||||
text: delta.content,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The usage information is only available after a few moments after the completion
|
||||
await delay(1000)
|
||||
|
||||
try {
|
||||
const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestUuid}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.options.glamaApiKey}`,
|
||||
},
|
||||
})
|
||||
|
||||
const completionRequest = response.data;
|
||||
|
||||
if (completionRequest.tokenUsage) {
|
||||
yield {
|
||||
type: "usage",
|
||||
inputTokens: completionRequest.tokenUsage.promptTokens,
|
||||
outputTokens: completionRequest.tokenUsage.completionTokens,
|
||||
totalCost: completionRequest.totalCostUsd,
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// ignore if fails
|
||||
console.error("Error fetching Glama generation details:", error)
|
||||
}
|
||||
}
|
||||
|
||||
getModel(): { id: string; info: ModelInfo } {
|
||||
const modelId = this.options.glamaModelId
|
||||
const modelInfo = this.options.glamaModelInfo
|
||||
|
||||
if (modelId && modelInfo) {
|
||||
return { id: modelId, info: modelInfo }
|
||||
}
|
||||
|
||||
return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
|
||||
}
|
||||
}
|
||||
@@ -33,6 +33,7 @@ https://github.com/KumarVariable/vscode-extension-sidebar-html/blob/master/src/c
|
||||
|
||||
type SecretKey =
|
||||
| "apiKey"
|
||||
| "glamaApiKey"
|
||||
| "openRouterApiKey"
|
||||
| "awsAccessKey"
|
||||
| "awsSecretKey"
|
||||
@@ -44,6 +45,8 @@ type SecretKey =
|
||||
type GlobalStateKey =
|
||||
| "apiProvider"
|
||||
| "apiModelId"
|
||||
| "glamaModelId"
|
||||
| "glamaModelInfo"
|
||||
| "awsRegion"
|
||||
| "awsUseCrossRegionInference"
|
||||
| "vertexProjectId"
|
||||
@@ -82,6 +85,7 @@ type GlobalStateKey =
|
||||
export const GlobalFileNames = {
|
||||
apiConversationHistory: "api_conversation_history.json",
|
||||
uiMessages: "ui_messages.json",
|
||||
glamaModels: "glama_models.json",
|
||||
openRouterModels: "openrouter_models.json",
|
||||
mcpSettings: "cline_mcp_settings.json",
|
||||
}
|
||||
@@ -385,6 +389,24 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
}
|
||||
}
|
||||
})
|
||||
this.readGlamaModels().then((glamaModels) => {
|
||||
if (glamaModels) {
|
||||
this.postMessageToWebview({ type: "glamaModels", glamaModels })
|
||||
}
|
||||
})
|
||||
this.refreshGlamaModels().then(async (glamaModels) => {
|
||||
if (glamaModels) {
|
||||
// update model info in state (this needs to be done here since we don't want to update state while settings is open, and we may refresh models there)
|
||||
const { apiConfiguration } = await this.getState()
|
||||
if (apiConfiguration.glamaModelId) {
|
||||
await this.updateGlobalState(
|
||||
"glamaModelInfo",
|
||||
glamaModels[apiConfiguration.glamaModelId],
|
||||
)
|
||||
await this.postStateToWebview()
|
||||
}
|
||||
}
|
||||
})
|
||||
break
|
||||
case "newTask":
|
||||
// Code that should run in response to the hello message command
|
||||
@@ -403,6 +425,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
apiProvider,
|
||||
apiModelId,
|
||||
apiKey,
|
||||
glamaModelId,
|
||||
glamaModelInfo,
|
||||
glamaApiKey,
|
||||
openRouterApiKey,
|
||||
awsAccessKey,
|
||||
awsSecretKey,
|
||||
@@ -430,6 +455,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
await this.updateGlobalState("apiProvider", apiProvider)
|
||||
await this.updateGlobalState("apiModelId", apiModelId)
|
||||
await this.storeSecret("apiKey", apiKey)
|
||||
await this.updateGlobalState("glamaModelId", glamaModelId)
|
||||
await this.updateGlobalState("glamaModelInfo", glamaModelInfo)
|
||||
await this.storeSecret("glamaApiKey", glamaApiKey)
|
||||
await this.storeSecret("openRouterApiKey", openRouterApiKey)
|
||||
await this.storeSecret("awsAccessKey", awsAccessKey)
|
||||
await this.storeSecret("awsSecretKey", awsSecretKey)
|
||||
@@ -525,6 +553,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
const lmStudioModels = await this.getLmStudioModels(message.text)
|
||||
this.postMessageToWebview({ type: "lmStudioModels", lmStudioModels })
|
||||
break
|
||||
case "refreshGlamaModels":
|
||||
await this.refreshGlamaModels()
|
||||
break
|
||||
case "refreshOpenRouterModels":
|
||||
await this.refreshOpenRouterModels()
|
||||
break
|
||||
@@ -831,6 +862,93 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
return cacheDir
|
||||
}
|
||||
|
||||
async readGlamaModels(): Promise<Record<string, ModelInfo> | undefined> {
|
||||
const glamaModelsFilePath = path.join(
|
||||
await this.ensureCacheDirectoryExists(),
|
||||
GlobalFileNames.glamaModels,
|
||||
)
|
||||
const fileExists = await fileExistsAtPath(glamaModelsFilePath)
|
||||
if (fileExists) {
|
||||
const fileContents = await fs.readFile(glamaModelsFilePath, "utf8")
|
||||
return JSON.parse(fileContents)
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
async refreshGlamaModels() {
|
||||
const glamaModelsFilePath = path.join(
|
||||
await this.ensureCacheDirectoryExists(),
|
||||
GlobalFileNames.glamaModels,
|
||||
)
|
||||
|
||||
let models: Record<string, ModelInfo> = {}
|
||||
try {
|
||||
const response = await axios.get("https://glama.ai/api/gateway/v1/models")
|
||||
/*
|
||||
{
|
||||
"added": "2024-12-24T15:12:49.324Z",
|
||||
"capabilities": [
|
||||
"adjustable_safety_settings",
|
||||
"caching",
|
||||
"code_execution",
|
||||
"function_calling",
|
||||
"json_mode",
|
||||
"json_schema",
|
||||
"system_instructions",
|
||||
"tuning",
|
||||
"input:audio",
|
||||
"input:image",
|
||||
"input:text",
|
||||
"input:video",
|
||||
"output:text"
|
||||
],
|
||||
"id": "google-vertex/gemini-1.5-flash-002",
|
||||
"maxTokensInput": 1048576,
|
||||
"maxTokensOutput": 8192,
|
||||
"pricePerToken": {
|
||||
"cacheRead": null,
|
||||
"cacheWrite": null,
|
||||
"input": "0.000000075",
|
||||
"output": "0.0000003"
|
||||
}
|
||||
}
|
||||
*/
|
||||
if (response.data) {
|
||||
const rawModels = response.data;
|
||||
const parsePrice = (price: any) => {
|
||||
if (price) {
|
||||
return parseFloat(price) * 1_000_000
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
for (const rawModel of rawModels) {
|
||||
const modelInfo: ModelInfo = {
|
||||
maxTokens: rawModel.maxTokensOutput,
|
||||
contextWindow: rawModel.maxTokensInput,
|
||||
supportsImages: rawModel.capabilities?.includes("input:image"),
|
||||
supportsPromptCache: rawModel.capabilities?.includes("caching"),
|
||||
inputPrice: parsePrice(rawModel.pricePerToken?.input),
|
||||
outputPrice: parsePrice(rawModel.pricePerToken?.output),
|
||||
description: undefined,
|
||||
cacheWritesPrice: parsePrice(rawModel.pricePerToken?.cacheWrite),
|
||||
cacheReadsPrice: parsePrice(rawModel.pricePerToken?.cacheRead),
|
||||
}
|
||||
|
||||
models[rawModel.id] = modelInfo
|
||||
}
|
||||
} else {
|
||||
console.error("Invalid response from Glama API")
|
||||
}
|
||||
await fs.writeFile(glamaModelsFilePath, JSON.stringify(models))
|
||||
console.log("Glama models fetched and saved", models)
|
||||
} catch (error) {
|
||||
console.error("Error fetching Glama models:", error)
|
||||
}
|
||||
|
||||
await this.postMessageToWebview({ type: "glamaModels", glamaModels: models })
|
||||
return models
|
||||
}
|
||||
|
||||
async readOpenRouterModels(): Promise<Record<string, ModelInfo> | undefined> {
|
||||
const openRouterModelsFilePath = path.join(
|
||||
await this.ensureCacheDirectoryExists(),
|
||||
@@ -1153,6 +1271,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
storedApiProvider,
|
||||
apiModelId,
|
||||
apiKey,
|
||||
glamaApiKey,
|
||||
glamaModelId,
|
||||
glamaModelInfo,
|
||||
openRouterApiKey,
|
||||
awsAccessKey,
|
||||
awsSecretKey,
|
||||
@@ -1200,6 +1321,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
|
||||
this.getGlobalState("apiModelId") as Promise<string | undefined>,
|
||||
this.getSecret("apiKey") as Promise<string | undefined>,
|
||||
this.getSecret("glamaApiKey") as Promise<string | undefined>,
|
||||
this.getGlobalState("glamaModelId") as Promise<string | undefined>,
|
||||
this.getGlobalState("glamaModelInfo") as Promise<ModelInfo | undefined>,
|
||||
this.getSecret("openRouterApiKey") as Promise<string | undefined>,
|
||||
this.getSecret("awsAccessKey") as Promise<string | undefined>,
|
||||
this.getSecret("awsSecretKey") as Promise<string | undefined>,
|
||||
@@ -1264,6 +1388,9 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
apiProvider,
|
||||
apiModelId,
|
||||
apiKey,
|
||||
glamaApiKey,
|
||||
glamaModelId,
|
||||
glamaModelInfo,
|
||||
openRouterApiKey,
|
||||
awsAccessKey,
|
||||
awsSecretKey,
|
||||
@@ -1402,6 +1529,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
|
||||
}
|
||||
const secretKeys: SecretKey[] = [
|
||||
"apiKey",
|
||||
"glamaApiKey",
|
||||
"openRouterApiKey",
|
||||
"awsAccessKey",
|
||||
"awsSecretKey",
|
||||
|
||||
@@ -16,6 +16,7 @@ export interface ExtensionMessage {
|
||||
| "workspaceUpdated"
|
||||
| "invoke"
|
||||
| "partialMessage"
|
||||
| "glamaModels"
|
||||
| "openRouterModels"
|
||||
| "openAiModels"
|
||||
| "mcpServers"
|
||||
@@ -34,6 +35,7 @@ export interface ExtensionMessage {
|
||||
lmStudioModels?: string[]
|
||||
filePaths?: string[]
|
||||
partialMessage?: ClineMessage
|
||||
glamaModels?: Record<string, ModelInfo>
|
||||
openRouterModels?: Record<string, ModelInfo>
|
||||
openAiModels?: string[]
|
||||
mcpServers?: McpServer[]
|
||||
|
||||
@@ -27,6 +27,7 @@ export interface WebviewMessage {
|
||||
| "openFile"
|
||||
| "openMention"
|
||||
| "cancelTask"
|
||||
| "refreshGlamaModels"
|
||||
| "refreshOpenRouterModels"
|
||||
| "refreshOpenAiModels"
|
||||
| "alwaysAllowBrowser"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export type ApiProvider =
|
||||
| "anthropic"
|
||||
| "glama"
|
||||
| "openrouter"
|
||||
| "bedrock"
|
||||
| "vertex"
|
||||
@@ -14,6 +15,9 @@ export interface ApiHandlerOptions {
|
||||
apiModelId?: string
|
||||
apiKey?: string // anthropic
|
||||
anthropicBaseUrl?: string
|
||||
glamaModelId?: string
|
||||
glamaModelInfo?: ModelInfo
|
||||
glamaApiKey?: string
|
||||
openRouterApiKey?: string
|
||||
openRouterModelId?: string
|
||||
openRouterModelInfo?: ModelInfo
|
||||
@@ -309,6 +313,23 @@ export const bedrockModels = {
|
||||
},
|
||||
} as const satisfies Record<string, ModelInfo>
|
||||
|
||||
// Glama
|
||||
// https://glama.ai/models
|
||||
export const glamaDefaultModelId = "anthropic/claude-3-5-sonnet" // will always exist in openRouterModels
|
||||
export const glamaDefaultModelInfo: ModelInfo = {
|
||||
maxTokens: 8192,
|
||||
contextWindow: 200_000,
|
||||
supportsImages: true,
|
||||
supportsComputerUse: true,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 3.0,
|
||||
outputPrice: 15.0,
|
||||
cacheWritesPrice: 3.75,
|
||||
cacheReadsPrice: 0.3,
|
||||
description:
|
||||
"The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/anthropic/claude-3.5-sonnet) variant._",
|
||||
}
|
||||
|
||||
// OpenRouter
|
||||
// https://openrouter.ai/models?order=newest&supported_parameters=tools
|
||||
export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet:beta" // will always exist in openRouterModels
|
||||
|
||||
Reference in New Issue
Block a user