merge(upstream): merge upstream changes keeping VSCode LM provider and adding Glama support

2026-02-05 12:05:16 -05:00 · 2025-01-07 01:54:46 +03:00
parent 9d62a7bb77 6beb90ef98
commit 98b9007c36
29 changed files with 2040 additions and 280 deletions
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -1,4 +1,5 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { GlamaHandler } from "./providers/glama"
 import { ApiConfiguration, ModelInfo } from "../shared/api"
 import { AnthropicHandler } from "./providers/anthropic"
 import { AwsBedrockHandler } from "./providers/bedrock"
@@ -28,6 +29,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
  switch (apiProvider) {
    case "anthropic":
      return new AnthropicHandler(options)
+    case "glama":
+      return new GlamaHandler(options)
    case "openrouter":
      return new OpenRouterHandler(options)
    case "bedrock":
--- a/src/api/providers/tests/openai.test.ts
+++ b/src/api/providers/tests/openai.test.ts
@@ -0,0 +1,192 @@
+import { OpenAiHandler } from '../openai'
+import { ApiHandlerOptions, openAiModelInfoSaneDefaults } from '../../../shared/api'
+import OpenAI, { AzureOpenAI } from 'openai'
+import { Anthropic } from '@anthropic-ai/sdk'
+
+// Mock dependencies
+jest.mock('openai')
+
+describe('OpenAiHandler', () => {
+    const mockOptions: ApiHandlerOptions = {
+        openAiApiKey: 'test-key',
+        openAiModelId: 'gpt-4',
+        openAiStreamingEnabled: true,
+        openAiBaseUrl: 'https://api.openai.com/v1'
+    }
+
+    beforeEach(() => {
+        jest.clearAllMocks()
+    })
+
+    test('constructor initializes with correct options', () => {
+        const handler = new OpenAiHandler(mockOptions)
+        expect(handler).toBeInstanceOf(OpenAiHandler)
+        expect(OpenAI).toHaveBeenCalledWith({
+            apiKey: mockOptions.openAiApiKey,
+            baseURL: mockOptions.openAiBaseUrl
+        })
+    })
+
+    test('constructor initializes Azure client when Azure URL is provided', () => {
+        const azureOptions: ApiHandlerOptions = {
+            ...mockOptions,
+            openAiBaseUrl: 'https://example.azure.com',
+            azureApiVersion: '2023-05-15'
+        }
+        const handler = new OpenAiHandler(azureOptions)
+        expect(handler).toBeInstanceOf(OpenAiHandler)
+        expect(AzureOpenAI).toHaveBeenCalledWith({
+            baseURL: azureOptions.openAiBaseUrl,
+            apiKey: azureOptions.openAiApiKey,
+            apiVersion: azureOptions.azureApiVersion
+        })
+    })
+
+    test('getModel returns correct model info', () => {
+        const handler = new OpenAiHandler(mockOptions)
+        const result = handler.getModel()
+        
+        expect(result).toEqual({
+            id: mockOptions.openAiModelId,
+            info: openAiModelInfoSaneDefaults
+        })
+    })
+
+    test('createMessage handles streaming correctly when enabled', async () => {
+        const handler = new OpenAiHandler({
+            ...mockOptions,
+            openAiStreamingEnabled: true,
+            includeMaxTokens: true
+        })
+        
+        const mockStream = {
+            async *[Symbol.asyncIterator]() {
+                yield {
+                    choices: [{
+                        delta: {
+                            content: 'test response'
+                        }
+                    }],
+                    usage: {
+                        prompt_tokens: 10,
+                        completion_tokens: 5
+                    }
+                }
+            }
+        }
+
+        const mockCreate = jest.fn().mockResolvedValue(mockStream)
+        ;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+            completions: { create: mockCreate }
+        } as any
+
+        const systemPrompt = 'test system prompt'
+        const messages: Anthropic.Messages.MessageParam[] = [
+            { role: 'user', content: 'test message' }
+        ]
+
+        const generator = handler.createMessage(systemPrompt, messages)
+        const chunks = []
+        
+        for await (const chunk of generator) {
+            chunks.push(chunk)
+        }
+
+        expect(chunks).toEqual([
+            {
+                type: 'text',
+                text: 'test response'
+            },
+            {
+                type: 'usage',
+                inputTokens: 10,
+                outputTokens: 5
+            }
+        ])
+
+        expect(mockCreate).toHaveBeenCalledWith({
+            model: mockOptions.openAiModelId,
+            messages: [
+                { role: 'system', content: systemPrompt },
+                { role: 'user', content: 'test message' }
+            ],
+            temperature: 0,
+            stream: true,
+            stream_options: { include_usage: true },
+            max_tokens: openAiModelInfoSaneDefaults.maxTokens
+        })
+    })
+
+    test('createMessage handles non-streaming correctly when disabled', async () => {
+        const handler = new OpenAiHandler({
+            ...mockOptions,
+            openAiStreamingEnabled: false
+        })
+        
+        const mockResponse = {
+            choices: [{
+                message: {
+                    content: 'test response'
+                }
+            }],
+            usage: {
+                prompt_tokens: 10,
+                completion_tokens: 5
+            }
+        }
+
+        const mockCreate = jest.fn().mockResolvedValue(mockResponse)
+        ;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+            completions: { create: mockCreate }
+        } as any
+
+        const systemPrompt = 'test system prompt'
+        const messages: Anthropic.Messages.MessageParam[] = [
+            { role: 'user', content: 'test message' }
+        ]
+
+        const generator = handler.createMessage(systemPrompt, messages)
+        const chunks = []
+        
+        for await (const chunk of generator) {
+            chunks.push(chunk)
+        }
+
+        expect(chunks).toEqual([
+            {
+                type: 'text',
+                text: 'test response'
+            },
+            {
+                type: 'usage',
+                inputTokens: 10,
+                outputTokens: 5
+            }
+        ])
+
+        expect(mockCreate).toHaveBeenCalledWith({
+            model: mockOptions.openAiModelId,
+            messages: [
+                { role: 'user', content: systemPrompt },
+                { role: 'user', content: 'test message' }
+            ]
+        })
+    })
+
+    test('createMessage handles API errors', async () => {
+        const handler = new OpenAiHandler(mockOptions)
+        const mockStream = {
+            async *[Symbol.asyncIterator]() {
+                throw new Error('API Error')
+            }
+        }
+
+        const mockCreate = jest.fn().mockResolvedValue(mockStream)
+        ;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
+            completions: { create: mockCreate }
+        } as any
+
+        const generator = handler.createMessage('test', [])
+        await expect(generator.next()).rejects.toThrow('API Error')
+    })
+})
--- a/src/api/providers/glama.ts
+++ b/src/api/providers/glama.ts
@@ -0,0 +1,132 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import axios from "axios"
+import OpenAI from "openai"
+import { ApiHandler } from "../"
+import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import { ApiStream } from "../transform/stream"
+import delay from "delay"
+
+export class GlamaHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: OpenAI
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+		this.client = new OpenAI({
+			baseURL: "https://glama.ai/api/gateway/openai/v1",
+			apiKey: this.options.glamaApiKey,
+		})
+	}
+
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to OpenAI format
+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
+		if (this.getModel().id.startsWith("anthropic/claude-3")) {
+			openAiMessages[0] = {
+				role: "system",
+				content: [
+					{
+						type: "text",
+						text: systemPrompt,
+						// @ts-ignore-next-line
+						cache_control: { type: "ephemeral" },
+					},
+				],
+			}
+
+			// Add cache_control to the last two user messages
+			// (note: this works because we only ever add one user message at a time,
+			// but if we added multiple we'd need to mark the user message before the last assistant message)
+			const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+			lastTwoUserMessages.forEach((msg) => {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end.
+					// but if it weren't there, and the user added a image_url type message,
+					// it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
+					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
+				}
+			})
+		}
+
+		// Required by Anthropic
+		// Other providers default to max tokens allowed.
+		let maxTokens: number | undefined
+
+		if (this.getModel().id.startsWith("anthropic/")) {
+			maxTokens = 8_192
+		}
+
+		const { data: completion, response } = await this.client.chat.completions.create({
+			model: this.getModel().id,
+			max_tokens: maxTokens,
+			temperature: 0,
+			messages: openAiMessages,
+			stream: true,
+		}).withResponse();
+
+		const completionRequestId = response.headers.get(
+			'x-completion-request-id',
+		);
+
+		for await (const chunk of completion) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+		}
+
+		try {
+			const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestId}`, {
+				headers: {
+					Authorization: `Bearer ${this.options.glamaApiKey}`,
+				},
+			})
+
+			const completionRequest = response.data;
+
+			if (completionRequest.tokenUsage) {
+				yield {
+					type: "usage",
+					cacheWriteTokens: completionRequest.tokenUsage.cacheCreationInputTokens,
+					cacheReadTokens: completionRequest.tokenUsage.cacheReadInputTokens,
+					inputTokens: completionRequest.tokenUsage.promptTokens,
+					outputTokens: completionRequest.tokenUsage.completionTokens,
+					totalCost: parseFloat(completionRequest.totalCostUsd),
+				}
+			}			
+		} catch (error) {
+			console.error("Error fetching Glama completion details", error)
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.glamaModelId
+		const modelInfo = this.options.glamaModelInfo
+
+		if (modelId && modelInfo) {
+			return { id: modelId, info: modelInfo }
+		}
+		
+		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
+	}
+}
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -32,42 +32,64 @@ export class OpenAiHandler implements ApiHandler {
 		}
 	}

-	// Include stream_options for OpenAI Compatible providers if the checkbox is checked
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
-		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
-			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
-		]
 		const modelInfo = this.getModel().info
-		const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
-			model: this.options.openAiModelId ?? "",
-			messages: openAiMessages,
-			temperature: 0,
-			stream: true,
-		}
-		if (this.options.includeMaxTokens) {
-			requestOptions.max_tokens = modelInfo.maxTokens
-		}
+		const modelId = this.options.openAiModelId ?? ""

-		if (this.options.includeStreamOptions ?? true) {
-			requestOptions.stream_options = { include_usage: true }
-		}
+		if (this.options.openAiStreamingEnabled ?? true) {
+			const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
+				role: "system",
+				content: systemPrompt
+			}
+			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+				model: modelId,
+				temperature: 0,
+				messages: [systemMessage, ...convertToOpenAiMessages(messages)],
+				stream: true as const,
+				stream_options: { include_usage: true },
+			}
+			if (this.options.includeMaxTokens) {
+				requestOptions.max_tokens = modelInfo.maxTokens
+			}

-		const stream = await this.client.chat.completions.create(requestOptions)
-		for await (const chunk of stream) {
-			const delta = chunk.choices[0]?.delta
-			if (delta?.content) {
-				yield {
-					type: "text",
-					text: delta.content,
+			const stream = await this.client.chat.completions.create(requestOptions)
+
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+				if (delta?.content) {
+					yield {
+						type: "text",
+						text: delta.content,
+					}
+				}
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
 				}
 			}
-			if (chunk.usage) {
-				yield {
-					type: "usage",
-					inputTokens: chunk.usage.prompt_tokens || 0,
-					outputTokens: chunk.usage.completion_tokens || 0,
-				}
+		} else {
+			// o1 for instance doesnt support streaming, non-1 temp, or system prompt
+			const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
+				role: "user",
+				content: systemPrompt
+			}
+			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
+				model: modelId,
+				messages: [systemMessage, ...convertToOpenAiMessages(messages)],
+			}
+			const response = await this.client.chat.completions.create(requestOptions)
+			
+			yield {
+				type: "text",
+				text: response.choices[0]?.message.content || "",
+			}
+			yield {
+				type: "usage",
+				inputTokens: response.usage?.prompt_tokens || 0,
+				outputTokens: response.usage?.completion_tokens || 0,
 			}
 		}
 	}