merge(upstream): merge upstream changes keeping VSCode LM provider and adding Glama support

This commit is contained in:
RaySinner
2025-01-07 01:54:46 +03:00
29 changed files with 2040 additions and 280 deletions

View File

@@ -1,4 +1,5 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { GlamaHandler } from "./providers/glama"
import { ApiConfiguration, ModelInfo } from "../shared/api"
import { AnthropicHandler } from "./providers/anthropic"
import { AwsBedrockHandler } from "./providers/bedrock"
@@ -28,6 +29,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
switch (apiProvider) {
case "anthropic":
return new AnthropicHandler(options)
case "glama":
return new GlamaHandler(options)
case "openrouter":
return new OpenRouterHandler(options)
case "bedrock":

View File

@@ -0,0 +1,192 @@
import { OpenAiHandler } from '../openai'
import { ApiHandlerOptions, openAiModelInfoSaneDefaults } from '../../../shared/api'
import OpenAI, { AzureOpenAI } from 'openai'
import { Anthropic } from '@anthropic-ai/sdk'
// Mock dependencies
jest.mock('openai')
describe('OpenAiHandler', () => {
const mockOptions: ApiHandlerOptions = {
openAiApiKey: 'test-key',
openAiModelId: 'gpt-4',
openAiStreamingEnabled: true,
openAiBaseUrl: 'https://api.openai.com/v1'
}
beforeEach(() => {
jest.clearAllMocks()
})
test('constructor initializes with correct options', () => {
const handler = new OpenAiHandler(mockOptions)
expect(handler).toBeInstanceOf(OpenAiHandler)
expect(OpenAI).toHaveBeenCalledWith({
apiKey: mockOptions.openAiApiKey,
baseURL: mockOptions.openAiBaseUrl
})
})
test('constructor initializes Azure client when Azure URL is provided', () => {
const azureOptions: ApiHandlerOptions = {
...mockOptions,
openAiBaseUrl: 'https://example.azure.com',
azureApiVersion: '2023-05-15'
}
const handler = new OpenAiHandler(azureOptions)
expect(handler).toBeInstanceOf(OpenAiHandler)
expect(AzureOpenAI).toHaveBeenCalledWith({
baseURL: azureOptions.openAiBaseUrl,
apiKey: azureOptions.openAiApiKey,
apiVersion: azureOptions.azureApiVersion
})
})
test('getModel returns correct model info', () => {
const handler = new OpenAiHandler(mockOptions)
const result = handler.getModel()
expect(result).toEqual({
id: mockOptions.openAiModelId,
info: openAiModelInfoSaneDefaults
})
})
test('createMessage handles streaming correctly when enabled', async () => {
const handler = new OpenAiHandler({
...mockOptions,
openAiStreamingEnabled: true,
includeMaxTokens: true
})
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{
delta: {
content: 'test response'
}
}],
usage: {
prompt_tokens: 10,
completion_tokens: 5
}
}
}
}
const mockCreate = jest.fn().mockResolvedValue(mockStream)
;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
completions: { create: mockCreate }
} as any
const systemPrompt = 'test system prompt'
const messages: Anthropic.Messages.MessageParam[] = [
{ role: 'user', content: 'test message' }
]
const generator = handler.createMessage(systemPrompt, messages)
const chunks = []
for await (const chunk of generator) {
chunks.push(chunk)
}
expect(chunks).toEqual([
{
type: 'text',
text: 'test response'
},
{
type: 'usage',
inputTokens: 10,
outputTokens: 5
}
])
expect(mockCreate).toHaveBeenCalledWith({
model: mockOptions.openAiModelId,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: 'test message' }
],
temperature: 0,
stream: true,
stream_options: { include_usage: true },
max_tokens: openAiModelInfoSaneDefaults.maxTokens
})
})
test('createMessage handles non-streaming correctly when disabled', async () => {
const handler = new OpenAiHandler({
...mockOptions,
openAiStreamingEnabled: false
})
const mockResponse = {
choices: [{
message: {
content: 'test response'
}
}],
usage: {
prompt_tokens: 10,
completion_tokens: 5
}
}
const mockCreate = jest.fn().mockResolvedValue(mockResponse)
;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
completions: { create: mockCreate }
} as any
const systemPrompt = 'test system prompt'
const messages: Anthropic.Messages.MessageParam[] = [
{ role: 'user', content: 'test message' }
]
const generator = handler.createMessage(systemPrompt, messages)
const chunks = []
for await (const chunk of generator) {
chunks.push(chunk)
}
expect(chunks).toEqual([
{
type: 'text',
text: 'test response'
},
{
type: 'usage',
inputTokens: 10,
outputTokens: 5
}
])
expect(mockCreate).toHaveBeenCalledWith({
model: mockOptions.openAiModelId,
messages: [
{ role: 'user', content: systemPrompt },
{ role: 'user', content: 'test message' }
]
})
})
test('createMessage handles API errors', async () => {
const handler = new OpenAiHandler(mockOptions)
const mockStream = {
async *[Symbol.asyncIterator]() {
throw new Error('API Error')
}
}
const mockCreate = jest.fn().mockResolvedValue(mockStream)
;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
completions: { create: mockCreate }
} as any
const generator = handler.createMessage('test', [])
await expect(generator.next()).rejects.toThrow('API Error')
})
})

132
src/api/providers/glama.ts Normal file
View File

@@ -0,0 +1,132 @@
import { Anthropic } from "@anthropic-ai/sdk"
import axios from "axios"
import OpenAI from "openai"
import { ApiHandler } from "../"
import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInfo } from "../../shared/api"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
import delay from "delay"
export class GlamaHandler implements ApiHandler {
private options: ApiHandlerOptions
private client: OpenAI
constructor(options: ApiHandlerOptions) {
this.options = options
this.client = new OpenAI({
baseURL: "https://glama.ai/api/gateway/openai/v1",
apiKey: this.options.glamaApiKey,
})
}
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
// Convert Anthropic messages to OpenAI format
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
if (this.getModel().id.startsWith("anthropic/claude-3")) {
openAiMessages[0] = {
role: "system",
content: [
{
type: "text",
text: systemPrompt,
// @ts-ignore-next-line
cache_control: { type: "ephemeral" },
},
],
}
// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time,
// but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}
if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end.
// but if it weren't there, and the user added a image_url type message,
// it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
if (!lastTextPart) {
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}
// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
})
}
// Required by Anthropic
// Other providers default to max tokens allowed.
let maxTokens: number | undefined
if (this.getModel().id.startsWith("anthropic/")) {
maxTokens = 8_192
}
const { data: completion, response } = await this.client.chat.completions.create({
model: this.getModel().id,
max_tokens: maxTokens,
temperature: 0,
messages: openAiMessages,
stream: true,
}).withResponse();
const completionRequestId = response.headers.get(
'x-completion-request-id',
);
for await (const chunk of completion) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}
}
try {
const response = await axios.get(`https://glama.ai/api/gateway/v1/completion-requests/${completionRequestId}`, {
headers: {
Authorization: `Bearer ${this.options.glamaApiKey}`,
},
})
const completionRequest = response.data;
if (completionRequest.tokenUsage) {
yield {
type: "usage",
cacheWriteTokens: completionRequest.tokenUsage.cacheCreationInputTokens,
cacheReadTokens: completionRequest.tokenUsage.cacheReadInputTokens,
inputTokens: completionRequest.tokenUsage.promptTokens,
outputTokens: completionRequest.tokenUsage.completionTokens,
totalCost: parseFloat(completionRequest.totalCostUsd),
}
}
} catch (error) {
console.error("Error fetching Glama completion details", error)
}
}
getModel(): { id: string; info: ModelInfo } {
const modelId = this.options.glamaModelId
const modelInfo = this.options.glamaModelInfo
if (modelId && modelInfo) {
return { id: modelId, info: modelInfo }
}
return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
}
}

View File

@@ -32,42 +32,64 @@ export class OpenAiHandler implements ApiHandler {
}
}
// Include stream_options for OpenAI Compatible providers if the checkbox is checked
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]
const modelInfo = this.getModel().info
const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
model: this.options.openAiModelId ?? "",
messages: openAiMessages,
temperature: 0,
stream: true,
}
if (this.options.includeMaxTokens) {
requestOptions.max_tokens = modelInfo.maxTokens
}
const modelId = this.options.openAiModelId ?? ""
if (this.options.includeStreamOptions ?? true) {
requestOptions.stream_options = { include_usage: true }
}
if (this.options.openAiStreamingEnabled ?? true) {
const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
role: "system",
content: systemPrompt
}
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId,
temperature: 0,
messages: [systemMessage, ...convertToOpenAiMessages(messages)],
stream: true as const,
stream_options: { include_usage: true },
}
if (this.options.includeMaxTokens) {
requestOptions.max_tokens = modelInfo.maxTokens
}
const stream = await this.client.chat.completions.create(requestOptions)
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
const stream = await this.client.chat.completions.create(requestOptions)
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}
if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
}
}
if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
} else {
// o1 for instance doesnt support streaming, non-1 temp, or system prompt
const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
role: "user",
content: systemPrompt
}
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId,
messages: [systemMessage, ...convertToOpenAiMessages(messages)],
}
const response = await this.client.chat.completions.create(requestOptions)
yield {
type: "text",
text: response.choices[0]?.message.content || "",
}
yield {
type: "usage",
inputTokens: response.usage?.prompt_tokens || 0,
outputTokens: response.usage?.completion_tokens || 0,
}
}
}