Implement streaming for all providers

This commit is contained in:
Saoud Rizwan
2024-09-29 01:32:58 -04:00
parent 59c188019a
commit 06ccaf6f67
11 changed files with 201 additions and 161 deletions

View File

@@ -12,7 +12,6 @@ import { ApiStream } from "./transform/stream"
export interface ApiHandler { export interface ApiHandler {
createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
getModel(): { id: string; info: ModelInfo } getModel(): { id: string; info: ModelInfo }
} }

View File

@@ -117,8 +117,8 @@ export class AnthropicHandler implements ApiHandler {
type: "usage", type: "usage",
inputTokens: usage.input_tokens || 0, inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0, outputTokens: usage.output_tokens || 0,
cacheWriteTokens: usage.cache_creation_input_tokens || 0, cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
cacheReadTokens: usage.cache_read_input_tokens || 0, cacheReadTokens: usage.cache_read_input_tokens || undefined,
} }
break break
case "message_delta": case "message_delta":
@@ -128,8 +128,6 @@ export class AnthropicHandler implements ApiHandler {
type: "usage", type: "usage",
inputTokens: 0, inputTokens: 0,
outputTokens: chunk.usage.output_tokens || 0, outputTokens: chunk.usage.output_tokens || 0,
cacheWriteTokens: 0,
cacheReadTokens: 0,
} }
break break
case "message_stop": case "message_stop":

View File

@@ -1,7 +1,8 @@
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk" import AnthropicBedrock from "@anthropic-ai/bedrock-sdk"
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandler, ApiHandlerMessageResponse } from "../" import { ApiHandler } from "../"
import { ApiHandlerOptions, bedrockDefaultModelId, BedrockModelId, bedrockModels, ModelInfo } from "../../shared/api" import { ApiHandlerOptions, bedrockDefaultModelId, BedrockModelId, bedrockModels, ModelInfo } from "../../shared/api"
import { ApiStream } from "../transform/stream"
// https://docs.anthropic.com/en/api/claude-on-amazon-bedrock // https://docs.anthropic.com/en/api/claude-on-amazon-bedrock
export class AwsBedrockHandler implements ApiHandler { export class AwsBedrockHandler implements ApiHandler {
@@ -23,21 +24,61 @@ export class AwsBedrockHandler implements ApiHandler {
}) })
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string, const stream = await this.client.messages.create({
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const message = await this.client.messages.create({
model: this.getModel().id, model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens,
temperature: 0.2, temperature: 0,
system: systemPrompt, system: systemPrompt,
messages, messages,
tools, stream: true,
tool_choice: { type: "auto" },
}) })
return { message } for await (const chunk of stream) {
switch (chunk.type) {
case "message_start":
const usage = chunk.message.usage
yield {
type: "usage",
inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0,
}
break
case "message_delta":
yield {
type: "usage",
inputTokens: 0,
outputTokens: chunk.usage.output_tokens || 0,
}
break
case "content_block_start":
switch (chunk.content_block.type) {
case "text":
if (chunk.index > 0) {
yield {
type: "text",
text: "\n",
}
}
yield {
type: "text",
text: chunk.content_block.text,
}
break
}
break
case "content_block_delta":
switch (chunk.delta.type) {
case "text_delta":
yield {
type: "text",
text: chunk.delta.text,
}
break
}
break
}
}
} }
getModel(): { id: BedrockModelId; info: ModelInfo } { getModel(): { id: BedrockModelId; info: ModelInfo } {

View File

@@ -1,12 +1,9 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import { FunctionCallingMode, GoogleGenerativeAI } from "@google/generative-ai" import { GoogleGenerativeAI } from "@google/generative-ai"
import { ApiHandler, ApiHandlerMessageResponse } from "../" import { ApiHandler } from "../"
import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api" import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
import { import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
convertAnthropicMessageToGemini, import { ApiStream } from "../transform/stream"
convertAnthropicToolToGemini,
convertGeminiResponseToAnthropic,
} from "../transform/gemini-format"
export class GeminiHandler implements ApiHandler { export class GeminiHandler implements ApiHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
@@ -20,31 +17,32 @@ export class GeminiHandler implements ApiHandler {
this.client = new GoogleGenerativeAI(options.geminiApiKey) this.client = new GoogleGenerativeAI(options.geminiApiKey)
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const model = this.client.getGenerativeModel({ const model = this.client.getGenerativeModel({
model: this.getModel().id, model: this.getModel().id,
systemInstruction: systemPrompt, systemInstruction: systemPrompt,
tools: [{ functionDeclarations: tools.map(convertAnthropicToolToGemini) }],
toolConfig: {
functionCallingConfig: {
mode: FunctionCallingMode.AUTO,
},
},
}) })
const result = await model.generateContent({ const result = await model.generateContentStream({
contents: messages.map(convertAnthropicMessageToGemini), contents: messages.map(convertAnthropicMessageToGemini),
generationConfig: { generationConfig: {
maxOutputTokens: this.getModel().info.maxTokens, maxOutputTokens: this.getModel().info.maxTokens,
temperature: 0.2, temperature: 0,
}, },
}) })
const message = convertGeminiResponseToAnthropic(result.response)
return { message } for await (const chunk of result.stream) {
yield {
type: "text",
text: chunk.text(),
}
}
const response = await result.response
yield {
type: "usage",
inputTokens: response.usageMetadata?.promptTokenCount ?? 0,
outputTokens: response.usageMetadata?.candidatesTokenCount ?? 0,
}
} }
getModel(): { id: GeminiModelId; info: ModelInfo } { getModel(): { id: GeminiModelId; info: ModelInfo } {

View File

@@ -1,8 +1,9 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai" import OpenAI from "openai"
import { ApiHandler, ApiHandlerMessageResponse } from "../" import { ApiHandler } from "../"
import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api" import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format" import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
export class OllamaHandler implements ApiHandler { export class OllamaHandler implements ApiHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
@@ -16,37 +17,27 @@ export class OllamaHandler implements ApiHandler {
}) })
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt }, { role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages), ...convertToOpenAiMessages(messages),
] ]
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
type: "function", const stream = await this.client.chat.completions.create({
function: {
name: tool.name,
description: tool.description,
parameters: tool.input_schema,
},
}))
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.options.ollamaModelId ?? "", model: this.options.ollamaModelId ?? "",
messages: openAiMessages, messages: openAiMessages,
temperature: 0.2, temperature: 0,
tools: openAiTools, stream: true,
tool_choice: "auto", })
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}
} }
const completion = await this.client.chat.completions.create(createParams)
const errorMessage = (completion as any).error?.message
if (errorMessage) {
throw new Error(errorMessage)
}
const anthropicMessage = convertToAnthropicMessage(completion)
return { message: anthropicMessage }
} }
getModel(): { id: string; info: ModelInfo } { getModel(): { id: string; info: ModelInfo } {

View File

@@ -1,6 +1,6 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai" import OpenAI from "openai"
import { ApiHandler, ApiHandlerMessageResponse } from "../" import { ApiHandler } from "../"
import { import {
ApiHandlerOptions, ApiHandlerOptions,
ModelInfo, ModelInfo,
@@ -8,8 +8,8 @@ import {
OpenAiNativeModelId, OpenAiNativeModelId,
openAiNativeModels, openAiNativeModels,
} from "../../shared/api" } from "../../shared/api"
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format" import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../transform/o1-format" import { ApiStream } from "../transform/stream"
export class OpenAiNativeHandler implements ApiHandler { export class OpenAiNativeHandler implements ApiHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
@@ -22,65 +22,39 @@ export class OpenAiNativeHandler implements ApiHandler {
}) })
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt }, { role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages), ...convertToOpenAiMessages(messages),
] ]
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.input_schema,
},
}))
let createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming const stream = await this.client.chat.completions.create({
model: this.getModel().id,
max_completion_tokens: this.getModel().info.maxTokens,
temperature: 0,
messages: openAiMessages,
stream: true,
stream_options: { include_usage: true },
})
switch (this.getModel().id) { for await (const chunk of stream) {
case "o1-preview": const delta = chunk.choices[0]?.delta
case "o1-mini": if (delta?.content) {
createParams = { yield {
model: this.getModel().id, type: "text",
max_completion_tokens: this.getModel().info.maxTokens, text: delta.content,
messages: convertToO1Messages(convertToOpenAiMessages(messages), systemPrompt),
} }
break }
default:
createParams = { // contains a null value except for the last chunk which contains the token usage statistics for the entire request
model: this.getModel().id, if (chunk.usage) {
max_completion_tokens: this.getModel().info.maxTokens, yield {
temperature: 0.2, type: "usage",
messages: openAiMessages, inputTokens: chunk.usage.prompt_tokens || 0,
tools: openAiTools, outputTokens: chunk.usage.completion_tokens || 0,
tool_choice: "auto",
} }
break }
} }
const completion = await this.client.chat.completions.create(createParams)
const errorMessage = (completion as any).error?.message
if (errorMessage) {
throw new Error(errorMessage)
}
let anthropicMessage: Anthropic.Messages.Message
switch (this.getModel().id) {
case "o1-preview":
case "o1-mini":
anthropicMessage = convertO1ResponseToAnthropicMessage(completion)
break
default:
anthropicMessage = convertToAnthropicMessage(completion)
break
}
return { message: anthropicMessage }
} }
getModel(): { id: OpenAiNativeModelId; info: ModelInfo } { getModel(): { id: OpenAiNativeModelId; info: ModelInfo } {

View File

@@ -1,13 +1,14 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI, { AzureOpenAI } from "openai" import OpenAI, { AzureOpenAI } from "openai"
import { ApiHandler, ApiHandlerMessageResponse } from "../index"
import { import {
ApiHandlerOptions, ApiHandlerOptions,
azureOpenAiDefaultApiVersion, azureOpenAiDefaultApiVersion,
ModelInfo, ModelInfo,
openAiModelInfoSaneDefaults, openAiModelInfoSaneDefaults,
} from "../../shared/api" } from "../../shared/api"
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format" import { ApiHandler } from "../index"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
export class OpenAiHandler implements ApiHandler { export class OpenAiHandler implements ApiHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
@@ -30,37 +31,34 @@ export class OpenAiHandler implements ApiHandler {
} }
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt }, { role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages), ...convertToOpenAiMessages(messages),
] ]
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({ const stream = await this.client.chat.completions.create({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.input_schema,
},
}))
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.options.openAiModelId ?? "", model: this.options.openAiModelId ?? "",
messages: openAiMessages, messages: openAiMessages,
temperature: 0.2, temperature: 0,
tools: openAiTools, stream: true,
tool_choice: "auto", stream_options: { include_usage: true },
})
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}
if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
}
} }
const completion = await this.client.chat.completions.create(createParams)
const errorMessage = (completion as any).error?.message
if (errorMessage) {
throw new Error(errorMessage)
}
const anthropicMessage = convertToAnthropicMessage(completion)
return { message: anthropicMessage }
} }
getModel(): { id: string; info: ModelInfo } { getModel(): { id: string; info: ModelInfo } {

View File

@@ -124,8 +124,8 @@ export class OpenRouterHandler implements ApiHandler {
type: "usage", type: "usage",
inputTokens: generation?.native_tokens_prompt || 0, inputTokens: generation?.native_tokens_prompt || 0,
outputTokens: generation?.native_tokens_completion || 0, outputTokens: generation?.native_tokens_completion || 0,
cacheWriteTokens: 0, // cacheWriteTokens: 0,
cacheReadTokens: 0, // cacheReadTokens: 0,
totalCost: generation?.total_cost || 0, totalCost: generation?.total_cost || 0,
} }
} catch (error) { } catch (error) {

View File

@@ -1,7 +1,8 @@
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import { ApiHandler, ApiHandlerMessageResponse } from "../" import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
import { ApiHandler } from "../"
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api" import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
import { ApiStream } from "../transform/stream"
// https://docs.anthropic.com/en/api/claude-on-vertex-ai // https://docs.anthropic.com/en/api/claude-on-vertex-ai
export class VertexHandler implements ApiHandler { export class VertexHandler implements ApiHandler {
@@ -17,21 +18,61 @@ export class VertexHandler implements ApiHandler {
}) })
} }
async createMessage( async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
systemPrompt: string, const stream = await this.client.messages.create({
messages: Anthropic.Messages.MessageParam[],
tools: Anthropic.Messages.Tool[]
): Promise<ApiHandlerMessageResponse> {
const message = await this.client.messages.create({
model: this.getModel().id, model: this.getModel().id,
max_tokens: this.getModel().info.maxTokens, max_tokens: this.getModel().info.maxTokens,
temperature: 0.2, temperature: 0,
system: systemPrompt, system: systemPrompt,
messages, messages,
tools, stream: true,
tool_choice: { type: "auto" },
}) })
return { message } for await (const chunk of stream) {
switch (chunk.type) {
case "message_start":
const usage = chunk.message.usage
yield {
type: "usage",
inputTokens: usage.input_tokens || 0,
outputTokens: usage.output_tokens || 0,
}
break
case "message_delta":
yield {
type: "usage",
inputTokens: 0,
outputTokens: chunk.usage.output_tokens || 0,
}
break
case "content_block_start":
switch (chunk.content_block.type) {
case "text":
if (chunk.index > 0) {
yield {
type: "text",
text: "\n",
}
}
yield {
type: "text",
text: chunk.content_block.text,
}
break
}
break
case "content_block_delta":
switch (chunk.delta.type) {
case "text_delta":
yield {
type: "text",
text: chunk.delta.text,
}
break
}
break
}
}
} }
getModel(): { id: VertexModelId; info: ModelInfo } { getModel(): { id: VertexModelId; info: ModelInfo } {

View File

@@ -10,7 +10,7 @@ export interface ApiStreamUsageChunk {
type: "usage" type: "usage"
inputTokens: number inputTokens: number
outputTokens: number outputTokens: number
cacheWriteTokens: number cacheWriteTokens?: number
cacheReadTokens: number cacheReadTokens?: number
totalCost?: number // openrouter totalCost?: number // openrouter
} }

View File

@@ -2397,8 +2397,8 @@ ${this.customInstructions.trim()}
case "usage": case "usage":
inputTokens += chunk.inputTokens inputTokens += chunk.inputTokens
outputTokens += chunk.outputTokens outputTokens += chunk.outputTokens
cacheWriteTokens += chunk.cacheWriteTokens cacheWriteTokens += chunk.cacheWriteTokens ?? 0
cacheReadTokens += chunk.cacheReadTokens cacheReadTokens += chunk.cacheReadTokens ?? 0
totalCost = chunk.totalCost totalCost = chunk.totalCost
break break
case "text": case "text":