mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 12:21:13 -05:00
Implement streaming for all providers
This commit is contained in:
@@ -12,7 +12,6 @@ import { ApiStream } from "./transform/stream"
|
|||||||
|
|
||||||
export interface ApiHandler {
|
export interface ApiHandler {
|
||||||
createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
|
createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
|
||||||
|
|
||||||
getModel(): { id: string; info: ModelInfo }
|
getModel(): { id: string; info: ModelInfo }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -117,8 +117,8 @@ export class AnthropicHandler implements ApiHandler {
|
|||||||
type: "usage",
|
type: "usage",
|
||||||
inputTokens: usage.input_tokens || 0,
|
inputTokens: usage.input_tokens || 0,
|
||||||
outputTokens: usage.output_tokens || 0,
|
outputTokens: usage.output_tokens || 0,
|
||||||
cacheWriteTokens: usage.cache_creation_input_tokens || 0,
|
cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
|
||||||
cacheReadTokens: usage.cache_read_input_tokens || 0,
|
cacheReadTokens: usage.cache_read_input_tokens || undefined,
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
case "message_delta":
|
case "message_delta":
|
||||||
@@ -128,8 +128,6 @@ export class AnthropicHandler implements ApiHandler {
|
|||||||
type: "usage",
|
type: "usage",
|
||||||
inputTokens: 0,
|
inputTokens: 0,
|
||||||
outputTokens: chunk.usage.output_tokens || 0,
|
outputTokens: chunk.usage.output_tokens || 0,
|
||||||
cacheWriteTokens: 0,
|
|
||||||
cacheReadTokens: 0,
|
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
case "message_stop":
|
case "message_stop":
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk"
|
import AnthropicBedrock from "@anthropic-ai/bedrock-sdk"
|
||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../"
|
import { ApiHandler } from "../"
|
||||||
import { ApiHandlerOptions, bedrockDefaultModelId, BedrockModelId, bedrockModels, ModelInfo } from "../../shared/api"
|
import { ApiHandlerOptions, bedrockDefaultModelId, BedrockModelId, bedrockModels, ModelInfo } from "../../shared/api"
|
||||||
|
import { ApiStream } from "../transform/stream"
|
||||||
|
|
||||||
// https://docs.anthropic.com/en/api/claude-on-amazon-bedrock
|
// https://docs.anthropic.com/en/api/claude-on-amazon-bedrock
|
||||||
export class AwsBedrockHandler implements ApiHandler {
|
export class AwsBedrockHandler implements ApiHandler {
|
||||||
@@ -23,21 +24,61 @@ export class AwsBedrockHandler implements ApiHandler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
const stream = await this.client.messages.create({
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const message = await this.client.messages.create({
|
|
||||||
model: this.getModel().id,
|
model: this.getModel().id,
|
||||||
max_tokens: this.getModel().info.maxTokens,
|
max_tokens: this.getModel().info.maxTokens,
|
||||||
temperature: 0.2,
|
temperature: 0,
|
||||||
system: systemPrompt,
|
system: systemPrompt,
|
||||||
messages,
|
messages,
|
||||||
tools,
|
stream: true,
|
||||||
tool_choice: { type: "auto" },
|
|
||||||
})
|
})
|
||||||
return { message }
|
for await (const chunk of stream) {
|
||||||
|
switch (chunk.type) {
|
||||||
|
case "message_start":
|
||||||
|
const usage = chunk.message.usage
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case "message_delta":
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: 0,
|
||||||
|
outputTokens: chunk.usage.output_tokens || 0,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
|
||||||
|
case "content_block_start":
|
||||||
|
switch (chunk.content_block.type) {
|
||||||
|
case "text":
|
||||||
|
if (chunk.index > 0) {
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: "\n",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: chunk.content_block.text,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case "content_block_delta":
|
||||||
|
switch (chunk.delta.type) {
|
||||||
|
case "text_delta":
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: chunk.delta.text,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: BedrockModelId; info: ModelInfo } {
|
getModel(): { id: BedrockModelId; info: ModelInfo } {
|
||||||
|
|||||||
@@ -1,12 +1,9 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import { FunctionCallingMode, GoogleGenerativeAI } from "@google/generative-ai"
|
import { GoogleGenerativeAI } from "@google/generative-ai"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../"
|
import { ApiHandler } from "../"
|
||||||
import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
|
import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
|
||||||
import {
|
import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
|
||||||
convertAnthropicMessageToGemini,
|
import { ApiStream } from "../transform/stream"
|
||||||
convertAnthropicToolToGemini,
|
|
||||||
convertGeminiResponseToAnthropic,
|
|
||||||
} from "../transform/gemini-format"
|
|
||||||
|
|
||||||
export class GeminiHandler implements ApiHandler {
|
export class GeminiHandler implements ApiHandler {
|
||||||
private options: ApiHandlerOptions
|
private options: ApiHandlerOptions
|
||||||
@@ -20,31 +17,32 @@ export class GeminiHandler implements ApiHandler {
|
|||||||
this.client = new GoogleGenerativeAI(options.geminiApiKey)
|
this.client = new GoogleGenerativeAI(options.geminiApiKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const model = this.client.getGenerativeModel({
|
const model = this.client.getGenerativeModel({
|
||||||
model: this.getModel().id,
|
model: this.getModel().id,
|
||||||
systemInstruction: systemPrompt,
|
systemInstruction: systemPrompt,
|
||||||
tools: [{ functionDeclarations: tools.map(convertAnthropicToolToGemini) }],
|
|
||||||
toolConfig: {
|
|
||||||
functionCallingConfig: {
|
|
||||||
mode: FunctionCallingMode.AUTO,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
const result = await model.generateContent({
|
const result = await model.generateContentStream({
|
||||||
contents: messages.map(convertAnthropicMessageToGemini),
|
contents: messages.map(convertAnthropicMessageToGemini),
|
||||||
generationConfig: {
|
generationConfig: {
|
||||||
maxOutputTokens: this.getModel().info.maxTokens,
|
maxOutputTokens: this.getModel().info.maxTokens,
|
||||||
temperature: 0.2,
|
temperature: 0,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
const message = convertGeminiResponseToAnthropic(result.response)
|
|
||||||
|
|
||||||
return { message }
|
for await (const chunk of result.stream) {
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: chunk.text(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await result.response
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: response.usageMetadata?.promptTokenCount ?? 0,
|
||||||
|
outputTokens: response.usageMetadata?.candidatesTokenCount ?? 0,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: GeminiModelId; info: ModelInfo } {
|
getModel(): { id: GeminiModelId; info: ModelInfo } {
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import OpenAI from "openai"
|
import OpenAI from "openai"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../"
|
import { ApiHandler } from "../"
|
||||||
import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
|
import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
|
||||||
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format"
|
import { convertToOpenAiMessages } from "../transform/openai-format"
|
||||||
|
import { ApiStream } from "../transform/stream"
|
||||||
|
|
||||||
export class OllamaHandler implements ApiHandler {
|
export class OllamaHandler implements ApiHandler {
|
||||||
private options: ApiHandlerOptions
|
private options: ApiHandlerOptions
|
||||||
@@ -16,37 +17,27 @@ export class OllamaHandler implements ApiHandler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
||||||
{ role: "system", content: systemPrompt },
|
{ role: "system", content: systemPrompt },
|
||||||
...convertToOpenAiMessages(messages),
|
...convertToOpenAiMessages(messages),
|
||||||
]
|
]
|
||||||
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
|
|
||||||
type: "function",
|
const stream = await this.client.chat.completions.create({
|
||||||
function: {
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
parameters: tool.input_schema,
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
|
|
||||||
model: this.options.ollamaModelId ?? "",
|
model: this.options.ollamaModelId ?? "",
|
||||||
messages: openAiMessages,
|
messages: openAiMessages,
|
||||||
temperature: 0.2,
|
temperature: 0,
|
||||||
tools: openAiTools,
|
stream: true,
|
||||||
tool_choice: "auto",
|
})
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const delta = chunk.choices[0]?.delta
|
||||||
|
if (delta?.content) {
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: delta.content,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const completion = await this.client.chat.completions.create(createParams)
|
|
||||||
const errorMessage = (completion as any).error?.message
|
|
||||||
if (errorMessage) {
|
|
||||||
throw new Error(errorMessage)
|
|
||||||
}
|
|
||||||
const anthropicMessage = convertToAnthropicMessage(completion)
|
|
||||||
return { message: anthropicMessage }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: string; info: ModelInfo } {
|
getModel(): { id: string; info: ModelInfo } {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import OpenAI from "openai"
|
import OpenAI from "openai"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../"
|
import { ApiHandler } from "../"
|
||||||
import {
|
import {
|
||||||
ApiHandlerOptions,
|
ApiHandlerOptions,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
@@ -8,8 +8,8 @@ import {
|
|||||||
OpenAiNativeModelId,
|
OpenAiNativeModelId,
|
||||||
openAiNativeModels,
|
openAiNativeModels,
|
||||||
} from "../../shared/api"
|
} from "../../shared/api"
|
||||||
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format"
|
import { convertToOpenAiMessages } from "../transform/openai-format"
|
||||||
import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../transform/o1-format"
|
import { ApiStream } from "../transform/stream"
|
||||||
|
|
||||||
export class OpenAiNativeHandler implements ApiHandler {
|
export class OpenAiNativeHandler implements ApiHandler {
|
||||||
private options: ApiHandlerOptions
|
private options: ApiHandlerOptions
|
||||||
@@ -22,65 +22,39 @@ export class OpenAiNativeHandler implements ApiHandler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
||||||
{ role: "system", content: systemPrompt },
|
{ role: "system", content: systemPrompt },
|
||||||
...convertToOpenAiMessages(messages),
|
...convertToOpenAiMessages(messages),
|
||||||
]
|
]
|
||||||
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
|
|
||||||
type: "function",
|
|
||||||
function: {
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
parameters: tool.input_schema,
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
|
|
||||||
let createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
|
const stream = await this.client.chat.completions.create({
|
||||||
|
model: this.getModel().id,
|
||||||
|
max_completion_tokens: this.getModel().info.maxTokens,
|
||||||
|
temperature: 0,
|
||||||
|
messages: openAiMessages,
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
|
})
|
||||||
|
|
||||||
switch (this.getModel().id) {
|
for await (const chunk of stream) {
|
||||||
case "o1-preview":
|
const delta = chunk.choices[0]?.delta
|
||||||
case "o1-mini":
|
if (delta?.content) {
|
||||||
createParams = {
|
yield {
|
||||||
model: this.getModel().id,
|
type: "text",
|
||||||
max_completion_tokens: this.getModel().info.maxTokens,
|
text: delta.content,
|
||||||
messages: convertToO1Messages(convertToOpenAiMessages(messages), systemPrompt),
|
|
||||||
}
|
}
|
||||||
break
|
}
|
||||||
default:
|
|
||||||
createParams = {
|
// contains a null value except for the last chunk which contains the token usage statistics for the entire request
|
||||||
model: this.getModel().id,
|
if (chunk.usage) {
|
||||||
max_completion_tokens: this.getModel().info.maxTokens,
|
yield {
|
||||||
temperature: 0.2,
|
type: "usage",
|
||||||
messages: openAiMessages,
|
inputTokens: chunk.usage.prompt_tokens || 0,
|
||||||
tools: openAiTools,
|
outputTokens: chunk.usage.completion_tokens || 0,
|
||||||
tool_choice: "auto",
|
|
||||||
}
|
}
|
||||||
break
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const completion = await this.client.chat.completions.create(createParams)
|
|
||||||
const errorMessage = (completion as any).error?.message
|
|
||||||
if (errorMessage) {
|
|
||||||
throw new Error(errorMessage)
|
|
||||||
}
|
|
||||||
|
|
||||||
let anthropicMessage: Anthropic.Messages.Message
|
|
||||||
switch (this.getModel().id) {
|
|
||||||
case "o1-preview":
|
|
||||||
case "o1-mini":
|
|
||||||
anthropicMessage = convertO1ResponseToAnthropicMessage(completion)
|
|
||||||
break
|
|
||||||
default:
|
|
||||||
anthropicMessage = convertToAnthropicMessage(completion)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
return { message: anthropicMessage }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: OpenAiNativeModelId; info: ModelInfo } {
|
getModel(): { id: OpenAiNativeModelId; info: ModelInfo } {
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import OpenAI, { AzureOpenAI } from "openai"
|
import OpenAI, { AzureOpenAI } from "openai"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../index"
|
|
||||||
import {
|
import {
|
||||||
ApiHandlerOptions,
|
ApiHandlerOptions,
|
||||||
azureOpenAiDefaultApiVersion,
|
azureOpenAiDefaultApiVersion,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
openAiModelInfoSaneDefaults,
|
openAiModelInfoSaneDefaults,
|
||||||
} from "../../shared/api"
|
} from "../../shared/api"
|
||||||
import { convertToAnthropicMessage, convertToOpenAiMessages } from "../transform/openai-format"
|
import { ApiHandler } from "../index"
|
||||||
|
import { convertToOpenAiMessages } from "../transform/openai-format"
|
||||||
|
import { ApiStream } from "../transform/stream"
|
||||||
|
|
||||||
export class OpenAiHandler implements ApiHandler {
|
export class OpenAiHandler implements ApiHandler {
|
||||||
private options: ApiHandlerOptions
|
private options: ApiHandlerOptions
|
||||||
@@ -30,37 +31,34 @@ export class OpenAiHandler implements ApiHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
|
||||||
{ role: "system", content: systemPrompt },
|
{ role: "system", content: systemPrompt },
|
||||||
...convertToOpenAiMessages(messages),
|
...convertToOpenAiMessages(messages),
|
||||||
]
|
]
|
||||||
const openAiTools: OpenAI.Chat.ChatCompletionTool[] = tools.map((tool) => ({
|
const stream = await this.client.chat.completions.create({
|
||||||
type: "function",
|
|
||||||
function: {
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
parameters: tool.input_schema,
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
|
|
||||||
model: this.options.openAiModelId ?? "",
|
model: this.options.openAiModelId ?? "",
|
||||||
messages: openAiMessages,
|
messages: openAiMessages,
|
||||||
temperature: 0.2,
|
temperature: 0,
|
||||||
tools: openAiTools,
|
stream: true,
|
||||||
tool_choice: "auto",
|
stream_options: { include_usage: true },
|
||||||
|
})
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const delta = chunk.choices[0]?.delta
|
||||||
|
if (delta?.content) {
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: delta.content,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (chunk.usage) {
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: chunk.usage.prompt_tokens || 0,
|
||||||
|
outputTokens: chunk.usage.completion_tokens || 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const completion = await this.client.chat.completions.create(createParams)
|
|
||||||
const errorMessage = (completion as any).error?.message
|
|
||||||
if (errorMessage) {
|
|
||||||
throw new Error(errorMessage)
|
|
||||||
}
|
|
||||||
const anthropicMessage = convertToAnthropicMessage(completion)
|
|
||||||
return { message: anthropicMessage }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: string; info: ModelInfo } {
|
getModel(): { id: string; info: ModelInfo } {
|
||||||
|
|||||||
@@ -124,8 +124,8 @@ export class OpenRouterHandler implements ApiHandler {
|
|||||||
type: "usage",
|
type: "usage",
|
||||||
inputTokens: generation?.native_tokens_prompt || 0,
|
inputTokens: generation?.native_tokens_prompt || 0,
|
||||||
outputTokens: generation?.native_tokens_completion || 0,
|
outputTokens: generation?.native_tokens_completion || 0,
|
||||||
cacheWriteTokens: 0,
|
// cacheWriteTokens: 0,
|
||||||
cacheReadTokens: 0,
|
// cacheReadTokens: 0,
|
||||||
totalCost: generation?.total_cost || 0,
|
totalCost: generation?.total_cost || 0,
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
|
|
||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
import { ApiHandler, ApiHandlerMessageResponse } from "../"
|
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
|
||||||
|
import { ApiHandler } from "../"
|
||||||
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
|
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
|
||||||
|
import { ApiStream } from "../transform/stream"
|
||||||
|
|
||||||
// https://docs.anthropic.com/en/api/claude-on-vertex-ai
|
// https://docs.anthropic.com/en/api/claude-on-vertex-ai
|
||||||
export class VertexHandler implements ApiHandler {
|
export class VertexHandler implements ApiHandler {
|
||||||
@@ -17,21 +18,61 @@ export class VertexHandler implements ApiHandler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async createMessage(
|
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
|
||||||
systemPrompt: string,
|
const stream = await this.client.messages.create({
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
|
||||||
tools: Anthropic.Messages.Tool[]
|
|
||||||
): Promise<ApiHandlerMessageResponse> {
|
|
||||||
const message = await this.client.messages.create({
|
|
||||||
model: this.getModel().id,
|
model: this.getModel().id,
|
||||||
max_tokens: this.getModel().info.maxTokens,
|
max_tokens: this.getModel().info.maxTokens,
|
||||||
temperature: 0.2,
|
temperature: 0,
|
||||||
system: systemPrompt,
|
system: systemPrompt,
|
||||||
messages,
|
messages,
|
||||||
tools,
|
stream: true,
|
||||||
tool_choice: { type: "auto" },
|
|
||||||
})
|
})
|
||||||
return { message }
|
for await (const chunk of stream) {
|
||||||
|
switch (chunk.type) {
|
||||||
|
case "message_start":
|
||||||
|
const usage = chunk.message.usage
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case "message_delta":
|
||||||
|
yield {
|
||||||
|
type: "usage",
|
||||||
|
inputTokens: 0,
|
||||||
|
outputTokens: chunk.usage.output_tokens || 0,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
|
||||||
|
case "content_block_start":
|
||||||
|
switch (chunk.content_block.type) {
|
||||||
|
case "text":
|
||||||
|
if (chunk.index > 0) {
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: "\n",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: chunk.content_block.text,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case "content_block_delta":
|
||||||
|
switch (chunk.delta.type) {
|
||||||
|
case "text_delta":
|
||||||
|
yield {
|
||||||
|
type: "text",
|
||||||
|
text: chunk.delta.text,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getModel(): { id: VertexModelId; info: ModelInfo } {
|
getModel(): { id: VertexModelId; info: ModelInfo } {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ export interface ApiStreamUsageChunk {
|
|||||||
type: "usage"
|
type: "usage"
|
||||||
inputTokens: number
|
inputTokens: number
|
||||||
outputTokens: number
|
outputTokens: number
|
||||||
cacheWriteTokens: number
|
cacheWriteTokens?: number
|
||||||
cacheReadTokens: number
|
cacheReadTokens?: number
|
||||||
totalCost?: number // openrouter
|
totalCost?: number // openrouter
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2397,8 +2397,8 @@ ${this.customInstructions.trim()}
|
|||||||
case "usage":
|
case "usage":
|
||||||
inputTokens += chunk.inputTokens
|
inputTokens += chunk.inputTokens
|
||||||
outputTokens += chunk.outputTokens
|
outputTokens += chunk.outputTokens
|
||||||
cacheWriteTokens += chunk.cacheWriteTokens
|
cacheWriteTokens += chunk.cacheWriteTokens ?? 0
|
||||||
cacheReadTokens += chunk.cacheReadTokens
|
cacheReadTokens += chunk.cacheReadTokens ?? 0
|
||||||
totalCost = chunk.totalCost
|
totalCost = chunk.totalCost
|
||||||
break
|
break
|
||||||
case "text":
|
case "text":
|
||||||
|
|||||||
Reference in New Issue
Block a user