Extend deepseek-r1 support

This commit is contained in:
Piotr Rogowski
2025-01-27 21:27:24 +01:00
parent db0ec64d1c
commit cb23be6346
4 changed files with 129 additions and 17 deletions

View File

@@ -1,5 +1,6 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI, { AzureOpenAI } from "openai" import OpenAI, { AzureOpenAI } from "openai"
import { import {
ApiHandlerOptions, ApiHandlerOptions,
azureOpenAiDefaultApiVersion, azureOpenAiDefaultApiVersion,
@@ -8,6 +9,7 @@ import {
} from "../../shared/api" } from "../../shared/api"
import { ApiHandler, SingleCompletionHandler } from "../index" import { ApiHandler, SingleCompletionHandler } from "../index"
import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToR1Format } from "../transform/r1-format"
import { ApiStream } from "../transform/stream" import { ApiStream } from "../transform/stream"
export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
@@ -16,7 +18,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
constructor(options: ApiHandlerOptions) { constructor(options: ApiHandlerOptions) {
this.options = options this.options = options
// Azure API shape slightly differs from the core API shape: https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai // Azure API shape slightly differs from the core API shape:
// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
const urlHost = new URL(this.options.openAiBaseUrl ?? "").host const urlHost = new URL(this.options.openAiBaseUrl ?? "").host
if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) { if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) {
this.client = new AzureOpenAI({ this.client = new AzureOpenAI({
@@ -38,7 +41,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const deepseekReasoner = modelId.includes("deepseek-reasoner") const deepseekReasoner = modelId.includes("deepseek-reasoner")
if (!deepseekReasoner && (this.options.openAiStreamingEnabled ?? true)) { if (this.options.openAiStreamingEnabled ?? true) {
const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = { const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
role: "system", role: "system",
content: systemPrompt, content: systemPrompt,
@@ -46,7 +49,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId, model: modelId,
temperature: 0, temperature: 0,
messages: [systemMessage, ...convertToOpenAiMessages(messages)], messages: deepseekReasoner
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
: [systemMessage, ...convertToOpenAiMessages(messages)],
stream: true as const, stream: true as const,
stream_options: { include_usage: true }, stream_options: { include_usage: true },
} }
@@ -64,6 +69,12 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
text: delta.content, text: delta.content,
} }
} }
if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
}
}
if (chunk.usage) { if (chunk.usage) {
yield { yield {
type: "usage", type: "usage",
@@ -73,24 +84,19 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
} }
} }
} else { } else {
let systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam | OpenAI.Chat.ChatCompletionSystemMessageParam
// o1 for instance doesnt support streaming, non-1 temp, or system prompt // o1 for instance doesnt support streaming, non-1 temp, or system prompt
// deepseek reasoner supports system prompt const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {
systemMessage = deepseekReasoner
? {
role: "system",
content: systemPrompt,
}
: {
role: "user", role: "user",
content: systemPrompt, content: systemPrompt,
} }
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId, model: modelId,
messages: [systemMessage, ...convertToOpenAiMessages(messages)], messages: deepseekReasoner
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
: [systemMessage, ...convertToOpenAiMessages(messages)],
} }
const response = await this.client.chat.completions.create(requestOptions) const response = await this.client.chat.completions.create(requestOptions)
yield { yield {

View File

@@ -19,6 +19,7 @@ interface OpenRouterApiStreamUsageChunk extends ApiStreamUsageChunk {
} }
import { SingleCompletionHandler } from ".." import { SingleCompletionHandler } from ".."
import { convertToR1Format } from "../transform/r1-format"
export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler { export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions private options: ApiHandlerOptions
@@ -41,7 +42,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
messages: Anthropic.Messages.MessageParam[], messages: Anthropic.Messages.MessageParam[],
): AsyncGenerator<ApiStreamChunk> { ): AsyncGenerator<ApiStreamChunk> {
// Convert Anthropic messages to OpenAI format // Convert Anthropic messages to OpenAI format
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt }, { role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages), ...convertToOpenAiMessages(messages),
] ]
@@ -117,6 +118,9 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
case "deepseek/deepseek-r1": case "deepseek/deepseek-r1":
// Recommended temperature for DeepSeek reasoning models // Recommended temperature for DeepSeek reasoning models
temperature = 0.6 temperature = 0.6
// DeepSeek highly recommends using user instead of system role
openAiMessages[0].role = "user"
openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
} }
// https://openrouter.ai/docs/transforms // https://openrouter.ai/docs/transforms

View File

@@ -0,0 +1,98 @@
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"
type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText
type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage
type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam
type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam
type Message = OpenAI.Chat.ChatCompletionMessageParam
type AnthropicMessage = Anthropic.Messages.MessageParam
/**
* Converts Anthropic messages to OpenAI format while merging consecutive messages with the same role.
* This is required for DeepSeek Reasoner which does not support successive messages with the same role.
*
* @param messages Array of Anthropic messages
* @returns Array of OpenAI messages where consecutive messages with the same role are combined
*/
export function convertToR1Format(messages: AnthropicMessage[]): Message[] {
return messages.reduce<Message[]>((merged, message) => {
const lastMessage = merged[merged.length - 1]
let messageContent: string | (ContentPartText | ContentPartImage)[] = ""
let hasImages = false
// Convert content to appropriate format
if (Array.isArray(message.content)) {
const textParts: string[] = []
const imageParts: ContentPartImage[] = []
message.content.forEach((part) => {
if (part.type === "text") {
textParts.push(part.text)
}
if (part.type === "image") {
hasImages = true
imageParts.push({
type: "image_url",
image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
})
}
})
if (hasImages) {
const parts: (ContentPartText | ContentPartImage)[] = []
if (textParts.length > 0) {
parts.push({ type: "text", text: textParts.join("\n") })
}
parts.push(...imageParts)
messageContent = parts
} else {
messageContent = textParts.join("\n")
}
} else {
messageContent = message.content
}
// If last message has same role, merge the content
if (lastMessage?.role === message.role) {
if (typeof lastMessage.content === "string" && typeof messageContent === "string") {
lastMessage.content += `\n${messageContent}`
}
// If either has image content, convert both to array format
else {
const lastContent = Array.isArray(lastMessage.content)
? lastMessage.content
: [{ type: "text" as const, text: lastMessage.content || "" }]
const newContent = Array.isArray(messageContent)
? messageContent
: [{ type: "text" as const, text: messageContent }]
if (message.role === "assistant") {
const mergedContent = [...lastContent, ...newContent] as AssistantMessage["content"]
lastMessage.content = mergedContent
} else {
const mergedContent = [...lastContent, ...newContent] as UserMessage["content"]
lastMessage.content = mergedContent
}
}
} else {
// Add as new message with the correct type based on role
if (message.role === "assistant") {
const newMessage: AssistantMessage = {
role: "assistant",
content: messageContent as AssistantMessage["content"],
}
merged.push(newMessage)
} else {
const newMessage: UserMessage = {
role: "user",
content: messageContent as UserMessage["content"],
}
merged.push(newMessage)
}
}
return merged
}, [])
}

View File

@@ -2391,6 +2391,10 @@ export class Cline {
let reasoningMessage = "" let reasoningMessage = ""
try { try {
for await (const chunk of stream) { for await (const chunk of stream) {
if (!chunk) {
// Sometimes chunk is undefined, no idea that can cause it, but this workaround seems to fix it
continue
}
switch (chunk.type) { switch (chunk.type) {
case "reasoning": case "reasoning":
reasoningMessage += chunk.text reasoningMessage += chunk.text