From 25987dd40b4726c579cd4a896b4ed6165d78bde4 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Sun, 29 Dec 2024 11:27:24 -0800 Subject: [PATCH] Remove truncation logic for now --- .changeset/modern-carrots-applaud.md | 2 +- README.md | 2 +- src/api/providers/__tests__/deepseek.test.ts | 98 +----------------- src/api/providers/deepseek.ts | 102 ++++++++----------- 4 files changed, 47 insertions(+), 157 deletions(-) diff --git a/.changeset/modern-carrots-applaud.md b/.changeset/modern-carrots-applaud.md index b56027d..fd0da56 100644 --- a/.changeset/modern-carrots-applaud.md +++ b/.changeset/modern-carrots-applaud.md @@ -2,4 +2,4 @@ "roo-cline": patch --- -Add the DeepSeek provider along with logic to trim messages when it hits the context window +Add the DeepSeek provider diff --git a/README.md b/README.md index c8ab5fa..2a191a7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A fork of Cline, an autonomous coding agent, with some additional experimental f - Includes current time in the system prompt - Uses a file system watcher to more reliably watch for file system changes - Language selection for Cline's communication (English, Japanese, Spanish, French, German, and more) -- Support for DeepSeek V3 with logic to trim messages when it hits the context window +- Support for DeepSeek V3 - Support for Meta 3, 3.1, and 3.2 models via AWS Bedrock - Support for listing models from OpenAI-compatible providers - Per-tool MCP auto-approval diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index 669ab2d..dd3d30f 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -5,19 +5,6 @@ import { Anthropic } from '@anthropic-ai/sdk' // Mock dependencies jest.mock('openai') -jest.mock('../../../shared/api', () => ({ - ...jest.requireActual('../../../shared/api'), - deepSeekModels: { - 'deepseek-chat': { - maxTokens: 1000, - contextWindow: 2000, - supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.014, - outputPrice: 0.28, - } - } -})) describe('DeepSeekHandler', () => { @@ -46,8 +33,8 @@ describe('DeepSeekHandler', () => { expect(result).toEqual({ id: mockOptions.deepSeekModelId, info: expect.objectContaining({ - maxTokens: 1000, - contextWindow: 2000, + maxTokens: 8192, + contextWindow: 64000, supportsPromptCache: false, supportsImages: false, inputPrice: 0.014, @@ -61,7 +48,7 @@ describe('DeepSeekHandler', () => { const result = handler.getModel() expect(result.id).toBe('deepseek-chat') - expect(result.info.maxTokens).toBe(1000) + expect(result.info.maxTokens).toBe(8192) }) test('createMessage handles string content correctly', async () => { @@ -109,7 +96,7 @@ describe('DeepSeekHandler', () => { ], temperature: 0, stream: true, - max_tokens: 1000, + max_tokens: 8192, stream_options: { include_usage: true } })) }) @@ -155,83 +142,6 @@ describe('DeepSeekHandler', () => { })) }) - test('createMessage truncates messages when exceeding context window', async () => { - const handler = new DeepSeekHandler(mockOptions) - const longString = 'a'.repeat(1000) // ~300 tokens - const shortString = 'b'.repeat(100) // ~30 tokens - - const systemPrompt = 'test system prompt' - const messages: Anthropic.Messages.MessageParam[] = [ - { role: 'user', content: longString }, // Old message - { role: 'assistant', content: 'short response' }, - { role: 'user', content: shortString } // Recent message - ] - - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - choices: [{ - delta: { - content: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n' - } - }] - } - yield { - choices: [{ - delta: { - content: 'test response' - } - }] - } - } - } - - const mockCreate = jest.fn().mockResolvedValue(mockStream) - ;(OpenAI as jest.MockedClass).prototype.chat = { - completions: { create: mockCreate } - } as any - - const generator = handler.createMessage(systemPrompt, messages) - const chunks = [] - for await (const chunk of generator) { - chunks.push(chunk) - } - - // Should get two chunks: truncation notice and response - expect(chunks).toHaveLength(2) - expect(chunks[0]).toEqual({ - type: 'text', - text: expect.stringContaining('truncated') - }) - expect(chunks[1]).toEqual({ - type: 'text', - text: 'test response' - }) - - // Verify API call includes system prompt and recent messages, but not old message - expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ - messages: expect.arrayContaining([ - { role: 'system', content: systemPrompt }, - { role: 'assistant', content: 'short response' }, - { role: 'user', content: shortString } - ]) - })) - - // Verify truncation notice was included - expect(chunks[0]).toEqual({ - type: 'text', - text: expect.stringContaining('truncated') - }) - - // Verify the messages array contains the expected messages - const calledMessages = mockCreate.mock.calls[0][0].messages - expect(calledMessages).toHaveLength(4) - expect(calledMessages[0]).toEqual({ role: 'system', content: systemPrompt }) - expect(calledMessages[1]).toEqual({ role: 'user', content: longString }) - expect(calledMessages[2]).toEqual({ role: 'assistant', content: 'short response' }) - expect(calledMessages[3]).toEqual({ role: 'user', content: shortString }) - }) - test('createMessage handles API errors', async () => { const handler = new DeepSeekHandler(mockOptions) const mockStream = { diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 0cffc7d..5b0c1a2 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -20,64 +20,36 @@ export class DeepSeekHandler implements ApiHandler { } async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { - // Convert messages to simple format that DeepSeek expects - const formattedMessages = messages.map(msg => { + const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] + + // Format all messages + const messagesToInclude: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: 'system' as const, content: systemPrompt } + ] + + // Add the rest of the messages + for (const msg of messages) { + let messageContent = "" if (typeof msg.content === "string") { - return { role: msg.role, content: msg.content } - } - // For array content, concatenate text parts - return { - role: msg.role, - content: msg.content.reduce((acc, part) => { + messageContent = msg.content + } else if (Array.isArray(msg.content)) { + messageContent = msg.content.reduce((acc, part) => { if (part.type === "text") { return acc + part.text } return acc }, "") } - }) - - const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...formattedMessages, - ] - const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] - - const contextWindow = modelInfo.contextWindow || 64_000 - const getTokenCount = (content: string) => Math.ceil(content.length * 0.3) - - // Always keep system prompt - const systemMsg = openAiMessages[0] - let availableTokens = contextWindow - getTokenCount(typeof systemMsg.content === 'string' ? systemMsg.content : '') - - // Start with most recent messages and work backwards - const userMessages = openAiMessages.slice(1).reverse() - const includedMessages = [] - let truncated = false - - for (const msg of userMessages) { - const content = typeof msg.content === 'string' ? msg.content : '' - const tokens = getTokenCount(content) - if (tokens <= availableTokens) { - includedMessages.unshift(msg) - availableTokens -= tokens - } else { - truncated = true - break - } - } - - if (truncated) { - yield { - type: 'text', - text: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n' - } + messagesToInclude.push({ + role: msg.role === 'user' ? 'user' as const : 'assistant' as const, + content: messageContent + }) } const requestOptions: OpenAI.Chat.ChatCompletionCreateParamsStreaming = { model: this.options.deepSeekModelId ?? "deepseek-chat", - messages: [systemMsg, ...includedMessages], + messages: messagesToInclude, temperature: 0, stream: true, max_tokens: modelInfo.maxTokens, @@ -87,22 +59,30 @@ export class DeepSeekHandler implements ApiHandler { requestOptions.stream_options = { include_usage: true } } - const stream = await this.client.chat.completions.create(requestOptions) - for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta - if (delta?.content) { - yield { - type: "text", - text: delta.content, - } - } - if (chunk.usage) { - yield { - type: "usage", - inputTokens: chunk.usage.prompt_tokens || 0, - outputTokens: chunk.usage.completion_tokens || 0, + let totalInputTokens = 0; + let totalOutputTokens = 0; + + try { + const stream = await this.client.chat.completions.create(requestOptions) + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta + if (delta?.content) { + yield { + type: "text", + text: delta.content, + } + } + if (chunk.usage) { + yield { + type: "usage", + inputTokens: chunk.usage.prompt_tokens || 0, + outputTokens: chunk.usage.completion_tokens || 0, + } } } + } catch (error) { + console.error("DeepSeek API Error:", error) + throw error } } @@ -113,4 +93,4 @@ export class DeepSeekHandler implements ApiHandler { info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId], } } -} \ No newline at end of file +}