Remove truncation logic for now

This commit is contained in:
Matt Rubens
2024-12-29 11:27:24 -08:00
parent eb8c4cc50f
commit 25987dd40b
4 changed files with 47 additions and 157 deletions

View File

@@ -2,4 +2,4 @@
"roo-cline": patch "roo-cline": patch
--- ---
Add the DeepSeek provider along with logic to trim messages when it hits the context window Add the DeepSeek provider

View File

@@ -13,7 +13,7 @@ A fork of Cline, an autonomous coding agent, with some additional experimental f
- Includes current time in the system prompt - Includes current time in the system prompt
- Uses a file system watcher to more reliably watch for file system changes - Uses a file system watcher to more reliably watch for file system changes
- Language selection for Cline's communication (English, Japanese, Spanish, French, German, and more) - Language selection for Cline's communication (English, Japanese, Spanish, French, German, and more)
- Support for DeepSeek V3 with logic to trim messages when it hits the context window - Support for DeepSeek V3
- Support for Meta 3, 3.1, and 3.2 models via AWS Bedrock - Support for Meta 3, 3.1, and 3.2 models via AWS Bedrock
- Support for listing models from OpenAI-compatible providers - Support for listing models from OpenAI-compatible providers
- Per-tool MCP auto-approval - Per-tool MCP auto-approval

View File

@@ -5,19 +5,6 @@ import { Anthropic } from '@anthropic-ai/sdk'
// Mock dependencies // Mock dependencies
jest.mock('openai') jest.mock('openai')
jest.mock('../../../shared/api', () => ({
...jest.requireActual('../../../shared/api'),
deepSeekModels: {
'deepseek-chat': {
maxTokens: 1000,
contextWindow: 2000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.014,
outputPrice: 0.28,
}
}
}))
describe('DeepSeekHandler', () => { describe('DeepSeekHandler', () => {
@@ -46,8 +33,8 @@ describe('DeepSeekHandler', () => {
expect(result).toEqual({ expect(result).toEqual({
id: mockOptions.deepSeekModelId, id: mockOptions.deepSeekModelId,
info: expect.objectContaining({ info: expect.objectContaining({
maxTokens: 1000, maxTokens: 8192,
contextWindow: 2000, contextWindow: 64000,
supportsPromptCache: false, supportsPromptCache: false,
supportsImages: false, supportsImages: false,
inputPrice: 0.014, inputPrice: 0.014,
@@ -61,7 +48,7 @@ describe('DeepSeekHandler', () => {
const result = handler.getModel() const result = handler.getModel()
expect(result.id).toBe('deepseek-chat') expect(result.id).toBe('deepseek-chat')
expect(result.info.maxTokens).toBe(1000) expect(result.info.maxTokens).toBe(8192)
}) })
test('createMessage handles string content correctly', async () => { test('createMessage handles string content correctly', async () => {
@@ -109,7 +96,7 @@ describe('DeepSeekHandler', () => {
], ],
temperature: 0, temperature: 0,
stream: true, stream: true,
max_tokens: 1000, max_tokens: 8192,
stream_options: { include_usage: true } stream_options: { include_usage: true }
})) }))
}) })
@@ -155,83 +142,6 @@ describe('DeepSeekHandler', () => {
})) }))
}) })
test('createMessage truncates messages when exceeding context window', async () => {
const handler = new DeepSeekHandler(mockOptions)
const longString = 'a'.repeat(1000) // ~300 tokens
const shortString = 'b'.repeat(100) // ~30 tokens
const systemPrompt = 'test system prompt'
const messages: Anthropic.Messages.MessageParam[] = [
{ role: 'user', content: longString }, // Old message
{ role: 'assistant', content: 'short response' },
{ role: 'user', content: shortString } // Recent message
]
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{
delta: {
content: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n'
}
}]
}
yield {
choices: [{
delta: {
content: 'test response'
}
}]
}
}
}
const mockCreate = jest.fn().mockResolvedValue(mockStream)
;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
completions: { create: mockCreate }
} as any
const generator = handler.createMessage(systemPrompt, messages)
const chunks = []
for await (const chunk of generator) {
chunks.push(chunk)
}
// Should get two chunks: truncation notice and response
expect(chunks).toHaveLength(2)
expect(chunks[0]).toEqual({
type: 'text',
text: expect.stringContaining('truncated')
})
expect(chunks[1]).toEqual({
type: 'text',
text: 'test response'
})
// Verify API call includes system prompt and recent messages, but not old message
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
messages: expect.arrayContaining([
{ role: 'system', content: systemPrompt },
{ role: 'assistant', content: 'short response' },
{ role: 'user', content: shortString }
])
}))
// Verify truncation notice was included
expect(chunks[0]).toEqual({
type: 'text',
text: expect.stringContaining('truncated')
})
// Verify the messages array contains the expected messages
const calledMessages = mockCreate.mock.calls[0][0].messages
expect(calledMessages).toHaveLength(4)
expect(calledMessages[0]).toEqual({ role: 'system', content: systemPrompt })
expect(calledMessages[1]).toEqual({ role: 'user', content: longString })
expect(calledMessages[2]).toEqual({ role: 'assistant', content: 'short response' })
expect(calledMessages[3]).toEqual({ role: 'user', content: shortString })
})
test('createMessage handles API errors', async () => { test('createMessage handles API errors', async () => {
const handler = new DeepSeekHandler(mockOptions) const handler = new DeepSeekHandler(mockOptions)
const mockStream = { const mockStream = {

View File

@@ -20,64 +20,36 @@ export class DeepSeekHandler implements ApiHandler {
} }
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
// Convert messages to simple format that DeepSeek expects const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
const formattedMessages = messages.map(msg => {
// Format all messages
const messagesToInclude: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: 'system' as const, content: systemPrompt }
]
// Add the rest of the messages
for (const msg of messages) {
let messageContent = ""
if (typeof msg.content === "string") { if (typeof msg.content === "string") {
return { role: msg.role, content: msg.content } messageContent = msg.content
} } else if (Array.isArray(msg.content)) {
// For array content, concatenate text parts messageContent = msg.content.reduce((acc, part) => {
return {
role: msg.role,
content: msg.content.reduce((acc, part) => {
if (part.type === "text") { if (part.type === "text") {
return acc + part.text return acc + part.text
} }
return acc return acc
}, "") }, "")
} }
})
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ messagesToInclude.push({
{ role: "system", content: systemPrompt }, role: msg.role === 'user' ? 'user' as const : 'assistant' as const,
...formattedMessages, content: messageContent
] })
const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
const contextWindow = modelInfo.contextWindow || 64_000
const getTokenCount = (content: string) => Math.ceil(content.length * 0.3)
// Always keep system prompt
const systemMsg = openAiMessages[0]
let availableTokens = contextWindow - getTokenCount(typeof systemMsg.content === 'string' ? systemMsg.content : '')
// Start with most recent messages and work backwards
const userMessages = openAiMessages.slice(1).reverse()
const includedMessages = []
let truncated = false
for (const msg of userMessages) {
const content = typeof msg.content === 'string' ? msg.content : ''
const tokens = getTokenCount(content)
if (tokens <= availableTokens) {
includedMessages.unshift(msg)
availableTokens -= tokens
} else {
truncated = true
break
}
}
if (truncated) {
yield {
type: 'text',
text: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n'
}
} }
const requestOptions: OpenAI.Chat.ChatCompletionCreateParamsStreaming = { const requestOptions: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
model: this.options.deepSeekModelId ?? "deepseek-chat", model: this.options.deepSeekModelId ?? "deepseek-chat",
messages: [systemMsg, ...includedMessages], messages: messagesToInclude,
temperature: 0, temperature: 0,
stream: true, stream: true,
max_tokens: modelInfo.maxTokens, max_tokens: modelInfo.maxTokens,
@@ -87,22 +59,30 @@ export class DeepSeekHandler implements ApiHandler {
requestOptions.stream_options = { include_usage: true } requestOptions.stream_options = { include_usage: true }
} }
const stream = await this.client.chat.completions.create(requestOptions) let totalInputTokens = 0;
for await (const chunk of stream) { let totalOutputTokens = 0;
const delta = chunk.choices[0]?.delta
if (delta?.content) { try {
yield { const stream = await this.client.chat.completions.create(requestOptions)
type: "text", for await (const chunk of stream) {
text: delta.content, const delta = chunk.choices[0]?.delta
} if (delta?.content) {
} yield {
if (chunk.usage) { type: "text",
yield { text: delta.content,
type: "usage", }
inputTokens: chunk.usage.prompt_tokens || 0, }
outputTokens: chunk.usage.completion_tokens || 0, if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
} }
} }
} catch (error) {
console.error("DeepSeek API Error:", error)
throw error
} }
} }