Use predicted length as input for detecting omissions

This commit is contained in:
Matt Rubens
2024-12-18 23:34:03 -05:00
parent a9775c0eb3
commit ef9c468f17
7 changed files with 178 additions and 19 deletions

View File

@@ -0,0 +1,5 @@
---
"roo-cline": patch
---
Take predicted file length into account when detecting omissions

1
.clinerules Normal file
View File

@@ -0,0 +1 @@
- Before attempting completion, always make sure that any code changes have test coverage and that the tests pass.

View File

@@ -1125,11 +1125,12 @@ export class Cline {
this.diffViewProvider.scrollToFirstDiff() this.diffViewProvider.scrollToFirstDiff()
// Check for code omissions before proceeding // Check for code omissions before proceeding
if (detectCodeOmission(this.diffViewProvider.originalContent || "", newContent)) { const predictedLineCount = parseInt(block.params.line_count ?? "0")
if (detectCodeOmission(this.diffViewProvider.originalContent || "", newContent, predictedLineCount)) {
if (this.diffStrategy) { if (this.diffStrategy) {
await this.diffViewProvider.revertChanges() await this.diffViewProvider.revertChanges()
pushToolResult(formatResponse.toolError( pushToolResult(formatResponse.toolError(
"Content appears to be truncated. Found comments indicating omitted code (e.g., '// rest of code unchanged', '/* previous code */'). Please provide the complete file content without any omissions if possible, or otherwise use the 'apply_diff' tool to apply the diff to the original file." `Content appears to be truncated (file has ${newContent.split("\n").length} lines but was predicted to have ${predictedLineCount} lines). Please provide the complete file content without any omissions if possible, or otherwise use the 'apply_diff' tool to apply the diff to the original file.`
)) ))
break break
} else { } else {

View File

@@ -30,6 +30,7 @@ export const toolParamNames = [
"command", "command",
"path", "path",
"content", "content",
"line_count",
"regex", "regex",
"file_pattern", "file_pattern",
"recursive", "recursive",
@@ -71,7 +72,7 @@ export interface ReadFileToolUse extends ToolUse {
export interface WriteToFileToolUse extends ToolUse { export interface WriteToFileToolUse extends ToolUse {
name: "write_to_file" name: "write_to_file"
params: Partial<Pick<Record<ToolParamName, string>, "path" | "content">> params: Partial<Pick<Record<ToolParamName, string>, "path" | "content" | "line_count">>
} }
export interface SearchFilesToolUse extends ToolUse { export interface SearchFilesToolUse extends ToolUse {

View File

@@ -62,9 +62,11 @@ Usage:
Description: Request to write full content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file. Description: Request to write full content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.
Parameters: Parameters:
- path: (required) The path of the file to write to (relative to the current working directory ${cwd.toPosix()}) - path: (required) The path of the file to write to (relative to the current working directory ${cwd.toPosix()})
- line_count: (required) The number of lines in the content. This is used to determine if the user needs to provide more content to complete the file.
- content: (required) The content to write to the file. ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file. - content: (required) The content to write to the file. ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file.
Usage: Usage:
<write_to_file> <write_to_file>
<line_count>total number of lines in the content, including empty lines</line_count>
<path>File path here</path> <path>File path here</path>
<content> <content>
Your file content here Your file content here
@@ -207,6 +209,7 @@ Your final result description here
## Example 2: Requesting to write to a file ## Example 2: Requesting to write to a file
<write_to_file> <write_to_file>
<line_count>14</line_count>
<path>frontend-config.json</path> <path>frontend-config.json</path>
<content> <content>
{ {

View File

@@ -8,40 +8,40 @@ describe('detectCodeOmission', () => {
return x + y; return x + y;
}` }`
it('should detect square bracket line range omission', () => { it('should skip square bracket checks for files under 100 lines', () => {
const newContent = `[Previous content from line 1-305 remains exactly the same] const newContent = `[Previous content from line 1-305 remains exactly the same]
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should detect single-line comment omission', () => { it('should skip single-line comment checks for files under 100 lines', () => {
const newContent = `// Lines 1-50 remain unchanged const newContent = `// Lines 1-50 remain unchanged
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should detect multi-line comment omission', () => { it('should skip multi-line comment checks for files under 100 lines', () => {
const newContent = `/* Previous content remains the same */ const newContent = `/* Previous content remains the same */
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should detect HTML-style comment omission', () => { it('should skip HTML-style comment checks for files under 100 lines', () => {
const newContent = `<!-- Existing content unchanged --> const newContent = `<!-- Existing content unchanged -->
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should detect JSX-style comment omission', () => { it('should skip JSX-style comment checks for files under 100 lines', () => {
const newContent = `{/* Rest of the code remains the same */} const newContent = `{/* Rest of the code remains the same */}
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should detect Python-style comment omission', () => { it('should skip Python-style comment checks for files under 100 lines', () => {
const newContent = `# Previous content remains unchanged const newContent = `# Previous content remains unchanged
const z = 3;` const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
it('should not detect regular comments without omission keywords', () => { it('should not detect regular comments without omission keywords', () => {
@@ -63,4 +63,130 @@ const z = 3;`
const unchanged = true;` const unchanged = true;`
expect(detectCodeOmission(originalContent, newContent)).toBe(false) expect(detectCodeOmission(originalContent, newContent)).toBe(false)
}) })
describe('with predicted line count', () => {
describe('length-based detection', () => {
it('should skip length checks for files under 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 50 // Less than 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect truncation for files with exactly 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 100 // Exactly 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should detect truncation for files with more than 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 150 // More than 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
})
describe('comment-based detection for large files', () => {
const generateLongContent = (commentLine: string) => {
return `${commentLine}
${Array.from({ length: 90 }, (_, i) => `const x${i} = ${i};`).join('\n')}
const y = 2;`
}
it('should detect suspicious single-line comment when content is more than 15% shorter', () => {
const newContent = `// Previous content remains here
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious single-line comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('// Previous content remains here')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious Python-style comment when content is more than 15% shorter', () => {
const newContent = `# Previous content remains here
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious Python-style comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('# Previous content remains here')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious multi-line comment when content is more than 15% shorter', () => {
const newContent = `/* Previous content remains the same */
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious multi-line comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('/* Previous content remains the same */')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious JSX comment when content is more than 15% shorter', () => {
const newContent = `{/* Rest of the code remains the same */}
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious JSX comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('{/* Rest of the code remains the same */}')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious HTML comment when content is more than 15% shorter', () => {
const newContent = `<!-- Existing content unchanged -->
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious HTML comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('<!-- Existing content unchanged -->')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious square bracket notation when content is more than 15% shorter', () => {
const newContent = `[Previous content from line 1-305 remains exactly the same]
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious square bracket notation when content is less than 15% shorter', () => {
const newContent = generateLongContent('[Previous content from line 1-305 remains exactly the same]')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
})
it('should not flag content very close to predicted length', () => {
const newContent = `const x = 1;
const y = 2;
// This is a legitimate comment that remains here`
const predictedLineCount = newContent.split('\n').length // Exact line count match
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should not flag when content is longer than predicted', () => {
const newContent = `const x = 1;
const y = 2;
// Previous content remains here but we added more
const z = 3;
const w = 4;`
const predictedLineCount = 3 // Content has 4 lines (longer than predicted)
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
})
}) })

View File

@@ -2,9 +2,27 @@
* Detects potential AI-generated code omissions in the given file content. * Detects potential AI-generated code omissions in the given file content.
* @param originalFileContent The original content of the file. * @param originalFileContent The original content of the file.
* @param newFileContent The new content of the file to check. * @param newFileContent The new content of the file to check.
* @param predictedLineCount Optional predicted number of lines in the new content.
* @returns True if a potential omission is detected, false otherwise. * @returns True if a potential omission is detected, false otherwise.
*/ */
export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean { export function detectCodeOmission(
originalFileContent: string,
newFileContent: string,
predictedLineCount?: number
): boolean {
// Skip all checks if predictedLineCount is less than 100
if (!predictedLineCount || predictedLineCount < 100) {
return false
}
const actualLineCount = newFileContent.split("\n").length
const lengthRatio = actualLineCount / predictedLineCount
// If content is more than 25% shorter than predicted, this is suspicious
if (lengthRatio <= 0.75) {
return true
}
const originalLines = originalFileContent.split("\n") const originalLines = originalFileContent.split("\n")
const newLines = newFileContent.split("\n") const newLines = newFileContent.split("\n")
const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."] const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."]
@@ -18,17 +36,21 @@ export function detectCodeOmission(originalFileContent: string, newFileContent:
/^\s*\[/, // Square bracket notation /^\s*\[/, // Square bracket notation
] ]
// Consider comments as suspicious if they weren't in the original file
// and contain omission keywords
for (const line of newLines) { for (const line of newLines) {
if (commentPatterns.some((pattern) => pattern.test(line))) { if (commentPatterns.some((pattern) => pattern.test(line))) {
const words = line.toLowerCase().split(/\s+/) const words = line.toLowerCase().split(/\s+/)
if (omissionKeywords.some((keyword) => words.includes(keyword))) { if (omissionKeywords.some((keyword) => words.includes(keyword))) {
if (!originalLines.includes(line)) { if (!originalLines.includes(line)) {
return true // For files with 100+ lines, only flag if content is more than 15% shorter
if (lengthRatio <= 0.85) {
return true
}
} }
} }
} }
} }
return false return false
} }