Use predicted length as input for detecting omissions

This commit is contained in:
Matt Rubens
2024-12-18 23:34:03 -05:00
parent a9775c0eb3
commit ef9c468f17
7 changed files with 178 additions and 19 deletions

View File

@@ -8,40 +8,40 @@ describe('detectCodeOmission', () => {
return x + y;
}`
it('should detect square bracket line range omission', () => {
it('should skip square bracket checks for files under 100 lines', () => {
const newContent = `[Previous content from line 1-305 remains exactly the same]
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should detect single-line comment omission', () => {
it('should skip single-line comment checks for files under 100 lines', () => {
const newContent = `// Lines 1-50 remain unchanged
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should detect multi-line comment omission', () => {
it('should skip multi-line comment checks for files under 100 lines', () => {
const newContent = `/* Previous content remains the same */
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should detect HTML-style comment omission', () => {
it('should skip HTML-style comment checks for files under 100 lines', () => {
const newContent = `<!-- Existing content unchanged -->
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should detect JSX-style comment omission', () => {
it('should skip JSX-style comment checks for files under 100 lines', () => {
const newContent = `{/* Rest of the code remains the same */}
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should detect Python-style comment omission', () => {
it('should skip Python-style comment checks for files under 100 lines', () => {
const newContent = `# Previous content remains unchanged
const z = 3;`
expect(detectCodeOmission(originalContent, newContent)).toBe(true)
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
it('should not detect regular comments without omission keywords', () => {
@@ -63,4 +63,130 @@ const z = 3;`
const unchanged = true;`
expect(detectCodeOmission(originalContent, newContent)).toBe(false)
})
describe('with predicted line count', () => {
describe('length-based detection', () => {
it('should skip length checks for files under 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 50 // Less than 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect truncation for files with exactly 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 100 // Exactly 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should detect truncation for files with more than 100 lines', () => {
const newContent = `const x = 1;`
const predictedLineCount = 150 // More than 100 lines
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
})
describe('comment-based detection for large files', () => {
const generateLongContent = (commentLine: string) => {
return `${commentLine}
${Array.from({ length: 90 }, (_, i) => `const x${i} = ${i};`).join('\n')}
const y = 2;`
}
it('should detect suspicious single-line comment when content is more than 15% shorter', () => {
const newContent = `// Previous content remains here
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious single-line comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('// Previous content remains here')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious Python-style comment when content is more than 15% shorter', () => {
const newContent = `# Previous content remains here
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious Python-style comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('# Previous content remains here')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious multi-line comment when content is more than 15% shorter', () => {
const newContent = `/* Previous content remains the same */
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious multi-line comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('/* Previous content remains the same */')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious JSX comment when content is more than 15% shorter', () => {
const newContent = `{/* Rest of the code remains the same */}
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious JSX comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('{/* Rest of the code remains the same */}')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious HTML comment when content is more than 15% shorter', () => {
const newContent = `<!-- Existing content unchanged -->
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious HTML comment when content is less than 15% shorter', () => {
const newContent = generateLongContent('<!-- Existing content unchanged -->')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should detect suspicious square bracket notation when content is more than 15% shorter', () => {
const newContent = `[Previous content from line 1-305 remains exactly the same]
const x = 1;`
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
})
it('should not flag suspicious square bracket notation when content is less than 15% shorter', () => {
const newContent = generateLongContent('[Previous content from line 1-305 remains exactly the same]')
const predictedLineCount = 100
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
})
it('should not flag content very close to predicted length', () => {
const newContent = `const x = 1;
const y = 2;
// This is a legitimate comment that remains here`
const predictedLineCount = newContent.split('\n').length // Exact line count match
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
it('should not flag when content is longer than predicted', () => {
const newContent = `const x = 1;
const y = 2;
// Previous content remains here but we added more
const z = 3;
const w = 4;`
const predictedLineCount = 3 // Content has 4 lines (longer than predicted)
expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
})
})
})

View File

@@ -2,9 +2,27 @@
* Detects potential AI-generated code omissions in the given file content.
* @param originalFileContent The original content of the file.
* @param newFileContent The new content of the file to check.
* @param predictedLineCount Optional predicted number of lines in the new content.
* @returns True if a potential omission is detected, false otherwise.
*/
export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean {
export function detectCodeOmission(
originalFileContent: string,
newFileContent: string,
predictedLineCount?: number
): boolean {
// Skip all checks if predictedLineCount is less than 100
if (!predictedLineCount || predictedLineCount < 100) {
return false
}
const actualLineCount = newFileContent.split("\n").length
const lengthRatio = actualLineCount / predictedLineCount
// If content is more than 25% shorter than predicted, this is suspicious
if (lengthRatio <= 0.75) {
return true
}
const originalLines = originalFileContent.split("\n")
const newLines = newFileContent.split("\n")
const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."]
@@ -18,17 +36,21 @@ export function detectCodeOmission(originalFileContent: string, newFileContent:
/^\s*\[/, // Square bracket notation
]
// Consider comments as suspicious if they weren't in the original file
// and contain omission keywords
for (const line of newLines) {
if (commentPatterns.some((pattern) => pattern.test(line))) {
const words = line.toLowerCase().split(/\s+/)
if (omissionKeywords.some((keyword) => words.includes(keyword))) {
if (!originalLines.includes(line)) {
return true
// For files with 100+ lines, only flag if content is more than 15% shorter
if (lengthRatio <= 0.85) {
return true
}
}
}
}
}
return false
}
}