Use predicted length as input for detecting omissions

2026-03-23 09:39:29 -04:00 · 2024-12-18 23:34:03 -05:00
parent a9775c0eb3
commit ef9c468f17
7 changed files with 178 additions and 19 deletions
--- a/src/integrations/editor/tests/detect-omission.test.ts
+++ b/src/integrations/editor/tests/detect-omission.test.ts
@@ -8,40 +8,40 @@ describe('detectCodeOmission', () => {
  return x + y;
 }`

-	it('should detect square bracket line range omission', () => {
+	it('should skip square bracket checks for files under 100 lines', () => {
 		const newContent = `[Previous content from line 1-305 remains exactly the same]
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

-	it('should detect single-line comment omission', () => {
+	it('should skip single-line comment checks for files under 100 lines', () => {
 		const newContent = `// Lines 1-50 remain unchanged
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

-	it('should detect multi-line comment omission', () => {
+	it('should skip multi-line comment checks for files under 100 lines', () => {
 		const newContent = `/* Previous content remains the same */
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

-	it('should detect HTML-style comment omission', () => {
+	it('should skip HTML-style comment checks for files under 100 lines', () => {
 		const newContent = `<!-- Existing content unchanged -->
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

-	it('should detect JSX-style comment omission', () => {
+	it('should skip JSX-style comment checks for files under 100 lines', () => {
 		const newContent = `{/* Rest of the code remains the same */}
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

-	it('should detect Python-style comment omission', () => {
+	it('should skip Python-style comment checks for files under 100 lines', () => {
 		const newContent = `# Previous content remains unchanged
 const z = 3;`
-		expect(detectCodeOmission(originalContent, newContent)).toBe(true)
+		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})

 	it('should not detect regular comments without omission keywords', () => {
@@ -63,4 +63,130 @@ const z = 3;`
 const unchanged = true;`
 		expect(detectCodeOmission(originalContent, newContent)).toBe(false)
 	})
+
+	describe('with predicted line count', () => {
+		describe('length-based detection', () => {
+			it('should skip length checks for files under 100 lines', () => {
+				const newContent = `const x = 1;`
+				const predictedLineCount = 50 // Less than 100 lines
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect truncation for files with exactly 100 lines', () => {
+				const newContent = `const x = 1;`
+				const predictedLineCount = 100 // Exactly 100 lines
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should detect truncation for files with more than 100 lines', () => {
+				const newContent = `const x = 1;`
+				const predictedLineCount = 150 // More than 100 lines
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+		})
+
+		describe('comment-based detection for large files', () => {
+			const generateLongContent = (commentLine: string) => {
+				return `${commentLine}
+${Array.from({ length: 90 }, (_, i) => `const x${i} = ${i};`).join('\n')}
+const y = 2;`
+			}
+
+			it('should detect suspicious single-line comment when content is more than 15% shorter', () => {
+				const newContent = `// Previous content remains here
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious single-line comment when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('// Previous content remains here')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect suspicious Python-style comment when content is more than 15% shorter', () => {
+				const newContent = `# Previous content remains here
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious Python-style comment when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('# Previous content remains here')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect suspicious multi-line comment when content is more than 15% shorter', () => {
+				const newContent = `/* Previous content remains the same */
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious multi-line comment when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('/* Previous content remains the same */')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect suspicious JSX comment when content is more than 15% shorter', () => {
+				const newContent = `{/* Rest of the code remains the same */}
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious JSX comment when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('{/* Rest of the code remains the same */}')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect suspicious HTML comment when content is more than 15% shorter', () => {
+				const newContent = `<!-- Existing content unchanged -->
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious HTML comment when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('<!-- Existing content unchanged -->')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+
+			it('should detect suspicious square bracket notation when content is more than 15% shorter', () => {
+				const newContent = `[Previous content from line 1-305 remains exactly the same]
+const x = 1;`
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(true)
+			})
+
+			it('should not flag suspicious square bracket notation when content is less than 15% shorter', () => {
+				const newContent = generateLongContent('[Previous content from line 1-305 remains exactly the same]')
+				const predictedLineCount = 100
+				expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+			})
+		})
+
+		it('should not flag content very close to predicted length', () => {
+			const newContent = `const x = 1;
+const y = 2;
+// This is a legitimate comment that remains here`
+			const predictedLineCount = newContent.split('\n').length // Exact line count match
+			expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+		})
+
+		it('should not flag when content is longer than predicted', () => {
+			const newContent = `const x = 1;
+const y = 2;
+// Previous content remains here but we added more
+const z = 3;
+const w = 4;`
+			const predictedLineCount = 3 // Content has 4 lines (longer than predicted)
+			expect(detectCodeOmission(originalContent, newContent, predictedLineCount)).toBe(false)
+		})
+	})
 })
--- a/src/integrations/editor/detect-omission.ts
+++ b/src/integrations/editor/detect-omission.ts
@@ -2,9 +2,27 @@
 * Detects potential AI-generated code omissions in the given file content.
 * @param originalFileContent The original content of the file.
 * @param newFileContent The new content of the file to check.
+ * @param predictedLineCount Optional predicted number of lines in the new content.
 * @returns True if a potential omission is detected, false otherwise.
 */
-export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean {
+export function detectCodeOmission(
+	originalFileContent: string,
+	newFileContent: string,
+	predictedLineCount?: number
+): boolean {
+	// Skip all checks if predictedLineCount is less than 100
+	if (!predictedLineCount || predictedLineCount < 100) {
+		return false
+	}
+
+	const actualLineCount = newFileContent.split("\n").length
+	const lengthRatio = actualLineCount / predictedLineCount
+	
+	// If content is more than 25% shorter than predicted, this is suspicious
+	if (lengthRatio <= 0.75) {
+		return true
+	}
+
 	const originalLines = originalFileContent.split("\n")
 	const newLines = newFileContent.split("\n")
 	const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."]
@@ -18,17 +36,21 @@ export function detectCodeOmission(originalFileContent: string, newFileContent:
 		/^\s*\[/, // Square bracket notation
 	]

+	// Consider comments as suspicious if they weren't in the original file
+	// and contain omission keywords
 	for (const line of newLines) {
 		if (commentPatterns.some((pattern) => pattern.test(line))) {
 			const words = line.toLowerCase().split(/\s+/)
 			if (omissionKeywords.some((keyword) => words.includes(keyword))) {
 				if (!originalLines.includes(line)) {
-					return true
+					// For files with 100+ lines, only flag if content is more than 15% shorter
+					if (lengthRatio <= 0.85) {
+						return true
+					}
 				}
 			}
 		}
 	}

 	return false
-}
-
+}