Use predicted length as input for detecting omissions

2025-12-20 12:21:13 -05:00 · 2024-12-18 23:34:03 -05:00
parent a9775c0eb3
commit ef9c468f17
7 changed files with 178 additions and 19 deletions
--- a/src/integrations/editor/detect-omission.ts
+++ b/src/integrations/editor/detect-omission.ts
@@ -2,9 +2,27 @@
 * Detects potential AI-generated code omissions in the given file content.
 * @param originalFileContent The original content of the file.
 * @param newFileContent The new content of the file to check.
+ * @param predictedLineCount Optional predicted number of lines in the new content.
 * @returns True if a potential omission is detected, false otherwise.
 */
-export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean {
+export function detectCodeOmission(
+	originalFileContent: string,
+	newFileContent: string,
+	predictedLineCount?: number
+): boolean {
+	// Skip all checks if predictedLineCount is less than 100
+	if (!predictedLineCount || predictedLineCount < 100) {
+		return false
+	}
+
+	const actualLineCount = newFileContent.split("\n").length
+	const lengthRatio = actualLineCount / predictedLineCount
+	
+	// If content is more than 25% shorter than predicted, this is suspicious
+	if (lengthRatio <= 0.75) {
+		return true
+	}
+
 	const originalLines = originalFileContent.split("\n")
 	const newLines = newFileContent.split("\n")
 	const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."]
@@ -18,17 +36,21 @@ export function detectCodeOmission(originalFileContent: string, newFileContent:
 		/^\s*\[/, // Square bracket notation
 	]

+	// Consider comments as suspicious if they weren't in the original file
+	// and contain omission keywords
 	for (const line of newLines) {
 		if (commentPatterns.some((pattern) => pattern.test(line))) {
 			const words = line.toLowerCase().split(/\s+/)
 			if (omissionKeywords.some((keyword) => words.includes(keyword))) {
 				if (!originalLines.includes(line)) {
-					return true
+					// For files with 100+ lines, only flag if content is more than 15% shorter
+					if (lengthRatio <= 0.85) {
+						return true
+					}
 				}
 			}
 		}
 	}

 	return false
-}
-
+}