Use predicted length as input for detecting omissions

This commit is contained in:
Matt Rubens
2024-12-18 23:34:03 -05:00
parent a9775c0eb3
commit ef9c468f17
7 changed files with 178 additions and 19 deletions

View File

@@ -2,9 +2,27 @@
* Detects potential AI-generated code omissions in the given file content.
* @param originalFileContent The original content of the file.
* @param newFileContent The new content of the file to check.
* @param predictedLineCount Optional predicted number of lines in the new content.
* @returns True if a potential omission is detected, false otherwise.
*/
export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean {
export function detectCodeOmission(
originalFileContent: string,
newFileContent: string,
predictedLineCount?: number
): boolean {
// Skip all checks if predictedLineCount is less than 100
if (!predictedLineCount || predictedLineCount < 100) {
return false
}
const actualLineCount = newFileContent.split("\n").length
const lengthRatio = actualLineCount / predictedLineCount
// If content is more than 25% shorter than predicted, this is suspicious
if (lengthRatio <= 0.75) {
return true
}
const originalLines = originalFileContent.split("\n")
const newLines = newFileContent.split("\n")
const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."]
@@ -18,17 +36,21 @@ export function detectCodeOmission(originalFileContent: string, newFileContent:
/^\s*\[/, // Square bracket notation
]
// Consider comments as suspicious if they weren't in the original file
// and contain omission keywords
for (const line of newLines) {
if (commentPatterns.some((pattern) => pattern.test(line))) {
const words = line.toLowerCase().split(/\s+/)
if (omissionKeywords.some((keyword) => words.includes(keyword))) {
if (!originalLines.includes(line)) {
return true
// For files with 100+ lines, only flag if content is more than 15% shorter
if (lengthRatio <= 0.85) {
return true
}
}
}
}
}
return false
}
}