Refactor edit strategies and confidence validation in unified diff processing

- Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes.
- Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits.
- Refined the way changes are processed, including better tracking of removal and addition changes.
- Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging.
- Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode.
This commit is contained in:
Daniel Riccio
2025-01-08 17:13:46 -05:00
parent 995692c48e
commit 9591ae062a
3 changed files with 136 additions and 105 deletions

View File

@@ -36,35 +36,43 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
const newResult = [...content.slice(0, matchPosition)]; const newResult = [...content.slice(0, matchPosition)];
let sourceIndex = matchPosition; let sourceIndex = matchPosition;
let previousIndent = ''; let previousIndent = '';
let lastChangeWasRemove = false; // Track if last change was a remove
const hunkChanges = hunk.changes.filter(c => c.type !== 'context');
for (const change of hunk.changes) { for (const change of hunk.changes) {
if (change.type === 'context') { if (change.type === 'context') {
newResult.push(change.originalLine || (change.indent + change.content)); newResult.push(change.originalLine || (change.indent + change.content));
previousIndent = change.indent; previousIndent = change.indent;
sourceIndex++; if (!lastChangeWasRemove) { // Only increment if we didn't just remove a line
sourceIndex++;
}
lastChangeWasRemove = false;
} else if (change.type === 'add') { } else if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content, const indent = change.indent || inferIndentation(change.content,
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''),
previousIndent previousIndent
); );
newResult.push(indent + change.content); newResult.push(indent + change.content);
previousIndent = indent; previousIndent = indent;
lastChangeWasRemove = false;
} else if (change.type === 'remove') { } else if (change.type === 'remove') {
sourceIndex++; sourceIndex++;
lastChangeWasRemove = true;
} }
} }
newResult.push(...content.slice(sourceIndex)); newResult.push(...content.slice(sourceIndex));
// Validate the result // Calculate the window size based on all changes
const windowSize = hunk.changes.length;
// Validate the result using the full window size
const similarity = getDMPSimilarity( const similarity = getDMPSimilarity(
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), content.slice(matchPosition, matchPosition + windowSize).join('\n'),
newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') newResult.slice(matchPosition, matchPosition + windowSize).join('\n')
) )
const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n')); const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context');
return { return {
confidence: similarity * confidence, confidence: similarity * confidence,
@@ -80,43 +88,43 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
} }
const dmp = new diff_match_patch(); const dmp = new diff_match_patch();
const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
const editText = editRegion.join('\n');
// Build the target text sequentially like in applyContextMatching // Build BEFORE block (context + removals)
let targetText = ''; const beforeLines = hunk.changes
let previousIndent = ''; .filter(change => change.type === 'context' || change.type === 'remove')
.map(change => change.originalLine || (change.indent + change.content));
for (const change of hunk.changes) { // Build AFTER block (context + additions)
if (change.type === 'context') { const afterLines = hunk.changes
targetText += (change.originalLine || (change.indent + change.content)) + '\n'; .filter(change => change.type === 'context' || change.type === 'add')
previousIndent = change.indent; .map(change => change.originalLine || (change.indent + change.content));
} else if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content,
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
previousIndent
);
targetText += indent + change.content + '\n';
previousIndent = indent;
}
// Skip remove changes as they shouldn't appear in target
}
// Trim the trailing newline // Convert to text
targetText = targetText.replace(/\n$/, ''); const beforeText = beforeLines.join('\n');
const afterText = afterLines.join('\n');
const patch = dmp.patch_make(editText, targetText); // Create the patch
const [patchedText] = dmp.patch_apply(patch, editText); const patch = dmp.patch_make(beforeText, afterText);
// Construct result with edited portion // Get the target text from content
const targetText = content.slice(matchPosition, matchPosition + beforeLines.length).join('\n');
// Apply the patch
const [patchedText] = dmp.patch_apply(patch, targetText);
// Split patched text back into lines
const patchedLines = patchedText.split('\n');
// Construct the final result
const newResult = [ const newResult = [
...content.slice(0, matchPosition), ...content.slice(0, matchPosition),
...patchedText.split('\n'), ...patchedLines,
...content.slice(matchPosition + hunk.changes.length) ...content.slice(matchPosition + beforeLines.length)
]; ];
const similarity = getDMPSimilarity(editText, patchedText) // Calculate confidence
const confidence = validateEditResult(hunk, patchedText); const similarity = getDMPSimilarity(beforeText, targetText);
const confidence = validateEditResult(hunk, patchedText, 'dmp');
return { return {
confidence: similarity * confidence, confidence: similarity * confidence,
@@ -228,7 +236,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
const osrResult = (await memfs.promises.readFile('/file.txt')).toString(); const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
const osrSimilarity = getDMPSimilarity(editText, osrResult) const osrSimilarity = getDMPSimilarity(editText, osrResult)
const confidence = validateEditResult(hunk, osrResult); const confidence = validateEditResult(hunk, osrResult, 'git-osr');
if (osrSimilarity * confidence > 0.9) { if (osrSimilarity * confidence > 0.9) {
// Construct result with edited portion // Construct result with edited portion
@@ -273,7 +281,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); const srsoResult = (await memfs.promises.readFile('/file.txt')).toString();
const srsoSimilarity = getDMPSimilarity(editText, srsoResult) const srsoSimilarity = getDMPSimilarity(editText, srsoResult)
const confidence = validateEditResult(hunk, srsoResult); const confidence = validateEditResult(hunk, srsoResult, 'git-srso');
// Construct result with edited portion // Construct result with edited portion
const newResult = [ const newResult = [
@@ -299,7 +307,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
} }
// Main edit function that tries strategies sequentially // Main edit function that tries strategies sequentially
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> { export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
// Don't attempt any edits if confidence is too low and not in debug mode // Don't attempt any edits if confidence is too low and not in debug mode
const MIN_CONFIDENCE = 0.9; const MIN_CONFIDENCE = 0.9;
@@ -310,12 +318,12 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
// Try each strategy in sequence until one succeeds // Try each strategy in sequence until one succeeds
const strategies = [ const strategies = [
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
{ name: 'git', apply: () => applyGit(hunk, content, matchPosition) } { name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
]; ];
if (debug) { if (debug !== '') {
// In debug mode, try all strategies and return the first success // In debug mode, try all strategies and return the first success
const results = await Promise.all(strategies.map(async strategy => { const results = await Promise.all(strategies.map(async strategy => {
console.log(`Attempting edit with ${strategy.name} strategy...`); console.log(`Attempting edit with ${strategy.name} strategy...`);
@@ -324,18 +332,19 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
return result; return result;
})); }));
const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); /*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
if (successfulResults.length > 0) { if (successfulResults.length > 0) {
const bestResult = successfulResults.reduce((best, current) => const bestResult = successfulResults.reduce((best, current) =>
current.confidence > best.confidence ? current : best current.confidence > best.confidence ? current : best
); );
return bestResult; return bestResult;
} }*/
return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
} else { } else {
// Normal mode - try strategies sequentially until one succeeds // Normal mode - try strategies sequentially until one succeeds
for (const strategy of strategies) { for (const strategy of strategies) {
const result = await strategy.apply(); const result = await strategy.apply();
if (result.confidence > MIN_CONFIDENCE) { if (result.confidence === 1) {
return result; return result;
} }
} }

View File

@@ -160,7 +160,7 @@ Your diff here
startLine?: number, startLine?: number,
endLine?: number endLine?: number
): Promise<DiffResult> { ): Promise<DiffResult> {
const MIN_CONFIDENCE = 0.9 const MIN_CONFIDENCE = 1
const parsedDiff = this.parseUnifiedDiff(diffContent) const parsedDiff = this.parseUnifiedDiff(diffContent)
const originalLines = originalContent.split("\n") const originalLines = originalContent.split("\n")
let result = [...originalLines] let result = [...originalLines]
@@ -170,7 +170,7 @@ Your diff here
const { index: matchPosition, confidence } = findBestMatch(contextStr, result) const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
const editResult = await applyEdit(hunk, result, matchPosition, confidence) const editResult = await applyEdit(hunk, result, matchPosition, confidence)
if (editResult.confidence > MIN_CONFIDENCE) { if (editResult.confidence >= MIN_CONFIDENCE) {
result = editResult.result result = editResult.result
} else { } else {
return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` }

View File

@@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95;
// Helper function to prepare search string from context // Helper function to prepare search string from context
export function prepareSearchString(changes: Change[]): string { export function prepareSearchString(changes: Change[]): string {
const lines = changes const lines = changes
.filter(c => c.type === 'context' || c.type === 'remove') .filter((c) => c.type === 'context' || c.type === 'remove')
.map(c => c.content); .map((c) => c.content);
return lines.join('\n'); return lines.join('\n');
} }
@@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number {
// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, // Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. // returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). // If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
export function validateEditResult(hunk: Hunk, result: string): number { export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)); const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
// Create skeleton of original content (context + removed lines) // Create skeleton of original content (context + removed lines)
const originalSkeleton = hunkDeepCopy.changes const originalSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'remove') .filter((change) => change.type === 'context' || change.type === 'remove')
.map(change => change.content) .map((change) => change.content)
.join('\n'); .join('\n');
// Create skeleton of expected result (context + added lines) // Create skeleton of expected result (context + added lines)
const expectedSkeleton = hunkDeepCopy.changes const expectedSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'add') .filter((change) => change.type === 'context' || change.type === 'add')
.map(change => change.content) .map((change) => change.content)
.join('\n'); .join('\n');
// Compare with original content // Compare with original content
const originalSimilarity = evaluateSimilarity(originalSkeleton, result); const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
console.log('originalSimilarity ', strategy, originalSimilarity);
// If result is too similar to original, it means changes weren't applied // If original similarity is 1, it means changes weren't applied
if (originalSimilarity > 0.9) { if (originalSimilarity > 0.97) {
console.log('Result too similar to original content:', originalSimilarity); if (originalSimilarity === 1) {
return 0.5; // Significant confidence reduction return 0.5; // Significant confidence reduction
} else {
return 0.8;
}
} }
// Compare with expected result // Compare with expected result
const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result); const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
console.log('Original similarity:', originalSimilarity);
console.log('Expected similarity:', expectedSimilarity);
console.log('originalSkeleton:', originalSkeleton); console.log('expectedSimilarity', strategy, expectedSimilarity);
console.log('expectedSkeleton:', expectedSkeleton);
console.log('result:', result);
// Scale between 0.98 and 1.0 (4% impact) based on expected similarity // Scale between 0.98 and 1.0 (4% impact) based on expected similarity
const multiplier = expectedSimilarity < MIN_CONFIDENCE const multiplier =
? 0.96 + (0.04 * expectedSimilarity) expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1;
: 1;
return multiplier; return multiplier;
} }
@@ -86,8 +85,9 @@ export function validateEditResult(hunk: Hunk, result: string): number {
// Helper function to validate context lines against original content // Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string): number { function validateContextLines(searchStr: string, content: string): number {
// Extract just the context lines from the search string // Extract just the context lines from the search string
const contextLines = searchStr.split('\n') const contextLines = searchStr
.filter(line => !line.startsWith('-')); // Exclude removed lines .split('\n')
.filter((line) => !line.startsWith('-')); // Exclude removed lines
// Compare context lines with content // Compare context lines with content
const similarity = evaluateSimilarity(contextLines.join('\n'), content); const similarity = evaluateSimilarity(contextLines.join('\n'), content);
@@ -97,25 +97,35 @@ function validateContextLines(searchStr: string, content: string): number {
} }
// Exact match strategy // Exact match strategy
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findExactMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const contentStr = content.slice(startIndex).join('\n'); const contentStr = content.slice(startIndex).join('\n');
const searchLines = searchStr.split('\n'); const searchLines = searchStr.split('\n');
const exactMatch = contentStr.indexOf(searchStr); const exactMatch = contentStr.indexOf(searchStr);
if (exactMatch !== -1) { if (exactMatch !== -1) {
const matchedContent = content.slice( const matchedContent = content
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, .slice(
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
).join('\n'); startIndex +
contentStr.slice(0, exactMatch).split('\n').length -
1 +
searchLines.length
)
.join('\n');
const similarity = getDMPSimilarity(searchStr, matchedContent); const similarity = getDMPSimilarity(searchStr, matchedContent);
const contextSimilarity = validateContextLines(searchStr, matchedContent); const contextSimilarity = validateContextLines(searchStr, matchedContent);
const confidence = Math.min(similarity, contextSimilarity); const confidence = Math.min(similarity, contextSimilarity);
return { return {
index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, index:
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
confidence, confidence,
strategy: 'exact' strategy: 'exact',
}; };
} }
@@ -123,7 +133,11 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
} }
// String similarity strategy // String similarity strategy
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findSimilarityMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const searchLines = searchStr.split('\n'); const searchLines = searchStr.split('\n');
let bestScore = 0; let bestScore = 0;
let bestIndex = -1; let bestIndex = -1;
@@ -147,12 +161,16 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
return { return {
index: bestIndex, index: bestIndex,
confidence: bestIndex !== -1 ? bestScore : 0, confidence: bestIndex !== -1 ? bestScore : 0,
strategy: 'similarity' strategy: 'similarity',
}; };
} }
// Levenshtein strategy // Levenshtein strategy
export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findLevenshteinMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const searchLines = searchStr.split('\n'); const searchLines = searchStr.split('\n');
const candidates = []; const candidates = [];
@@ -165,12 +183,12 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
const index = startIndex + candidates.indexOf(closestMatch); const index = startIndex + candidates.indexOf(closestMatch);
const similarity = getDMPSimilarity(searchStr, closestMatch); const similarity = getDMPSimilarity(searchStr, closestMatch);
const contextSimilarity = validateContextLines(searchStr, closestMatch); const contextSimilarity = validateContextLines(searchStr, closestMatch);
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
return { return {
index, index,
confidence, confidence,
strategy: 'levenshtein' strategy: 'levenshtein',
}; };
} }
@@ -178,11 +196,15 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
} }
// Main search function that tries all strategies // Main search function that tries all strategies
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findBestMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const strategies = [ const strategies = [
findExactMatch, findExactMatch,
findSimilarityMatch, findSimilarityMatch,
findLevenshteinMatch findLevenshteinMatch,
]; ];
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' }; let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };