Refactor edit strategies and confidence validation in unified diff processing

- Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes. - Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits. - Refined the way changes are processed, including better tracking of removal and addition changes. - Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging. - Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode.
2026-02-05 12:05:16 -05:00 · 2025-01-08 17:13:46 -05:00
parent 995692c48e
commit 9591ae062a
3 changed files with 136 additions and 105 deletions
--- a/src/core/diff/strategies/new-unified/edit-strategies.ts
+++ b/src/core/diff/strategies/new-unified/edit-strategies.ts
@@ -36,35 +36,43 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
  const newResult = [...content.slice(0, matchPosition)];
  let sourceIndex = matchPosition;
  let previousIndent = '';
-
+  let lastChangeWasRemove = false;  // Track if last change was a remove
  const hunkChanges = hunk.changes.filter(c => c.type !== 'context');
  for (const change of hunk.changes) {
    if (change.type === 'context') {
      newResult.push(change.originalLine || (change.indent + change.content));
      previousIndent = change.indent;
-      sourceIndex++;
+      if (!lastChangeWasRemove) {  // Only increment if we didn't just remove a line
        sourceIndex++;
      }
      lastChangeWasRemove = false;
    } else if (change.type === 'add') {
      const indent = change.indent || inferIndentation(change.content, 
-        hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
+        hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''),
        previousIndent
      );
      newResult.push(indent + change.content);
      previousIndent = indent;
      lastChangeWasRemove = false;
    } else if (change.type === 'remove') {
      sourceIndex++;
      lastChangeWasRemove = true;
    }
  }
  newResult.push(...content.slice(sourceIndex));
-  // Validate the result
+  // Calculate the window size based on all changes
  const windowSize = hunk.changes.length;
  // Validate the result using the full window size
  const similarity = getDMPSimilarity(
-    content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
+    content.slice(matchPosition, matchPosition + windowSize).join('\n'),
-    newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
+    newResult.slice(matchPosition, matchPosition + windowSize).join('\n')
  )
-  const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n'));
+  const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context');
  return { 
    confidence: similarity * confidence,
@@ -80,43 +88,43 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
  }
  const dmp = new diff_match_patch();
  const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
  const editText = editRegion.join('\n');
-  // Build the target text sequentially like in applyContextMatching
+  // Build BEFORE block (context + removals)
-  let targetText = '';
+  const beforeLines = hunk.changes
-  let previousIndent = '';
+    .filter(change => change.type === 'context' || change.type === 'remove')
    .map(change => change.originalLine || (change.indent + change.content));
-  for (const change of hunk.changes) {
+  // Build AFTER block (context + additions)
-    if (change.type === 'context') {
+  const afterLines = hunk.changes
-      targetText += (change.originalLine || (change.indent + change.content)) + '\n';
+    .filter(change => change.type === 'context' || change.type === 'add')
-      previousIndent = change.indent;
+    .map(change => change.originalLine || (change.indent + change.content));
    } else if (change.type === 'add') {
      const indent = change.indent || inferIndentation(change.content, 
        hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
        previousIndent
      );
      targetText += indent + change.content + '\n';
      previousIndent = indent;
    }
    // Skip remove changes as they shouldn't appear in target
  }
-  // Trim the trailing newline
+  // Convert to text
-  targetText = targetText.replace(/\n$/, '');
+  const beforeText = beforeLines.join('\n');
  const afterText = afterLines.join('\n');
-  const patch = dmp.patch_make(editText, targetText);
+  // Create the patch
-  const [patchedText] = dmp.patch_apply(patch, editText);
+  const patch = dmp.patch_make(beforeText, afterText);
-  // Construct result with edited portion
+  // Get the target text from content
  const targetText = content.slice(matchPosition, matchPosition + beforeLines.length).join('\n');
  // Apply the patch
  const [patchedText] = dmp.patch_apply(patch, targetText);
  // Split patched text back into lines
  const patchedLines = patchedText.split('\n');
  // Construct the final result
  const newResult = [
    ...content.slice(0, matchPosition),
-    ...patchedText.split('\n'),
+    ...patchedLines,
-    ...content.slice(matchPosition + hunk.changes.length)
+    ...content.slice(matchPosition + beforeLines.length)
  ];
-  const similarity = getDMPSimilarity(editText, patchedText)
+  // Calculate confidence
-  const confidence = validateEditResult(hunk, patchedText);
+  const similarity = getDMPSimilarity(beforeText, targetText);
  const confidence = validateEditResult(hunk, patchedText, 'dmp');
  return {
    confidence: similarity * confidence,
@@ -228,7 +236,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
      const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
      const osrSimilarity = getDMPSimilarity(editText, osrResult)
-      const confidence = validateEditResult(hunk, osrResult);
+      const confidence = validateEditResult(hunk, osrResult, 'git-osr');
      if (osrSimilarity * confidence > 0.9) {
        // Construct result with edited portion
@@ -273,7 +281,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
      const srsoResult = (await memfs.promises.readFile('/file.txt')).toString();
      const srsoSimilarity = getDMPSimilarity(editText, srsoResult)
-      const confidence = validateEditResult(hunk, srsoResult);
+      const confidence = validateEditResult(hunk, srsoResult, 'git-srso');
      // Construct result with edited portion
      const newResult = [
@@ -299,7 +307,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
 }
 // Main edit function that tries strategies sequentially
-export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> {
+export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
  // Don't attempt any edits if confidence is too low and not in debug mode
  const MIN_CONFIDENCE = 0.9;
@@ -310,12 +318,12 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
  // Try each strategy in sequence until one succeeds
  const strategies = [
    { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
    { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
    { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
    { name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
  ];
-  if (debug) {
+  if (debug !== '') {
    // In debug mode, try all strategies and return the first success
    const results = await Promise.all(strategies.map(async strategy => {
      console.log(`Attempting edit with ${strategy.name} strategy...`);
@@ -324,18 +332,19 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
      return result;
    }));
-    const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
+    /*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
    if (successfulResults.length > 0) {
      const bestResult = successfulResults.reduce((best, current) => 
        current.confidence > best.confidence ? current : best
      );
      return bestResult;
-    }
+    }*/
    return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
  } else {
    // Normal mode - try strategies sequentially until one succeeds
    for (const strategy of strategies) {
      const result = await strategy.apply();
-      if (result.confidence > MIN_CONFIDENCE) {
+      if (result.confidence === 1) {
        return result;
      }
    }
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -160,7 +160,7 @@ Your diff here
 		startLine?: number,
 		endLine?: number
 	): Promise<DiffResult> {
-		const MIN_CONFIDENCE = 0.9
+		const MIN_CONFIDENCE = 1
 		const parsedDiff = this.parseUnifiedDiff(diffContent)
    const originalLines = originalContent.split("\n")
 		let result = [...originalLines]
@@ -170,7 +170,7 @@ Your diff here
 			const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
 			const editResult = await applyEdit(hunk, result, matchPosition, confidence)
-			if (editResult.confidence > MIN_CONFIDENCE) {
+			if (editResult.confidence >= MIN_CONFIDENCE) {
 				result = editResult.result
 			} else {
 				return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` }
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95;
 // Helper function to prepare search string from context
 export function prepareSearchString(changes: Change[]): string {
  const lines = changes
-    .filter(c => c.type === 'context' || c.type === 'remove')
+    .filter((c) => c.type === 'context' || c.type === 'remove')
-    .map(c => c.content);
+    .map((c) => c.content);
  return lines.join('\n');
 }
@@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number {
 // Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
 // returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
 // If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
-export function validateEditResult(hunk: Hunk, result: string): number {
+export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
  const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
  // Create skeleton of original content (context + removed lines)
  const originalSkeleton = hunkDeepCopy.changes
-    .filter(change => change.type === 'context' || change.type === 'remove')
+    .filter((change) => change.type === 'context' || change.type === 'remove')
-    .map(change => change.content)
+    .map((change) => change.content)
    .join('\n');
  // Create skeleton of expected result (context + added lines)
  const expectedSkeleton = hunkDeepCopy.changes
-    .filter(change => change.type === 'context' || change.type === 'add')
+    .filter((change) => change.type === 'context' || change.type === 'add')
-    .map(change => change.content)
+    .map((change) => change.content)
    .join('\n');
  // Compare with original content
  const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
-  
+  console.log('originalSimilarity ', strategy, originalSimilarity);
-  // If result is too similar to original, it means changes weren't applied
+  // If original similarity is 1, it means changes weren't applied
-  if (originalSimilarity > 0.9) {
+  if (originalSimilarity > 0.97) {
-    console.log('Result too similar to original content:', originalSimilarity);
+    if (originalSimilarity === 1) {
-    return 0.5; // Significant confidence reduction
+      return 0.5; // Significant confidence reduction
    } else {
      return 0.8;
    }
  }
  // Compare with expected result
  const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
  console.log('Original similarity:', originalSimilarity);
  console.log('Expected similarity:', expectedSimilarity);
-  console.log('originalSkeleton:', originalSkeleton);
+  console.log('expectedSimilarity', strategy, expectedSimilarity);
-  console.log('expectedSkeleton:', expectedSkeleton);
+  
  console.log('result:', result);
  // Scale between 0.98 and 1.0 (4% impact) based on expected similarity
-  const multiplier = expectedSimilarity < MIN_CONFIDENCE 
+  const multiplier =
-    ? 0.96 + (0.04 * expectedSimilarity) 
+    expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1;
    : 1;
  return multiplier;
 }
@@ -86,8 +85,9 @@ export function validateEditResult(hunk: Hunk, result: string): number {
 // Helper function to validate context lines against original content
 function validateContextLines(searchStr: string, content: string): number {
  // Extract just the context lines from the search string
-  const contextLines = searchStr.split('\n')
+  const contextLines = searchStr
-    .filter(line => !line.startsWith('-'));  // Exclude removed lines
+    .split('\n')
    .filter((line) => !line.startsWith('-')); // Exclude removed lines
  // Compare context lines with content
  const similarity = evaluateSimilarity(contextLines.join('\n'), content);
@@ -97,25 +97,35 @@ function validateContextLines(searchStr: string, content: string): number {
 }
 // Exact match strategy
-export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findExactMatch(
  searchStr: string,
  content: string[],
  startIndex: number = 0
 ): SearchResult {
  const contentStr = content.slice(startIndex).join('\n');
  const searchLines = searchStr.split('\n');
  const exactMatch = contentStr.indexOf(searchStr);
  if (exactMatch !== -1) {
-    const matchedContent = content.slice(
+    const matchedContent = content
-      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
+      .slice(
-      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
+        startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
-    ).join('\n');
+        startIndex +
          contentStr.slice(0, exactMatch).split('\n').length -
          1 +
          searchLines.length
      )
      .join('\n');
    const similarity = getDMPSimilarity(searchStr, matchedContent);
    const contextSimilarity = validateContextLines(searchStr, matchedContent);
    const confidence = Math.min(similarity, contextSimilarity);
    return {
-      index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
+      index:
        startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
      confidence,
-      strategy: 'exact'
+      strategy: 'exact',
    };
  }
@@ -123,7 +133,11 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
 }
 // String similarity strategy
-export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findSimilarityMatch(
  searchStr: string,
  content: string[],
  startIndex: number = 0
 ): SearchResult {
  const searchLines = searchStr.split('\n');
  let bestScore = 0;
  let bestIndex = -1;
@@ -147,12 +161,16 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
  return {
    index: bestIndex,
    confidence: bestIndex !== -1 ? bestScore : 0,
-    strategy: 'similarity'
+    strategy: 'similarity',
  };
 }
 // Levenshtein strategy
-export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findLevenshteinMatch(
  searchStr: string,
  content: string[],
  startIndex: number = 0
 ): SearchResult {
  const searchLines = searchStr.split('\n');
  const candidates = [];
@@ -165,12 +183,12 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
    const index = startIndex + candidates.indexOf(closestMatch);
    const similarity = getDMPSimilarity(searchStr, closestMatch);
    const contextSimilarity = validateContextLines(searchStr, closestMatch);
-    const confidence = Math.min(similarity, contextSimilarity) * 0.7;  // Still apply Levenshtein penalty
+    const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
    return {
      index,
      confidence,
-      strategy: 'levenshtein'
+      strategy: 'levenshtein',
    };
  }
@@ -178,11 +196,15 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
 }
 // Main search function that tries all strategies
-export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findBestMatch(
  searchStr: string,
  content: string[],
  startIndex: number = 0
 ): SearchResult {
  const strategies = [
    findExactMatch,
    findSimilarityMatch,
-    findLevenshteinMatch
+    findLevenshteinMatch,
  ];
  let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };