Refactor edit strategies and confidence validation in unified diff processing

- Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes. - Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits. - Refined the way changes are processed, including better tracking of removal and addition changes. - Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging. - Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode.
2025-12-21 04:41:16 -05:00 · 2025-01-08 17:13:46 -05:00
parent 995692c48e
commit 9591ae062a
3 changed files with 136 additions and 105 deletions
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95;
 // Helper function to prepare search string from context
 export function prepareSearchString(changes: Change[]): string {
  const lines = changes
-    .filter(c => c.type === 'context' || c.type === 'remove')
-    .map(c => c.content);
+    .filter((c) => c.type === 'context' || c.type === 'remove')
+    .map((c) => c.content);
  return lines.join('\n');
 }

@@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number {
 // Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
 // returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
 // If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
-export function validateEditResult(hunk: Hunk, result: string): number {
+export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
  const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
-  
+
  // Create skeleton of original content (context + removed lines)
  const originalSkeleton = hunkDeepCopy.changes
-    .filter(change => change.type === 'context' || change.type === 'remove')
-    .map(change => change.content)
+    .filter((change) => change.type === 'context' || change.type === 'remove')
+    .map((change) => change.content)
    .join('\n');

  // Create skeleton of expected result (context + added lines)
  const expectedSkeleton = hunkDeepCopy.changes
-    .filter(change => change.type === 'context' || change.type === 'add')
-    .map(change => change.content)
+    .filter((change) => change.type === 'context' || change.type === 'add')
+    .map((change) => change.content)
    .join('\n');

  // Compare with original content
  const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
-  
-  // If result is too similar to original, it means changes weren't applied
-  if (originalSimilarity > 0.9) {
-    console.log('Result too similar to original content:', originalSimilarity);
-    return 0.5; // Significant confidence reduction
+  console.log('originalSimilarity ', strategy, originalSimilarity);
+  // If original similarity is 1, it means changes weren't applied
+  if (originalSimilarity > 0.97) {
+    if (originalSimilarity === 1) {
+      return 0.5; // Significant confidence reduction
+    } else {
+      return 0.8;
+    }
  }

  // Compare with expected result
  const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
-  console.log('Original similarity:', originalSimilarity);
-  console.log('Expected similarity:', expectedSimilarity);

-  console.log('originalSkeleton:', originalSkeleton);
-  console.log('expectedSkeleton:', expectedSkeleton);
-  console.log('result:', result);
+  console.log('expectedSimilarity', strategy, expectedSimilarity);
  
+
  // Scale between 0.98 and 1.0 (4% impact) based on expected similarity
-  const multiplier = expectedSimilarity < MIN_CONFIDENCE 
-    ? 0.96 + (0.04 * expectedSimilarity) 
-    : 1;
+  const multiplier =
+    expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1;

  return multiplier;
 }
@@ -86,44 +85,59 @@ export function validateEditResult(hunk: Hunk, result: string): number {
 // Helper function to validate context lines against original content
 function validateContextLines(searchStr: string, content: string): number {
  // Extract just the context lines from the search string
-  const contextLines = searchStr.split('\n')
-    .filter(line => !line.startsWith('-'));  // Exclude removed lines
-  
+  const contextLines = searchStr
+    .split('\n')
+    .filter((line) => !line.startsWith('-')); // Exclude removed lines
+
  // Compare context lines with content
  const similarity = evaluateSimilarity(contextLines.join('\n'), content);
-  
+
  // Context lines must match very closely, or confidence drops significantly
  return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity;
 }

 // Exact match strategy
-export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findExactMatch(
+  searchStr: string,
+  content: string[],
+  startIndex: number = 0
+): SearchResult {
  const contentStr = content.slice(startIndex).join('\n');
  const searchLines = searchStr.split('\n');
-  
+
  const exactMatch = contentStr.indexOf(searchStr);
  if (exactMatch !== -1) {
-    const matchedContent = content.slice(
-      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
-      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
-    ).join('\n');
-    
+    const matchedContent = content
+      .slice(
+        startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
+        startIndex +
+          contentStr.slice(0, exactMatch).split('\n').length -
+          1 +
+          searchLines.length
+      )
+      .join('\n');
+
    const similarity = getDMPSimilarity(searchStr, matchedContent);
    const contextSimilarity = validateContextLines(searchStr, matchedContent);
    const confidence = Math.min(similarity, contextSimilarity);
-    
+
    return {
-      index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
+      index:
+        startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
      confidence,
-      strategy: 'exact'
+      strategy: 'exact',
    };
  }
-  
+
  return { index: -1, confidence: 0, strategy: 'exact' };
 }

 // String similarity strategy
-export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findSimilarityMatch(
+  searchStr: string,
+  content: string[],
+  startIndex: number = 0
+): SearchResult {
  const searchLines = searchStr.split('\n');
  let bestScore = 0;
  let bestIndex = -1;
@@ -136,7 +150,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
      const similarity = getDMPSimilarity(searchStr, windowStr);
      const contextSimilarity = validateContextLines(searchStr, windowStr);
      const adjustedScore = Math.min(similarity, contextSimilarity) * score;
-      
+
      if (adjustedScore > bestScore) {
        bestScore = adjustedScore;
        bestIndex = i;
@@ -144,33 +158,37 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
    }
  }

-  return { 
-    index: bestIndex, 
+  return {
+    index: bestIndex,
    confidence: bestIndex !== -1 ? bestScore : 0,
-    strategy: 'similarity'
+    strategy: 'similarity',
  };
 }

 // Levenshtein strategy
-export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findLevenshteinMatch(
+  searchStr: string,
+  content: string[],
+  startIndex: number = 0
+): SearchResult {
  const searchLines = searchStr.split('\n');
  const candidates = [];
-  
+
  for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
    candidates.push(content.slice(i, i + searchLines.length).join('\n'));
  }
-  
+
  if (candidates.length > 0) {
    const closestMatch = closest(searchStr, candidates);
    const index = startIndex + candidates.indexOf(closestMatch);
    const similarity = getDMPSimilarity(searchStr, closestMatch);
    const contextSimilarity = validateContextLines(searchStr, closestMatch);
-    const confidence = Math.min(similarity, contextSimilarity) * 0.7;  // Still apply Levenshtein penalty
-    
-    return { 
-      index, 
+    const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
+
+    return {
+      index,
      confidence,
-      strategy: 'levenshtein'
+      strategy: 'levenshtein',
    };
  }

@@ -178,21 +196,25 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
 }

 // Main search function that tries all strategies
-export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findBestMatch(
+  searchStr: string,
+  content: string[],
+  startIndex: number = 0
+): SearchResult {
  const strategies = [
    findExactMatch,
    findSimilarityMatch,
-    findLevenshteinMatch
+    findLevenshteinMatch,
  ];
-  
+
  let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };
-  
+
  for (const strategy of strategies) {
    const result = strategy(searchStr, content, startIndex);
    if (result.confidence > bestResult.confidence) {
      bestResult = result;
    }
  }
-  
+
  return bestResult;
-} 
+}