Improve strategies and confidence system

2025-12-21 04:41:16 -05:00 · 2025-01-08 12:28:31 -05:00
parent 594481643b
commit 995692c48e
4 changed files with 265 additions and 81 deletions
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -1,7 +1,7 @@
 import { compareTwoStrings } from 'string-similarity';
 import { closest } from 'fastest-levenshtein';
 import { diff_match_patch } from 'diff-match-patch';
-import { Change } from './types';
+import { Change, Hunk } from './types';

 export type SearchResult = {
  index: number;
@@ -32,10 +32,70 @@ export function getDMPSimilarity(original: string, modified: string): number {
  dmp.diff_cleanupSemantic(diffs);
  const patches = dmp.patch_make(original, diffs);
  const [expectedText] = dmp.patch_apply(patches, original);
+
  const similarity = evaluateSimilarity(expectedText, modified);
  return similarity;
 }

+// Helper function to validate edit results using hunk information
+// Returns a confidence reduction value between 0 and 1
+// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
+// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
+// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
+export function validateEditResult(hunk: Hunk, result: string): number {
+  const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
+  
+  // Create skeleton of original content (context + removed lines)
+  const originalSkeleton = hunkDeepCopy.changes
+    .filter(change => change.type === 'context' || change.type === 'remove')
+    .map(change => change.content)
+    .join('\n');
+
+  // Create skeleton of expected result (context + added lines)
+  const expectedSkeleton = hunkDeepCopy.changes
+    .filter(change => change.type === 'context' || change.type === 'add')
+    .map(change => change.content)
+    .join('\n');
+
+  // Compare with original content
+  const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
+  
+  // If result is too similar to original, it means changes weren't applied
+  if (originalSimilarity > 0.9) {
+    console.log('Result too similar to original content:', originalSimilarity);
+    return 0.5; // Significant confidence reduction
+  }
+
+  // Compare with expected result
+  const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
+  console.log('Original similarity:', originalSimilarity);
+  console.log('Expected similarity:', expectedSimilarity);
+
+  console.log('originalSkeleton:', originalSkeleton);
+  console.log('expectedSkeleton:', expectedSkeleton);
+  console.log('result:', result);
+  
+  // Scale between 0.98 and 1.0 (4% impact) based on expected similarity
+  const multiplier = expectedSimilarity < MIN_CONFIDENCE 
+    ? 0.96 + (0.04 * expectedSimilarity) 
+    : 1;
+
+  return multiplier;
+}
+
+// Helper function to validate context lines against original content
+function validateContextLines(searchStr: string, content: string): number {
+  // Extract just the context lines from the search string
+  const contextLines = searchStr.split('\n')
+    .filter(line => !line.startsWith('-'));  // Exclude removed lines
+  
+  // Compare context lines with content
+  const similarity = evaluateSimilarity(contextLines.join('\n'), content);
+  
+  // Context lines must match very closely, or confidence drops significantly
+  return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity;
+}
+
 // Exact match strategy
 export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
  const contentStr = content.slice(startIndex).join('\n');
@@ -48,10 +108,13 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
    ).join('\n');
    
-    const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE;
+    const similarity = getDMPSimilarity(searchStr, matchedContent);
+    const contextSimilarity = validateContextLines(searchStr, matchedContent);
+    const confidence = Math.min(similarity, contextSimilarity);
+    
    return {
      index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
-      confidence: dmpValid ? 1.0 : 0.9,
+      confidence,
      strategy: 'exact'
    };
  }
@@ -70,8 +133,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
    const windowStr = content.slice(i, i + searchLines.length).join('\n');
    const score = compareTwoStrings(searchStr, windowStr);
    if (score > bestScore && score >= minScore) {
-      const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE;
-      const adjustedScore = dmpValid ? score : score * 0.9;
+      const similarity = getDMPSimilarity(searchStr, windowStr);
+      const contextSimilarity = validateContextLines(searchStr, windowStr);
+      const adjustedScore = Math.min(similarity, contextSimilarity) * score;
      
      if (adjustedScore > bestScore) {
        bestScore = adjustedScore;
@@ -99,10 +163,13 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
  if (candidates.length > 0) {
    const closestMatch = closest(searchStr, candidates);
    const index = startIndex + candidates.indexOf(closestMatch);
-    const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE;
+    const similarity = getDMPSimilarity(searchStr, closestMatch);
+    const contextSimilarity = validateContextLines(searchStr, closestMatch);
+    const confidence = Math.min(similarity, contextSimilarity) * 0.7;  // Still apply Levenshtein penalty
+    
    return { 
      index, 
-      confidence: dmpValid ? 0.7 : 0.6,
+      confidence,
      strategy: 'levenshtein'
    };
  }