From e6d3db6075d51ab6b1759e5604899f2c5c89dd6d Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Tue, 14 Jan 2025 17:59:46 -0500
Subject: [PATCH] refactor: use confidenceThreshold from the settings in new
 diff strategy

---
 .../strategies/new-unified/edit-strategies.ts | 88 ++++++++++---------
 src/core/diff/strategies/new-unified/index.ts | 17 ++--
 .../new-unified/search-strategies.ts          | 21 ++---
 3 files changed, 69 insertions(+), 57 deletions(-)
diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts
index e64173e..05f5732 100644
--- a/src/core/diff/strategies/new-unified/edit-strategies.ts
+++ b/src/core/diff/strategies/new-unified/edit-strategies.ts
@@ -249,48 +249,54 @@ async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResu
 }
 
 // Main edit function that tries strategies sequentially
-export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
-  // Don't attempt regular edits if confidence is too low
-  const MIN_CONFIDENCE = 0.9;
-  if (confidence < MIN_CONFIDENCE && debug === '') {
-    console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`);
-    return applyGitFallback(hunk, content);
-  }
+export async function applyEdit(
+	hunk: Hunk, 
+	content: string[], 
+	matchPosition: number, 
+	confidence: number, 
+	debug: string = '',
+	minConfidence: number = 0.9
+): Promise<EditResult> {
+	// Don't attempt regular edits if confidence is too low
+	if (confidence < minConfidence && debug === '') {
+		console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`);
+		return applyGitFallback(hunk, content);
+	}
 
-  // Try each strategy in sequence until one succeeds
-  const strategies = [
-    { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
-    { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
-    { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
-  ];
+	// Try each strategy in sequence until one succeeds
+	const strategies = [
+		{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
+		{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
+		{ name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
+	];
 
-  if (debug !== '') {
-    // In debug mode, try all strategies including git fallback
-    const results = await Promise.all([
-      ...strategies.map(async strategy => {
-        console.log(`Attempting edit with ${strategy.name} strategy...`);
-        const result = await strategy.apply();
-        console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`);
-        return result;
-      })
-    ]);
-    
-    return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
-  } else {
-    // Normal mode - try strategies sequentially until one succeeds
-    for (const strategy of strategies) {
-      const result = await strategy.apply();
-      if (result.confidence === 1) {
-        return result;
-      }
-    }
-    // If all strategies fail, try git fallback
-    
-    const result = await applyGitFallback(hunk, content);
-    if(result.confidence === 1) {
-      return result;
-    }
-  }
+	if (debug !== '') {
+		// In debug mode, try all strategies including git fallback
+		const results = await Promise.all([
+			...strategies.map(async strategy => {
+				console.log(`Attempting edit with ${strategy.name} strategy...`);
+				const result = await strategy.apply();
+				console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`);
+				return result;
+			})
+		]);
+		
+		return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
+	} else {
+		// Normal mode - try strategies sequentially until one succeeds
+		for (const strategy of strategies) {
+			const result = await strategy.apply();
+			if (result.confidence === 1) {
+				return result;
+			}
+		}
+		// If all strategies fail, try git fallback
+		
+		const result = await applyGitFallback(hunk, content);
+		if(result.confidence === 1) {
+			return result;
+		}
+	}
 
-  return { confidence: 0, result: content, strategy: 'none' };
+	return { confidence: 0, result: content, strategy: 'none' };
 }
diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts
index 86495f9..7737d90 100644
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -4,6 +4,12 @@ import { applyEdit } from "./edit-strategies"
 import { DiffResult, DiffStrategy } from "../../types"
 
 export class NewUnifiedDiffStrategy implements DiffStrategy {
+	private readonly confidenceThreshold: number
+
+	constructor(confidenceThreshold: number = 0.9) {
+		this.confidenceThreshold = Math.max(confidenceThreshold, 0.8)
+	}
+
 	private parseUnifiedDiff(diff: string): Diff {
 		const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
 		const lines = diff.split("\n")
@@ -185,7 +191,6 @@ Your diff here
 		startLine?: number,
 		endLine?: number
 	): Promise<DiffResult> {
-		const MIN_CONFIDENCE = 0.9
 		const parsedDiff = this.parseUnifiedDiff(diffContent)
 		const originalLines = originalContent.split("\n")
 		let result = [...originalLines]
@@ -199,20 +204,20 @@ Your diff here
 
 		for (const hunk of parsedDiff.hunks) {
 			const contextStr = prepareSearchString(hunk.changes)
-			const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result)
+			const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
 
-			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
-			if (editResult.confidence > MIN_CONFIDENCE) {
+			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold)
+			if (editResult.confidence >= this.confidenceThreshold) {
 				result = editResult.result
 			} else {
 				// Determine if the failure is due to search or edit
-				if (confidence < MIN_CONFIDENCE) {
+				if (confidence < this.confidenceThreshold) {
 					// Search failure - likely due to context not matching
 					const contextLines = hunk.changes.filter(c => c.type === "context").length
 					const totalLines = hunk.changes.length
 					const contextRatio = contextLines / totalLines
 
-					let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n`
+					let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
 					errorMsg += "Debug Info:\n"
 					errorMsg += `- Search Strategy Used: ${strategy}\n`
 					errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts
index 16b0ee8..05f2166 100644
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -18,11 +18,11 @@ const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
 const MAX_WINDOW_SIZE = 500 // maximum lines in a window
 
 // Helper function to calculate adaptive confidence threshold based on file size
-function getAdaptiveThreshold(contentLength: number): number {
+function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number {
 	if (contentLength <= LARGE_FILE_THRESHOLD) {
-		return MIN_CONFIDENCE
+		return baseThreshold
 	}
-	return MIN_CONFIDENCE_LARGE_FILE
+	return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80%
 }
 
 // Helper function to evaluate content uniqueness
@@ -109,7 +109,7 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string)
 }
 
 // Helper function to validate context lines against original content
-function validateContextLines(searchStr: string, content: string): number {
+function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number {
 	// Extract just the context lines from the search string
 	const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
 
@@ -117,7 +117,7 @@ function validateContextLines(searchStr: string, content: string): number {
 	const similarity = evaluateSimilarity(contextLines.join("\n"), content)
 
 	// Get adaptive threshold based on content size
-	const threshold = getAdaptiveThreshold(content.split("\n").length)
+	const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold)
 
 	// Calculate uniqueness boost
 	const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
@@ -243,18 +243,17 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
 }
 
 // String similarity strategy
-export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult {
 	const searchLines = searchStr.split("\n")
 	let bestScore = 0
 	let bestIndex = -1
-	const minScore = 0.8
 
 	for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
 		const windowStr = content.slice(i, i + searchLines.length).join("\n")
 		const score = compareTwoStrings(searchStr, windowStr)
 		if (score > bestScore && score >= minScore) {
 			const similarity = getDMPSimilarity(searchStr, windowStr)
-			const contextSimilarity = validateContextLines(searchStr, windowStr)
+			const contextSimilarity = validateContextLines(searchStr, windowStr, minScore)
 			const adjustedScore = Math.min(similarity, contextSimilarity) * score
 
 			if (adjustedScore > bestScore) {
@@ -385,13 +384,15 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
 }
 
 // Main search function that tries all strategies
-export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult {
 	const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]
 
 	let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
 
 	for (const strategy of strategies) {
-		const result = strategy(searchStr, content, startIndex)
+		const result = strategy === findSimilarityMatch 
+			? strategy(searchStr, content, startIndex, minConfidence)
+			: strategy(searchStr, content, startIndex)
 		if (result.confidence > bestResult.confidence) {
 			bestResult = result
 		}