refactor: use confidenceThreshold from the settings in new diff strategy

2026-02-05 12:05:16 -05:00 · 2025-01-14 17:59:46 -05:00
parent e00ec0cc3f
commit e6d3db6075
3 changed files with 69 additions and 57 deletions
--- a/src/core/diff/strategies/new-unified/edit-strategies.ts
+++ b/src/core/diff/strategies/new-unified/edit-strategies.ts
@@ -249,11 +249,17 @@ async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResu
 }

 // Main edit function that tries strategies sequentially
-export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
+export async function applyEdit(
+	hunk: Hunk, 
+	content: string[], 
+	matchPosition: number, 
+	confidence: number, 
+	debug: string = '',
+	minConfidence: number = 0.9
+): Promise<EditResult> {
 	// Don't attempt regular edits if confidence is too low
-  const MIN_CONFIDENCE = 0.9;
-  if (confidence < MIN_CONFIDENCE && debug === '') {
-    console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`);
+	if (confidence < minConfidence && debug === '') {
+		console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`);
 		return applyGitFallback(hunk, content);
 	}

--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -4,6 +4,12 @@ import { applyEdit } from "./edit-strategies"
 import { DiffResult, DiffStrategy } from "../../types"

 export class NewUnifiedDiffStrategy implements DiffStrategy {
+	private readonly confidenceThreshold: number
+
+	constructor(confidenceThreshold: number = 0.9) {
+		this.confidenceThreshold = Math.max(confidenceThreshold, 0.8)
+	}
+
 	private parseUnifiedDiff(diff: string): Diff {
 		const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
 		const lines = diff.split("\n")
@@ -185,7 +191,6 @@ Your diff here
 		startLine?: number,
 		endLine?: number
 	): Promise<DiffResult> {
-		const MIN_CONFIDENCE = 0.9
 		const parsedDiff = this.parseUnifiedDiff(diffContent)
 		const originalLines = originalContent.split("\n")
 		let result = [...originalLines]
@@ -199,20 +204,20 @@ Your diff here

 		for (const hunk of parsedDiff.hunks) {
 			const contextStr = prepareSearchString(hunk.changes)
-			const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result)
+			const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold)

-			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
-			if (editResult.confidence > MIN_CONFIDENCE) {
+			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold)
+			if (editResult.confidence >= this.confidenceThreshold) {
 				result = editResult.result
 			} else {
 				// Determine if the failure is due to search or edit
-				if (confidence < MIN_CONFIDENCE) {
+				if (confidence < this.confidenceThreshold) {
 					// Search failure - likely due to context not matching
 					const contextLines = hunk.changes.filter(c => c.type === "context").length
 					const totalLines = hunk.changes.length
 					const contextRatio = contextLines / totalLines

-					let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n`
+					let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
 					errorMsg += "Debug Info:\n"
 					errorMsg += `- Search Strategy Used: ${strategy}\n`
 					errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -18,11 +18,11 @@ const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
 const MAX_WINDOW_SIZE = 500 // maximum lines in a window

 // Helper function to calculate adaptive confidence threshold based on file size
-function getAdaptiveThreshold(contentLength: number): number {
+function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number {
 	if (contentLength <= LARGE_FILE_THRESHOLD) {
-		return MIN_CONFIDENCE
+		return baseThreshold
 	}
-	return MIN_CONFIDENCE_LARGE_FILE
+	return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80%
 }

 // Helper function to evaluate content uniqueness
@@ -109,7 +109,7 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string)
 }

 // Helper function to validate context lines against original content
-function validateContextLines(searchStr: string, content: string): number {
+function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number {
 	// Extract just the context lines from the search string
 	const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines

@@ -117,7 +117,7 @@ function validateContextLines(searchStr: string, content: string): number {
 	const similarity = evaluateSimilarity(contextLines.join("\n"), content)

 	// Get adaptive threshold based on content size
-	const threshold = getAdaptiveThreshold(content.split("\n").length)
+	const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold)

 	// Calculate uniqueness boost
 	const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
@@ -243,18 +243,17 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
 }

 // String similarity strategy
-export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult {
 	const searchLines = searchStr.split("\n")
 	let bestScore = 0
 	let bestIndex = -1
-	const minScore = 0.8

 	for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
 		const windowStr = content.slice(i, i + searchLines.length).join("\n")
 		const score = compareTwoStrings(searchStr, windowStr)
 		if (score > bestScore && score >= minScore) {
 			const similarity = getDMPSimilarity(searchStr, windowStr)
-			const contextSimilarity = validateContextLines(searchStr, windowStr)
+			const contextSimilarity = validateContextLines(searchStr, windowStr, minScore)
 			const adjustedScore = Math.min(similarity, contextSimilarity) * score

 			if (adjustedScore > bestScore) {
@@ -385,13 +384,15 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
 }

 // Main search function that tries all strategies
-export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult {
 	const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]

 	let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }

 	for (const strategy of strategies) {
-		const result = strategy(searchStr, content, startIndex)
+		const result = strategy === findSimilarityMatch 
+			? strategy(searchStr, content, startIndex, minConfidence)
+			: strategy(searchStr, content, startIndex)
 		if (result.confidence > bestResult.confidence) {
 			bestResult = result
 		}