diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index f8dc166..6f14f31 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -30,7 +30,7 @@ function inferIndentation(line: string, contextLines: string[], previousIndent: } // Context matching edit strategy -export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { +export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'context' }; } @@ -71,7 +71,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio newResult.slice(matchPosition, matchPosition + windowSize).join('\n') ) - const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context'); + const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), confidenceThreshold); return { confidence: similarity * confidence, @@ -81,7 +81,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio } // DMP edit strategy -export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult { +export function applyDMP(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'dmp' }; } @@ -123,7 +123,7 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): // Calculate confidence const similarity = getDMPSimilarity(beforeText, targetText); - const confidence = validateEditResult(hunk, patchedText, 'dmp'); + const confidence = validateEditResult(hunk, patchedText, confidenceThreshold); return { confidence: similarity * confidence, @@ -254,25 +254,25 @@ export async function applyEdit( content: string[], matchPosition: number, confidence: number, - minConfidence: number = 0.9 + confidenceThreshold: number = 0.97 ): Promise { // Don't attempt regular edits if confidence is too low - if (confidence < minConfidence) { - console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); + if (confidence < confidenceThreshold) { + console.log(`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`); return applyGitFallback(hunk, content); } // Try each strategy in sequence until one succeeds const strategies = [ - { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition, confidenceThreshold) }, + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition, confidenceThreshold) }, { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } ]; // Try strategies sequentially until one succeeds for (const strategy of strategies) { const result = await strategy.apply(); - if (result.confidence >= minConfidence) { + if (result.confidence >= confidenceThreshold) { return result; } } diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 05f2166..8b2aa70 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -9,16 +9,13 @@ export type SearchResult = { strategy: string } -//TODO: this should be configurable -const MIN_CONFIDENCE = 0.97 -const MIN_CONFIDENCE_LARGE_FILE = 0.9 const LARGE_FILE_THRESHOLD = 1000 // lines const UNIQUE_CONTENT_BOOST = 0.05 const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows const MAX_WINDOW_SIZE = 500 // maximum lines in a window // Helper function to calculate adaptive confidence threshold based on file size -function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number { +function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number { if (contentLength <= LARGE_FILE_THRESHOLD) { return baseThreshold } @@ -69,11 +66,7 @@ export function getDMPSimilarity(original: string, modified: string): number { } // Helper function to validate edit results using hunk information -// Returns a confidence reduction value between 0 and 1 -// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, -// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. -// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). -export function validateEditResult(hunk: Hunk, result: string, strategy: string): number { +export function validateEditResult(hunk: Hunk, result: string, confidenceThreshold: number): number { const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) const originalSkeleton = hunkDeepCopy.changes @@ -90,26 +83,20 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { - if (originalSimilarity === 1) { - if (originalSimilarity > 0.97) { if (originalSimilarity === 1) { return 0.5 - } else { - return 0.8 - } - } } else { return 0.8 } } - const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1 + const multiplier = expectedSimilarity < confidenceThreshold ? expectedSimilarity : 1 return multiplier } // Helper function to validate context lines against original content -function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number { +function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number { // Extract just the context lines from the search string const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines @@ -117,7 +104,7 @@ function validateContextLines(searchStr: string, content: string, baseThreshold: const similarity = evaluateSimilarity(contextLines.join("\n"), content) // Get adaptive threshold based on content size - const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold) + const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold) // Calculate uniqueness boost const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) @@ -207,8 +194,7 @@ function combineOverlappingMatches( return combinedMatches } -// Modified search functions to use sliding windows -export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length) const matches: (SearchResult & { windowIndex: number })[] = [] @@ -226,7 +212,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: .join("\n") const similarity = getDMPSimilarity(searchStr, matchedContent) - const contextSimilarity = validateContextLines(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) matches.push({ @@ -243,7 +229,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult { +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") let bestScore = 0 let bestIndex = -1 @@ -251,9 +237,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { const windowStr = content.slice(i, i + searchLines.length).join("\n") const score = compareTwoStrings(searchStr, windowStr) - if (score > bestScore && score >= minScore) { + if (score > bestScore && score >= confidenceThreshold) { const similarity = getDMPSimilarity(searchStr, windowStr) - const contextSimilarity = validateContextLines(searchStr, windowStr, minScore) + const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold) const adjustedScore = Math.min(similarity, contextSimilarity) * score if (adjustedScore > bestScore) { @@ -271,7 +257,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI } // Levenshtein strategy -export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const candidates = [] @@ -283,7 +269,7 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start const closestMatch = closest(searchStr, candidates) const index = startIndex + candidates.indexOf(closestMatch) const similarity = getDMPSimilarity(searchStr, closestMatch) - const contextSimilarity = validateContextLines(searchStr, closestMatch) + const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) return { index, @@ -355,7 +341,7 @@ function validateAnchorPositions( } // Anchor-based search strategy -export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const anchors = identifyAnchors(searchStr, content.slice(startIndex)) @@ -370,7 +356,7 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex const matchPosition = startIndex + offset const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n") const similarity = getDMPSimilarity(searchStr, matchedContent) - const contextSimilarity = validateContextLines(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight return { @@ -384,15 +370,18 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult { - const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { + const strategies = [ + findExactMatch, + findAnchorMatch, + findSimilarityMatch, + findLevenshteinMatch + ] let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } for (const strategy of strategies) { - const result = strategy === findSimilarityMatch - ? strategy(searchStr, content, startIndex, minConfidence) - : strategy(searchStr, content, startIndex) + const result = strategy(searchStr, content, startIndex, confidenceThreshold) if (result.confidence > bestResult.confidence) { bestResult = result }