From e6d3db6075d51ab6b1759e5604899f2c5c89dd6d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:59:46 -0500 Subject: [PATCH] refactor: use confidenceThreshold from the settings in new diff strategy --- .../strategies/new-unified/edit-strategies.ts | 88 ++++++++++--------- src/core/diff/strategies/new-unified/index.ts | 17 ++-- .../new-unified/search-strategies.ts | 21 ++--- 3 files changed, 69 insertions(+), 57 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index e64173e..05f5732 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -249,48 +249,54 @@ async function applyGitFallback(hunk: Hunk, content: string[]): Promise { - // Don't attempt regular edits if confidence is too low - const MIN_CONFIDENCE = 0.9; - if (confidence < MIN_CONFIDENCE && debug === '') { - console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`); - return applyGitFallback(hunk, content); - } +export async function applyEdit( + hunk: Hunk, + content: string[], + matchPosition: number, + confidence: number, + debug: string = '', + minConfidence: number = 0.9 +): Promise { + // Don't attempt regular edits if confidence is too low + if (confidence < minConfidence && debug === '') { + console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); + return applyGitFallback(hunk, content); + } - // Try each strategy in sequence until one succeeds - const strategies = [ - { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, - { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } - ]; + // Try each strategy in sequence until one succeeds + const strategies = [ + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } + ]; - if (debug !== '') { - // In debug mode, try all strategies including git fallback - const results = await Promise.all([ - ...strategies.map(async strategy => { - console.log(`Attempting edit with ${strategy.name} strategy...`); - const result = await strategy.apply(); - console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); - return result; - }) - ]); - - return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; - } else { - // Normal mode - try strategies sequentially until one succeeds - for (const strategy of strategies) { - const result = await strategy.apply(); - if (result.confidence === 1) { - return result; - } - } - // If all strategies fail, try git fallback - - const result = await applyGitFallback(hunk, content); - if(result.confidence === 1) { - return result; - } - } + if (debug !== '') { + // In debug mode, try all strategies including git fallback + const results = await Promise.all([ + ...strategies.map(async strategy => { + console.log(`Attempting edit with ${strategy.name} strategy...`); + const result = await strategy.apply(); + console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); + return result; + }) + ]); + + return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; + } else { + // Normal mode - try strategies sequentially until one succeeds + for (const strategy of strategies) { + const result = await strategy.apply(); + if (result.confidence === 1) { + return result; + } + } + // If all strategies fail, try git fallback + + const result = await applyGitFallback(hunk, content); + if(result.confidence === 1) { + return result; + } + } - return { confidence: 0, result: content, strategy: 'none' }; + return { confidence: 0, result: content, strategy: 'none' }; } diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 86495f9..7737d90 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -4,6 +4,12 @@ import { applyEdit } from "./edit-strategies" import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { + private readonly confidenceThreshold: number + + constructor(confidenceThreshold: number = 0.9) { + this.confidenceThreshold = Math.max(confidenceThreshold, 0.8) + } + private parseUnifiedDiff(diff: string): Diff { const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes const lines = diff.split("\n") @@ -185,7 +191,6 @@ Your diff here startLine?: number, endLine?: number ): Promise { - const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) const originalLines = originalContent.split("\n") let result = [...originalLines] @@ -199,20 +204,20 @@ Your diff here for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) - const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result) + const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) - const editResult = await applyEdit(hunk, result, matchPosition, confidence, '') - if (editResult.confidence > MIN_CONFIDENCE) { + const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold) + if (editResult.confidence >= this.confidenceThreshold) { result = editResult.result } else { // Determine if the failure is due to search or edit - if (confidence < MIN_CONFIDENCE) { + if (confidence < this.confidenceThreshold) { // Search failure - likely due to context not matching const contextLines = hunk.changes.filter(c => c.type === "context").length const totalLines = hunk.changes.length const contextRatio = contextLines / totalLines - let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n` + let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` errorMsg += "Debug Info:\n" errorMsg += `- Search Strategy Used: ${strategy}\n` errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 16b0ee8..05f2166 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -18,11 +18,11 @@ const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows const MAX_WINDOW_SIZE = 500 // maximum lines in a window // Helper function to calculate adaptive confidence threshold based on file size -function getAdaptiveThreshold(contentLength: number): number { +function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number { if (contentLength <= LARGE_FILE_THRESHOLD) { - return MIN_CONFIDENCE + return baseThreshold } - return MIN_CONFIDENCE_LARGE_FILE + return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80% } // Helper function to evaluate content uniqueness @@ -109,7 +109,7 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) } // Helper function to validate context lines against original content -function validateContextLines(searchStr: string, content: string): number { +function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number { // Extract just the context lines from the search string const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines @@ -117,7 +117,7 @@ function validateContextLines(searchStr: string, content: string): number { const similarity = evaluateSimilarity(contextLines.join("\n"), content) // Get adaptive threshold based on content size - const threshold = getAdaptiveThreshold(content.split("\n").length) + const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold) // Calculate uniqueness boost const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) @@ -243,18 +243,17 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult { const searchLines = searchStr.split("\n") let bestScore = 0 let bestIndex = -1 - const minScore = 0.8 for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { const windowStr = content.slice(i, i + searchLines.length).join("\n") const score = compareTwoStrings(searchStr, windowStr) if (score > bestScore && score >= minScore) { const similarity = getDMPSimilarity(searchStr, windowStr) - const contextSimilarity = validateContextLines(searchStr, windowStr) + const contextSimilarity = validateContextLines(searchStr, windowStr, minScore) const adjustedScore = Math.min(similarity, contextSimilarity) * score if (adjustedScore > bestScore) { @@ -385,13 +384,15 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult { const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } for (const strategy of strategies) { - const result = strategy(searchStr, content, startIndex) + const result = strategy === findSimilarityMatch + ? strategy(searchStr, content, startIndex, minConfidence) + : strategy(searchStr, content, startIndex) if (result.confidence > bestResult.confidence) { bestResult = result }