refactor: update edit and search strategies to use configurable confidence thresholds

- Modified applyContextMatching and applyDMP functions to accept a confidenceThreshold parameter, enhancing flexibility in edit strategies.
- Updated validateEditResult and related functions to utilize the new confidenceThreshold, improving consistency across validation processes.
- Adjusted findExactMatch, findSimilarityMatch, findLevenshteinMatch, and findAnchorMatch functions to incorporate confidenceThreshold, ensuring adaptive behavior based on user settings.
This commit is contained in:
Daniel Riccio
2025-01-15 10:54:25 -05:00
parent f696f8e0f1
commit f9a453a44f
2 changed files with 32 additions and 43 deletions

View File

@@ -30,7 +30,7 @@ function inferIndentation(line: string, contextLines: string[], previousIndent:
} }
// Context matching edit strategy // Context matching edit strategy
export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult {
if (matchPosition === -1) { if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: 'context' }; return { confidence: 0, result: content, strategy: 'context' };
} }
@@ -71,7 +71,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
newResult.slice(matchPosition, matchPosition + windowSize).join('\n') newResult.slice(matchPosition, matchPosition + windowSize).join('\n')
) )
const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context'); const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), confidenceThreshold);
return { return {
confidence: similarity * confidence, confidence: similarity * confidence,
@@ -81,7 +81,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
} }
// DMP edit strategy // DMP edit strategy
export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult { export function applyDMP(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult {
if (matchPosition === -1) { if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: 'dmp' }; return { confidence: 0, result: content, strategy: 'dmp' };
} }
@@ -123,7 +123,7 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
// Calculate confidence // Calculate confidence
const similarity = getDMPSimilarity(beforeText, targetText); const similarity = getDMPSimilarity(beforeText, targetText);
const confidence = validateEditResult(hunk, patchedText, 'dmp'); const confidence = validateEditResult(hunk, patchedText, confidenceThreshold);
return { return {
confidence: similarity * confidence, confidence: similarity * confidence,
@@ -254,25 +254,25 @@ export async function applyEdit(
content: string[], content: string[],
matchPosition: number, matchPosition: number,
confidence: number, confidence: number,
minConfidence: number = 0.9 confidenceThreshold: number = 0.97
): Promise<EditResult> { ): Promise<EditResult> {
// Don't attempt regular edits if confidence is too low // Don't attempt regular edits if confidence is too low
if (confidence < minConfidence) { if (confidence < confidenceThreshold) {
console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); console.log(`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`);
return applyGitFallback(hunk, content); return applyGitFallback(hunk, content);
} }
// Try each strategy in sequence until one succeeds // Try each strategy in sequence until one succeeds
const strategies = [ const strategies = [
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition, confidenceThreshold) },
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition, confidenceThreshold) },
{ name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
]; ];
// Try strategies sequentially until one succeeds // Try strategies sequentially until one succeeds
for (const strategy of strategies) { for (const strategy of strategies) {
const result = await strategy.apply(); const result = await strategy.apply();
if (result.confidence >= minConfidence) { if (result.confidence >= confidenceThreshold) {
return result; return result;
} }
} }

View File

@@ -9,16 +9,13 @@ export type SearchResult = {
strategy: string strategy: string
} }
//TODO: this should be configurable
const MIN_CONFIDENCE = 0.97
const MIN_CONFIDENCE_LARGE_FILE = 0.9
const LARGE_FILE_THRESHOLD = 1000 // lines const LARGE_FILE_THRESHOLD = 1000 // lines
const UNIQUE_CONTENT_BOOST = 0.05 const UNIQUE_CONTENT_BOOST = 0.05
const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
const MAX_WINDOW_SIZE = 500 // maximum lines in a window const MAX_WINDOW_SIZE = 500 // maximum lines in a window
// Helper function to calculate adaptive confidence threshold based on file size // Helper function to calculate adaptive confidence threshold based on file size
function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number { function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number {
if (contentLength <= LARGE_FILE_THRESHOLD) { if (contentLength <= LARGE_FILE_THRESHOLD) {
return baseThreshold return baseThreshold
} }
@@ -69,11 +66,7 @@ export function getDMPSimilarity(original: string, modified: string): number {
} }
// Helper function to validate edit results using hunk information // Helper function to validate edit results using hunk information
// Returns a confidence reduction value between 0 and 1 export function validateEditResult(hunk: Hunk, result: string, confidenceThreshold: number): number {
// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk))
const originalSkeleton = hunkDeepCopy.changes const originalSkeleton = hunkDeepCopy.changes
@@ -90,26 +83,20 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string)
const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result)
if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { if (originalSimilarity > 0.97 && expectedSimilarity !== 1) {
if (originalSimilarity === 1) {
if (originalSimilarity > 0.97) {
if (originalSimilarity === 1) { if (originalSimilarity === 1) {
return 0.5 return 0.5
} else {
return 0.8
}
}
} else { } else {
return 0.8 return 0.8
} }
} }
const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1 const multiplier = expectedSimilarity < confidenceThreshold ? expectedSimilarity : 1
return multiplier return multiplier
} }
// Helper function to validate context lines against original content // Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number { function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number {
// Extract just the context lines from the search string // Extract just the context lines from the search string
const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
@@ -117,7 +104,7 @@ function validateContextLines(searchStr: string, content: string, baseThreshold:
const similarity = evaluateSimilarity(contextLines.join("\n"), content) const similarity = evaluateSimilarity(contextLines.join("\n"), content)
// Get adaptive threshold based on content size // Get adaptive threshold based on content size
const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold) const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold)
// Calculate uniqueness boost // Calculate uniqueness boost
const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
@@ -207,8 +194,7 @@ function combineOverlappingMatches(
return combinedMatches return combinedMatches
} }
// Modified search functions to use sliding windows export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
const searchLines = searchStr.split("\n") const searchLines = searchStr.split("\n")
const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length) const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length)
const matches: (SearchResult & { windowIndex: number })[] = [] const matches: (SearchResult & { windowIndex: number })[] = []
@@ -226,7 +212,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
.join("\n") .join("\n")
const similarity = getDMPSimilarity(searchStr, matchedContent) const similarity = getDMPSimilarity(searchStr, matchedContent)
const contextSimilarity = validateContextLines(searchStr, matchedContent) const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
const confidence = Math.min(similarity, contextSimilarity) const confidence = Math.min(similarity, contextSimilarity)
matches.push({ matches.push({
@@ -243,7 +229,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
} }
// String similarity strategy // String similarity strategy
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult { export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
const searchLines = searchStr.split("\n") const searchLines = searchStr.split("\n")
let bestScore = 0 let bestScore = 0
let bestIndex = -1 let bestIndex = -1
@@ -251,9 +237,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
const windowStr = content.slice(i, i + searchLines.length).join("\n") const windowStr = content.slice(i, i + searchLines.length).join("\n")
const score = compareTwoStrings(searchStr, windowStr) const score = compareTwoStrings(searchStr, windowStr)
if (score > bestScore && score >= minScore) { if (score > bestScore && score >= confidenceThreshold) {
const similarity = getDMPSimilarity(searchStr, windowStr) const similarity = getDMPSimilarity(searchStr, windowStr)
const contextSimilarity = validateContextLines(searchStr, windowStr, minScore) const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold)
const adjustedScore = Math.min(similarity, contextSimilarity) * score const adjustedScore = Math.min(similarity, contextSimilarity) * score
if (adjustedScore > bestScore) { if (adjustedScore > bestScore) {
@@ -271,7 +257,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
} }
// Levenshtein strategy // Levenshtein strategy
export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
const searchLines = searchStr.split("\n") const searchLines = searchStr.split("\n")
const candidates = [] const candidates = []
@@ -283,7 +269,7 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
const closestMatch = closest(searchStr, candidates) const closestMatch = closest(searchStr, candidates)
const index = startIndex + candidates.indexOf(closestMatch) const index = startIndex + candidates.indexOf(closestMatch)
const similarity = getDMPSimilarity(searchStr, closestMatch) const similarity = getDMPSimilarity(searchStr, closestMatch)
const contextSimilarity = validateContextLines(searchStr, closestMatch) const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
const confidence = Math.min(similarity, contextSimilarity) const confidence = Math.min(similarity, contextSimilarity)
return { return {
index, index,
@@ -355,7 +341,7 @@ function validateAnchorPositions(
} }
// Anchor-based search strategy // Anchor-based search strategy
export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
const searchLines = searchStr.split("\n") const searchLines = searchStr.split("\n")
const anchors = identifyAnchors(searchStr, content.slice(startIndex)) const anchors = identifyAnchors(searchStr, content.slice(startIndex))
@@ -370,7 +356,7 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
const matchPosition = startIndex + offset const matchPosition = startIndex + offset
const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n") const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n")
const similarity = getDMPSimilarity(searchStr, matchedContent) const similarity = getDMPSimilarity(searchStr, matchedContent)
const contextSimilarity = validateContextLines(searchStr, matchedContent) const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight
return { return {
@@ -384,15 +370,18 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
} }
// Main search function that tries all strategies // Main search function that tries all strategies
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult { export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] const strategies = [
findExactMatch,
findAnchorMatch,
findSimilarityMatch,
findLevenshteinMatch
]
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
for (const strategy of strategies) { for (const strategy of strategies) {
const result = strategy === findSimilarityMatch const result = strategy(searchStr, content, startIndex, confidenceThreshold)
? strategy(searchStr, content, startIndex, minConfidence)
: strategy(searchStr, content, startIndex)
if (result.confidence > bestResult.confidence) { if (result.confidence > bestResult.confidence) {
bestResult = result bestResult = result
} }