refactor: use confidenceThreshold from the settings in new diff strategy

This commit is contained in:
Daniel Riccio
2025-01-14 17:59:46 -05:00
parent e00ec0cc3f
commit e6d3db6075
3 changed files with 69 additions and 57 deletions

View File

@@ -249,11 +249,17 @@ async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResu
}
// Main edit function that tries strategies sequentially
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
export async function applyEdit(
hunk: Hunk,
content: string[],
matchPosition: number,
confidence: number,
debug: string = '',
minConfidence: number = 0.9
): Promise<EditResult> {
// Don't attempt regular edits if confidence is too low
const MIN_CONFIDENCE = 0.9;
if (confidence < MIN_CONFIDENCE && debug === '') {
console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`);
if (confidence < minConfidence && debug === '') {
console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`);
return applyGitFallback(hunk, content);
}

View File

@@ -4,6 +4,12 @@ import { applyEdit } from "./edit-strategies"
import { DiffResult, DiffStrategy } from "../../types"
export class NewUnifiedDiffStrategy implements DiffStrategy {
private readonly confidenceThreshold: number
constructor(confidenceThreshold: number = 0.9) {
this.confidenceThreshold = Math.max(confidenceThreshold, 0.8)
}
private parseUnifiedDiff(diff: string): Diff {
const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
const lines = diff.split("\n")
@@ -185,7 +191,6 @@ Your diff here
startLine?: number,
endLine?: number
): Promise<DiffResult> {
const MIN_CONFIDENCE = 0.9
const parsedDiff = this.parseUnifiedDiff(diffContent)
const originalLines = originalContent.split("\n")
let result = [...originalLines]
@@ -199,20 +204,20 @@ Your diff here
for (const hunk of parsedDiff.hunks) {
const contextStr = prepareSearchString(hunk.changes)
const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result)
const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
if (editResult.confidence > MIN_CONFIDENCE) {
const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold)
if (editResult.confidence >= this.confidenceThreshold) {
result = editResult.result
} else {
// Determine if the failure is due to search or edit
if (confidence < MIN_CONFIDENCE) {
if (confidence < this.confidenceThreshold) {
// Search failure - likely due to context not matching
const contextLines = hunk.changes.filter(c => c.type === "context").length
const totalLines = hunk.changes.length
const contextRatio = contextLines / totalLines
let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n`
let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += `- Search Strategy Used: ${strategy}\n`
errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`

View File

@@ -18,11 +18,11 @@ const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
const MAX_WINDOW_SIZE = 500 // maximum lines in a window
// Helper function to calculate adaptive confidence threshold based on file size
function getAdaptiveThreshold(contentLength: number): number {
function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number {
if (contentLength <= LARGE_FILE_THRESHOLD) {
return MIN_CONFIDENCE
return baseThreshold
}
return MIN_CONFIDENCE_LARGE_FILE
return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80%
}
// Helper function to evaluate content uniqueness
@@ -109,7 +109,7 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string)
}
// Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string): number {
function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number {
// Extract just the context lines from the search string
const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
@@ -117,7 +117,7 @@ function validateContextLines(searchStr: string, content: string): number {
const similarity = evaluateSimilarity(contextLines.join("\n"), content)
// Get adaptive threshold based on content size
const threshold = getAdaptiveThreshold(content.split("\n").length)
const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold)
// Calculate uniqueness boost
const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
@@ -243,18 +243,17 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
}
// String similarity strategy
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult {
const searchLines = searchStr.split("\n")
let bestScore = 0
let bestIndex = -1
const minScore = 0.8
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
const windowStr = content.slice(i, i + searchLines.length).join("\n")
const score = compareTwoStrings(searchStr, windowStr)
if (score > bestScore && score >= minScore) {
const similarity = getDMPSimilarity(searchStr, windowStr)
const contextSimilarity = validateContextLines(searchStr, windowStr)
const contextSimilarity = validateContextLines(searchStr, windowStr, minScore)
const adjustedScore = Math.min(similarity, contextSimilarity) * score
if (adjustedScore > bestScore) {
@@ -385,13 +384,15 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
}
// Main search function that tries all strategies
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult {
const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
for (const strategy of strategies) {
const result = strategy(searchStr, content, startIndex)
const result = strategy === findSimilarityMatch
? strategy(searchStr, content, startIndex, minConfidence)
: strategy(searchStr, content, startIndex)
if (result.confidence > bestResult.confidence) {
bestResult = result
}