feat: implement hunk splitting strategy in diff application to better handle large diffs that might fail if the search fails

This commit is contained in:
Daniel Riccio
2025-01-16 12:49:02 -05:00
parent 36b7da07d2
commit 04f6e4f03f

View File

@@ -1,4 +1,4 @@
import { Diff, Hunk } from "./types"
import { Diff, Hunk, Change } from "./types"
import { findBestMatch, prepareSearchString } from "./search-strategies"
import { applyEdit } from "./edit-strategies"
import { DiffResult, DiffStrategy } from "../../types"
@@ -185,6 +185,59 @@ Your diff here
</apply_diff>`
}
// Helper function to split a hunk into smaller hunks based on contiguous changes
private splitHunk(hunk: Hunk): Hunk[] {
const result: Hunk[] = []
let currentHunk: Hunk | null = null
let contextBefore: Change[] = []
let contextAfter: Change[] = []
const MAX_CONTEXT_LINES = 3 // Keep 3 lines of context before/after changes
for (let i = 0; i < hunk.changes.length; i++) {
const change = hunk.changes[i]
if (change.type === 'context') {
if (!currentHunk) {
contextBefore.push(change)
if (contextBefore.length > MAX_CONTEXT_LINES) {
contextBefore.shift()
}
} else {
contextAfter.push(change)
if (contextAfter.length > MAX_CONTEXT_LINES) {
// We've collected enough context after changes, create a new hunk
currentHunk.changes.push(...contextAfter)
result.push(currentHunk)
currentHunk = null
// Keep the last few context lines for the next hunk
contextBefore = contextAfter
contextAfter = []
}
}
} else {
if (!currentHunk) {
currentHunk = { changes: [...contextBefore] }
contextAfter = []
} else if (contextAfter.length > 0) {
// Add accumulated context to current hunk
currentHunk.changes.push(...contextAfter)
contextAfter = []
}
currentHunk.changes.push(change)
}
}
// Add any remaining changes
if (currentHunk) {
if (contextAfter.length > 0) {
currentHunk.changes.push(...contextAfter)
}
result.push(currentHunk)
}
return result
}
async applyDiff(
originalContent: string,
diffContent: string,
@@ -206,55 +259,82 @@ Your diff here
const contextStr = prepareSearchString(hunk.changes)
const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
if (confidence < 1.1) {
console.log('Full hunk application failed, trying sub-hunks strategy')
// Try splitting the hunk into smaller hunks
const subHunks = this.splitHunk(hunk)
let subHunkSuccess = true
let subHunkResult = [...result]
for (const subHunk of subHunks) {
const subContextStr = prepareSearchString(subHunk.changes)
console.log(subContextStr)
const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold)
if (subSearchResult.confidence >= this.confidenceThreshold) {
const subEditResult = await applyEdit(subHunk, subHunkResult, subSearchResult.index, subSearchResult.confidence, this.confidenceThreshold)
if (subEditResult.confidence >= this.confidenceThreshold) {
subHunkResult = subEditResult.result
continue
}
}
subHunkSuccess = false
break
}
if (subHunkSuccess) {
result = subHunkResult
continue
}
// If sub-hunks also failed, return the original error
const contextLines = hunk.changes.filter(c => c.type === "context").length
const totalLines = hunk.changes.length
const contextRatio = contextLines / totalLines
let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += `- Search Strategy Used: ${strategy}\n`
errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n`
if (contextRatio < 0.2) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Not enough context lines to uniquely identify the location\n"
errorMsg += "- Add a few more lines of unchanged code around your changes\n"
} else if (contextRatio > 0.5) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Too many context lines may reduce search accuracy\n"
errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
} else {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- The diff may be targeting a different version of the file\n"
errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
}
if (startLine && endLine) {
errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
}
return { success: false, error: errorMsg }
}
const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold)
if (editResult.confidence >= this.confidenceThreshold) {
result = editResult.result
} else {
// Determine if the failure is due to search or edit
if (confidence < this.confidenceThreshold) {
// Search failure - likely due to context not matching
const contextLines = hunk.changes.filter(c => c.type === "context").length
const totalLines = hunk.changes.length
const contextRatio = contextLines / totalLines
// Edit failure - likely due to content mismatch
let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += "- The location was found but the content didn't match exactly\n"
errorMsg += "- This usually means the file has been modified since the diff was created\n"
errorMsg += "- Or the diff may be targeting a different version of the file\n"
errorMsg += "\nPossible Solutions:\n"
errorMsg += "1. Refresh your view of the file and create a new diff\n"
errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
errorMsg += "3. Ensure your diff targets the correct version of the file"
let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += `- Search Strategy Used: ${strategy}\n`
errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
if (contextRatio < 0.2) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Not enough context lines to uniquely identify the location\n"
errorMsg += "- Add a few more lines of unchanged code around your changes\n"
} else if (contextRatio > 0.5) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Too many context lines may reduce search accuracy\n"
errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
} else {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- The diff may be targeting a different version of the file\n"
errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
}
if (startLine && endLine) {
errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
}
return { success: false, error: errorMsg }
} else {
// Edit failure - likely due to content mismatch
let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += "- The location was found but the content didn't match exactly\n"
errorMsg += "- This usually means the file has been modified since the diff was created\n"
errorMsg += "- Or the diff may be targeting a different version of the file\n"
errorMsg += "\nPossible Solutions:\n"
errorMsg += "1. Refresh your view of the file and create a new diff\n"
errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
errorMsg += "3. Ensure your diff targets the correct version of the file"
return { success: false, error: errorMsg }
}
return { success: false, error: errorMsg }
}
}