feat: implement hunk splitting strategy in diff application to better handle large diffs that might fail if the search fails

2026-02-04 19:45:16 -05:00 · 2025-01-16 12:49:02 -05:00
parent 36b7da07d2
commit 04f6e4f03f
1 changed files with 125 additions and 45 deletions
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -1,4 +1,4 @@
-import { Diff, Hunk } from "./types"
+import { Diff, Hunk, Change } from "./types"
 import { findBestMatch, prepareSearchString } from "./search-strategies"
 import { applyEdit } from "./edit-strategies"
 import { DiffResult, DiffStrategy } from "../../types"
@@ -185,6 +185,59 @@ Your diff here
 </apply_diff>`
 	}
 	// Helper function to split a hunk into smaller hunks based on contiguous changes
 	private splitHunk(hunk: Hunk): Hunk[] {
 		const result: Hunk[] = []
 		let currentHunk: Hunk | null = null
 		let contextBefore: Change[] = []
 		let contextAfter: Change[] = []
 		const MAX_CONTEXT_LINES = 3 // Keep 3 lines of context before/after changes
 		for (let i = 0; i < hunk.changes.length; i++) {
 			const change = hunk.changes[i]
 			if (change.type === 'context') {
 				if (!currentHunk) {
 					contextBefore.push(change)
 					if (contextBefore.length > MAX_CONTEXT_LINES) {
 						contextBefore.shift()
 					}
 				} else {
 					contextAfter.push(change)
 					if (contextAfter.length > MAX_CONTEXT_LINES) {
 						// We've collected enough context after changes, create a new hunk
 						currentHunk.changes.push(...contextAfter)
 						result.push(currentHunk)
 						currentHunk = null
 						// Keep the last few context lines for the next hunk
 						contextBefore = contextAfter
 						contextAfter = []
 					}
 				}
 			} else {
 				if (!currentHunk) {
 					currentHunk = { changes: [...contextBefore] }
 					contextAfter = []
 				} else if (contextAfter.length > 0) {
 					// Add accumulated context to current hunk
 					currentHunk.changes.push(...contextAfter)
 					contextAfter = []
 				}
 				currentHunk.changes.push(change)
 			}
 		}
 		// Add any remaining changes
 		if (currentHunk) {
 			if (contextAfter.length > 0) {
 				currentHunk.changes.push(...contextAfter)
 			}
 			result.push(currentHunk)
 		}
 		return result
 	}
 	async applyDiff(
 		originalContent: string,
 		diffContent: string,
@@ -206,55 +259,82 @@ Your diff here
 			const contextStr = prepareSearchString(hunk.changes)
 			const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
 			if (confidence < 1.1) {
        console.log('Full hunk application failed, trying sub-hunks strategy')
 				// Try splitting the hunk into smaller hunks
 				const subHunks = this.splitHunk(hunk)
 				let subHunkSuccess = true
 				let subHunkResult = [...result]
 				for (const subHunk of subHunks) {
 					const subContextStr = prepareSearchString(subHunk.changes)
          console.log(subContextStr)
 					const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold)
 					if (subSearchResult.confidence >= this.confidenceThreshold) {
 						const subEditResult = await applyEdit(subHunk, subHunkResult, subSearchResult.index, subSearchResult.confidence, this.confidenceThreshold)
 						if (subEditResult.confidence >= this.confidenceThreshold) {
 							subHunkResult = subEditResult.result
 							continue
 						}
 					}
 					subHunkSuccess = false
 					break
 				}
 				if (subHunkSuccess) {
 					result = subHunkResult
 					continue
 				}
 				// If sub-hunks also failed, return the original error
 				const contextLines = hunk.changes.filter(c => c.type === "context").length
 				const totalLines = hunk.changes.length
 				const contextRatio = contextLines / totalLines
 				let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
 				errorMsg += "Debug Info:\n"
 				errorMsg += `- Search Strategy Used: ${strategy}\n`
 				errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
 				errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n`
 				if (contextRatio < 0.2) {
 					errorMsg += "\nPossible Issues:\n"
 					errorMsg += "- Not enough context lines to uniquely identify the location\n"
 					errorMsg += "- Add a few more lines of unchanged code around your changes\n"
 				} else if (contextRatio > 0.5) {
 					errorMsg += "\nPossible Issues:\n"
 					errorMsg += "- Too many context lines may reduce search accuracy\n"
 					errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
 				} else {
 					errorMsg += "\nPossible Issues:\n"
 					errorMsg += "- The diff may be targeting a different version of the file\n"
 					errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
 				}
 				if (startLine && endLine) {
 					errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
 				}
 				return { success: false, error: errorMsg }
 			}
 			const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold)
 			if (editResult.confidence >= this.confidenceThreshold) {
 				result = editResult.result
 			} else {
-				// Determine if the failure is due to search or edit
+				// Edit failure - likely due to content mismatch
-				if (confidence < this.confidenceThreshold) {
+				let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n`
-					// Search failure - likely due to context not matching
+				errorMsg += "Debug Info:\n"
-					const contextLines = hunk.changes.filter(c => c.type === "context").length
+				errorMsg += "- The location was found but the content didn't match exactly\n"
-					const totalLines = hunk.changes.length
+				errorMsg += "- This usually means the file has been modified since the diff was created\n"
-					const contextRatio = contextLines / totalLines
+				errorMsg += "- Or the diff may be targeting a different version of the file\n"
 				errorMsg += "\nPossible Solutions:\n"
 				errorMsg += "1. Refresh your view of the file and create a new diff\n"
 				errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
 				errorMsg += "3. Ensure your diff targets the correct version of the file"
-					let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
+				return { success: false, error: errorMsg }
 					errorMsg += "Debug Info:\n"
 					errorMsg += `- Search Strategy Used: ${strategy}\n`
 					errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n`
 					if (contextRatio < 0.2) {
 						errorMsg += "\nPossible Issues:\n"
 						errorMsg += "- Not enough context lines to uniquely identify the location\n"
 						errorMsg += "- Add a few more lines of unchanged code around your changes\n"
 					} else if (contextRatio > 0.5) {
 						errorMsg += "\nPossible Issues:\n"
 						errorMsg += "- Too many context lines may reduce search accuracy\n"
 						errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
 					} else {
 						errorMsg += "\nPossible Issues:\n"
 						errorMsg += "- The diff may be targeting a different version of the file\n"
            errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
 					}
 					if (startLine && endLine) {
 						errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
 					}
 					return { success: false, error: errorMsg }
 				} else {
 					// Edit failure - likely due to content mismatch
 					let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n`
 					errorMsg += "Debug Info:\n"
 					errorMsg += "- The location was found but the content didn't match exactly\n"
 					errorMsg += "- This usually means the file has been modified since the diff was created\n"
 					errorMsg += "- Or the diff may be targeting a different version of the file\n"
 					errorMsg += "\nPossible Solutions:\n"
 					errorMsg += "1. Refresh your view of the file and create a new diff\n"
 					errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
 					errorMsg += "3. Ensure your diff targets the correct version of the file"
 					return { success: false, error: errorMsg }
 				}
 			}
 		}