refactor: enhance NewUnifiedDiffStrategy for improved diff parsing and context handling

- Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management. - Added handling for empty context lines to ensure accurate representation in hunks. - Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes. - Improved overall readability and consistency of the code by standardizing formatting and indentation practices.
2025-12-21 04:41:16 -05:00 · 2025-01-14 12:01:09 -05:00
parent 258024aa5a
commit d25f370013
1 changed files with 119 additions and 113 deletions
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types"
 export class NewUnifiedDiffStrategy implements DiffStrategy {
 	private parseUnifiedDiff(diff: string): Diff {
-    const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes
+		const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
-    const lines = diff.split('\n');
+		const lines = diff.split("\n")
-    const hunks: Hunk[] = [];
+		const hunks: Hunk[] = []
-    let currentHunk: Hunk | null = null;
+		let currentHunk: Hunk | null = null
-    let i = 0;
+		let i = 0
-    while (i < lines.length && !lines[i].startsWith('@@')) {
+		while (i < lines.length && !lines[i].startsWith("@@")) {
-      i++;
+			i++
 		}
 		for (; i < lines.length; i++) {
-      const line = lines[i];
+			const line = lines[i]
-      if (line.startsWith('@@')) {
+			if (line.startsWith("@@")) {
-        if (currentHunk && currentHunk.changes.length > 0 && 
+				if (
-            currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
+					currentHunk &&
 					currentHunk.changes.length > 0 &&
 					currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
 				) {
 					// Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
-          const changes = currentHunk.changes;
+					const changes = currentHunk.changes
-          let startIdx = 0;
+					let startIdx = 0
-          let endIdx = changes.length - 1;
+					let endIdx = changes.length - 1
 					// Find first non-context line
 					for (let j = 0; j < changes.length; j++) {
-            if (changes[j].type !== 'context') {
+						if (changes[j].type !== "context") {
-              startIdx = Math.max(0, j - MAX_CONTEXT_LINES);
+							startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
-              break;
+							break
 						}
 					}
 					// Find last non-context line
 					for (let j = changes.length - 1; j >= 0; j--) {
-            if (changes[j].type !== 'context') {
+						if (changes[j].type !== "context") {
-              endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES);
+							endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
-              break;
+							break
 						}
 					}
-          currentHunk.changes = changes.slice(startIdx, endIdx + 1);
+					currentHunk.changes = changes.slice(startIdx, endIdx + 1)
-          hunks.push(currentHunk);
+					hunks.push(currentHunk)
 				}
-        currentHunk = { changes: [] };
+				currentHunk = { changes: [] }
-        continue;
+				continue
 			}
-      if (!currentHunk) {continue};
+			if (!currentHunk) {
 				continue
 			}
 			// Extract the complete indentation for each line
-      const content = line.slice(1); // Remove the diff marker
+			const content = line.slice(1) // Remove the diff marker
-      const indentMatch = content.match(/^(\s*)/);
+			const indentMatch = content.match(/^(\s*)/)
-      const indent = indentMatch ? indentMatch[0] : '';
+			const indent = indentMatch ? indentMatch[0] : ""
-      const trimmedContent = content.slice(indent.length);
+			const trimmedContent = content.slice(indent.length)
-      if (line.startsWith(' ')) {
+			if (line.startsWith(" ")) {
 				currentHunk.changes.push({
-          type: 'context',
+					type: "context",
 					content: trimmedContent,
 					indent,
-          originalLine: content
+					originalLine: content,
-        });
+				})
-      } else if (line.startsWith('+')) {
+			} else if (line.startsWith("+")) {
 				currentHunk.changes.push({
-          type: 'add',
+					type: "add",
 					content: trimmedContent,
 					indent,
-          originalLine: content
+					originalLine: content,
-        });
+				})
-      } else if (line.startsWith('-')) {
+			} else if (line.startsWith("-")) {
 				currentHunk.changes.push({
-          type: 'remove',
+					type: "remove",
 					content: trimmedContent,
 					indent,
-          originalLine: content
+					originalLine: content,
-        });
+				})
 			} else {
 				// Assume is a context line and add a space if it's empty
 				const finalContent = trimmedContent ? " " + trimmedContent : " "
 				currentHunk.changes.push({
 					type: "context",
 					content: finalContent,
 					indent,
 					originalLine: content,
 				})
 			}
 		}
-    if (currentHunk && currentHunk.changes.length > 0 && 
+		if (
-        currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
+			currentHunk &&
-      hunks.push(currentHunk);
+			currentHunk.changes.length > 0 &&
 			currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
 		) {
 			hunks.push(currentHunk)
 		}
-    return { hunks };
+		return { hunks }
 	}
 	getToolDescription(cwd: string): string {
-		return `## apply_diff
+		return `# apply_diff Tool Rules:
-Description:
+Generate a unified diff similar to what "diff -U0" would produce. 
 Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together.
-Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them.
+The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths.
-Key Requirements:
+Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool.
 1. Generate compact diffs with minimal context
   - Use reduced context similar to diff -U0
   - Only include hunks that contain actual changes (+ or - lines)
   - Skip hunks with only unchanged lines
-2. Use high-level, logical grouping
+Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch.
   - When modifying code blocks (functions, methods, loops), replace the entire block in one hunk
   - Delete the complete existing block with \`-\` lines
   - Add the complete updated block with \`+\` lines
   - Group related changes together rather than creating many small hunks
-3. Format requirements
+Indentation matters! Make sure to preserve the exact indentation of both removed and added lines.
   - Include file paths in the first 2 lines (without timestamps)
   - Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed)
   - Preserve exact indentation
   - The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included
-4. Common operations
+Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it.
   - To move code: Create one hunk to delete from original location, another to add at new location
   - To modify a block: Delete entire original block, then add entire new version
   - Order hunks in whatever logical sequence makes sense
-Parameters:
+Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense.
 - path: (required) File path relative to current working directory ${cwd}
 - diff: (required) Unified format diff content to apply
-The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines.
+When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs.
 If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location.
 To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext".
 Here’s an example of the desired format:
 Example:
 \`\`\`diff
 --- mathweb/flask/app.py
 +++ mathweb/flask/app.py
@@ ... @@
 -class MathWeb:
 +import sympy
 +
 +class MathWeb:
@@ ... @@
@@ -165,6 +169,8 @@ Example:
 +    return str(num)
 \`\`\`
 Be precise, consistent, and follow these rules carefully to generate correct diffs!
 Usage:
 <apply_diff>
 <path>File path here</path>
@@ -189,7 +195,7 @@ Your diff here
 			const contextStr = prepareSearchString(hunk.changes)
 			const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
-			const editResult = await applyEdit(hunk, result, matchPosition, confidence)
+			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
 			if (editResult.confidence > MIN_CONFIDENCE) {
 				result = editResult.result
 			} else {