From d25f37001367e74a604f34330f1a81df0bc45738 Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Tue, 14 Jan 2025 12:01:09 -0500
Subject: [PATCH] refactor: enhance NewUnifiedDiffStrategy for improved diff
 parsing and context handling

- Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management.
- Added handling for empty context lines to ensure accurate representation in hunks.
- Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes.
- Improved overall readability and consistency of the code by standardizing formatting and indentation practices.
---
 src/core/diff/strategies/new-unified/index.ts | 232 +++++++++---------
 1 file changed, 119 insertions(+), 113 deletions(-)

diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts
index 17b5b3f..ad9fd81 100644
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types"
 
 export class NewUnifiedDiffStrategy implements DiffStrategy {
 	private parseUnifiedDiff(diff: string): Diff {
-    const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes
-    const lines = diff.split('\n');
-    const hunks: Hunk[] = [];
-    let currentHunk: Hunk | null = null;
-    
-    let i = 0;
-    while (i < lines.length && !lines[i].startsWith('@@')) {
-      i++;
-    }
-  
-    for (; i < lines.length; i++) {
-      const line = lines[i];
-      
-      if (line.startsWith('@@')) {
-        if (currentHunk && currentHunk.changes.length > 0 && 
-            currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
-          // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
-          const changes = currentHunk.changes;
-          let startIdx = 0;
-          let endIdx = changes.length - 1;
-          
-          // Find first non-context line
-          for (let j = 0; j < changes.length; j++) {
-            if (changes[j].type !== 'context') {
-              startIdx = Math.max(0, j - MAX_CONTEXT_LINES);
-              break;
-            }
-          }
-          
-          // Find last non-context line
-          for (let j = changes.length - 1; j >= 0; j--) {
-            if (changes[j].type !== 'context') {
-              endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES);
-              break;
-            }
-          }
-          
-          currentHunk.changes = changes.slice(startIdx, endIdx + 1);
-          hunks.push(currentHunk);
-        }
-        currentHunk = { changes: [] };
-        continue;
-      }
-  
-      if (!currentHunk) {continue};
-  
-      // Extract the complete indentation for each line
-      const content = line.slice(1); // Remove the diff marker
-      const indentMatch = content.match(/^(\s*)/);
-      const indent = indentMatch ? indentMatch[0] : '';
-      const trimmedContent = content.slice(indent.length);
-  
-      if (line.startsWith(' ')) {
-        currentHunk.changes.push({
-          type: 'context',
-          content: trimmedContent,
-          indent,
-          originalLine: content
-        });
-      } else if (line.startsWith('+')) {
-        currentHunk.changes.push({
-          type: 'add',
-          content: trimmedContent,
-          indent,
-          originalLine: content
-        });
-      } else if (line.startsWith('-')) {
-        currentHunk.changes.push({
-          type: 'remove',
-          content: trimmedContent,
-          indent,
-          originalLine: content
-        });
-      }
-    }
-  
-    if (currentHunk && currentHunk.changes.length > 0 && 
-        currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
-      hunks.push(currentHunk);
-    }
-  
-    return { hunks };
-  }
+		const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
+		const lines = diff.split("\n")
+		const hunks: Hunk[] = []
+		let currentHunk: Hunk | null = null
+
+		let i = 0
+		while (i < lines.length && !lines[i].startsWith("@@")) {
+			i++
+		}
+
+		for (; i < lines.length; i++) {
+			const line = lines[i]
+
+			if (line.startsWith("@@")) {
+				if (
+					currentHunk &&
+					currentHunk.changes.length > 0 &&
+					currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
+				) {
+					// Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
+					const changes = currentHunk.changes
+					let startIdx = 0
+					let endIdx = changes.length - 1
+
+					// Find first non-context line
+					for (let j = 0; j < changes.length; j++) {
+						if (changes[j].type !== "context") {
+							startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
+							break
+						}
+					}
+
+					// Find last non-context line
+					for (let j = changes.length - 1; j >= 0; j--) {
+						if (changes[j].type !== "context") {
+							endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
+							break
+						}
+					}
+
+					currentHunk.changes = changes.slice(startIdx, endIdx + 1)
+					hunks.push(currentHunk)
+				}
+				currentHunk = { changes: [] }
+				continue
+			}
+
+			if (!currentHunk) {
+				continue
+			}
+
+			// Extract the complete indentation for each line
+			const content = line.slice(1) // Remove the diff marker
+			const indentMatch = content.match(/^(\s*)/)
+			const indent = indentMatch ? indentMatch[0] : ""
+			const trimmedContent = content.slice(indent.length)
+
+			if (line.startsWith(" ")) {
+				currentHunk.changes.push({
+					type: "context",
+					content: trimmedContent,
+					indent,
+					originalLine: content,
+				})
+			} else if (line.startsWith("+")) {
+				currentHunk.changes.push({
+					type: "add",
+					content: trimmedContent,
+					indent,
+					originalLine: content,
+				})
+			} else if (line.startsWith("-")) {
+				currentHunk.changes.push({
+					type: "remove",
+					content: trimmedContent,
+					indent,
+					originalLine: content,
+				})
+			} else {
+				// Assume is a context line and add a space if it's empty
+				const finalContent = trimmedContent ? " " + trimmedContent : " "
+				currentHunk.changes.push({
+					type: "context",
+					content: finalContent,
+					indent,
+					originalLine: content,
+				})
+			}
+		}
+
+		if (
+			currentHunk &&
+			currentHunk.changes.length > 0 &&
+			currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
+		) {
+			hunks.push(currentHunk)
+		}
+
+		return { hunks }
+	}
 
 	getToolDescription(cwd: string): string {
-		return `## apply_diff
+		return `# apply_diff Tool Rules:
 
-Description:
-Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together.
+Generate a unified diff similar to what "diff -U0" would produce. 
 
-Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them.
+The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths.
 
-Key Requirements:
-1. Generate compact diffs with minimal context
-   - Use reduced context similar to diff -U0
-   - Only include hunks that contain actual changes (+ or - lines)
-   - Skip hunks with only unchanged lines
+Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool.
 
-2. Use high-level, logical grouping
-   - When modifying code blocks (functions, methods, loops), replace the entire block in one hunk
-   - Delete the complete existing block with \`-\` lines
-   - Add the complete updated block with \`+\` lines
-   - Group related changes together rather than creating many small hunks
+Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch.
 
-3. Format requirements
-   - Include file paths in the first 2 lines (without timestamps)
-   - Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed)
-   - Preserve exact indentation
-   - The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included
+Indentation matters! Make sure to preserve the exact indentation of both removed and added lines.
 
-4. Common operations
-   - To move code: Create one hunk to delete from original location, another to add at new location
-   - To modify a block: Delete entire original block, then add entire new version
-   - Order hunks in whatever logical sequence makes sense
+Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it.
 
-Parameters:
-- path: (required) File path relative to current working directory ${cwd}
-- diff: (required) Unified format diff content to apply
+Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense.
 
-The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines.
+When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs.
 
+If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location.
+
+To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext".
+
+Here’s an example of the desired format:
 
-Example:
 \`\`\`diff
 --- mathweb/flask/app.py
 +++ mathweb/flask/app.py
 @@ ... @@
 -class MathWeb:
 +import sympy
+
 +
 +class MathWeb:
 @@ ... @@
@@ -165,6 +169,8 @@ Example:
 +    return str(num)
 \`\`\`
 
+Be precise, consistent, and follow these rules carefully to generate correct diffs!
+
 Usage:
 <apply_diff>
 <path>File path here</path>
@@ -182,14 +188,14 @@ Your diff here
 	): Promise<DiffResult> {
 		const MIN_CONFIDENCE = 0.9
 		const parsedDiff = this.parseUnifiedDiff(diffContent)
-    const originalLines = originalContent.split("\n")
+		const originalLines = originalContent.split("\n")
 		let result = [...originalLines]
 
 		for (const hunk of parsedDiff.hunks) {
 			const contextStr = prepareSearchString(hunk.changes)
 			const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
 
-			const editResult = await applyEdit(hunk, result, matchPosition, confidence)
+			const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
 			if (editResult.confidence > MIN_CONFIDENCE) {
 				result = editResult.result
 			} else {