From d25f37001367e74a604f34330f1a81df0bc45738 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 12:01:09 -0500 Subject: [PATCH] refactor: enhance NewUnifiedDiffStrategy for improved diff parsing and context handling - Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management. - Added handling for empty context lines to ensure accurate representation in hunks. - Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes. - Improved overall readability and consistency of the code by standardizing formatting and indentation practices. --- src/core/diff/strategies/new-unified/index.ts | 232 +++++++++--------- 1 file changed, 119 insertions(+), 113 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 17b5b3f..ad9fd81 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { private parseUnifiedDiff(diff: string): Diff { - const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes - const lines = diff.split('\n'); - const hunks: Hunk[] = []; - let currentHunk: Hunk | null = null; - - let i = 0; - while (i < lines.length && !lines[i].startsWith('@@')) { - i++; - } - - for (; i < lines.length; i++) { - const line = lines[i]; - - if (line.startsWith('@@')) { - if (currentHunk && currentHunk.changes.length > 0 && - currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { - // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes - const changes = currentHunk.changes; - let startIdx = 0; - let endIdx = changes.length - 1; - - // Find first non-context line - for (let j = 0; j < changes.length; j++) { - if (changes[j].type !== 'context') { - startIdx = Math.max(0, j - MAX_CONTEXT_LINES); - break; - } - } - - // Find last non-context line - for (let j = changes.length - 1; j >= 0; j--) { - if (changes[j].type !== 'context') { - endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES); - break; - } - } - - currentHunk.changes = changes.slice(startIdx, endIdx + 1); - hunks.push(currentHunk); - } - currentHunk = { changes: [] }; - continue; - } - - if (!currentHunk) {continue}; - - // Extract the complete indentation for each line - const content = line.slice(1); // Remove the diff marker - const indentMatch = content.match(/^(\s*)/); - const indent = indentMatch ? indentMatch[0] : ''; - const trimmedContent = content.slice(indent.length); - - if (line.startsWith(' ')) { - currentHunk.changes.push({ - type: 'context', - content: trimmedContent, - indent, - originalLine: content - }); - } else if (line.startsWith('+')) { - currentHunk.changes.push({ - type: 'add', - content: trimmedContent, - indent, - originalLine: content - }); - } else if (line.startsWith('-')) { - currentHunk.changes.push({ - type: 'remove', - content: trimmedContent, - indent, - originalLine: content - }); - } - } - - if (currentHunk && currentHunk.changes.length > 0 && - currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { - hunks.push(currentHunk); - } - - return { hunks }; - } + const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes + const lines = diff.split("\n") + const hunks: Hunk[] = [] + let currentHunk: Hunk | null = null + + let i = 0 + while (i < lines.length && !lines[i].startsWith("@@")) { + i++ + } + + for (; i < lines.length; i++) { + const line = lines[i] + + if (line.startsWith("@@")) { + if ( + currentHunk && + currentHunk.changes.length > 0 && + currentHunk.changes.some((change) => change.type === "add" || change.type === "remove") + ) { + // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes + const changes = currentHunk.changes + let startIdx = 0 + let endIdx = changes.length - 1 + + // Find first non-context line + for (let j = 0; j < changes.length; j++) { + if (changes[j].type !== "context") { + startIdx = Math.max(0, j - MAX_CONTEXT_LINES) + break + } + } + + // Find last non-context line + for (let j = changes.length - 1; j >= 0; j--) { + if (changes[j].type !== "context") { + endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES) + break + } + } + + currentHunk.changes = changes.slice(startIdx, endIdx + 1) + hunks.push(currentHunk) + } + currentHunk = { changes: [] } + continue + } + + if (!currentHunk) { + continue + } + + // Extract the complete indentation for each line + const content = line.slice(1) // Remove the diff marker + const indentMatch = content.match(/^(\s*)/) + const indent = indentMatch ? indentMatch[0] : "" + const trimmedContent = content.slice(indent.length) + + if (line.startsWith(" ")) { + currentHunk.changes.push({ + type: "context", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("+")) { + currentHunk.changes.push({ + type: "add", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("-")) { + currentHunk.changes.push({ + type: "remove", + content: trimmedContent, + indent, + originalLine: content, + }) + } else { + // Assume is a context line and add a space if it's empty + const finalContent = trimmedContent ? " " + trimmedContent : " " + currentHunk.changes.push({ + type: "context", + content: finalContent, + indent, + originalLine: content, + }) + } + } + + if ( + currentHunk && + currentHunk.changes.length > 0 && + currentHunk.changes.some((change) => change.type === "add" || change.type === "remove") + ) { + hunks.push(currentHunk) + } + + return { hunks } + } getToolDescription(cwd: string): string { - return `## apply_diff + return `# apply_diff Tool Rules: -Description: -Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together. +Generate a unified diff similar to what "diff -U0" would produce. -Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them. +The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths. -Key Requirements: -1. Generate compact diffs with minimal context - - Use reduced context similar to diff -U0 - - Only include hunks that contain actual changes (+ or - lines) - - Skip hunks with only unchanged lines +Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool. -2. Use high-level, logical grouping - - When modifying code blocks (functions, methods, loops), replace the entire block in one hunk - - Delete the complete existing block with \`-\` lines - - Add the complete updated block with \`+\` lines - - Group related changes together rather than creating many small hunks +Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch. -3. Format requirements - - Include file paths in the first 2 lines (without timestamps) - - Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed) - - Preserve exact indentation - - The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included +Indentation matters! Make sure to preserve the exact indentation of both removed and added lines. -4. Common operations - - To move code: Create one hunk to delete from original location, another to add at new location - - To modify a block: Delete entire original block, then add entire new version - - Order hunks in whatever logical sequence makes sense +Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it. -Parameters: -- path: (required) File path relative to current working directory ${cwd} -- diff: (required) Unified format diff content to apply +Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense. -The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines. +When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs. +If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location. + +To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext". + +Here’s an example of the desired format: -Example: \`\`\`diff --- mathweb/flask/app.py +++ mathweb/flask/app.py @@ ... @@ -class MathWeb: +import sympy + + +class MathWeb: @@ ... @@ @@ -165,6 +169,8 @@ Example: + return str(num) \`\`\` +Be precise, consistent, and follow these rules carefully to generate correct diffs! + Usage: File path here @@ -182,14 +188,14 @@ Your diff here ): Promise { const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) - const originalLines = originalContent.split("\n") + const originalLines = originalContent.split("\n") let result = [...originalLines] for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence } = findBestMatch(contextStr, result) - const editResult = await applyEdit(hunk, result, matchPosition, confidence) + const editResult = await applyEdit(hunk, result, matchPosition, confidence, '') if (editResult.confidence > MIN_CONFIDENCE) { result = editResult.result } else {