refactor: enhance NewUnifiedDiffStrategy for improved diff parsing and context handling

- Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management.
- Added handling for empty context lines to ensure accurate representation in hunks.
- Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes.
- Improved overall readability and consistency of the code by standardizing formatting and indentation practices.
This commit is contained in:
Daniel Riccio
2025-01-14 12:01:09 -05:00
parent 258024aa5a
commit d25f370013

View File

@@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types"
export class NewUnifiedDiffStrategy implements DiffStrategy { export class NewUnifiedDiffStrategy implements DiffStrategy {
private parseUnifiedDiff(diff: string): Diff { private parseUnifiedDiff(diff: string): Diff {
const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
const lines = diff.split('\n'); const lines = diff.split("\n")
const hunks: Hunk[] = []; const hunks: Hunk[] = []
let currentHunk: Hunk | null = null; let currentHunk: Hunk | null = null
let i = 0; let i = 0
while (i < lines.length && !lines[i].startsWith('@@')) { while (i < lines.length && !lines[i].startsWith("@@")) {
i++; i++
} }
for (; i < lines.length; i++) { for (; i < lines.length; i++) {
const line = lines[i]; const line = lines[i]
if (line.startsWith('@@')) { if (line.startsWith("@@")) {
if (currentHunk && currentHunk.changes.length > 0 && if (
currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { currentHunk &&
currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
// Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
const changes = currentHunk.changes; const changes = currentHunk.changes
let startIdx = 0; let startIdx = 0
let endIdx = changes.length - 1; let endIdx = changes.length - 1
// Find first non-context line // Find first non-context line
for (let j = 0; j < changes.length; j++) { for (let j = 0; j < changes.length; j++) {
if (changes[j].type !== 'context') { if (changes[j].type !== "context") {
startIdx = Math.max(0, j - MAX_CONTEXT_LINES); startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
break; break
} }
} }
// Find last non-context line // Find last non-context line
for (let j = changes.length - 1; j >= 0; j--) { for (let j = changes.length - 1; j >= 0; j--) {
if (changes[j].type !== 'context') { if (changes[j].type !== "context") {
endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES); endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
break; break
} }
} }
currentHunk.changes = changes.slice(startIdx, endIdx + 1); currentHunk.changes = changes.slice(startIdx, endIdx + 1)
hunks.push(currentHunk); hunks.push(currentHunk)
} }
currentHunk = { changes: [] }; currentHunk = { changes: [] }
continue; continue
} }
if (!currentHunk) {continue}; if (!currentHunk) {
continue
}
// Extract the complete indentation for each line // Extract the complete indentation for each line
const content = line.slice(1); // Remove the diff marker const content = line.slice(1) // Remove the diff marker
const indentMatch = content.match(/^(\s*)/); const indentMatch = content.match(/^(\s*)/)
const indent = indentMatch ? indentMatch[0] : ''; const indent = indentMatch ? indentMatch[0] : ""
const trimmedContent = content.slice(indent.length); const trimmedContent = content.slice(indent.length)
if (line.startsWith(' ')) { if (line.startsWith(" ")) {
currentHunk.changes.push({ currentHunk.changes.push({
type: 'context', type: "context",
content: trimmedContent, content: trimmedContent,
indent, indent,
originalLine: content originalLine: content,
}); })
} else if (line.startsWith('+')) { } else if (line.startsWith("+")) {
currentHunk.changes.push({ currentHunk.changes.push({
type: 'add', type: "add",
content: trimmedContent, content: trimmedContent,
indent, indent,
originalLine: content originalLine: content,
}); })
} else if (line.startsWith('-')) { } else if (line.startsWith("-")) {
currentHunk.changes.push({ currentHunk.changes.push({
type: 'remove', type: "remove",
content: trimmedContent, content: trimmedContent,
indent, indent,
originalLine: content originalLine: content,
}); })
} else {
// Assume is a context line and add a space if it's empty
const finalContent = trimmedContent ? " " + trimmedContent : " "
currentHunk.changes.push({
type: "context",
content: finalContent,
indent,
originalLine: content,
})
} }
} }
if (currentHunk && currentHunk.changes.length > 0 && if (
currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { currentHunk &&
hunks.push(currentHunk); currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
hunks.push(currentHunk)
} }
return { hunks }; return { hunks }
} }
getToolDescription(cwd: string): string { getToolDescription(cwd: string): string {
return `## apply_diff return `# apply_diff Tool Rules:
Description: Generate a unified diff similar to what "diff -U0" would produce.
Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together.
Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them. The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths.
Key Requirements: Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool.
1. Generate compact diffs with minimal context
- Use reduced context similar to diff -U0
- Only include hunks that contain actual changes (+ or - lines)
- Skip hunks with only unchanged lines
2. Use high-level, logical grouping Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch.
- When modifying code blocks (functions, methods, loops), replace the entire block in one hunk
- Delete the complete existing block with \`-\` lines
- Add the complete updated block with \`+\` lines
- Group related changes together rather than creating many small hunks
3. Format requirements Indentation matters! Make sure to preserve the exact indentation of both removed and added lines.
- Include file paths in the first 2 lines (without timestamps)
- Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed)
- Preserve exact indentation
- The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included
4. Common operations Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it.
- To move code: Create one hunk to delete from original location, another to add at new location
- To modify a block: Delete entire original block, then add entire new version
- Order hunks in whatever logical sequence makes sense
Parameters: Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense.
- path: (required) File path relative to current working directory ${cwd}
- diff: (required) Unified format diff content to apply
The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines. When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs.
If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location.
To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext".
Heres an example of the desired format:
Example:
\`\`\`diff \`\`\`diff
--- mathweb/flask/app.py --- mathweb/flask/app.py
+++ mathweb/flask/app.py +++ mathweb/flask/app.py
@@ ... @@ @@ ... @@
-class MathWeb: -class MathWeb:
+import sympy +import sympy
+ +
+class MathWeb: +class MathWeb:
@@ ... @@ @@ ... @@
@@ -165,6 +169,8 @@ Example:
+ return str(num) + return str(num)
\`\`\` \`\`\`
Be precise, consistent, and follow these rules carefully to generate correct diffs!
Usage: Usage:
<apply_diff> <apply_diff>
<path>File path here</path> <path>File path here</path>
@@ -189,7 +195,7 @@ Your diff here
const contextStr = prepareSearchString(hunk.changes) const contextStr = prepareSearchString(hunk.changes)
const { index: matchPosition, confidence } = findBestMatch(contextStr, result) const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
const editResult = await applyEdit(hunk, result, matchPosition, confidence) const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
if (editResult.confidence > MIN_CONFIDENCE) { if (editResult.confidence > MIN_CONFIDENCE) {
result = editResult.result result = editResult.result
} else { } else {