refactor: enhance NewUnifiedDiffStrategy for improved diff parsing and context handling

- Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management.
- Added handling for empty context lines to ensure accurate representation in hunks.
- Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes.
- Improved overall readability and consistency of the code by standardizing formatting and indentation practices.
This commit is contained in:
Daniel Riccio
2025-01-14 12:01:09 -05:00
parent 258024aa5a
commit d25f370013

View File

@@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types"
export class NewUnifiedDiffStrategy implements DiffStrategy {
private parseUnifiedDiff(diff: string): Diff {
const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes
const lines = diff.split('\n');
const hunks: Hunk[] = [];
let currentHunk: Hunk | null = null;
const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
const lines = diff.split("\n")
const hunks: Hunk[] = []
let currentHunk: Hunk | null = null
let i = 0;
while (i < lines.length && !lines[i].startsWith('@@')) {
i++;
}
let i = 0
while (i < lines.length && !lines[i].startsWith("@@")) {
i++
}
for (; i < lines.length; i++) {
const line = lines[i];
for (; i < lines.length; i++) {
const line = lines[i]
if (line.startsWith('@@')) {
if (currentHunk && currentHunk.changes.length > 0 &&
currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
// Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
const changes = currentHunk.changes;
let startIdx = 0;
let endIdx = changes.length - 1;
if (line.startsWith("@@")) {
if (
currentHunk &&
currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
// Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes
const changes = currentHunk.changes
let startIdx = 0
let endIdx = changes.length - 1
// Find first non-context line
for (let j = 0; j < changes.length; j++) {
if (changes[j].type !== 'context') {
startIdx = Math.max(0, j - MAX_CONTEXT_LINES);
break;
}
}
// Find first non-context line
for (let j = 0; j < changes.length; j++) {
if (changes[j].type !== "context") {
startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
break
}
}
// Find last non-context line
for (let j = changes.length - 1; j >= 0; j--) {
if (changes[j].type !== 'context') {
endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES);
break;
}
}
// Find last non-context line
for (let j = changes.length - 1; j >= 0; j--) {
if (changes[j].type !== "context") {
endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
break
}
}
currentHunk.changes = changes.slice(startIdx, endIdx + 1);
hunks.push(currentHunk);
}
currentHunk = { changes: [] };
continue;
}
currentHunk.changes = changes.slice(startIdx, endIdx + 1)
hunks.push(currentHunk)
}
currentHunk = { changes: [] }
continue
}
if (!currentHunk) {continue};
if (!currentHunk) {
continue
}
// Extract the complete indentation for each line
const content = line.slice(1); // Remove the diff marker
const indentMatch = content.match(/^(\s*)/);
const indent = indentMatch ? indentMatch[0] : '';
const trimmedContent = content.slice(indent.length);
// Extract the complete indentation for each line
const content = line.slice(1) // Remove the diff marker
const indentMatch = content.match(/^(\s*)/)
const indent = indentMatch ? indentMatch[0] : ""
const trimmedContent = content.slice(indent.length)
if (line.startsWith(' ')) {
currentHunk.changes.push({
type: 'context',
content: trimmedContent,
indent,
originalLine: content
});
} else if (line.startsWith('+')) {
currentHunk.changes.push({
type: 'add',
content: trimmedContent,
indent,
originalLine: content
});
} else if (line.startsWith('-')) {
currentHunk.changes.push({
type: 'remove',
content: trimmedContent,
indent,
originalLine: content
});
}
}
if (line.startsWith(" ")) {
currentHunk.changes.push({
type: "context",
content: trimmedContent,
indent,
originalLine: content,
})
} else if (line.startsWith("+")) {
currentHunk.changes.push({
type: "add",
content: trimmedContent,
indent,
originalLine: content,
})
} else if (line.startsWith("-")) {
currentHunk.changes.push({
type: "remove",
content: trimmedContent,
indent,
originalLine: content,
})
} else {
// Assume is a context line and add a space if it's empty
const finalContent = trimmedContent ? " " + trimmedContent : " "
currentHunk.changes.push({
type: "context",
content: finalContent,
indent,
originalLine: content,
})
}
}
if (currentHunk && currentHunk.changes.length > 0 &&
currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) {
hunks.push(currentHunk);
}
if (
currentHunk &&
currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
hunks.push(currentHunk)
}
return { hunks };
}
return { hunks }
}
getToolDescription(cwd: string): string {
return `## apply_diff
return `# apply_diff Tool Rules:
Description:
Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together.
Generate a unified diff similar to what "diff -U0" would produce.
Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them.
The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths.
Key Requirements:
1. Generate compact diffs with minimal context
- Use reduced context similar to diff -U0
- Only include hunks that contain actual changes (+ or - lines)
- Skip hunks with only unchanged lines
Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool.
2. Use high-level, logical grouping
- When modifying code blocks (functions, methods, loops), replace the entire block in one hunk
- Delete the complete existing block with \`-\` lines
- Add the complete updated block with \`+\` lines
- Group related changes together rather than creating many small hunks
Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch.
3. Format requirements
- Include file paths in the first 2 lines (without timestamps)
- Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed)
- Preserve exact indentation
- The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included
Indentation matters! Make sure to preserve the exact indentation of both removed and added lines.
4. Common operations
- To move code: Create one hunk to delete from original location, another to add at new location
- To modify a block: Delete entire original block, then add entire new version
- Order hunks in whatever logical sequence makes sense
Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it.
Parameters:
- path: (required) File path relative to current working directory ${cwd}
- diff: (required) Unified format diff content to apply
Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense.
The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines.
When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs.
If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location.
To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext".
Heres an example of the desired format:
Example:
\`\`\`diff
--- mathweb/flask/app.py
+++ mathweb/flask/app.py
@@ ... @@
-class MathWeb:
+import sympy
+
+class MathWeb:
@@ ... @@
@@ -165,6 +169,8 @@ Example:
+ return str(num)
\`\`\`
Be precise, consistent, and follow these rules carefully to generate correct diffs!
Usage:
<apply_diff>
<path>File path here</path>
@@ -182,14 +188,14 @@ Your diff here
): Promise<DiffResult> {
const MIN_CONFIDENCE = 0.9
const parsedDiff = this.parseUnifiedDiff(diffContent)
const originalLines = originalContent.split("\n")
const originalLines = originalContent.split("\n")
let result = [...originalLines]
for (const hunk of parsedDiff.hunks) {
const contextStr = prepareSearchString(hunk.changes)
const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
const editResult = await applyEdit(hunk, result, matchPosition, confidence)
const editResult = await applyEdit(hunk, result, matchPosition, confidence, '')
if (editResult.confidence > MIN_CONFIDENCE) {
result = editResult.result
} else {