mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-21 04:41:16 -05:00
Add New Unified Diff Strategy Implementation
- Introduced a new unified diff strategy with support for context matching, DMP, and Git-based edits. - Implemented helper functions for parsing unified diffs and evaluating similarity. - Added types for changes, hunks, and diffs to enhance type safety. - Created a main edit function that applies strategies sequentially based on confidence levels. - Included detailed descriptions and usage examples for the new strategy.
This commit is contained in:
236
src/core/diff/strategies/new-unified/edit-strategies.ts
Normal file
236
src/core/diff/strategies/new-unified/edit-strategies.ts
Normal file
@@ -0,0 +1,236 @@
|
||||
import { diff_match_patch } from 'diff-match-patch';
|
||||
import * as git from 'isomorphic-git';
|
||||
import { fs as memfs, vol } from 'memfs';
|
||||
import { Hunk } from './types';
|
||||
import { getDMPSimilarity } from './search-strategies';
|
||||
|
||||
// Helper function to infer indentation
|
||||
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string {
|
||||
const match = line.match(/^(\s+)/);
|
||||
if (match) {
|
||||
return match[1];
|
||||
}
|
||||
|
||||
for (const contextLine of contextLines) {
|
||||
const contextMatch = contextLine.match(/^(\s+)/);
|
||||
if (contextMatch) {
|
||||
const currentLineDepth = (line.match(/^\s*/)?.[0] || '').length;
|
||||
const contextLineDepth = contextMatch[1].length;
|
||||
|
||||
if (currentLineDepth > contextLineDepth) {
|
||||
return contextMatch[1] + ' '.repeat(2);
|
||||
}
|
||||
return contextMatch[1];
|
||||
}
|
||||
}
|
||||
|
||||
return previousIndent;
|
||||
}
|
||||
|
||||
export type EditResult = {
|
||||
confidence: number;
|
||||
result: string[];
|
||||
strategy: string;
|
||||
};
|
||||
|
||||
// Context matching edit strategy
|
||||
export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
|
||||
if (matchPosition === -1) {
|
||||
return { confidence: 0, result: content, strategy: 'context' };
|
||||
}
|
||||
|
||||
const newResult = [...content.slice(0, matchPosition)];
|
||||
let sourceIndex = matchPosition;
|
||||
let previousIndent = '';
|
||||
|
||||
for (const change of hunk.changes) {
|
||||
if (change.type === 'context') {
|
||||
newResult.push(change.originalLine || (change.indent + change.content));
|
||||
previousIndent = change.indent;
|
||||
sourceIndex++;
|
||||
} else if (change.type === 'add') {
|
||||
const indent = change.indent || inferIndentation(change.content,
|
||||
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
|
||||
previousIndent
|
||||
);
|
||||
newResult.push(indent + change.content);
|
||||
previousIndent = indent;
|
||||
} else if (change.type === 'remove') {
|
||||
sourceIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
newResult.push(...content.slice(sourceIndex));
|
||||
|
||||
// Validate the result
|
||||
const similarity = getDMPSimilarity(
|
||||
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
|
||||
newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
|
||||
);
|
||||
|
||||
return {
|
||||
confidence: similarity,
|
||||
result: newResult,
|
||||
strategy: 'context'
|
||||
};
|
||||
}
|
||||
|
||||
// DMP edit strategy
|
||||
export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult {
|
||||
if (matchPosition === -1) {
|
||||
return { confidence: 0, result: content, strategy: 'dmp' };
|
||||
}
|
||||
|
||||
const dmp = new diff_match_patch();
|
||||
const currentText = content.join('\n');
|
||||
const contextLines = hunk.changes
|
||||
.filter(c => c.type === 'context')
|
||||
.map(c => c.content);
|
||||
|
||||
// Create a patch from the hunk with proper indentation
|
||||
const patch = dmp.patch_make(
|
||||
currentText,
|
||||
hunk.changes.reduce((acc, change) => {
|
||||
if (change.type === 'add') {
|
||||
const indent = change.indent || inferIndentation(change.content, contextLines);
|
||||
return acc + indent + change.content + '\n';
|
||||
}
|
||||
if (change.type === 'remove') {
|
||||
return acc.replace(change.content + '\n', '');
|
||||
}
|
||||
return acc + change.content + '\n';
|
||||
}, '')
|
||||
);
|
||||
|
||||
const [patchedText] = dmp.patch_apply(patch, currentText);
|
||||
const similarity = getDMPSimilarity(
|
||||
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
|
||||
patchedText
|
||||
);
|
||||
|
||||
return {
|
||||
confidence: similarity,
|
||||
result: patchedText.split('\n'),
|
||||
strategy: 'dmp'
|
||||
};
|
||||
}
|
||||
|
||||
// Git edit strategy
|
||||
export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> {
|
||||
if (matchPosition === -1) {
|
||||
return { confidence: 0, result: content, strategy: 'git' };
|
||||
}
|
||||
|
||||
vol.reset();
|
||||
|
||||
try {
|
||||
await git.init({ fs: memfs, dir: '/' });
|
||||
|
||||
const originalContent = content.join('\n');
|
||||
await memfs.promises.writeFile('/file.txt', originalContent);
|
||||
|
||||
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
|
||||
await git.commit({
|
||||
fs: memfs,
|
||||
dir: '/',
|
||||
author: { name: 'Temp', email: 'temp@example.com' },
|
||||
message: 'Initial commit'
|
||||
});
|
||||
|
||||
await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' });
|
||||
await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' });
|
||||
|
||||
const lines = originalContent.split('\n');
|
||||
const newLines = [...lines];
|
||||
let offset = matchPosition;
|
||||
|
||||
const contextLines = hunk.changes
|
||||
.filter(c => c.type === 'context')
|
||||
.map(c => c.content);
|
||||
|
||||
for (const change of hunk.changes) {
|
||||
if (change.type === 'add') {
|
||||
const indent = change.indent || inferIndentation(change.content, contextLines);
|
||||
newLines.splice(offset, 0, indent + change.content);
|
||||
offset++;
|
||||
} else if (change.type === 'remove') {
|
||||
const index = newLines.findIndex(
|
||||
(line, i) => i >= offset && line.trimLeft() === change.content
|
||||
);
|
||||
if (index !== -1) {
|
||||
newLines.splice(index, 1);
|
||||
}
|
||||
} else {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
const modifiedContent = newLines.join('\n');
|
||||
await memfs.promises.writeFile('/file.txt', modifiedContent);
|
||||
|
||||
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
|
||||
await git.commit({
|
||||
fs: memfs,
|
||||
dir: '/',
|
||||
author: { name: 'Temp', email: 'temp@example.com' },
|
||||
message: 'Apply changes'
|
||||
});
|
||||
|
||||
const similarity = getDMPSimilarity(
|
||||
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
|
||||
newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
|
||||
);
|
||||
|
||||
return {
|
||||
confidence: similarity,
|
||||
result: newLines,
|
||||
strategy: 'git'
|
||||
};
|
||||
} catch (error) {
|
||||
return { confidence: 0, result: content, strategy: 'git' };
|
||||
} finally {
|
||||
vol.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Main edit function that tries strategies sequentially
|
||||
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> {
|
||||
// Don't attempt any edits if confidence is too low and not in debug mode
|
||||
const MIN_CONFIDENCE = 0.9;
|
||||
if (confidence < MIN_CONFIDENCE && !debug) {
|
||||
return { confidence: 0, result: content, strategy: 'none' };
|
||||
}
|
||||
|
||||
// Try each strategy in sequence until one succeeds
|
||||
const strategies = [
|
||||
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
|
||||
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
|
||||
{ name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
|
||||
];
|
||||
|
||||
if (debug) {
|
||||
// In debug mode, try all strategies and return the first success
|
||||
const results = await Promise.all(strategies.map(async strategy => {
|
||||
const result = await strategy.apply();
|
||||
return result;
|
||||
}));
|
||||
|
||||
const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
|
||||
if (successfulResults.length > 0) {
|
||||
return successfulResults.reduce((best, current) =>
|
||||
current.confidence > best.confidence ? current : best
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// Normal mode - try strategies sequentially until one succeeds
|
||||
for (const strategy of strategies) {
|
||||
const result = await strategy.apply();
|
||||
if (result.confidence > MIN_CONFIDENCE) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all strategies fail, return failure
|
||||
return { confidence: 0, result: content, strategy: 'none' };
|
||||
}
|
||||
181
src/core/diff/strategies/new-unified/index.ts
Normal file
181
src/core/diff/strategies/new-unified/index.ts
Normal file
@@ -0,0 +1,181 @@
|
||||
import { Diff, Hunk } from "./types"
|
||||
import { findBestMatch, prepareSearchString } from "./search-strategies"
|
||||
import { applyEdit } from "./edit-strategies"
|
||||
import { DiffResult, DiffStrategy } from "../../types"
|
||||
|
||||
export class NewUnifiedDiffStrategy implements DiffStrategy {
|
||||
private parseUnifiedDiff(diff: string): Diff {
|
||||
const lines = diff.split("\n")
|
||||
const hunks: Hunk[] = []
|
||||
let currentHunk: Hunk | null = null
|
||||
|
||||
let i = 0
|
||||
while (i < lines.length && !lines[i].startsWith("@@")) {
|
||||
i++
|
||||
}
|
||||
|
||||
for (; i < lines.length; i++) {
|
||||
const line = lines[i]
|
||||
|
||||
if (line.startsWith("@@")) {
|
||||
if (currentHunk) {
|
||||
hunks.push(currentHunk)
|
||||
}
|
||||
currentHunk = { changes: [] }
|
||||
continue
|
||||
}
|
||||
|
||||
if (!currentHunk) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract the complete indentation for each line
|
||||
const content = line.slice(1) // Remove the diff marker
|
||||
const indentMatch = content.match(/^(\s*)/)
|
||||
const indent = indentMatch ? indentMatch[0] : ""
|
||||
const trimmedContent = content.slice(indent.length)
|
||||
|
||||
if (line.startsWith(" ")) {
|
||||
currentHunk.changes.push({
|
||||
type: "context",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
} else if (line.startsWith("+")) {
|
||||
currentHunk.changes.push({
|
||||
type: "add",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
} else if (line.startsWith("-")) {
|
||||
currentHunk.changes.push({
|
||||
type: "remove",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (currentHunk && currentHunk.changes.length > 0) {
|
||||
hunks.push(currentHunk)
|
||||
}
|
||||
|
||||
return { hunks }
|
||||
}
|
||||
|
||||
getToolDescription(cwd: string): string {
|
||||
return `## apply_diff
|
||||
Description: Apply a unified diff to a file at the specified path. This tool is useful when you need to make specific modifications to a file based on a set of changes provided in unified diff format (diff -U0).
|
||||
|
||||
Make sure you include the first 2 lines with the file paths.
|
||||
Don't include timestamps with the file paths.
|
||||
|
||||
Start each hunk of changes with a \`@@ ... @@\` line.
|
||||
Don't include line numbers like \`diff -U0\` does.
|
||||
The user's patch tool doesn't need them.
|
||||
|
||||
Indentation matters in the diffs!
|
||||
|
||||
Start a new hunk for each section of the file that needs changes.
|
||||
|
||||
Only output hunks that specify changes with \`+\` or \`-\` lines.
|
||||
Skip any hunks that are entirely unchanging \` \` lines.
|
||||
|
||||
The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file!
|
||||
Think carefully and make sure you include and mark all lines that need to be removed or changed as \`-\` lines.
|
||||
Make sure you mark all new or modified lines with \`+\`.
|
||||
Don't leave out any lines or the diff patch won't apply correctly.
|
||||
|
||||
Output hunks in whatever order makes the most sense.
|
||||
Hunks don't need to be in any particular order.
|
||||
|
||||
The hunks do not need line numbers.
|
||||
|
||||
When editing a function, method, loop, etc use a hunk to replace the *entire* code block.
|
||||
Delete the entire existing version with \`-\` lines and then add a new, updated version with \`+\` lines.
|
||||
This will help you generate correct code and correct diffs.
|
||||
|
||||
To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location.
|
||||
|
||||
Parameters:
|
||||
- path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd})
|
||||
- diff: (required) The diff content in unified format to apply to the file.
|
||||
|
||||
For each file that needs to be changed, write out the changes similar to a unified diff like \`diff -U0\` would produce.
|
||||
|
||||
|
||||
Example:
|
||||
\`\`\`diff
|
||||
--- mathweb/flask/app.py
|
||||
+++ mathweb/flask/app.py
|
||||
@@ ... @@
|
||||
-class MathWeb:
|
||||
+import sympy
|
||||
+
|
||||
+class MathWeb:
|
||||
@@ ... @@
|
||||
-def is_prime(x):
|
||||
- if x < 2:
|
||||
- return False
|
||||
- for i in range(2, int(math.sqrt(x)) + 1):
|
||||
- if x % i == 0:
|
||||
- return False
|
||||
- return True
|
||||
@@ ... @@
|
||||
-@app.route('/prime/<int:n>')
|
||||
-def nth_prime(n):
|
||||
- count = 0
|
||||
- num = 1
|
||||
- while count < n:
|
||||
- num += 1
|
||||
- if is_prime(num):
|
||||
- count += 1
|
||||
- return str(num)
|
||||
+@app.route('/prime/<int:n>')
|
||||
+def nth_prime(n):
|
||||
+ count = 0
|
||||
+ num = 1
|
||||
+ while count < n:
|
||||
+ num += 1
|
||||
+ if sympy.isprime(num):
|
||||
+ count += 1
|
||||
+ return str(num)
|
||||
\`\`\`
|
||||
|
||||
Usage:
|
||||
<apply_diff>
|
||||
<path>File path here</path>
|
||||
<diff>
|
||||
Your diff here
|
||||
</diff>
|
||||
</apply_diff>`
|
||||
}
|
||||
|
||||
async applyDiff(
|
||||
originalContent: string,
|
||||
diffContent: string,
|
||||
startLine?: number,
|
||||
endLine?: number
|
||||
): Promise<DiffResult> {
|
||||
const MIN_CONFIDENCE = 0.9
|
||||
const parsedDiff = this.parseUnifiedDiff(diffContent)
|
||||
let result = originalContent.split("\n")
|
||||
|
||||
for (const hunk of parsedDiff.hunks) {
|
||||
const contextStr = prepareSearchString(hunk.changes)
|
||||
const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
|
||||
|
||||
const editResult = await applyEdit(hunk, result, matchPosition, confidence)
|
||||
if (editResult.confidence > MIN_CONFIDENCE) {
|
||||
result = editResult.result
|
||||
} else {
|
||||
return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` }
|
||||
}
|
||||
}
|
||||
|
||||
return { success: true, content: result.join("\n") }
|
||||
}
|
||||
}
|
||||
131
src/core/diff/strategies/new-unified/search-strategies.ts
Normal file
131
src/core/diff/strategies/new-unified/search-strategies.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import { compareTwoStrings } from 'string-similarity';
|
||||
import { closest } from 'fastest-levenshtein';
|
||||
import { diff_match_patch } from 'diff-match-patch';
|
||||
import { Change } from './types';
|
||||
|
||||
export type SearchResult = {
|
||||
index: number;
|
||||
confidence: number;
|
||||
strategy: string;
|
||||
};
|
||||
|
||||
//TODO: this should be configurable
|
||||
const MIN_CONFIDENCE = 0.95;
|
||||
|
||||
// Helper function to prepare search string from context
|
||||
export function prepareSearchString(changes: Change[]): string {
|
||||
const lines = changes
|
||||
.filter(c => c.type === 'context' || c.type === 'remove')
|
||||
.map(c => c.content);
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// Helper function to evaluate similarity between two texts
|
||||
export function evaluateSimilarity(original: string, modified: string): number {
|
||||
return compareTwoStrings(original, modified);
|
||||
}
|
||||
|
||||
// Helper function to validate using diff-match-patch
|
||||
export function getDMPSimilarity(original: string, modified: string): number {
|
||||
const dmp = new diff_match_patch();
|
||||
const diffs = dmp.diff_main(original, modified);
|
||||
dmp.diff_cleanupSemantic(diffs);
|
||||
const patches = dmp.patch_make(original, diffs);
|
||||
const [expectedText] = dmp.patch_apply(patches, original);
|
||||
const similarity = evaluateSimilarity(expectedText, modified);
|
||||
return similarity;
|
||||
}
|
||||
|
||||
// Exact match strategy
|
||||
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
const contentStr = content.slice(startIndex).join('\n');
|
||||
const searchLines = searchStr.split('\n');
|
||||
|
||||
const exactMatch = contentStr.indexOf(searchStr);
|
||||
if (exactMatch !== -1) {
|
||||
const matchedContent = content.slice(
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
|
||||
).join('\n');
|
||||
|
||||
const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE;
|
||||
return {
|
||||
index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
confidence: dmpValid ? 1.0 : 0.9,
|
||||
strategy: 'exact'
|
||||
};
|
||||
}
|
||||
|
||||
return { index: -1, confidence: 0, strategy: 'exact' };
|
||||
}
|
||||
|
||||
// String similarity strategy
|
||||
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
const searchLines = searchStr.split('\n');
|
||||
let bestScore = 0;
|
||||
let bestIndex = -1;
|
||||
const minScore = 0.8;
|
||||
|
||||
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
|
||||
const windowStr = content.slice(i, i + searchLines.length).join('\n');
|
||||
const score = compareTwoStrings(searchStr, windowStr);
|
||||
if (score > bestScore && score >= minScore) {
|
||||
const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE;
|
||||
const adjustedScore = dmpValid ? score : score * 0.9;
|
||||
|
||||
if (adjustedScore > bestScore) {
|
||||
bestScore = adjustedScore;
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
index: bestIndex,
|
||||
confidence: bestIndex !== -1 ? bestScore : 0,
|
||||
strategy: 'similarity'
|
||||
};
|
||||
}
|
||||
|
||||
// Levenshtein strategy
|
||||
export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
const searchLines = searchStr.split('\n');
|
||||
const candidates = [];
|
||||
|
||||
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
|
||||
candidates.push(content.slice(i, i + searchLines.length).join('\n'));
|
||||
}
|
||||
|
||||
if (candidates.length > 0) {
|
||||
const closestMatch = closest(searchStr, candidates);
|
||||
const index = startIndex + candidates.indexOf(closestMatch);
|
||||
const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE;
|
||||
return {
|
||||
index,
|
||||
confidence: dmpValid ? 0.7 : 0.6,
|
||||
strategy: 'levenshtein'
|
||||
};
|
||||
}
|
||||
|
||||
return { index: -1, confidence: 0, strategy: 'levenshtein' };
|
||||
}
|
||||
|
||||
// Main search function that tries all strategies
|
||||
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
const strategies = [
|
||||
findExactMatch,
|
||||
findSimilarityMatch,
|
||||
findLevenshteinMatch
|
||||
];
|
||||
|
||||
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };
|
||||
|
||||
for (const strategy of strategies) {
|
||||
const result = strategy(searchStr, content, startIndex);
|
||||
if (result.confidence > bestResult.confidence) {
|
||||
bestResult = result;
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult;
|
||||
}
|
||||
14
src/core/diff/strategies/new-unified/types.ts
Normal file
14
src/core/diff/strategies/new-unified/types.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
export type Change = {
|
||||
type: 'context' | 'add' | 'remove';
|
||||
content: string;
|
||||
indent: string;
|
||||
originalLine?: string;
|
||||
};
|
||||
|
||||
export type Hunk = {
|
||||
changes: Change[];
|
||||
};
|
||||
|
||||
export type Diff = {
|
||||
hunks: Hunk[];
|
||||
};
|
||||
Reference in New Issue
Block a user