mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 04:11:10 -05:00
Refactor edit strategies and confidence validation in unified diff processing
- Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes. - Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits. - Refined the way changes are processed, including better tracking of removal and addition changes. - Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging. - Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode.
This commit is contained in:
@@ -36,35 +36,43 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
|
||||
const newResult = [...content.slice(0, matchPosition)];
|
||||
let sourceIndex = matchPosition;
|
||||
let previousIndent = '';
|
||||
|
||||
const hunkChanges = hunk.changes.filter(c => c.type !== 'context');
|
||||
let lastChangeWasRemove = false; // Track if last change was a remove
|
||||
|
||||
for (const change of hunk.changes) {
|
||||
|
||||
if (change.type === 'context') {
|
||||
newResult.push(change.originalLine || (change.indent + change.content));
|
||||
previousIndent = change.indent;
|
||||
sourceIndex++;
|
||||
if (!lastChangeWasRemove) { // Only increment if we didn't just remove a line
|
||||
sourceIndex++;
|
||||
}
|
||||
lastChangeWasRemove = false;
|
||||
} else if (change.type === 'add') {
|
||||
const indent = change.indent || inferIndentation(change.content,
|
||||
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
|
||||
hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''),
|
||||
previousIndent
|
||||
);
|
||||
newResult.push(indent + change.content);
|
||||
previousIndent = indent;
|
||||
lastChangeWasRemove = false;
|
||||
} else if (change.type === 'remove') {
|
||||
sourceIndex++;
|
||||
lastChangeWasRemove = true;
|
||||
}
|
||||
}
|
||||
|
||||
newResult.push(...content.slice(sourceIndex));
|
||||
|
||||
// Validate the result
|
||||
// Calculate the window size based on all changes
|
||||
const windowSize = hunk.changes.length;
|
||||
|
||||
// Validate the result using the full window size
|
||||
const similarity = getDMPSimilarity(
|
||||
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
|
||||
newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
|
||||
content.slice(matchPosition, matchPosition + windowSize).join('\n'),
|
||||
newResult.slice(matchPosition, matchPosition + windowSize).join('\n')
|
||||
)
|
||||
|
||||
const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n'));
|
||||
const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context');
|
||||
|
||||
return {
|
||||
confidence: similarity * confidence,
|
||||
@@ -80,45 +88,45 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
|
||||
}
|
||||
|
||||
const dmp = new diff_match_patch();
|
||||
const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
|
||||
const editText = editRegion.join('\n');
|
||||
|
||||
// Build the target text sequentially like in applyContextMatching
|
||||
let targetText = '';
|
||||
let previousIndent = '';
|
||||
// Build BEFORE block (context + removals)
|
||||
const beforeLines = hunk.changes
|
||||
.filter(change => change.type === 'context' || change.type === 'remove')
|
||||
.map(change => change.originalLine || (change.indent + change.content));
|
||||
|
||||
for (const change of hunk.changes) {
|
||||
if (change.type === 'context') {
|
||||
targetText += (change.originalLine || (change.indent + change.content)) + '\n';
|
||||
previousIndent = change.indent;
|
||||
} else if (change.type === 'add') {
|
||||
const indent = change.indent || inferIndentation(change.content,
|
||||
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
|
||||
previousIndent
|
||||
);
|
||||
targetText += indent + change.content + '\n';
|
||||
previousIndent = indent;
|
||||
}
|
||||
// Skip remove changes as they shouldn't appear in target
|
||||
}
|
||||
|
||||
// Trim the trailing newline
|
||||
targetText = targetText.replace(/\n$/, '');
|
||||
|
||||
const patch = dmp.patch_make(editText, targetText);
|
||||
const [patchedText] = dmp.patch_apply(patch, editText);
|
||||
// Build AFTER block (context + additions)
|
||||
const afterLines = hunk.changes
|
||||
.filter(change => change.type === 'context' || change.type === 'add')
|
||||
.map(change => change.originalLine || (change.indent + change.content));
|
||||
|
||||
// Construct result with edited portion
|
||||
// Convert to text
|
||||
const beforeText = beforeLines.join('\n');
|
||||
const afterText = afterLines.join('\n');
|
||||
|
||||
// Create the patch
|
||||
const patch = dmp.patch_make(beforeText, afterText);
|
||||
|
||||
// Get the target text from content
|
||||
const targetText = content.slice(matchPosition, matchPosition + beforeLines.length).join('\n');
|
||||
|
||||
// Apply the patch
|
||||
const [patchedText] = dmp.patch_apply(patch, targetText);
|
||||
|
||||
// Split patched text back into lines
|
||||
const patchedLines = patchedText.split('\n');
|
||||
|
||||
// Construct the final result
|
||||
const newResult = [
|
||||
...content.slice(0, matchPosition),
|
||||
...patchedText.split('\n'),
|
||||
...content.slice(matchPosition + hunk.changes.length)
|
||||
...patchedLines,
|
||||
...content.slice(matchPosition + beforeLines.length)
|
||||
];
|
||||
|
||||
const similarity = getDMPSimilarity(editText, patchedText)
|
||||
const confidence = validateEditResult(hunk, patchedText);
|
||||
|
||||
return {
|
||||
// Calculate confidence
|
||||
const similarity = getDMPSimilarity(beforeText, targetText);
|
||||
const confidence = validateEditResult(hunk, patchedText, 'dmp');
|
||||
|
||||
return {
|
||||
confidence: similarity * confidence,
|
||||
result: newResult,
|
||||
strategy: 'dmp'
|
||||
@@ -228,7 +236,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
|
||||
const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
|
||||
const osrSimilarity = getDMPSimilarity(editText, osrResult)
|
||||
|
||||
const confidence = validateEditResult(hunk, osrResult);
|
||||
const confidence = validateEditResult(hunk, osrResult, 'git-osr');
|
||||
|
||||
if (osrSimilarity * confidence > 0.9) {
|
||||
// Construct result with edited portion
|
||||
@@ -273,7 +281,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
|
||||
const srsoResult = (await memfs.promises.readFile('/file.txt')).toString();
|
||||
const srsoSimilarity = getDMPSimilarity(editText, srsoResult)
|
||||
|
||||
const confidence = validateEditResult(hunk, srsoResult);
|
||||
const confidence = validateEditResult(hunk, srsoResult, 'git-srso');
|
||||
|
||||
// Construct result with edited portion
|
||||
const newResult = [
|
||||
@@ -299,7 +307,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P
|
||||
}
|
||||
|
||||
// Main edit function that tries strategies sequentially
|
||||
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> {
|
||||
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
|
||||
|
||||
// Don't attempt any edits if confidence is too low and not in debug mode
|
||||
const MIN_CONFIDENCE = 0.9;
|
||||
@@ -310,12 +318,12 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
|
||||
|
||||
// Try each strategy in sequence until one succeeds
|
||||
const strategies = [
|
||||
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
|
||||
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
|
||||
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
|
||||
{ name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
|
||||
];
|
||||
|
||||
if (debug) {
|
||||
if (debug !== '') {
|
||||
// In debug mode, try all strategies and return the first success
|
||||
const results = await Promise.all(strategies.map(async strategy => {
|
||||
console.log(`Attempting edit with ${strategy.name} strategy...`);
|
||||
@@ -324,18 +332,19 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
|
||||
return result;
|
||||
}));
|
||||
|
||||
const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
|
||||
/*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
|
||||
if (successfulResults.length > 0) {
|
||||
const bestResult = successfulResults.reduce((best, current) =>
|
||||
current.confidence > best.confidence ? current : best
|
||||
);
|
||||
return bestResult;
|
||||
}
|
||||
}*/
|
||||
return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
|
||||
} else {
|
||||
// Normal mode - try strategies sequentially until one succeeds
|
||||
for (const strategy of strategies) {
|
||||
const result = await strategy.apply();
|
||||
if (result.confidence > MIN_CONFIDENCE) {
|
||||
if (result.confidence === 1) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ Your diff here
|
||||
startLine?: number,
|
||||
endLine?: number
|
||||
): Promise<DiffResult> {
|
||||
const MIN_CONFIDENCE = 0.9
|
||||
const MIN_CONFIDENCE = 1
|
||||
const parsedDiff = this.parseUnifiedDiff(diffContent)
|
||||
const originalLines = originalContent.split("\n")
|
||||
let result = [...originalLines]
|
||||
@@ -170,7 +170,7 @@ Your diff here
|
||||
const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
|
||||
|
||||
const editResult = await applyEdit(hunk, result, matchPosition, confidence)
|
||||
if (editResult.confidence > MIN_CONFIDENCE) {
|
||||
if (editResult.confidence >= MIN_CONFIDENCE) {
|
||||
result = editResult.result
|
||||
} else {
|
||||
return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` }
|
||||
|
||||
@@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95;
|
||||
// Helper function to prepare search string from context
|
||||
export function prepareSearchString(changes: Change[]): string {
|
||||
const lines = changes
|
||||
.filter(c => c.type === 'context' || c.type === 'remove')
|
||||
.map(c => c.content);
|
||||
.filter((c) => c.type === 'context' || c.type === 'remove')
|
||||
.map((c) => c.content);
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
@@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number {
|
||||
// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
|
||||
// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
|
||||
// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
|
||||
export function validateEditResult(hunk: Hunk, result: string): number {
|
||||
export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
|
||||
const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
|
||||
|
||||
|
||||
// Create skeleton of original content (context + removed lines)
|
||||
const originalSkeleton = hunkDeepCopy.changes
|
||||
.filter(change => change.type === 'context' || change.type === 'remove')
|
||||
.map(change => change.content)
|
||||
.filter((change) => change.type === 'context' || change.type === 'remove')
|
||||
.map((change) => change.content)
|
||||
.join('\n');
|
||||
|
||||
// Create skeleton of expected result (context + added lines)
|
||||
const expectedSkeleton = hunkDeepCopy.changes
|
||||
.filter(change => change.type === 'context' || change.type === 'add')
|
||||
.map(change => change.content)
|
||||
.filter((change) => change.type === 'context' || change.type === 'add')
|
||||
.map((change) => change.content)
|
||||
.join('\n');
|
||||
|
||||
// Compare with original content
|
||||
const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
|
||||
|
||||
// If result is too similar to original, it means changes weren't applied
|
||||
if (originalSimilarity > 0.9) {
|
||||
console.log('Result too similar to original content:', originalSimilarity);
|
||||
return 0.5; // Significant confidence reduction
|
||||
console.log('originalSimilarity ', strategy, originalSimilarity);
|
||||
// If original similarity is 1, it means changes weren't applied
|
||||
if (originalSimilarity > 0.97) {
|
||||
if (originalSimilarity === 1) {
|
||||
return 0.5; // Significant confidence reduction
|
||||
} else {
|
||||
return 0.8;
|
||||
}
|
||||
}
|
||||
|
||||
// Compare with expected result
|
||||
const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
|
||||
console.log('Original similarity:', originalSimilarity);
|
||||
console.log('Expected similarity:', expectedSimilarity);
|
||||
|
||||
console.log('originalSkeleton:', originalSkeleton);
|
||||
console.log('expectedSkeleton:', expectedSkeleton);
|
||||
console.log('result:', result);
|
||||
console.log('expectedSimilarity', strategy, expectedSimilarity);
|
||||
|
||||
|
||||
// Scale between 0.98 and 1.0 (4% impact) based on expected similarity
|
||||
const multiplier = expectedSimilarity < MIN_CONFIDENCE
|
||||
? 0.96 + (0.04 * expectedSimilarity)
|
||||
: 1;
|
||||
const multiplier =
|
||||
expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1;
|
||||
|
||||
return multiplier;
|
||||
}
|
||||
@@ -86,44 +85,59 @@ export function validateEditResult(hunk: Hunk, result: string): number {
|
||||
// Helper function to validate context lines against original content
|
||||
function validateContextLines(searchStr: string, content: string): number {
|
||||
// Extract just the context lines from the search string
|
||||
const contextLines = searchStr.split('\n')
|
||||
.filter(line => !line.startsWith('-')); // Exclude removed lines
|
||||
|
||||
const contextLines = searchStr
|
||||
.split('\n')
|
||||
.filter((line) => !line.startsWith('-')); // Exclude removed lines
|
||||
|
||||
// Compare context lines with content
|
||||
const similarity = evaluateSimilarity(contextLines.join('\n'), content);
|
||||
|
||||
|
||||
// Context lines must match very closely, or confidence drops significantly
|
||||
return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity;
|
||||
}
|
||||
|
||||
// Exact match strategy
|
||||
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
export function findExactMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0
|
||||
): SearchResult {
|
||||
const contentStr = content.slice(startIndex).join('\n');
|
||||
const searchLines = searchStr.split('\n');
|
||||
|
||||
|
||||
const exactMatch = contentStr.indexOf(searchStr);
|
||||
if (exactMatch !== -1) {
|
||||
const matchedContent = content.slice(
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
|
||||
).join('\n');
|
||||
|
||||
const matchedContent = content
|
||||
.slice(
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
startIndex +
|
||||
contentStr.slice(0, exactMatch).split('\n').length -
|
||||
1 +
|
||||
searchLines.length
|
||||
)
|
||||
.join('\n');
|
||||
|
||||
const similarity = getDMPSimilarity(searchStr, matchedContent);
|
||||
const contextSimilarity = validateContextLines(searchStr, matchedContent);
|
||||
const confidence = Math.min(similarity, contextSimilarity);
|
||||
|
||||
|
||||
return {
|
||||
index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
index:
|
||||
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
|
||||
confidence,
|
||||
strategy: 'exact'
|
||||
strategy: 'exact',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
return { index: -1, confidence: 0, strategy: 'exact' };
|
||||
}
|
||||
|
||||
// String similarity strategy
|
||||
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
export function findSimilarityMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split('\n');
|
||||
let bestScore = 0;
|
||||
let bestIndex = -1;
|
||||
@@ -136,7 +150,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
|
||||
const similarity = getDMPSimilarity(searchStr, windowStr);
|
||||
const contextSimilarity = validateContextLines(searchStr, windowStr);
|
||||
const adjustedScore = Math.min(similarity, contextSimilarity) * score;
|
||||
|
||||
|
||||
if (adjustedScore > bestScore) {
|
||||
bestScore = adjustedScore;
|
||||
bestIndex = i;
|
||||
@@ -144,33 +158,37 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
index: bestIndex,
|
||||
return {
|
||||
index: bestIndex,
|
||||
confidence: bestIndex !== -1 ? bestScore : 0,
|
||||
strategy: 'similarity'
|
||||
strategy: 'similarity',
|
||||
};
|
||||
}
|
||||
|
||||
// Levenshtein strategy
|
||||
export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
export function findLevenshteinMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split('\n');
|
||||
const candidates = [];
|
||||
|
||||
|
||||
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
|
||||
candidates.push(content.slice(i, i + searchLines.length).join('\n'));
|
||||
}
|
||||
|
||||
|
||||
if (candidates.length > 0) {
|
||||
const closestMatch = closest(searchStr, candidates);
|
||||
const index = startIndex + candidates.indexOf(closestMatch);
|
||||
const similarity = getDMPSimilarity(searchStr, closestMatch);
|
||||
const contextSimilarity = validateContextLines(searchStr, closestMatch);
|
||||
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
|
||||
|
||||
return {
|
||||
index,
|
||||
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
|
||||
|
||||
return {
|
||||
index,
|
||||
confidence,
|
||||
strategy: 'levenshtein'
|
||||
strategy: 'levenshtein',
|
||||
};
|
||||
}
|
||||
|
||||
@@ -178,21 +196,25 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
|
||||
}
|
||||
|
||||
// Main search function that tries all strategies
|
||||
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
|
||||
export function findBestMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0
|
||||
): SearchResult {
|
||||
const strategies = [
|
||||
findExactMatch,
|
||||
findSimilarityMatch,
|
||||
findLevenshteinMatch
|
||||
findLevenshteinMatch,
|
||||
];
|
||||
|
||||
|
||||
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };
|
||||
|
||||
|
||||
for (const strategy of strategies) {
|
||||
const result = strategy(searchStr, content, startIndex);
|
||||
if (result.confidence > bestResult.confidence) {
|
||||
bestResult = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return bestResult;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user