Refactor edit strategies and confidence validation in unified diff processing

- Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes.
- Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits.
- Refined the way changes are processed, including better tracking of removal and addition changes.
- Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging.
- Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode.
This commit is contained in:
Daniel Riccio
2025-01-08 17:13:46 -05:00
parent 995692c48e
commit 9591ae062a
3 changed files with 136 additions and 105 deletions

View File

@@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95;
// Helper function to prepare search string from context
export function prepareSearchString(changes: Change[]): string {
const lines = changes
.filter(c => c.type === 'context' || c.type === 'remove')
.map(c => c.content);
.filter((c) => c.type === 'context' || c.type === 'remove')
.map((c) => c.content);
return lines.join('\n');
}
@@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number {
// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
export function validateEditResult(hunk: Hunk, result: string): number {
export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
// Create skeleton of original content (context + removed lines)
const originalSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'remove')
.map(change => change.content)
.filter((change) => change.type === 'context' || change.type === 'remove')
.map((change) => change.content)
.join('\n');
// Create skeleton of expected result (context + added lines)
const expectedSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'add')
.map(change => change.content)
.filter((change) => change.type === 'context' || change.type === 'add')
.map((change) => change.content)
.join('\n');
// Compare with original content
const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
// If result is too similar to original, it means changes weren't applied
if (originalSimilarity > 0.9) {
console.log('Result too similar to original content:', originalSimilarity);
return 0.5; // Significant confidence reduction
console.log('originalSimilarity ', strategy, originalSimilarity);
// If original similarity is 1, it means changes weren't applied
if (originalSimilarity > 0.97) {
if (originalSimilarity === 1) {
return 0.5; // Significant confidence reduction
} else {
return 0.8;
}
}
// Compare with expected result
const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
console.log('Original similarity:', originalSimilarity);
console.log('Expected similarity:', expectedSimilarity);
console.log('originalSkeleton:', originalSkeleton);
console.log('expectedSkeleton:', expectedSkeleton);
console.log('result:', result);
console.log('expectedSimilarity', strategy, expectedSimilarity);
// Scale between 0.98 and 1.0 (4% impact) based on expected similarity
const multiplier = expectedSimilarity < MIN_CONFIDENCE
? 0.96 + (0.04 * expectedSimilarity)
: 1;
const multiplier =
expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1;
return multiplier;
}
@@ -86,44 +85,59 @@ export function validateEditResult(hunk: Hunk, result: string): number {
// Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string): number {
// Extract just the context lines from the search string
const contextLines = searchStr.split('\n')
.filter(line => !line.startsWith('-')); // Exclude removed lines
const contextLines = searchStr
.split('\n')
.filter((line) => !line.startsWith('-')); // Exclude removed lines
// Compare context lines with content
const similarity = evaluateSimilarity(contextLines.join('\n'), content);
// Context lines must match very closely, or confidence drops significantly
return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity;
}
// Exact match strategy
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findExactMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const contentStr = content.slice(startIndex).join('\n');
const searchLines = searchStr.split('\n');
const exactMatch = contentStr.indexOf(searchStr);
if (exactMatch !== -1) {
const matchedContent = content.slice(
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
).join('\n');
const matchedContent = content
.slice(
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
startIndex +
contentStr.slice(0, exactMatch).split('\n').length -
1 +
searchLines.length
)
.join('\n');
const similarity = getDMPSimilarity(searchStr, matchedContent);
const contextSimilarity = validateContextLines(searchStr, matchedContent);
const confidence = Math.min(similarity, contextSimilarity);
return {
index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
index:
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
confidence,
strategy: 'exact'
strategy: 'exact',
};
}
return { index: -1, confidence: 0, strategy: 'exact' };
}
// String similarity strategy
export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findSimilarityMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const searchLines = searchStr.split('\n');
let bestScore = 0;
let bestIndex = -1;
@@ -136,7 +150,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
const similarity = getDMPSimilarity(searchStr, windowStr);
const contextSimilarity = validateContextLines(searchStr, windowStr);
const adjustedScore = Math.min(similarity, contextSimilarity) * score;
if (adjustedScore > bestScore) {
bestScore = adjustedScore;
bestIndex = i;
@@ -144,33 +158,37 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
}
}
return {
index: bestIndex,
return {
index: bestIndex,
confidence: bestIndex !== -1 ? bestScore : 0,
strategy: 'similarity'
strategy: 'similarity',
};
}
// Levenshtein strategy
export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findLevenshteinMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const searchLines = searchStr.split('\n');
const candidates = [];
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
candidates.push(content.slice(i, i + searchLines.length).join('\n'));
}
if (candidates.length > 0) {
const closestMatch = closest(searchStr, candidates);
const index = startIndex + candidates.indexOf(closestMatch);
const similarity = getDMPSimilarity(searchStr, closestMatch);
const contextSimilarity = validateContextLines(searchStr, closestMatch);
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
return {
index,
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
return {
index,
confidence,
strategy: 'levenshtein'
strategy: 'levenshtein',
};
}
@@ -178,21 +196,25 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
}
// Main search function that tries all strategies
export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
export function findBestMatch(
searchStr: string,
content: string[],
startIndex: number = 0
): SearchResult {
const strategies = [
findExactMatch,
findSimilarityMatch,
findLevenshteinMatch
findLevenshteinMatch,
];
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };
for (const strategy of strategies) {
const result = strategy(searchStr, content, startIndex);
if (result.confidence > bestResult.confidence) {
bestResult = result;
}
}
return bestResult;
}
}