Improve strategies and confidence system

This commit is contained in:
Daniel Riccio
2025-01-08 12:28:31 -05:00
parent 594481643b
commit 995692c48e
4 changed files with 265 additions and 81 deletions

View File

@@ -1,8 +1,8 @@
import { diff_match_patch } from 'diff-match-patch'; import { diff_match_patch } from 'diff-match-patch';
import * as git from 'isomorphic-git'; import * as git from 'isomorphic-git';
import { fs as memfs, vol } from 'memfs'; import { fs as memfs, vol } from 'memfs';
import { Hunk } from './types'; import { Change, EditResult, Hunk } from './types';
import { getDMPSimilarity } from './search-strategies'; import { getDMPSimilarity, validateEditResult } from './search-strategies';
// Helper function to infer indentation // Helper function to infer indentation
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string {
@@ -27,12 +27,6 @@ function inferIndentation(line: string, contextLines: string[], previousIndent:
return previousIndent; return previousIndent;
} }
export type EditResult = {
confidence: number;
result: string[];
strategy: string;
};
// Context matching edit strategy // Context matching edit strategy
export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
if (matchPosition === -1) { if (matchPosition === -1) {
@@ -43,6 +37,8 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
let sourceIndex = matchPosition; let sourceIndex = matchPosition;
let previousIndent = ''; let previousIndent = '';
const hunkChanges = hunk.changes.filter(c => c.type !== 'context');
for (const change of hunk.changes) { for (const change of hunk.changes) {
if (change.type === 'context') { if (change.type === 'context') {
newResult.push(change.originalLine || (change.indent + change.content)); newResult.push(change.originalLine || (change.indent + change.content));
@@ -66,10 +62,12 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
const similarity = getDMPSimilarity( const similarity = getDMPSimilarity(
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
); )
const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n'));
return { return {
confidence: similarity, confidence: similarity * confidence,
result: newResult, result: newResult,
strategy: 'context' strategy: 'context'
}; };
@@ -82,41 +80,53 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
} }
const dmp = new diff_match_patch(); const dmp = new diff_match_patch();
const currentText = content.join('\n'); const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
const contextLines = hunk.changes const editText = editRegion.join('\n');
.filter(c => c.type === 'context')
.map(c => c.content);
// Create a patch from the hunk with proper indentation // Build the target text sequentially like in applyContextMatching
const patch = dmp.patch_make( let targetText = '';
currentText, let previousIndent = '';
hunk.changes.reduce((acc, change) => {
if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content, contextLines);
return acc + indent + change.content + '\n';
}
if (change.type === 'remove') {
return acc.replace(change.content + '\n', '');
}
return acc + change.content + '\n';
}, '')
);
const [patchedText] = dmp.patch_apply(patch, currentText); for (const change of hunk.changes) {
const similarity = getDMPSimilarity( if (change.type === 'context') {
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), targetText += (change.originalLine || (change.indent + change.content)) + '\n';
patchedText previousIndent = change.indent;
); } else if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content,
hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
previousIndent
);
targetText += indent + change.content + '\n';
previousIndent = indent;
}
// Skip remove changes as they shouldn't appear in target
}
// Trim the trailing newline
targetText = targetText.replace(/\n$/, '');
const patch = dmp.patch_make(editText, targetText);
const [patchedText] = dmp.patch_apply(patch, editText);
// Construct result with edited portion
const newResult = [
...content.slice(0, matchPosition),
...patchedText.split('\n'),
...content.slice(matchPosition + hunk.changes.length)
];
const similarity = getDMPSimilarity(editText, patchedText)
const confidence = validateEditResult(hunk, patchedText);
return { return {
confidence: similarity, confidence: similarity * confidence,
result: patchedText.split('\n'), result: newResult,
strategy: 'dmp' strategy: 'dmp'
}; };
} }
// Git edit strategy // Git edit strategy with cherry-pick approach
export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> { async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> {
if (matchPosition === -1) { if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: 'git' }; return { confidence: 0, result: content, strategy: 'git' };
} }
@@ -124,26 +134,55 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num
vol.reset(); vol.reset();
try { try {
// Initialize git repo
await git.init({ fs: memfs, dir: '/' }); await git.init({ fs: memfs, dir: '/' });
const originalContent = content.join('\n'); // Create original content - only use the edit region
await memfs.promises.writeFile('/file.txt', originalContent); const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
const editText = editRegion.join('\n');
await memfs.promises.writeFile('/file.txt', editText);
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({ await git.commit({
fs: memfs, fs: memfs,
dir: '/', dir: '/',
author: { name: 'Temp', email: 'temp@example.com' }, author: { name: 'Temp', email: 'temp@example.com' },
message: 'Initial commit' message: 'Original'
}); });
const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' }); // Create search content (content with removals)
await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' }); const searchLines = [...editRegion];
let offset = 0;
for (const change of hunk.changes) {
if (change.type === 'remove') {
const index = searchLines.findIndex(
(line, i) => i >= offset && line.trimLeft() === change.content
);
if (index !== -1) {
searchLines.splice(index, 1);
}
}
if (change.type !== 'add') {
offset++;
}
}
const lines = originalContent.split('\n'); // Create search branch and commit
const newLines = [...lines]; await git.branch({ fs: memfs, dir: '/', ref: 'search' });
let offset = matchPosition; await git.checkout({ fs: memfs, dir: '/', ref: 'search' });
await memfs.promises.writeFile('/file.txt', searchLines.join('\n'));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Search state'
});
const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Create replace content (with additions)
const replaceLines = [...searchLines];
offset = 0;
const contextLines = hunk.changes const contextLines = hunk.changes
.filter(c => c.type === 'context') .filter(c => c.type === 'context')
.map(c => c.content); .map(c => c.content);
@@ -151,42 +190,108 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num
for (const change of hunk.changes) { for (const change of hunk.changes) {
if (change.type === 'add') { if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content, contextLines); const indent = change.indent || inferIndentation(change.content, contextLines);
newLines.splice(offset, 0, indent + change.content); replaceLines.splice(offset, 0, indent + change.content);
offset++; offset++;
} else if (change.type === 'remove') { } else if (change.type !== 'remove') {
const index = newLines.findIndex(
(line, i) => i >= offset && line.trimLeft() === change.content
);
if (index !== -1) {
newLines.splice(index, 1);
}
} else {
offset++; offset++;
} }
} }
const modifiedContent = newLines.join('\n'); // Create replace branch and commit
await memfs.promises.writeFile('/file.txt', modifiedContent); await git.branch({ fs: memfs, dir: '/', ref: 'replace' });
await git.checkout({ fs: memfs, dir: '/', ref: 'replace' });
await memfs.promises.writeFile('/file.txt', replaceLines.join('\n'));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({ await git.commit({
fs: memfs, fs: memfs,
dir: '/', dir: '/',
author: { name: 'Temp', email: 'temp@example.com' }, author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply changes' message: 'Replace state'
}); });
const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
const similarity = getDMPSimilarity( // Try both strategies:
content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), // 1. OSR: Cherry-pick replace onto original
newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') // 2. SR-SO: Apply search->replace changes to search->original
);
return { // Strategy 1: OSR
confidence: similarity, await git.checkout({ fs: memfs, dir: '/', ref: originalHash });
result: newLines, try {
strategy: 'git' await git.merge({
}; fs: memfs,
dir: '/',
ours: originalHash,
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Cherry-pick OSR'
});
const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
const osrSimilarity = getDMPSimilarity(editText, osrResult)
const confidence = validateEditResult(hunk, osrResult);
if (osrSimilarity * confidence > 0.9) {
// Construct result with edited portion
const newResult = [
...content.slice(0, matchPosition),
...osrResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length)
];
return {
confidence: osrSimilarity,
result: newResult,
strategy: 'git-osr'
};
}
} catch (error) {
console.log('OSR strategy failed:', error);
}
// Strategy 2: SR-SO
await git.checkout({ fs: memfs, dir: '/', ref: searchHash });
try {
// First apply original changes
await git.merge({
fs: memfs,
dir: '/',
ours: searchHash,
theirs: originalHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply original changes'
});
// Then apply replace changes
await git.merge({
fs: memfs,
dir: '/',
ours: 'HEAD',
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply replace changes'
});
const srsoResult = (await memfs.promises.readFile('/file.txt')).toString();
const srsoSimilarity = getDMPSimilarity(editText, srsoResult)
const confidence = validateEditResult(hunk, srsoResult);
// Construct result with edited portion
const newResult = [
...content.slice(0, matchPosition),
...srsoResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length)
];
return {
confidence: srsoSimilarity * confidence,
result: newResult,
strategy: 'git-srso'
};
} catch (error) {
console.log('SR-SO strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git' };
}
} catch (error) { } catch (error) {
console.log('Git strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git' }; return { confidence: 0, result: content, strategy: 'git' };
} finally { } finally {
vol.reset(); vol.reset();
@@ -195,9 +300,11 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num
// Main edit function that tries strategies sequentially // Main edit function that tries strategies sequentially
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> { export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> {
// Don't attempt any edits if confidence is too low and not in debug mode // Don't attempt any edits if confidence is too low and not in debug mode
const MIN_CONFIDENCE = 0.9; const MIN_CONFIDENCE = 0.9;
if (confidence < MIN_CONFIDENCE && !debug) { if (confidence < MIN_CONFIDENCE) {
console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`);
return { confidence: 0, result: content, strategy: 'none' }; return { confidence: 0, result: content, strategy: 'none' };
} }
@@ -211,15 +318,18 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
if (debug) { if (debug) {
// In debug mode, try all strategies and return the first success // In debug mode, try all strategies and return the first success
const results = await Promise.all(strategies.map(async strategy => { const results = await Promise.all(strategies.map(async strategy => {
console.log(`Attempting edit with ${strategy.name} strategy...`);
const result = await strategy.apply(); const result = await strategy.apply();
console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`);
return result; return result;
})); }));
const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
if (successfulResults.length > 0) { if (successfulResults.length > 0) {
return successfulResults.reduce((best, current) => const bestResult = successfulResults.reduce((best, current) =>
current.confidence > best.confidence ? current : best current.confidence > best.confidence ? current : best
); );
return bestResult;
} }
} else { } else {
// Normal mode - try strategies sequentially until one succeeds // Normal mode - try strategies sequentially until one succeeds

View File

@@ -162,7 +162,8 @@ Your diff here
): Promise<DiffResult> { ): Promise<DiffResult> {
const MIN_CONFIDENCE = 0.9 const MIN_CONFIDENCE = 0.9
const parsedDiff = this.parseUnifiedDiff(diffContent) const parsedDiff = this.parseUnifiedDiff(diffContent)
let result = originalContent.split("\n") const originalLines = originalContent.split("\n")
let result = [...originalLines]
for (const hunk of parsedDiff.hunks) { for (const hunk of parsedDiff.hunks) {
const contextStr = prepareSearchString(hunk.changes) const contextStr = prepareSearchString(hunk.changes)

View File

@@ -1,7 +1,7 @@
import { compareTwoStrings } from 'string-similarity'; import { compareTwoStrings } from 'string-similarity';
import { closest } from 'fastest-levenshtein'; import { closest } from 'fastest-levenshtein';
import { diff_match_patch } from 'diff-match-patch'; import { diff_match_patch } from 'diff-match-patch';
import { Change } from './types'; import { Change, Hunk } from './types';
export type SearchResult = { export type SearchResult = {
index: number; index: number;
@@ -32,10 +32,70 @@ export function getDMPSimilarity(original: string, modified: string): number {
dmp.diff_cleanupSemantic(diffs); dmp.diff_cleanupSemantic(diffs);
const patches = dmp.patch_make(original, diffs); const patches = dmp.patch_make(original, diffs);
const [expectedText] = dmp.patch_apply(patches, original); const [expectedText] = dmp.patch_apply(patches, original);
const similarity = evaluateSimilarity(expectedText, modified); const similarity = evaluateSimilarity(expectedText, modified);
return similarity; return similarity;
} }
// Helper function to validate edit results using hunk information
// Returns a confidence reduction value between 0 and 1
// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
export function validateEditResult(hunk: Hunk, result: string): number {
const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk));
// Create skeleton of original content (context + removed lines)
const originalSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'remove')
.map(change => change.content)
.join('\n');
// Create skeleton of expected result (context + added lines)
const expectedSkeleton = hunkDeepCopy.changes
.filter(change => change.type === 'context' || change.type === 'add')
.map(change => change.content)
.join('\n');
// Compare with original content
const originalSimilarity = evaluateSimilarity(originalSkeleton, result);
// If result is too similar to original, it means changes weren't applied
if (originalSimilarity > 0.9) {
console.log('Result too similar to original content:', originalSimilarity);
return 0.5; // Significant confidence reduction
}
// Compare with expected result
const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result);
console.log('Original similarity:', originalSimilarity);
console.log('Expected similarity:', expectedSimilarity);
console.log('originalSkeleton:', originalSkeleton);
console.log('expectedSkeleton:', expectedSkeleton);
console.log('result:', result);
// Scale between 0.98 and 1.0 (4% impact) based on expected similarity
const multiplier = expectedSimilarity < MIN_CONFIDENCE
? 0.96 + (0.04 * expectedSimilarity)
: 1;
return multiplier;
}
// Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string): number {
// Extract just the context lines from the search string
const contextLines = searchStr.split('\n')
.filter(line => !line.startsWith('-')); // Exclude removed lines
// Compare context lines with content
const similarity = evaluateSimilarity(contextLines.join('\n'), content);
// Context lines must match very closely, or confidence drops significantly
return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity;
}
// Exact match strategy // Exact match strategy
export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
const contentStr = content.slice(startIndex).join('\n'); const contentStr = content.slice(startIndex).join('\n');
@@ -48,10 +108,13 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
).join('\n'); ).join('\n');
const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE; const similarity = getDMPSimilarity(searchStr, matchedContent);
const contextSimilarity = validateContextLines(searchStr, matchedContent);
const confidence = Math.min(similarity, contextSimilarity);
return { return {
index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
confidence: dmpValid ? 1.0 : 0.9, confidence,
strategy: 'exact' strategy: 'exact'
}; };
} }
@@ -70,8 +133,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
const windowStr = content.slice(i, i + searchLines.length).join('\n'); const windowStr = content.slice(i, i + searchLines.length).join('\n');
const score = compareTwoStrings(searchStr, windowStr); const score = compareTwoStrings(searchStr, windowStr);
if (score > bestScore && score >= minScore) { if (score > bestScore && score >= minScore) {
const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE; const similarity = getDMPSimilarity(searchStr, windowStr);
const adjustedScore = dmpValid ? score : score * 0.9; const contextSimilarity = validateContextLines(searchStr, windowStr);
const adjustedScore = Math.min(similarity, contextSimilarity) * score;
if (adjustedScore > bestScore) { if (adjustedScore > bestScore) {
bestScore = adjustedScore; bestScore = adjustedScore;
@@ -99,10 +163,13 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
if (candidates.length > 0) { if (candidates.length > 0) {
const closestMatch = closest(searchStr, candidates); const closestMatch = closest(searchStr, candidates);
const index = startIndex + candidates.indexOf(closestMatch); const index = startIndex + candidates.indexOf(closestMatch);
const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE; const similarity = getDMPSimilarity(searchStr, closestMatch);
const contextSimilarity = validateContextLines(searchStr, closestMatch);
const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty
return { return {
index, index,
confidence: dmpValid ? 0.7 : 0.6, confidence,
strategy: 'levenshtein' strategy: 'levenshtein'
}; };
} }

View File

@@ -12,3 +12,9 @@ export type Hunk = {
export type Diff = { export type Diff = {
hunks: Hunk[]; hunks: Hunk[];
}; };
export type EditResult = {
confidence: number;
result: string[];
strategy: string;
};