refactor: implement Git fallback strategy in edit processing

- Introduced a new Git fallback strategy for handling edits when confidence levels are low.
- Replaced the previous Git edit strategy with a more robust approach that utilizes temporary directories and commits for original, search, and replace states.
- Enhanced error handling and logging for better debugging during the edit process.
- Updated the main edit function to incorporate the Git fallback strategy, ensuring it is attempted when other strategies fail.
- Improved overall structure and readability of the applyGitFallback function, streamlining the process of applying changes using Git.
This commit is contained in:
Daniel Riccio
2025-01-10 17:34:44 -05:00
parent 058431eaf1
commit 6d68edef3e

View File

@@ -1,8 +1,10 @@
import { diff_match_patch } from 'diff-match-patch'; import { diff_match_patch } from 'diff-match-patch';
import * as git from 'isomorphic-git'; import { EditResult, Hunk } from './types';
import { fs as memfs, vol } from 'memfs';
import { Change, EditResult, Hunk } from './types';
import { getDMPSimilarity, validateEditResult } from './search-strategies'; import { getDMPSimilarity, validateEditResult } from './search-strategies';
import * as path from 'path';
import simpleGit, { SimpleGit } from 'simple-git';
import * as tmp from 'tmp';
import * as fs from 'fs';
// Helper function to infer indentation // Helper function to infer indentation
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string {
@@ -130,212 +132,156 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
}; };
} }
// Git edit strategy with cherry-pick approach // Git fallback strategy that works with full content
async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> { async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResult> {
if (matchPosition === -1) { let tmpDir: tmp.DirResult | undefined;
return { confidence: 0, result: content, strategy: 'git' };
}
vol.reset();
try { try {
// Create temporary directory
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const git: SimpleGit = simpleGit(tmpDir.name);
// Initialize git repo // Initialize git repo
await git.init({ fs: memfs, dir: '/' }); await git.init();
await git.addConfig('user.name', 'Temp');
await git.addConfig('user.email', 'temp@example.com');
// Create original content - only use the edit region const filePath = path.join(tmpDir.name, 'file.txt');
const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
const editText = editRegion.join('\n');
await memfs.promises.writeFile('/file.txt', editText);
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Original'
});
const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Create search content (content with removals) // Build the search text (context + removals)
const searchLines = [...editRegion]; const searchLines = hunk.changes
let offset = 0; .filter(change => change.type === 'context' || change.type === 'remove')
for (const change of hunk.changes) { .map(change => change.originalLine || (change.indent + change.content));
if (change.type === 'remove') {
const index = searchLines.findIndex(
(line, i) => i >= offset && line.trimLeft() === change.content
);
if (index !== -1) {
searchLines.splice(index, 1);
}
}
if (change.type !== 'add') {
offset++;
}
}
// Create search branch and commit // Build the replace text (context + additions)
await git.branch({ fs: memfs, dir: '/', ref: 'search' }); const replaceLines = hunk.changes
await git.checkout({ fs: memfs, dir: '/', ref: 'search' }); .filter(change => change.type === 'context' || change.type === 'add')
await memfs.promises.writeFile('/file.txt', searchLines.join('\n')); .map(change => change.originalLine || (change.indent + change.content));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Search state'
});
const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Create replace content (with additions) const searchText = searchLines.join('\n');
const replaceLines = [...searchLines]; const replaceText = replaceLines.join('\n');
offset = 0; const originalText = content.join('\n');
const contextLines = hunk.changes
.filter(c => c.type === 'context')
.map(c => c.content);
for (const change of hunk.changes) { // Strategy 1: O->S->R, cherry-pick R onto O
if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content, contextLines);
replaceLines.splice(offset, 0, indent + change.content);
offset++;
} else if (change.type !== 'remove') {
offset++;
}
}
// Create replace branch and commit
await git.branch({ fs: memfs, dir: '/', ref: 'replace' });
await git.checkout({ fs: memfs, dir: '/', ref: 'replace' });
await memfs.promises.writeFile('/file.txt', replaceLines.join('\n'));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Replace state'
});
const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Try both strategies:
// 1. OSR: Cherry-pick replace onto original
// 2. SR-SO: Apply search->replace changes to search->original
// Strategy 1: OSR
await git.checkout({ fs: memfs, dir: '/', ref: originalHash });
try { try {
await git.merge({ // Original commit - use full file content
fs: memfs, fs.writeFileSync(filePath, originalText);
dir: '/', await git.add('file.txt');
ours: originalHash, const originalCommit = await git.commit('original');
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Cherry-pick OSR'
});
const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
const osrSimilarity = getDMPSimilarity(editText, osrResult)
const confidence = validateEditResult(hunk, osrResult, 'git-osr'); // Search commit - just the search text
fs.writeFileSync(filePath, searchText);
await git.add('file.txt');
await git.commit('search');
if (osrSimilarity * confidence > 0.9) { // Replace commit - just the replace text
// Construct result with edited portion fs.writeFileSync(filePath, replaceText);
const newResult = [ await git.add('file.txt');
...content.slice(0, matchPosition), const replaceCommit = await git.commit('replace');
...osrResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length) // Go back to original and cherry-pick
]; await git.checkout(originalCommit.commit);
try {
await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]);
// Read result
const newText = fs.readFileSync(filePath, 'utf-8');
const newLines = newText.split('\n');
return { return {
confidence: osrSimilarity, confidence: 1,
result: newResult, result: newLines,
strategy: 'git-osr' strategy: 'git-fallback'
}; };
} catch (cherryPickError) {
console.log('Strategy 1 failed with merge conflict');
} }
} catch (error) { } catch (error) {
console.log('OSR strategy failed:', error); console.log('Strategy 1 failed:', error);
} }
// Strategy 2: SR-SO // Strategy 2: S->R, S->O, cherry-pick R onto O
await git.checkout({ fs: memfs, dir: '/', ref: searchHash });
try { try {
// First apply original changes // Reset repo
await git.merge({ await git.init();
fs: memfs, await git.addConfig('user.name', 'Temp');
dir: '/', await git.addConfig('user.email', 'temp@example.com');
ours: searchHash,
theirs: originalHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply original changes'
});
// Then apply replace changes // Search commit - just the search text
await git.merge({ fs.writeFileSync(filePath, searchText);
fs: memfs, await git.add('file.txt');
dir: '/', const searchCommit = await git.commit('search');
ours: 'HEAD',
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply replace changes'
});
const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); // Replace commit - just the replace text
const srsoSimilarity = getDMPSimilarity(editText, srsoResult) fs.writeFileSync(filePath, replaceText);
await git.add('file.txt');
const replaceCommit = await git.commit('replace');
const confidence = validateEditResult(hunk, srsoResult, 'git-srso'); // Go back to search and create original with full file content
await git.checkout(searchCommit.commit);
fs.writeFileSync(filePath, originalText);
await git.add('file.txt');
await git.commit('original');
// Construct result with edited portion try {
const newResult = [ // Cherry-pick replace onto original
...content.slice(0, matchPosition), await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]);
...srsoResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length) // Read result
]; const newText = fs.readFileSync(filePath, 'utf-8');
return { const newLines = newText.split('\n');
confidence: srsoSimilarity * confidence, return {
result: newResult, confidence: 1,
strategy: 'git-srso' result: newLines,
}; strategy: 'git-fallback'
};
} catch (cherryPickError) {
console.log('Strategy 2 failed with merge conflict');
}
} catch (error) { } catch (error) {
console.log('SR-SO strategy failed:', error); console.log('Strategy 2 failed:', error);
return { confidence: 0, result: content, strategy: 'git' };
} }
// If both strategies fail, return no confidence
console.log('Git fallback failed');
return { confidence: 0, result: content, strategy: 'git-fallback' };
} catch (error) { } catch (error) {
console.log('Git strategy failed:', error); console.log('Git fallback strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git' }; return { confidence: 0, result: content, strategy: 'git-fallback' };
} finally { } finally {
vol.reset(); // Clean up temporary directory
if (tmpDir) {
tmpDir.removeCallback();
}
} }
} }
// Main edit function that tries strategies sequentially // Main edit function that tries strategies sequentially
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> { export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
// Don't attempt regular edits if confidence is too low
// Don't attempt any edits if confidence is too low and not in debug mode
const MIN_CONFIDENCE = 0.9; const MIN_CONFIDENCE = 0.9;
if (confidence < MIN_CONFIDENCE) { if (confidence < MIN_CONFIDENCE && debug === '') {
console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`); console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`);
return { confidence: 0, result: content, strategy: 'none' }; return applyGitFallback(hunk, content);
} }
// Try each strategy in sequence until one succeeds // Try each strategy in sequence until one succeeds
const strategies = [ const strategies = [
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
{ name: 'git', apply: () => applyGit(hunk, content, matchPosition) } { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
]; ];
if (debug !== '') { if (debug !== '') {
// In debug mode, try all strategies and return the first success // In debug mode, try all strategies including git fallback
const results = await Promise.all(strategies.map(async strategy => { const results = await Promise.all([
console.log(`Attempting edit with ${strategy.name} strategy...`); ...strategies.map(async strategy => {
const result = await strategy.apply(); console.log(`Attempting edit with ${strategy.name} strategy...`);
console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); const result = await strategy.apply();
return result; console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`);
})); return result;
})
]);
/*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
if (successfulResults.length > 0) {
const bestResult = successfulResults.reduce((best, current) =>
current.confidence > best.confidence ? current : best
);
return bestResult;
}*/
return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
} else { } else {
// Normal mode - try strategies sequentially until one succeeds // Normal mode - try strategies sequentially until one succeeds
@@ -345,8 +291,13 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
return result; return result;
} }
} }
// If all strategies fail, try git fallback
const result = await applyGitFallback(hunk, content);
if(result.confidence === 1) {
return result;
}
} }
// If all strategies fail, return failure
return { confidence: 0, result: content, strategy: 'none' }; return { confidence: 0, result: content, strategy: 'none' };
} }