refactor: implement Git fallback strategy in edit processing

- Introduced a new Git fallback strategy for handling edits when confidence levels are low.
- Replaced the previous Git edit strategy with a more robust approach that utilizes temporary directories and commits for original, search, and replace states.
- Enhanced error handling and logging for better debugging during the edit process.
- Updated the main edit function to incorporate the Git fallback strategy, ensuring it is attempted when other strategies fail.
- Improved overall structure and readability of the applyGitFallback function, streamlining the process of applying changes using Git.
This commit is contained in:
Daniel Riccio
2025-01-10 17:34:44 -05:00
parent 058431eaf1
commit 6d68edef3e

View File

@@ -1,8 +1,10 @@
import { diff_match_patch } from 'diff-match-patch';
import * as git from 'isomorphic-git';
import { fs as memfs, vol } from 'memfs';
import { Change, EditResult, Hunk } from './types';
import { EditResult, Hunk } from './types';
import { getDMPSimilarity, validateEditResult } from './search-strategies';
import * as path from 'path';
import simpleGit, { SimpleGit } from 'simple-git';
import * as tmp from 'tmp';
import * as fs from 'fs';
// Helper function to infer indentation
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string {
@@ -130,212 +132,156 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
};
}
// Git edit strategy with cherry-pick approach
async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> {
if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: 'git' };
}
vol.reset();
// Git fallback strategy that works with full content
async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResult> {
let tmpDir: tmp.DirResult | undefined;
try {
// Create temporary directory
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const git: SimpleGit = simpleGit(tmpDir.name);
// Initialize git repo
await git.init({ fs: memfs, dir: '/' });
await git.init();
await git.addConfig('user.name', 'Temp');
await git.addConfig('user.email', 'temp@example.com');
// Create original content - only use the edit region
const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length);
const editText = editRegion.join('\n');
await memfs.promises.writeFile('/file.txt', editText);
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Original'
});
const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
const filePath = path.join(tmpDir.name, 'file.txt');
// Create search content (content with removals)
const searchLines = [...editRegion];
let offset = 0;
for (const change of hunk.changes) {
if (change.type === 'remove') {
const index = searchLines.findIndex(
(line, i) => i >= offset && line.trimLeft() === change.content
);
if (index !== -1) {
searchLines.splice(index, 1);
}
}
if (change.type !== 'add') {
offset++;
}
}
// Build the search text (context + removals)
const searchLines = hunk.changes
.filter(change => change.type === 'context' || change.type === 'remove')
.map(change => change.originalLine || (change.indent + change.content));
// Create search branch and commit
await git.branch({ fs: memfs, dir: '/', ref: 'search' });
await git.checkout({ fs: memfs, dir: '/', ref: 'search' });
await memfs.promises.writeFile('/file.txt', searchLines.join('\n'));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Search state'
});
const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Build the replace text (context + additions)
const replaceLines = hunk.changes
.filter(change => change.type === 'context' || change.type === 'add')
.map(change => change.originalLine || (change.indent + change.content));
// Create replace content (with additions)
const replaceLines = [...searchLines];
offset = 0;
const contextLines = hunk.changes
.filter(c => c.type === 'context')
.map(c => c.content);
const searchText = searchLines.join('\n');
const replaceText = replaceLines.join('\n');
const originalText = content.join('\n');
for (const change of hunk.changes) {
if (change.type === 'add') {
const indent = change.indent || inferIndentation(change.content, contextLines);
replaceLines.splice(offset, 0, indent + change.content);
offset++;
} else if (change.type !== 'remove') {
offset++;
}
}
// Create replace branch and commit
await git.branch({ fs: memfs, dir: '/', ref: 'replace' });
await git.checkout({ fs: memfs, dir: '/', ref: 'replace' });
await memfs.promises.writeFile('/file.txt', replaceLines.join('\n'));
await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
await git.commit({
fs: memfs,
dir: '/',
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Replace state'
});
const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' });
// Try both strategies:
// 1. OSR: Cherry-pick replace onto original
// 2. SR-SO: Apply search->replace changes to search->original
// Strategy 1: OSR
await git.checkout({ fs: memfs, dir: '/', ref: originalHash });
// Strategy 1: O->S->R, cherry-pick R onto O
try {
await git.merge({
fs: memfs,
dir: '/',
ours: originalHash,
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Cherry-pick OSR'
});
const osrResult = (await memfs.promises.readFile('/file.txt')).toString();
const osrSimilarity = getDMPSimilarity(editText, osrResult)
// Original commit - use full file content
fs.writeFileSync(filePath, originalText);
await git.add('file.txt');
const originalCommit = await git.commit('original');
const confidence = validateEditResult(hunk, osrResult, 'git-osr');
// Search commit - just the search text
fs.writeFileSync(filePath, searchText);
await git.add('file.txt');
await git.commit('search');
if (osrSimilarity * confidence > 0.9) {
// Construct result with edited portion
const newResult = [
...content.slice(0, matchPosition),
...osrResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length)
];
return {
confidence: osrSimilarity,
result: newResult,
strategy: 'git-osr'
};
}
} catch (error) {
console.log('OSR strategy failed:', error);
}
// Replace commit - just the replace text
fs.writeFileSync(filePath, replaceText);
await git.add('file.txt');
const replaceCommit = await git.commit('replace');
// Strategy 2: SR-SO
await git.checkout({ fs: memfs, dir: '/', ref: searchHash });
// Go back to original and cherry-pick
await git.checkout(originalCommit.commit);
try {
// First apply original changes
await git.merge({
fs: memfs,
dir: '/',
ours: searchHash,
theirs: originalHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply original changes'
});
await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]);
// Then apply replace changes
await git.merge({
fs: memfs,
dir: '/',
ours: 'HEAD',
theirs: replaceHash,
author: { name: 'Temp', email: 'temp@example.com' },
message: 'Apply replace changes'
});
const srsoResult = (await memfs.promises.readFile('/file.txt')).toString();
const srsoSimilarity = getDMPSimilarity(editText, srsoResult)
const confidence = validateEditResult(hunk, srsoResult, 'git-srso');
// Construct result with edited portion
const newResult = [
...content.slice(0, matchPosition),
...srsoResult.split('\n'),
...content.slice(matchPosition + hunk.changes.length)
];
// Read result
const newText = fs.readFileSync(filePath, 'utf-8');
const newLines = newText.split('\n');
return {
confidence: srsoSimilarity * confidence,
result: newResult,
strategy: 'git-srso'
confidence: 1,
result: newLines,
strategy: 'git-fallback'
};
} catch (error) {
console.log('SR-SO strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git' };
} catch (cherryPickError) {
console.log('Strategy 1 failed with merge conflict');
}
} catch (error) {
console.log('Git strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git' };
console.log('Strategy 1 failed:', error);
}
// Strategy 2: S->R, S->O, cherry-pick R onto O
try {
// Reset repo
await git.init();
await git.addConfig('user.name', 'Temp');
await git.addConfig('user.email', 'temp@example.com');
// Search commit - just the search text
fs.writeFileSync(filePath, searchText);
await git.add('file.txt');
const searchCommit = await git.commit('search');
// Replace commit - just the replace text
fs.writeFileSync(filePath, replaceText);
await git.add('file.txt');
const replaceCommit = await git.commit('replace');
// Go back to search and create original with full file content
await git.checkout(searchCommit.commit);
fs.writeFileSync(filePath, originalText);
await git.add('file.txt');
await git.commit('original');
try {
// Cherry-pick replace onto original
await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]);
// Read result
const newText = fs.readFileSync(filePath, 'utf-8');
const newLines = newText.split('\n');
return {
confidence: 1,
result: newLines,
strategy: 'git-fallback'
};
} catch (cherryPickError) {
console.log('Strategy 2 failed with merge conflict');
}
} catch (error) {
console.log('Strategy 2 failed:', error);
}
// If both strategies fail, return no confidence
console.log('Git fallback failed');
return { confidence: 0, result: content, strategy: 'git-fallback' };
} catch (error) {
console.log('Git fallback strategy failed:', error);
return { confidence: 0, result: content, strategy: 'git-fallback' };
} finally {
vol.reset();
// Clean up temporary directory
if (tmpDir) {
tmpDir.removeCallback();
}
}
}
// Main edit function that tries strategies sequentially
export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise<EditResult> {
// Don't attempt any edits if confidence is too low and not in debug mode
// Don't attempt regular edits if confidence is too low
const MIN_CONFIDENCE = 0.9;
if (confidence < MIN_CONFIDENCE) {
console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`);
return { confidence: 0, result: content, strategy: 'none' };
if (confidence < MIN_CONFIDENCE && debug === '') {
console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`);
return applyGitFallback(hunk, content);
}
// Try each strategy in sequence until one succeeds
const strategies = [
{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
{ name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
{ name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
];
if (debug !== '') {
// In debug mode, try all strategies and return the first success
const results = await Promise.all(strategies.map(async strategy => {
// In debug mode, try all strategies including git fallback
const results = await Promise.all([
...strategies.map(async strategy => {
console.log(`Attempting edit with ${strategy.name} strategy...`);
const result = await strategy.apply();
console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`);
return result;
}));
})
]);
/*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
if (successfulResults.length > 0) {
const bestResult = successfulResults.reduce((best, current) =>
current.confidence > best.confidence ? current : best
);
return bestResult;
}*/
return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' };
} else {
// Normal mode - try strategies sequentially until one succeeds
@@ -345,8 +291,13 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu
return result;
}
}
// If all strategies fail, try git fallback
const result = await applyGitFallback(hunk, content);
if(result.confidence === 1) {
return result;
}
}
// If all strategies fail, return failure
return { confidence: 0, result: content, strategy: 'none' };
}