diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 0cbd5c0..36c9fa7 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -1,8 +1,8 @@ import { diff_match_patch } from 'diff-match-patch'; import * as git from 'isomorphic-git'; import { fs as memfs, vol } from 'memfs'; -import { Hunk } from './types'; -import { getDMPSimilarity } from './search-strategies'; +import { Change, EditResult, Hunk } from './types'; +import { getDMPSimilarity, validateEditResult } from './search-strategies'; // Helper function to infer indentation function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { @@ -27,12 +27,6 @@ function inferIndentation(line: string, contextLines: string[], previousIndent: return previousIndent; } -export type EditResult = { - confidence: number; - result: string[]; - strategy: string; -}; - // Context matching edit strategy export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { if (matchPosition === -1) { @@ -43,6 +37,8 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio let sourceIndex = matchPosition; let previousIndent = ''; + const hunkChanges = hunk.changes.filter(c => c.type !== 'context'); + for (const change of hunk.changes) { if (change.type === 'context') { newResult.push(change.originalLine || (change.indent + change.content)); @@ -66,10 +62,12 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio const similarity = getDMPSimilarity( content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') - ); - + ) + + const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n')); + return { - confidence: similarity, + confidence: similarity * confidence, result: newResult, strategy: 'context' }; @@ -82,41 +80,53 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): } const dmp = new diff_match_patch(); - const currentText = content.join('\n'); - const contextLines = hunk.changes - .filter(c => c.type === 'context') - .map(c => c.content); + const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); + const editText = editRegion.join('\n'); + + // Build the target text sequentially like in applyContextMatching + let targetText = ''; + let previousIndent = ''; + + for (const change of hunk.changes) { + if (change.type === 'context') { + targetText += (change.originalLine || (change.indent + change.content)) + '\n'; + previousIndent = change.indent; + } else if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, + hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), + previousIndent + ); + targetText += indent + change.content + '\n'; + previousIndent = indent; + } + // Skip remove changes as they shouldn't appear in target + } - // Create a patch from the hunk with proper indentation - const patch = dmp.patch_make( - currentText, - hunk.changes.reduce((acc, change) => { - if (change.type === 'add') { - const indent = change.indent || inferIndentation(change.content, contextLines); - return acc + indent + change.content + '\n'; - } - if (change.type === 'remove') { - return acc.replace(change.content + '\n', ''); - } - return acc + change.content + '\n'; - }, '') - ); + // Trim the trailing newline + targetText = targetText.replace(/\n$/, ''); - const [patchedText] = dmp.patch_apply(patch, currentText); - const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), - patchedText - ); + const patch = dmp.patch_make(editText, targetText); + const [patchedText] = dmp.patch_apply(patch, editText); + + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...patchedText.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + + const similarity = getDMPSimilarity(editText, patchedText) + const confidence = validateEditResult(hunk, patchedText); return { - confidence: similarity, - result: patchedText.split('\n'), + confidence: similarity * confidence, + result: newResult, strategy: 'dmp' }; } -// Git edit strategy -export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { +// Git edit strategy with cherry-pick approach +async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'git' }; } @@ -124,26 +134,55 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num vol.reset(); try { + // Initialize git repo await git.init({ fs: memfs, dir: '/' }); - const originalContent = content.join('\n'); - await memfs.promises.writeFile('/file.txt', originalContent); - + // Create original content - only use the edit region + const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); + const editText = editRegion.join('\n'); + await memfs.promises.writeFile('/file.txt', editText); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.commit({ fs: memfs, dir: '/', author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Initial commit' + message: 'Original' }); + const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); - await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' }); - await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' }); - - const lines = originalContent.split('\n'); - const newLines = [...lines]; - let offset = matchPosition; + // Create search content (content with removals) + const searchLines = [...editRegion]; + let offset = 0; + for (const change of hunk.changes) { + if (change.type === 'remove') { + const index = searchLines.findIndex( + (line, i) => i >= offset && line.trimLeft() === change.content + ); + if (index !== -1) { + searchLines.splice(index, 1); + } + } + if (change.type !== 'add') { + offset++; + } + } + + // Create search branch and commit + await git.branch({ fs: memfs, dir: '/', ref: 'search' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'search' }); + await memfs.promises.writeFile('/file.txt', searchLines.join('\n')); + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Search state' + }); + const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); + // Create replace content (with additions) + const replaceLines = [...searchLines]; + offset = 0; const contextLines = hunk.changes .filter(c => c.type === 'context') .map(c => c.content); @@ -151,42 +190,108 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num for (const change of hunk.changes) { if (change.type === 'add') { const indent = change.indent || inferIndentation(change.content, contextLines); - newLines.splice(offset, 0, indent + change.content); + replaceLines.splice(offset, 0, indent + change.content); offset++; - } else if (change.type === 'remove') { - const index = newLines.findIndex( - (line, i) => i >= offset && line.trimLeft() === change.content - ); - if (index !== -1) { - newLines.splice(index, 1); - } - } else { + } else if (change.type !== 'remove') { offset++; } } - const modifiedContent = newLines.join('\n'); - await memfs.promises.writeFile('/file.txt', modifiedContent); - + // Create replace branch and commit + await git.branch({ fs: memfs, dir: '/', ref: 'replace' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'replace' }); + await memfs.promises.writeFile('/file.txt', replaceLines.join('\n')); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.commit({ fs: memfs, dir: '/', author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Apply changes' + message: 'Replace state' }); + const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); - const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), - newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') - ); + // Try both strategies: + // 1. OSR: Cherry-pick replace onto original + // 2. SR-SO: Apply search->replace changes to search->original - return { - confidence: similarity, - result: newLines, - strategy: 'git' - }; + // Strategy 1: OSR + await git.checkout({ fs: memfs, dir: '/', ref: originalHash }); + try { + await git.merge({ + fs: memfs, + dir: '/', + ours: originalHash, + theirs: replaceHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Cherry-pick OSR' + }); + const osrResult = (await memfs.promises.readFile('/file.txt')).toString(); + const osrSimilarity = getDMPSimilarity(editText, osrResult) + + const confidence = validateEditResult(hunk, osrResult); + + if (osrSimilarity * confidence > 0.9) { + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...osrResult.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + return { + confidence: osrSimilarity, + result: newResult, + strategy: 'git-osr' + }; + } + } catch (error) { + console.log('OSR strategy failed:', error); + } + + // Strategy 2: SR-SO + await git.checkout({ fs: memfs, dir: '/', ref: searchHash }); + try { + // First apply original changes + await git.merge({ + fs: memfs, + dir: '/', + ours: searchHash, + theirs: originalHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply original changes' + }); + + // Then apply replace changes + await git.merge({ + fs: memfs, + dir: '/', + ours: 'HEAD', + theirs: replaceHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply replace changes' + }); + + const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); + const srsoSimilarity = getDMPSimilarity(editText, srsoResult) + + const confidence = validateEditResult(hunk, srsoResult); + + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...srsoResult.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + return { + confidence: srsoSimilarity * confidence, + result: newResult, + strategy: 'git-srso' + }; + } catch (error) { + console.log('SR-SO strategy failed:', error); + return { confidence: 0, result: content, strategy: 'git' }; + } } catch (error) { + console.log('Git strategy failed:', error); return { confidence: 0, result: content, strategy: 'git' }; } finally { vol.reset(); @@ -195,9 +300,11 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num // Main edit function that tries strategies sequentially export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise { + // Don't attempt any edits if confidence is too low and not in debug mode const MIN_CONFIDENCE = 0.9; - if (confidence < MIN_CONFIDENCE && !debug) { + if (confidence < MIN_CONFIDENCE) { + console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`); return { confidence: 0, result: content, strategy: 'none' }; } @@ -211,15 +318,18 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu if (debug) { // In debug mode, try all strategies and return the first success const results = await Promise.all(strategies.map(async strategy => { + console.log(`Attempting edit with ${strategy.name} strategy...`); const result = await strategy.apply(); + console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); return result; })); const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); if (successfulResults.length > 0) { - return successfulResults.reduce((best, current) => + const bestResult = successfulResults.reduce((best, current) => current.confidence > best.confidence ? current : best ); + return bestResult; } } else { // Normal mode - try strategies sequentially until one succeeds diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 891fb96..a3658aa 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -162,7 +162,8 @@ Your diff here ): Promise { const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) - let result = originalContent.split("\n") + const originalLines = originalContent.split("\n") + let result = [...originalLines] for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 2ad8120..5c7a55d 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -1,7 +1,7 @@ import { compareTwoStrings } from 'string-similarity'; import { closest } from 'fastest-levenshtein'; import { diff_match_patch } from 'diff-match-patch'; -import { Change } from './types'; +import { Change, Hunk } from './types'; export type SearchResult = { index: number; @@ -32,10 +32,70 @@ export function getDMPSimilarity(original: string, modified: string): number { dmp.diff_cleanupSemantic(diffs); const patches = dmp.patch_make(original, diffs); const [expectedText] = dmp.patch_apply(patches, original); + const similarity = evaluateSimilarity(expectedText, modified); return similarity; } +// Helper function to validate edit results using hunk information +// Returns a confidence reduction value between 0 and 1 +// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, +// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. +// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). +export function validateEditResult(hunk: Hunk, result: string): number { + const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)); + + // Create skeleton of original content (context + removed lines) + const originalSkeleton = hunkDeepCopy.changes + .filter(change => change.type === 'context' || change.type === 'remove') + .map(change => change.content) + .join('\n'); + + // Create skeleton of expected result (context + added lines) + const expectedSkeleton = hunkDeepCopy.changes + .filter(change => change.type === 'context' || change.type === 'add') + .map(change => change.content) + .join('\n'); + + // Compare with original content + const originalSimilarity = evaluateSimilarity(originalSkeleton, result); + + // If result is too similar to original, it means changes weren't applied + if (originalSimilarity > 0.9) { + console.log('Result too similar to original content:', originalSimilarity); + return 0.5; // Significant confidence reduction + } + + // Compare with expected result + const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result); + console.log('Original similarity:', originalSimilarity); + console.log('Expected similarity:', expectedSimilarity); + + console.log('originalSkeleton:', originalSkeleton); + console.log('expectedSkeleton:', expectedSkeleton); + console.log('result:', result); + + // Scale between 0.98 and 1.0 (4% impact) based on expected similarity + const multiplier = expectedSimilarity < MIN_CONFIDENCE + ? 0.96 + (0.04 * expectedSimilarity) + : 1; + + return multiplier; +} + +// Helper function to validate context lines against original content +function validateContextLines(searchStr: string, content: string): number { + // Extract just the context lines from the search string + const contextLines = searchStr.split('\n') + .filter(line => !line.startsWith('-')); // Exclude removed lines + + // Compare context lines with content + const similarity = evaluateSimilarity(contextLines.join('\n'), content); + + // Context lines must match very closely, or confidence drops significantly + return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity; +} + // Exact match strategy export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { const contentStr = content.slice(startIndex).join('\n'); @@ -48,10 +108,13 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length ).join('\n'); - const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE; + const similarity = getDMPSimilarity(searchStr, matchedContent); + const contextSimilarity = validateContextLines(searchStr, matchedContent); + const confidence = Math.min(similarity, contextSimilarity); + return { index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, - confidence: dmpValid ? 1.0 : 0.9, + confidence, strategy: 'exact' }; } @@ -70,8 +133,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI const windowStr = content.slice(i, i + searchLines.length).join('\n'); const score = compareTwoStrings(searchStr, windowStr); if (score > bestScore && score >= minScore) { - const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE; - const adjustedScore = dmpValid ? score : score * 0.9; + const similarity = getDMPSimilarity(searchStr, windowStr); + const contextSimilarity = validateContextLines(searchStr, windowStr); + const adjustedScore = Math.min(similarity, contextSimilarity) * score; if (adjustedScore > bestScore) { bestScore = adjustedScore; @@ -99,10 +163,13 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start if (candidates.length > 0) { const closestMatch = closest(searchStr, candidates); const index = startIndex + candidates.indexOf(closestMatch); - const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE; + const similarity = getDMPSimilarity(searchStr, closestMatch); + const contextSimilarity = validateContextLines(searchStr, closestMatch); + const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty + return { index, - confidence: dmpValid ? 0.7 : 0.6, + confidence, strategy: 'levenshtein' }; } diff --git a/src/core/diff/strategies/new-unified/types.ts b/src/core/diff/strategies/new-unified/types.ts index 6135d0a..a734f6e 100644 --- a/src/core/diff/strategies/new-unified/types.ts +++ b/src/core/diff/strategies/new-unified/types.ts @@ -11,4 +11,10 @@ export type Hunk = { export type Diff = { hunks: Hunk[]; -}; \ No newline at end of file +}; + +export type EditResult = { + confidence: number; + result: string[]; + strategy: string; +}; \ No newline at end of file