diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts new file mode 100644 index 0000000..0cbd5c0 --- /dev/null +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -0,0 +1,236 @@ +import { diff_match_patch } from 'diff-match-patch'; +import * as git from 'isomorphic-git'; +import { fs as memfs, vol } from 'memfs'; +import { Hunk } from './types'; +import { getDMPSimilarity } from './search-strategies'; + +// Helper function to infer indentation +function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { + const match = line.match(/^(\s+)/); + if (match) { + return match[1]; + } + + for (const contextLine of contextLines) { + const contextMatch = contextLine.match(/^(\s+)/); + if (contextMatch) { + const currentLineDepth = (line.match(/^\s*/)?.[0] || '').length; + const contextLineDepth = contextMatch[1].length; + + if (currentLineDepth > contextLineDepth) { + return contextMatch[1] + ' '.repeat(2); + } + return contextMatch[1]; + } + } + + return previousIndent; +} + +export type EditResult = { + confidence: number; + result: string[]; + strategy: string; +}; + +// Context matching edit strategy +export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'context' }; + } + + const newResult = [...content.slice(0, matchPosition)]; + let sourceIndex = matchPosition; + let previousIndent = ''; + + for (const change of hunk.changes) { + if (change.type === 'context') { + newResult.push(change.originalLine || (change.indent + change.content)); + previousIndent = change.indent; + sourceIndex++; + } else if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, + hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), + previousIndent + ); + newResult.push(indent + change.content); + previousIndent = indent; + } else if (change.type === 'remove') { + sourceIndex++; + } + } + + newResult.push(...content.slice(sourceIndex)); + + // Validate the result + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') + ); + + return { + confidence: similarity, + result: newResult, + strategy: 'context' + }; +} + +// DMP edit strategy +export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'dmp' }; + } + + const dmp = new diff_match_patch(); + const currentText = content.join('\n'); + const contextLines = hunk.changes + .filter(c => c.type === 'context') + .map(c => c.content); + + // Create a patch from the hunk with proper indentation + const patch = dmp.patch_make( + currentText, + hunk.changes.reduce((acc, change) => { + if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, contextLines); + return acc + indent + change.content + '\n'; + } + if (change.type === 'remove') { + return acc.replace(change.content + '\n', ''); + } + return acc + change.content + '\n'; + }, '') + ); + + const [patchedText] = dmp.patch_apply(patch, currentText); + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + patchedText + ); + + return { + confidence: similarity, + result: patchedText.split('\n'), + strategy: 'dmp' + }; +} + +// Git edit strategy +export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'git' }; + } + + vol.reset(); + + try { + await git.init({ fs: memfs, dir: '/' }); + + const originalContent = content.join('\n'); + await memfs.promises.writeFile('/file.txt', originalContent); + + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Initial commit' + }); + + await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' }); + + const lines = originalContent.split('\n'); + const newLines = [...lines]; + let offset = matchPosition; + + const contextLines = hunk.changes + .filter(c => c.type === 'context') + .map(c => c.content); + + for (const change of hunk.changes) { + if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, contextLines); + newLines.splice(offset, 0, indent + change.content); + offset++; + } else if (change.type === 'remove') { + const index = newLines.findIndex( + (line, i) => i >= offset && line.trimLeft() === change.content + ); + if (index !== -1) { + newLines.splice(index, 1); + } + } else { + offset++; + } + } + + const modifiedContent = newLines.join('\n'); + await memfs.promises.writeFile('/file.txt', modifiedContent); + + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply changes' + }); + + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') + ); + + return { + confidence: similarity, + result: newLines, + strategy: 'git' + }; + } catch (error) { + return { confidence: 0, result: content, strategy: 'git' }; + } finally { + vol.reset(); + } +} + +// Main edit function that tries strategies sequentially +export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise { + // Don't attempt any edits if confidence is too low and not in debug mode + const MIN_CONFIDENCE = 0.9; + if (confidence < MIN_CONFIDENCE && !debug) { + return { confidence: 0, result: content, strategy: 'none' }; + } + + // Try each strategy in sequence until one succeeds + const strategies = [ + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, + { name: 'git', apply: () => applyGit(hunk, content, matchPosition) } + ]; + + if (debug) { + // In debug mode, try all strategies and return the first success + const results = await Promise.all(strategies.map(async strategy => { + const result = await strategy.apply(); + return result; + })); + + const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); + if (successfulResults.length > 0) { + return successfulResults.reduce((best, current) => + current.confidence > best.confidence ? current : best + ); + } + } else { + // Normal mode - try strategies sequentially until one succeeds + for (const strategy of strategies) { + const result = await strategy.apply(); + if (result.confidence > MIN_CONFIDENCE) { + return result; + } + } + } + + // If all strategies fail, return failure + return { confidence: 0, result: content, strategy: 'none' }; +} diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts new file mode 100644 index 0000000..891fb96 --- /dev/null +++ b/src/core/diff/strategies/new-unified/index.ts @@ -0,0 +1,181 @@ +import { Diff, Hunk } from "./types" +import { findBestMatch, prepareSearchString } from "./search-strategies" +import { applyEdit } from "./edit-strategies" +import { DiffResult, DiffStrategy } from "../../types" + +export class NewUnifiedDiffStrategy implements DiffStrategy { + private parseUnifiedDiff(diff: string): Diff { + const lines = diff.split("\n") + const hunks: Hunk[] = [] + let currentHunk: Hunk | null = null + + let i = 0 + while (i < lines.length && !lines[i].startsWith("@@")) { + i++ + } + + for (; i < lines.length; i++) { + const line = lines[i] + + if (line.startsWith("@@")) { + if (currentHunk) { + hunks.push(currentHunk) + } + currentHunk = { changes: [] } + continue + } + + if (!currentHunk) { + continue + } + + // Extract the complete indentation for each line + const content = line.slice(1) // Remove the diff marker + const indentMatch = content.match(/^(\s*)/) + const indent = indentMatch ? indentMatch[0] : "" + const trimmedContent = content.slice(indent.length) + + if (line.startsWith(" ")) { + currentHunk.changes.push({ + type: "context", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("+")) { + currentHunk.changes.push({ + type: "add", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("-")) { + currentHunk.changes.push({ + type: "remove", + content: trimmedContent, + indent, + originalLine: content, + }) + } + } + + if (currentHunk && currentHunk.changes.length > 0) { + hunks.push(currentHunk) + } + + return { hunks } + } + + getToolDescription(cwd: string): string { + return `## apply_diff +Description: Apply a unified diff to a file at the specified path. This tool is useful when you need to make specific modifications to a file based on a set of changes provided in unified diff format (diff -U0). + +Make sure you include the first 2 lines with the file paths. +Don't include timestamps with the file paths. + +Start each hunk of changes with a \`@@ ... @@\` line. +Don't include line numbers like \`diff -U0\` does. +The user's patch tool doesn't need them. + +Indentation matters in the diffs! + +Start a new hunk for each section of the file that needs changes. + +Only output hunks that specify changes with \`+\` or \`-\` lines. +Skip any hunks that are entirely unchanging \` \` lines. + +The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file! +Think carefully and make sure you include and mark all lines that need to be removed or changed as \`-\` lines. +Make sure you mark all new or modified lines with \`+\`. +Don't leave out any lines or the diff patch won't apply correctly. + +Output hunks in whatever order makes the most sense. +Hunks don't need to be in any particular order. + +The hunks do not need line numbers. + +When editing a function, method, loop, etc use a hunk to replace the *entire* code block. +Delete the entire existing version with \`-\` lines and then add a new, updated version with \`+\` lines. +This will help you generate correct code and correct diffs. + +To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location. + +Parameters: +- path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd}) +- diff: (required) The diff content in unified format to apply to the file. + +For each file that needs to be changed, write out the changes similar to a unified diff like \`diff -U0\` would produce. + + +Example: +\`\`\`diff +--- mathweb/flask/app.py ++++ mathweb/flask/app.py +@@ ... @@ +-class MathWeb: ++import sympy ++ ++class MathWeb: +@@ ... @@ +-def is_prime(x): +- if x < 2: +- return False +- for i in range(2, int(math.sqrt(x)) + 1): +- if x % i == 0: +- return False +- return True +@@ ... @@ +-@app.route('/prime/') +-def nth_prime(n): +- count = 0 +- num = 1 +- while count < n: +- num += 1 +- if is_prime(num): +- count += 1 +- return str(num) ++@app.route('/prime/') ++def nth_prime(n): ++ count = 0 ++ num = 1 ++ while count < n: ++ num += 1 ++ if sympy.isprime(num): ++ count += 1 ++ return str(num) +\`\`\` + +Usage: + +File path here + +Your diff here + +` + } + + async applyDiff( + originalContent: string, + diffContent: string, + startLine?: number, + endLine?: number + ): Promise { + const MIN_CONFIDENCE = 0.9 + const parsedDiff = this.parseUnifiedDiff(diffContent) + let result = originalContent.split("\n") + + for (const hunk of parsedDiff.hunks) { + const contextStr = prepareSearchString(hunk.changes) + const { index: matchPosition, confidence } = findBestMatch(contextStr, result) + + const editResult = await applyEdit(hunk, result, matchPosition, confidence) + if (editResult.confidence > MIN_CONFIDENCE) { + result = editResult.result + } else { + return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } + } + } + + return { success: true, content: result.join("\n") } + } +} diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts new file mode 100644 index 0000000..2ad8120 --- /dev/null +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -0,0 +1,131 @@ +import { compareTwoStrings } from 'string-similarity'; +import { closest } from 'fastest-levenshtein'; +import { diff_match_patch } from 'diff-match-patch'; +import { Change } from './types'; + +export type SearchResult = { + index: number; + confidence: number; + strategy: string; +}; + +//TODO: this should be configurable +const MIN_CONFIDENCE = 0.95; + +// Helper function to prepare search string from context +export function prepareSearchString(changes: Change[]): string { + const lines = changes + .filter(c => c.type === 'context' || c.type === 'remove') + .map(c => c.content); + return lines.join('\n'); +} + +// Helper function to evaluate similarity between two texts +export function evaluateSimilarity(original: string, modified: string): number { + return compareTwoStrings(original, modified); +} + +// Helper function to validate using diff-match-patch +export function getDMPSimilarity(original: string, modified: string): number { + const dmp = new diff_match_patch(); + const diffs = dmp.diff_main(original, modified); + dmp.diff_cleanupSemantic(diffs); + const patches = dmp.patch_make(original, diffs); + const [expectedText] = dmp.patch_apply(patches, original); + const similarity = evaluateSimilarity(expectedText, modified); + return similarity; +} + +// Exact match strategy +export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const contentStr = content.slice(startIndex).join('\n'); + const searchLines = searchStr.split('\n'); + + const exactMatch = contentStr.indexOf(searchStr); + if (exactMatch !== -1) { + const matchedContent = content.slice( + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length + ).join('\n'); + + const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE; + return { + index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + confidence: dmpValid ? 1.0 : 0.9, + strategy: 'exact' + }; + } + + return { index: -1, confidence: 0, strategy: 'exact' }; +} + +// String similarity strategy +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split('\n'); + let bestScore = 0; + let bestIndex = -1; + const minScore = 0.8; + + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + const windowStr = content.slice(i, i + searchLines.length).join('\n'); + const score = compareTwoStrings(searchStr, windowStr); + if (score > bestScore && score >= minScore) { + const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE; + const adjustedScore = dmpValid ? score : score * 0.9; + + if (adjustedScore > bestScore) { + bestScore = adjustedScore; + bestIndex = i; + } + } + } + + return { + index: bestIndex, + confidence: bestIndex !== -1 ? bestScore : 0, + strategy: 'similarity' + }; +} + +// Levenshtein strategy +export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split('\n'); + const candidates = []; + + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + candidates.push(content.slice(i, i + searchLines.length).join('\n')); + } + + if (candidates.length > 0) { + const closestMatch = closest(searchStr, candidates); + const index = startIndex + candidates.indexOf(closestMatch); + const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE; + return { + index, + confidence: dmpValid ? 0.7 : 0.6, + strategy: 'levenshtein' + }; + } + + return { index: -1, confidence: 0, strategy: 'levenshtein' }; +} + +// Main search function that tries all strategies +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const strategies = [ + findExactMatch, + findSimilarityMatch, + findLevenshteinMatch + ]; + + let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' }; + + for (const strategy of strategies) { + const result = strategy(searchStr, content, startIndex); + if (result.confidence > bestResult.confidence) { + bestResult = result; + } + } + + return bestResult; +} \ No newline at end of file diff --git a/src/core/diff/strategies/new-unified/types.ts b/src/core/diff/strategies/new-unified/types.ts new file mode 100644 index 0000000..6135d0a --- /dev/null +++ b/src/core/diff/strategies/new-unified/types.ts @@ -0,0 +1,14 @@ +export type Change = { + type: 'context' | 'add' | 'remove'; + content: string; + indent: string; + originalLine?: string; +}; + +export type Hunk = { + changes: Change[]; +}; + +export type Diff = { + hunks: Hunk[]; +}; \ No newline at end of file