Add New Unified Diff Strategy Implementation

- Introduced a new unified diff strategy with support for context matching, DMP, and Git-based edits. - Implemented helper functions for parsing unified diffs and evaluating similarity. - Added types for changes, hunks, and diffs to enhance type safety. - Created a main edit function that applies strategies sequentially based on confidence levels. - Included detailed descriptions and usage examples for the new strategy.
2026-02-05 12:05:16 -05:00 · 2025-01-07 19:01:12 -05:00
parent 2857dd4996
commit 594481643b
4 changed files with 562 additions and 0 deletions
--- a/src/core/diff/strategies/new-unified/edit-strategies.ts
+++ b/src/core/diff/strategies/new-unified/edit-strategies.ts
@@ -0,0 +1,236 @@
 import { diff_match_patch } from 'diff-match-patch';
 import * as git from 'isomorphic-git';
 import { fs as memfs, vol } from 'memfs';
 import { Hunk } from './types';
 import { getDMPSimilarity } from './search-strategies';
 // Helper function to infer indentation
 function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string {
  const match = line.match(/^(\s+)/);
  if (match) {
    return match[1];
  }
  for (const contextLine of contextLines) {
    const contextMatch = contextLine.match(/^(\s+)/);
    if (contextMatch) {
      const currentLineDepth = (line.match(/^\s*/)?.[0] || '').length;
      const contextLineDepth = contextMatch[1].length;
      if (currentLineDepth > contextLineDepth) {
        return contextMatch[1] + ' '.repeat(2);
      }
      return contextMatch[1];
    }
  }
  return previousIndent;
 }
 export type EditResult = {
  confidence: number;
  result: string[];
  strategy: string;
 };
 // Context matching edit strategy
 export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
  if (matchPosition === -1) {
    return { confidence: 0, result: content, strategy: 'context' };
  }
  const newResult = [...content.slice(0, matchPosition)];
  let sourceIndex = matchPosition;
  let previousIndent = '';
  for (const change of hunk.changes) {
    if (change.type === 'context') {
      newResult.push(change.originalLine || (change.indent + change.content));
      previousIndent = change.indent;
      sourceIndex++;
    } else if (change.type === 'add') {
      const indent = change.indent || inferIndentation(change.content, 
        hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''),
        previousIndent
      );
      newResult.push(indent + change.content);
      previousIndent = indent;
    } else if (change.type === 'remove') {
      sourceIndex++;
    }
  }
  newResult.push(...content.slice(sourceIndex));
  // Validate the result
  const similarity = getDMPSimilarity(
    content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
    newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
  );
  return { 
    confidence: similarity,
    result: newResult,
    strategy: 'context'
  };
 }
 // DMP edit strategy
 export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult {
  if (matchPosition === -1) {
    return { confidence: 0, result: content, strategy: 'dmp' };
  }
  const dmp = new diff_match_patch();
  const currentText = content.join('\n');
  const contextLines = hunk.changes
    .filter(c => c.type === 'context')
    .map(c => c.content);
  // Create a patch from the hunk with proper indentation
  const patch = dmp.patch_make(
    currentText,
    hunk.changes.reduce((acc, change) => {
      if (change.type === 'add') {
        const indent = change.indent || inferIndentation(change.content, contextLines);
        return acc + indent + change.content + '\n';
      }
      if (change.type === 'remove') {
        return acc.replace(change.content + '\n', '');
      }
      return acc + change.content + '\n';
    }, '')
  );
  const [patchedText] = dmp.patch_apply(patch, currentText);
  const similarity = getDMPSimilarity(
    content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
    patchedText
  );
  return { 
    confidence: similarity,
    result: patchedText.split('\n'),
    strategy: 'dmp'
  };
 }
 // Git edit strategy
 export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise<EditResult> {
  if (matchPosition === -1) {
    return { confidence: 0, result: content, strategy: 'git' };
  }
  vol.reset();
  try {
    await git.init({ fs: memfs, dir: '/' });
    const originalContent = content.join('\n');
    await memfs.promises.writeFile('/file.txt', originalContent);
    await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
    await git.commit({
      fs: memfs,
      dir: '/',
      author: { name: 'Temp', email: 'temp@example.com' },
      message: 'Initial commit'
    });
    await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' });
    await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' });
    const lines = originalContent.split('\n');
    const newLines = [...lines];
    let offset = matchPosition;
    const contextLines = hunk.changes
      .filter(c => c.type === 'context')
      .map(c => c.content);
    for (const change of hunk.changes) {
      if (change.type === 'add') {
        const indent = change.indent || inferIndentation(change.content, contextLines);
        newLines.splice(offset, 0, indent + change.content);
        offset++;
      } else if (change.type === 'remove') {
        const index = newLines.findIndex(
          (line, i) => i >= offset && line.trimLeft() === change.content
        );
        if (index !== -1) {
          newLines.splice(index, 1);
        }
      } else {
        offset++;
      }
    }
    const modifiedContent = newLines.join('\n');
    await memfs.promises.writeFile('/file.txt', modifiedContent);
    await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' });
    await git.commit({
      fs: memfs,
      dir: '/',
      author: { name: 'Temp', email: 'temp@example.com' },
      message: 'Apply changes'
    });
    const similarity = getDMPSimilarity(
      content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'),
      newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n')
    );
    return { 
      confidence: similarity,
      result: newLines,
      strategy: 'git'
    };
  } catch (error) {
    return { confidence: 0, result: content, strategy: 'git' };
  } finally {
    vol.reset();
  }
 }
 // Main edit function that tries strategies sequentially
 export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise<EditResult> {
  // Don't attempt any edits if confidence is too low and not in debug mode
  const MIN_CONFIDENCE = 0.9;
  if (confidence < MIN_CONFIDENCE && !debug) {
    return { confidence: 0, result: content, strategy: 'none' };
  }
  // Try each strategy in sequence until one succeeds
  const strategies = [
    { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
    { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
    { name: 'git', apply: () => applyGit(hunk, content, matchPosition) }
  ];
  if (debug) {
    // In debug mode, try all strategies and return the first success
    const results = await Promise.all(strategies.map(async strategy => {
      const result = await strategy.apply();
      return result;
    }));
    const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE);
    if (successfulResults.length > 0) {
      return successfulResults.reduce((best, current) => 
        current.confidence > best.confidence ? current : best
      );
    }
  } else {
    // Normal mode - try strategies sequentially until one succeeds
    for (const strategy of strategies) {
      const result = await strategy.apply();
      if (result.confidence > MIN_CONFIDENCE) {
        return result;
      }
    }
  }
  // If all strategies fail, return failure
  return { confidence: 0, result: content, strategy: 'none' };
 }
--- a/src/core/diff/strategies/new-unified/index.ts
+++ b/src/core/diff/strategies/new-unified/index.ts
@@ -0,0 +1,181 @@
 import { Diff, Hunk } from "./types"
 import { findBestMatch, prepareSearchString } from "./search-strategies"
 import { applyEdit } from "./edit-strategies"
 import { DiffResult, DiffStrategy } from "../../types"
 export class NewUnifiedDiffStrategy implements DiffStrategy {
 	private parseUnifiedDiff(diff: string): Diff {
 		const lines = diff.split("\n")
 		const hunks: Hunk[] = []
 		let currentHunk: Hunk | null = null
 		let i = 0
 		while (i < lines.length && !lines[i].startsWith("@@")) {
 			i++
 		}
 		for (; i < lines.length; i++) {
 			const line = lines[i]
 			if (line.startsWith("@@")) {
 				if (currentHunk) {
 					hunks.push(currentHunk)
 				}
 				currentHunk = { changes: [] }
 				continue
 			}
 			if (!currentHunk) {
 				continue
 			}
 			// Extract the complete indentation for each line
 			const content = line.slice(1) // Remove the diff marker
 			const indentMatch = content.match(/^(\s*)/)
 			const indent = indentMatch ? indentMatch[0] : ""
 			const trimmedContent = content.slice(indent.length)
 			if (line.startsWith(" ")) {
 				currentHunk.changes.push({
 					type: "context",
 					content: trimmedContent,
 					indent,
 					originalLine: content,
 				})
 			} else if (line.startsWith("+")) {
 				currentHunk.changes.push({
 					type: "add",
 					content: trimmedContent,
 					indent,
 					originalLine: content,
 				})
 			} else if (line.startsWith("-")) {
 				currentHunk.changes.push({
 					type: "remove",
 					content: trimmedContent,
 					indent,
 					originalLine: content,
 				})
 			}
 		}
 		if (currentHunk && currentHunk.changes.length > 0) {
 			hunks.push(currentHunk)
 		}
 		return { hunks }
 	}
 	getToolDescription(cwd: string): string {
 		return `## apply_diff
 Description: Apply a unified diff to a file at the specified path. This tool is useful when you need to make specific modifications to a file based on a set of changes provided in unified diff format (diff -U0).
 Make sure you include the first 2 lines with the file paths.
 Don't include timestamps with the file paths.
 Start each hunk of changes with a \`@@ ... @@\` line.
 Don't include line numbers like \`diff -U0\` does.
 The user's patch tool doesn't need them.
 Indentation matters in the diffs!
 Start a new hunk for each section of the file that needs changes.
 Only output hunks that specify changes with \`+\` or \`-\` lines.
 Skip any hunks that are entirely unchanging \` \` lines.
 The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file!
 Think carefully and make sure you include and mark all lines that need to be removed or changed as \`-\` lines.
 Make sure you mark all new or modified lines with \`+\`.
 Don't leave out any lines or the diff patch won't apply correctly.
 Output hunks in whatever order makes the most sense.
 Hunks don't need to be in any particular order.
 The hunks do not need line numbers.
 When editing a function, method, loop, etc use a hunk to replace the *entire* code block.
 Delete the entire existing version with \`-\` lines and then add a new, updated version with \`+\` lines.
 This will help you generate correct code and correct diffs.
 To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location.
 Parameters:
 - path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd})
 - diff: (required) The diff content in unified format to apply to the file.
 For each file that needs to be changed, write out the changes similar to a unified diff like \`diff -U0\` would produce.
 Example:
 \`\`\`diff
 --- mathweb/flask/app.py
 +++ mathweb/flask/app.py
@@ ... @@
 -class MathWeb:
 +import sympy
 +
 +class MathWeb:
@@ ... @@
 -def is_prime(x):
 -    if x < 2:
 -        return False
 -    for i in range(2, int(math.sqrt(x)) + 1):
 -        if x % i == 0:
 -            return False
 -    return True
@@ ... @@
 -@app.route('/prime/<int:n>')
 -def nth_prime(n):
 -    count = 0
 -    num = 1
 -    while count < n:
 -        num += 1
 -        if is_prime(num):
 -            count += 1
 -    return str(num)
 +@app.route('/prime/<int:n>')
 +def nth_prime(n):
 +    count = 0
 +    num = 1
 +    while count < n:
 +        num += 1
 +        if sympy.isprime(num):
 +            count += 1
 +    return str(num)
 \`\`\`
 Usage:
 <apply_diff>
 <path>File path here</path>
 <diff>
 Your diff here
 </diff>
 </apply_diff>`
 	}
 	async applyDiff(
 		originalContent: string,
 		diffContent: string,
 		startLine?: number,
 		endLine?: number
 	): Promise<DiffResult> {
 		const MIN_CONFIDENCE = 0.9
 		const parsedDiff = this.parseUnifiedDiff(diffContent)
 		let result = originalContent.split("\n")
 		for (const hunk of parsedDiff.hunks) {
 			const contextStr = prepareSearchString(hunk.changes)
 			const { index: matchPosition, confidence } = findBestMatch(contextStr, result)
 			const editResult = await applyEdit(hunk, result, matchPosition, confidence)
 			if (editResult.confidence > MIN_CONFIDENCE) {
 				result = editResult.result
 			} else {
 				return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` }
 			}
 		}
 		return { success: true, content: result.join("\n") }
 	}
 }
--- a/src/core/diff/strategies/new-unified/search-strategies.ts
+++ b/src/core/diff/strategies/new-unified/search-strategies.ts
@@ -0,0 +1,131 @@
 import { compareTwoStrings } from 'string-similarity';
 import { closest } from 'fastest-levenshtein';
 import { diff_match_patch } from 'diff-match-patch';
 import { Change } from './types';
 export type SearchResult = {
  index: number;
  confidence: number;
  strategy: string;
 };
 //TODO: this should be configurable
 const MIN_CONFIDENCE = 0.95;
 // Helper function to prepare search string from context
 export function prepareSearchString(changes: Change[]): string {
  const lines = changes
    .filter(c => c.type === 'context' || c.type === 'remove')
    .map(c => c.content);
  return lines.join('\n');
 }
 // Helper function to evaluate similarity between two texts
 export function evaluateSimilarity(original: string, modified: string): number {
  return compareTwoStrings(original, modified);
 }
 // Helper function to validate using diff-match-patch
 export function getDMPSimilarity(original: string, modified: string): number {
  const dmp = new diff_match_patch();
  const diffs = dmp.diff_main(original, modified);
  dmp.diff_cleanupSemantic(diffs);
  const patches = dmp.patch_make(original, diffs);
  const [expectedText] = dmp.patch_apply(patches, original);
  const similarity = evaluateSimilarity(expectedText, modified);
  return similarity;
 }
 // Exact match strategy
 export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
  const contentStr = content.slice(startIndex).join('\n');
  const searchLines = searchStr.split('\n');
  const exactMatch = contentStr.indexOf(searchStr);
  if (exactMatch !== -1) {
    const matchedContent = content.slice(
      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
      startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length
    ).join('\n');
    const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE;
    return {
      index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1,
      confidence: dmpValid ? 1.0 : 0.9,
      strategy: 'exact'
    };
  }
  return { index: -1, confidence: 0, strategy: 'exact' };
 }
 // String similarity strategy
 export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
  const searchLines = searchStr.split('\n');
  let bestScore = 0;
  let bestIndex = -1;
  const minScore = 0.8;
  for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
    const windowStr = content.slice(i, i + searchLines.length).join('\n');
    const score = compareTwoStrings(searchStr, windowStr);
    if (score > bestScore && score >= minScore) {
      const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE;
      const adjustedScore = dmpValid ? score : score * 0.9;
      if (adjustedScore > bestScore) {
        bestScore = adjustedScore;
        bestIndex = i;
      }
    }
  }
  return { 
    index: bestIndex, 
    confidence: bestIndex !== -1 ? bestScore : 0,
    strategy: 'similarity'
  };
 }
 // Levenshtein strategy
 export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
  const searchLines = searchStr.split('\n');
  const candidates = [];
  for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
    candidates.push(content.slice(i, i + searchLines.length).join('\n'));
  }
  if (candidates.length > 0) {
    const closestMatch = closest(searchStr, candidates);
    const index = startIndex + candidates.indexOf(closestMatch);
    const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE;
    return { 
      index, 
      confidence: dmpValid ? 0.7 : 0.6,
      strategy: 'levenshtein'
    };
  }
  return { index: -1, confidence: 0, strategy: 'levenshtein' };
 }
 // Main search function that tries all strategies
 export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
  const strategies = [
    findExactMatch,
    findSimilarityMatch,
    findLevenshteinMatch
  ];
  let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' };
  for (const strategy of strategies) {
    const result = strategy(searchStr, content, startIndex);
    if (result.confidence > bestResult.confidence) {
      bestResult = result;
    }
  }
  return bestResult;
 } 
--- a/src/core/diff/strategies/new-unified/types.ts
+++ b/src/core/diff/strategies/new-unified/types.ts
@@ -0,0 +1,14 @@
 export type Change = {
  type: 'context' | 'add' | 'remove';
  content: string;
  indent: string;
  originalLine?: string;
 };
 export type Hunk = {
  changes: Change[];
 };
 export type Diff = {
  hunks: Hunk[];
 };