Merge pull request #111 from RooVetGit/fuzzy_matching

Support fuzzy matching for apply_diff
2026-02-05 03:55:23 -05:00 · 2024-12-14 14:19:58 -05:00
parent 85acffdfa7 f3b77c9c62
commit a3d10dec8e
6 changed files with 146 additions and 297 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Roo Cline Changelog
 ## [2.2.6]
 -   Add a fuzzy match tolerance when applying diffs
 ## [2.2.5]
 -   Allow MCP servers to be enabled/disabled
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "roo-cline",
-  "version": "2.2.5",
+  "version": "2.2.6",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "roo-cline",
-      "version": "2.2.5",
+      "version": "2.2.6",
      "dependencies": {
        "@anthropic-ai/bedrock-sdk": "^0.10.2",
        "@anthropic-ai/sdk": "^0.26.0",
--- a/package.json
+++ b/package.json
@@ -3,7 +3,7 @@
  "displayName": "Roo Cline",
  "description": "A fork of Cline, an autonomous coding agent, with some added experimental configuration and automation features.",
  "publisher": "RooVeterinaryInc",
-  "version": "2.2.5",
+  "version": "2.2.6",
  "icon": "assets/icons/rocket.png",
  "galleryBanner": {
    "color": "#617A91",
--- a/src/core/diff/DiffStrategy.ts
+++ b/src/core/diff/DiffStrategy.ts
@@ -7,9 +7,9 @@ import { SearchReplaceDiffStrategy } from './strategies/search-replace'
 * @returns The appropriate diff strategy for the model
 */
 export function getDiffStrategy(model: string): DiffStrategy {
-    // For now, return SearchReplaceDiffStrategy for all models
+    // For now, return SearchReplaceDiffStrategy for all models (with a fuzzy threshold of 0.9)
    // This architecture allows for future optimizations based on model capabilities
-    return new SearchReplaceDiffStrategy()
+    return new SearchReplaceDiffStrategy(0.9)
 }
 export type { DiffStrategy }
--- a/src/core/diff/strategies/tests/search-replace.test.ts
+++ b/src/core/diff/strategies/tests/search-replace.test.ts
@@ -1,18 +1,15 @@
 import { SearchReplaceDiffStrategy } from '../search-replace'
 describe('SearchReplaceDiffStrategy', () => {
    describe('exact matching', () => {
        let strategy: SearchReplaceDiffStrategy
        beforeEach(() => {
-        strategy = new SearchReplaceDiffStrategy()
+            strategy = new SearchReplaceDiffStrategy() // Default 1.0 threshold for exact matching
        })
    describe('applyDiff', () => {
        it('should replace matching content', () => {
-            const originalContent = `function hello() {
+            const originalContent = 'function hello() {\n    console.log("hello")\n}\n'
    console.log("hello")
 }
 `
            const diffContent = `test.ts
 <<<<<<< SEARCH
 function hello() {
@@ -25,19 +22,11 @@ function hello() {
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            expect(result).toBe(`function hello() {
+            expect(result).toBe('function hello() {\n    console.log("hello world")\n}\n')
    console.log("hello world")
 }
 `)
        })
        it('should match content with different surrounding whitespace', () => {
-            const originalContent = `
+            const originalContent = '\nfunction example() {\n    return 42;\n}\n\n'
 function example() {
    return 42;
 }
 `
            const diffContent = `test.ts
 <<<<<<< SEARCH
 function example() {
@@ -50,19 +39,11 @@ function example() {
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            expect(result).toBe(`
+            expect(result).toBe('\nfunction example() {\n    return 43;\n}\n\n')
 function example() {
    return 43;
 }
 `)
        })
        it('should match content with different indentation in search block', () => {
-            const originalContent = `    function test() {
+            const originalContent = '    function test() {\n        return true;\n    }\n'
        return true;
    }
 `
            const diffContent = `test.ts
 <<<<<<< SEARCH
 function test() {
@@ -75,10 +56,7 @@ function test() {
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            expect(result).toBe(`    function test() {
+            expect(result).toBe('    function test() {\n        return false;\n    }\n')
        return false;
    }
 `)
        })
        it('should handle tab-based indentation', () => {
@@ -174,10 +152,7 @@ function test() {
        })
        it('should return false if search content does not match', () => {
-            const originalContent = `function hello() {
+            const originalContent = 'function hello() {\n    console.log("hello")\n}\n'
    console.log("hello")
 }
 `
            const diffContent = `test.ts
 <<<<<<< SEARCH
 function hello() {
@@ -194,28 +169,15 @@ function hello() {
        })
        it('should return false if diff format is invalid', () => {
-            const originalContent = `function hello() {
+            const originalContent = 'function hello() {\n    console.log("hello")\n}\n'
-    console.log("hello")
+            const diffContent = `test.ts\nInvalid diff format`
 }
 `
            const diffContent = `test.ts
 Invalid diff format`
            const result = strategy.applyDiff(originalContent, diffContent)
            expect(result).toBe(false)
        })
        it('should handle multiple lines with proper indentation', () => {
-            const originalContent = `class Example {
+            const originalContent = 'class Example {\n    constructor() {\n        this.value = 0\n    }\n\n    getValue() {\n        return this.value\n    }\n}\n'
    constructor() {
        this.value = 0
    }
    getValue() {
        return this.value
    }
 }
 `
            const diffContent = `test.ts
 <<<<<<< SEARCH
    getValue() {
@@ -230,18 +192,7 @@ Invalid diff format`
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            expect(result).toBe(`class Example {
+            expect(result).toBe('class Example {\n    constructor() {\n        this.value = 0\n    }\n\n    getValue() {\n        // Add logging\n        console.log("Getting value")\n        return this.value\n    }\n}\n')
    constructor() {
        this.value = 0
    }
    getValue() {
        // Add logging
        console.log("Getting value")
        return this.value
    }
 }
 `)
        })
        it('should preserve whitespace exactly in the output', () => {
@@ -262,7 +213,7 @@ Invalid diff format`
        })
        it('should preserve indentation when adding new lines after existing content', () => {
-            const originalContent = `				onScroll={() => updateHighlights()}`
+            const originalContent = '				onScroll={() => updateHighlights()}'
            const diffContent = `test.ts
 <<<<<<< SEARCH
 				onScroll={() => updateHighlights()}
@@ -275,230 +226,78 @@ Invalid diff format`
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            expect(result).toBe(`				onScroll={() => updateHighlights()}
+            expect(result).toBe('				onScroll={() => updateHighlights()}\n				onDragOver={(e) => {\n					e.preventDefault()\n					e.stopPropagation()\n				}}')
-				onDragOver={(e) => {
+        })
 					e.preventDefault()
 					e.stopPropagation()
 				}}`)
    })
-        it('should handle complex refactoring with multiple functions', () => {
+    describe('fuzzy matching', () => {
-            const originalContent = `export async function extractTextFromFile(filePath: string): Promise<string> {
+        let strategy: SearchReplaceDiffStrategy
 	try {
 		await fs.access(filePath)
 	} catch (error) {
 		throw new Error(\`File not found: \${filePath}\`)
 	}
 	const fileExtension = path.extname(filePath).toLowerCase()
 	switch (fileExtension) {
 		case ".pdf":
 			return extractTextFromPDF(filePath)
 		case ".docx":
 			return extractTextFromDOCX(filePath)
 		case ".ipynb":
 			return extractTextFromIPYNB(filePath)
 		default:
 			const isBinary = await isBinaryFile(filePath).catch(() => false)
 			if (!isBinary) {
 				return addLineNumbers(await fs.readFile(filePath, "utf8"))
 			} else {
 				throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
 			}
 	}
 }
-export function addLineNumbers(content: string): string {
+        beforeEach(() => {
-	const lines = content.split('\\n')
+            strategy = new SearchReplaceDiffStrategy(0.9) // 90% similarity threshold
-	const maxLineNumberWidth = String(lines.length).length
+        })
 	return lines
 		.map((line, index) => {
 			const lineNumber = String(index + 1).padStart(maxLineNumberWidth, ' ')
 			return \`\${lineNumber} | \${line}\`
 		}).join('\\n')
 }`
        it('should match content with small differences (>90% similar)', () => {
            const originalContent = 'function getData() {\n    const results = fetchData();\n    return results.filter(Boolean);\n}\n'
            const diffContent = `test.ts
 <<<<<<< SEARCH
-export async function extractTextFromFile(filePath: string): Promise<string> {
+function getData() {
-	try {
+    const result = fetchData();
-		await fs.access(filePath)
+    return results.filter(Boolean);
 	} catch (error) {
 		throw new Error(\`File not found: \${filePath}\`)
 	}
 	const fileExtension = path.extname(filePath).toLowerCase()
 	switch (fileExtension) {
 		case ".pdf":
 			return extractTextFromPDF(filePath)
 		case ".docx":
 			return extractTextFromDOCX(filePath)
 		case ".ipynb":
 			return extractTextFromIPYNB(filePath)
 		default:
 			const isBinary = await isBinaryFile(filePath).catch(() => false)
 			if (!isBinary) {
 				return addLineNumbers(await fs.readFile(filePath, "utf8"))
 			} else {
 				throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
 			}
 	}
 }
 export function addLineNumbers(content: string): string {
 	const lines = content.split('\\n')
 	const maxLineNumberWidth = String(lines.length).length
 	return lines
 		.map((line, index) => {
 			const lineNumber = String(index + 1).padStart(maxLineNumberWidth, ' ')
 			return \`\${lineNumber} | \${line}\`
 		}).join('\\n')
 }
 =======
-function extractLineRange(content: string, startLine?: number, endLine?: number): string {
+function getData() {
-	const lines = content.split('\\n')
+    const data = fetchData();
-	const start = startLine ? Math.max(1, startLine) : 1
+    return data.filter(Boolean);
 	const end = endLine ? Math.min(lines.length, endLine) : lines.length
 	if (start > end || start > lines.length) {
 		throw new Error(\`Invalid line range: start=\${start}, end=\${end}, total lines=\${lines.length}\`)
 	}
 	return lines.slice(start - 1, end).join('\\n')
 }
 export async function extractTextFromFile(filePath: string, startLine?: number, endLine?: number): Promise<string> {
 	try {
 		await fs.access(filePath)
 	} catch (error) {
 		throw new Error(\`File not found: \${filePath}\`)
 	}
 	const fileExtension = path.extname(filePath).toLowerCase()
 	let content: string
 	switch (fileExtension) {
 		case ".pdf": {
 			const dataBuffer = await fs.readFile(filePath)
 			const data = await pdf(dataBuffer)
 			content = extractLineRange(data.text, startLine, endLine)
 			break
 		}
 		case ".docx": {
 			const result = await mammoth.extractRawText({ path: filePath })
 			content = extractLineRange(result.value, startLine, endLine)
 			break
 		}
 		case ".ipynb": {
 			const data = await fs.readFile(filePath, "utf8")
 			const notebook = JSON.parse(data)
 			let extractedText = ""
 			for (const cell of notebook.cells) {
 				if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
 					extractedText += cell.source.join("\\n") + "\\n"
 				}
 			}
 			content = extractLineRange(extractedText, startLine, endLine)
 			break
 		}
 		default: {
 			const isBinary = await isBinaryFile(filePath).catch(() => false)
 			if (!isBinary) {
 				const fileContent = await fs.readFile(filePath, "utf8")
 				content = extractLineRange(fileContent, startLine, endLine)
 			} else {
 				throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
 			}
 		}
 	}
 	return addLineNumbers(content, startLine)
 }
 export function addLineNumbers(content: string, startLine: number = 1): string {
 	const lines = content.split('\\n')
 	const maxLineNumberWidth = String(startLine + lines.length - 1).length
 	return lines
 		.map((line, index) => {
 			const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, ' ')
 			return \`\${lineNumber} | \${line}\`
 		}).join('\\n')
 }
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
-            const expected = `function extractLineRange(content: string, startLine?: number, endLine?: number): string {
+            expect(result).toBe('function getData() {\n    const data = fetchData();\n    return data.filter(Boolean);\n}\n')
-	const lines = content.split('\\n')
+        })
 	const start = startLine ? Math.max(1, startLine) : 1
 	const end = endLine ? Math.min(lines.length, endLine) : lines.length
-	if (start > end || start > lines.length) {
+        it('should not match when content is too different (<90% similar)', () => {
-		throw new Error(\`Invalid line range: start=\${start}, end=\${end}, total lines=\${lines.length}\`)
+            const originalContent = 'function processUsers(data) {\n    return data.map(user => user.name);\n}\n'
-	}
+            const diffContent = `test.ts
-	
+<<<<<<< SEARCH
-	return lines.slice(start - 1, end).join('\\n')
+function handleItems(items) {
    return items.map(item => item.username);
 }
-
+=======
-export async function extractTextFromFile(filePath: string, startLine?: number, endLine?: number): Promise<string> {
+function processData(data) {
-	try {
+    return data.map(d => d.value);
 		await fs.access(filePath)
 	} catch (error) {
 		throw new Error(\`File not found: \${filePath}\`)
 	}
 	const fileExtension = path.extname(filePath).toLowerCase()
 	let content: string
 	switch (fileExtension) {
 		case ".pdf": {
 			const dataBuffer = await fs.readFile(filePath)
 			const data = await pdf(dataBuffer)
 			content = extractLineRange(data.text, startLine, endLine)
 			break
 		}
 		case ".docx": {
 			const result = await mammoth.extractRawText({ path: filePath })
 			content = extractLineRange(result.value, startLine, endLine)
 			break
 		}
 		case ".ipynb": {
 			const data = await fs.readFile(filePath, "utf8")
 			const notebook = JSON.parse(data)
 			let extractedText = ""
 			for (const cell of notebook.cells) {
 				if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
 					extractedText += cell.source.join("\\n") + "\\n"
 				}
 			}
 			content = extractLineRange(extractedText, startLine, endLine)
 			break
 		}
 		default: {
 			const isBinary = await isBinaryFile(filePath).catch(() => false)
 			if (!isBinary) {
 				const fileContent = await fs.readFile(filePath, "utf8")
 				content = extractLineRange(fileContent, startLine, endLine)
 			} else {
 				throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
 			}
 		}
 	}
 	return addLineNumbers(content, startLine)
 }
 >>>>>>> REPLACE`
-export function addLineNumbers(content: string, startLine: number = 1): string {
+            const result = strategy.applyDiff(originalContent, diffContent)
-	const lines = content.split('\\n')
+            expect(result).toBe(false)
-	const maxLineNumberWidth = String(startLine + lines.length - 1).length
+        })
-	return lines
+
-		.map((line, index) => {
+        it('should match content with extra whitespace', () => {
-			const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, ' ')
+            const originalContent = 'function sum(a, b) {\n    return a + b;\n}'
-			return \`\${lineNumber} | \${line}\`
+            const diffContent = `test.ts
-		}).join('\\n')
+<<<<<<< SEARCH
-}`
+function   sum(a,   b)    {
-            expect(result).toBe(expected)
+    return    a + b;
 }
 =======
 function sum(a, b) {
    return a + b + 1;
 }
 >>>>>>> REPLACE`
            const result = strategy.applyDiff(originalContent, diffContent)
            expect(result).toBe('function sum(a, b) {\n    return a + b + 1;\n}')
        })
    })
    describe('getToolDescription', () => {
        let strategy: SearchReplaceDiffStrategy
        beforeEach(() => {
            strategy = new SearchReplaceDiffStrategy()
        })
        it('should include the current working directory', () => {
            const cwd = '/test/dir'
            const description = strategy.getToolDescription(cwd)
@@ -515,4 +314,3 @@ export function addLineNumbers(content: string, startLine: number = 1): string {
        })
    })
 })
--- a/src/core/diff/strategies/search-replace.ts
+++ b/src/core/diff/strategies/search-replace.ts
@@ -1,6 +1,59 @@
 import { DiffStrategy } from "../types"
 function levenshteinDistance(a: string, b: string): number {
    const matrix: number[][] = [];
    // Initialize matrix
    for (let i = 0; i <= a.length; i++) {
        matrix[i] = [i];
    }
    for (let j = 0; j <= b.length; j++) {
        matrix[0][j] = j;
    }
    // Fill matrix
    for (let i = 1; i <= a.length; i++) {
        for (let j = 1; j <= b.length; j++) {
            if (a[i-1] === b[j-1]) {
                matrix[i][j] = matrix[i-1][j-1];
            } else {
                matrix[i][j] = Math.min(
                    matrix[i-1][j-1] + 1, // substitution
                    matrix[i][j-1] + 1,   // insertion
                    matrix[i-1][j] + 1    // deletion
                );
            }
        }
    }
    return matrix[a.length][b.length];
 }
 function getSimilarity(original: string, search: string): number {
    // Normalize strings by removing extra whitespace but preserve case
    const normalizeStr = (str: string) => str.replace(/\s+/g, ' ').trim();
    const normalizedOriginal = normalizeStr(original);
    const normalizedSearch = normalizeStr(search);
    if (normalizedOriginal === normalizedSearch) { return 1; }
    // Calculate Levenshtein distance
    const distance = levenshteinDistance(normalizedOriginal, normalizedSearch);
    // Calculate similarity ratio (0 to 1, where 1 is exact match)
    const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
    return 1 - (distance / maxLength);
 }
 export class SearchReplaceDiffStrategy implements DiffStrategy {
    private fuzzyThreshold: number;
    constructor(fuzzyThreshold?: number) {
        // Default to exact matching (1.0) unless fuzzy threshold specified
        this.fuzzyThreshold = fuzzyThreshold ?? 1.0;
    }
    getToolDescription(cwd: string): string {
        return `## apply_diff
 Description: Request to replace existing code using search and replace blocks.
@@ -78,30 +131,24 @@ Your search/replace content here
        const replaceLines = replaceContent.split(/\r?\n/);
        const originalLines = originalContent.split(/\r?\n/);
-        // Find the search content in the original
+        // Find the search content in the original using fuzzy matching
        let matchIndex = -1;
        let bestMatchScore = 0;
        for (let i = 0; i <= originalLines.length - searchLines.length; i++) {
-            let found = true;
+            // Join the lines and calculate overall similarity
            const originalChunk = originalLines.slice(i, i + searchLines.length).join('\n');
            const searchChunk = searchLines.join('\n');
-            for (let j = 0; j < searchLines.length; j++) {
+            const similarity = getSimilarity(originalChunk, searchChunk);
-                const originalLine = originalLines[i + j];
+            if (similarity > bestMatchScore) {
-                const searchLine = searchLines[j];
+                bestMatchScore = similarity;
                // Compare lines after removing leading/trailing whitespace
                if (originalLine.trim() !== searchLine.trim()) {
                    found = false;
                    break;
                }
            }
            if (found) {
                matchIndex = i;
                break;
            }
        }
-        if (matchIndex === -1) {
+        // Require similarity to meet threshold
        if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
            return false;
        }
@@ -121,7 +168,7 @@ Your search/replace content here
        });
        // Apply the replacement while preserving exact indentation
-        const indentedReplace = replaceLines.map((line, i) => {
+        const indentedReplaceLines = replaceLines.map((line, i) => {
            // Get the corresponding original and search indentations
            const originalIndent = originalIndents[Math.min(i, originalIndents.length - 1)];
            const searchIndent = searchIndents[Math.min(i, searchIndents.length - 1)];
@@ -162,6 +209,6 @@ Your search/replace content here
        const beforeMatch = originalLines.slice(0, matchIndex);
        const afterMatch = originalLines.slice(matchIndex + searchLines.length);
-        return [...beforeMatch, ...indentedReplace, ...afterMatch].join(lineEnding);
+        return [...beforeMatch, ...indentedReplaceLines, ...afterMatch].join(lineEnding);
    }
 }