Merge pull request #111 from RooVetGit/fuzzy_matching

Support fuzzy matching for apply_diff
This commit is contained in:
Matt Rubens
2024-12-14 14:19:58 -05:00
committed by GitHub
6 changed files with 146 additions and 297 deletions

View File

@@ -1,5 +1,9 @@
# Roo Cline Changelog # Roo Cline Changelog
## [2.2.6]
- Add a fuzzy match tolerance when applying diffs
## [2.2.5] ## [2.2.5]
- Allow MCP servers to be enabled/disabled - Allow MCP servers to be enabled/disabled

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "roo-cline", "name": "roo-cline",
"version": "2.2.5", "version": "2.2.6",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "roo-cline", "name": "roo-cline",
"version": "2.2.5", "version": "2.2.6",
"dependencies": { "dependencies": {
"@anthropic-ai/bedrock-sdk": "^0.10.2", "@anthropic-ai/bedrock-sdk": "^0.10.2",
"@anthropic-ai/sdk": "^0.26.0", "@anthropic-ai/sdk": "^0.26.0",

View File

@@ -3,7 +3,7 @@
"displayName": "Roo Cline", "displayName": "Roo Cline",
"description": "A fork of Cline, an autonomous coding agent, with some added experimental configuration and automation features.", "description": "A fork of Cline, an autonomous coding agent, with some added experimental configuration and automation features.",
"publisher": "RooVeterinaryInc", "publisher": "RooVeterinaryInc",
"version": "2.2.5", "version": "2.2.6",
"icon": "assets/icons/rocket.png", "icon": "assets/icons/rocket.png",
"galleryBanner": { "galleryBanner": {
"color": "#617A91", "color": "#617A91",

View File

@@ -7,9 +7,9 @@ import { SearchReplaceDiffStrategy } from './strategies/search-replace'
* @returns The appropriate diff strategy for the model * @returns The appropriate diff strategy for the model
*/ */
export function getDiffStrategy(model: string): DiffStrategy { export function getDiffStrategy(model: string): DiffStrategy {
// For now, return SearchReplaceDiffStrategy for all models // For now, return SearchReplaceDiffStrategy for all models (with a fuzzy threshold of 0.9)
// This architecture allows for future optimizations based on model capabilities // This architecture allows for future optimizations based on model capabilities
return new SearchReplaceDiffStrategy() return new SearchReplaceDiffStrategy(0.9)
} }
export type { DiffStrategy } export type { DiffStrategy }

View File

@@ -1,18 +1,15 @@
import { SearchReplaceDiffStrategy } from '../search-replace' import { SearchReplaceDiffStrategy } from '../search-replace'
describe('SearchReplaceDiffStrategy', () => { describe('SearchReplaceDiffStrategy', () => {
describe('exact matching', () => {
let strategy: SearchReplaceDiffStrategy let strategy: SearchReplaceDiffStrategy
beforeEach(() => { beforeEach(() => {
strategy = new SearchReplaceDiffStrategy() strategy = new SearchReplaceDiffStrategy() // Default 1.0 threshold for exact matching
}) })
describe('applyDiff', () => {
it('should replace matching content', () => { it('should replace matching content', () => {
const originalContent = `function hello() { const originalContent = 'function hello() {\n console.log("hello")\n}\n'
console.log("hello")
}
`
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
function hello() { function hello() {
@@ -25,19 +22,11 @@ function hello() {
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(`function hello() { expect(result).toBe('function hello() {\n console.log("hello world")\n}\n')
console.log("hello world")
}
`)
}) })
it('should match content with different surrounding whitespace', () => { it('should match content with different surrounding whitespace', () => {
const originalContent = ` const originalContent = '\nfunction example() {\n return 42;\n}\n\n'
function example() {
return 42;
}
`
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
function example() { function example() {
@@ -50,19 +39,11 @@ function example() {
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(` expect(result).toBe('\nfunction example() {\n return 43;\n}\n\n')
function example() {
return 43;
}
`)
}) })
it('should match content with different indentation in search block', () => { it('should match content with different indentation in search block', () => {
const originalContent = ` function test() { const originalContent = ' function test() {\n return true;\n }\n'
return true;
}
`
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
function test() { function test() {
@@ -75,10 +56,7 @@ function test() {
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(` function test() { expect(result).toBe(' function test() {\n return false;\n }\n')
return false;
}
`)
}) })
it('should handle tab-based indentation', () => { it('should handle tab-based indentation', () => {
@@ -174,10 +152,7 @@ function test() {
}) })
it('should return false if search content does not match', () => { it('should return false if search content does not match', () => {
const originalContent = `function hello() { const originalContent = 'function hello() {\n console.log("hello")\n}\n'
console.log("hello")
}
`
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
function hello() { function hello() {
@@ -194,28 +169,15 @@ function hello() {
}) })
it('should return false if diff format is invalid', () => { it('should return false if diff format is invalid', () => {
const originalContent = `function hello() { const originalContent = 'function hello() {\n console.log("hello")\n}\n'
console.log("hello") const diffContent = `test.ts\nInvalid diff format`
}
`
const diffContent = `test.ts
Invalid diff format`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(false) expect(result).toBe(false)
}) })
it('should handle multiple lines with proper indentation', () => { it('should handle multiple lines with proper indentation', () => {
const originalContent = `class Example { const originalContent = 'class Example {\n constructor() {\n this.value = 0\n }\n\n getValue() {\n return this.value\n }\n}\n'
constructor() {
this.value = 0
}
getValue() {
return this.value
}
}
`
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
getValue() { getValue() {
@@ -230,18 +192,7 @@ Invalid diff format`
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(`class Example { expect(result).toBe('class Example {\n constructor() {\n this.value = 0\n }\n\n getValue() {\n // Add logging\n console.log("Getting value")\n return this.value\n }\n}\n')
constructor() {
this.value = 0
}
getValue() {
// Add logging
console.log("Getting value")
return this.value
}
}
`)
}) })
it('should preserve whitespace exactly in the output', () => { it('should preserve whitespace exactly in the output', () => {
@@ -262,7 +213,7 @@ Invalid diff format`
}) })
it('should preserve indentation when adding new lines after existing content', () => { it('should preserve indentation when adding new lines after existing content', () => {
const originalContent = ` onScroll={() => updateHighlights()}` const originalContent = ' onScroll={() => updateHighlights()}'
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
onScroll={() => updateHighlights()} onScroll={() => updateHighlights()}
@@ -275,230 +226,78 @@ Invalid diff format`
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe(` onScroll={() => updateHighlights()} expect(result).toBe(' onScroll={() => updateHighlights()}\n onDragOver={(e) => {\n e.preventDefault()\n e.stopPropagation()\n }}')
onDragOver={(e) => { })
e.preventDefault()
e.stopPropagation()
}}`)
}) })
it('should handle complex refactoring with multiple functions', () => { describe('fuzzy matching', () => {
const originalContent = `export async function extractTextFromFile(filePath: string): Promise<string> { let strategy: SearchReplaceDiffStrategy
try {
await fs.access(filePath)
} catch (error) {
throw new Error(\`File not found: \${filePath}\`)
}
const fileExtension = path.extname(filePath).toLowerCase()
switch (fileExtension) {
case ".pdf":
return extractTextFromPDF(filePath)
case ".docx":
return extractTextFromDOCX(filePath)
case ".ipynb":
return extractTextFromIPYNB(filePath)
default:
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
return addLineNumbers(await fs.readFile(filePath, "utf8"))
} else {
throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
}
}
}
export function addLineNumbers(content: string): string { beforeEach(() => {
const lines = content.split('\\n') strategy = new SearchReplaceDiffStrategy(0.9) // 90% similarity threshold
const maxLineNumberWidth = String(lines.length).length })
return lines
.map((line, index) => {
const lineNumber = String(index + 1).padStart(maxLineNumberWidth, ' ')
return \`\${lineNumber} | \${line}\`
}).join('\\n')
}`
it('should match content with small differences (>90% similar)', () => {
const originalContent = 'function getData() {\n const results = fetchData();\n return results.filter(Boolean);\n}\n'
const diffContent = `test.ts const diffContent = `test.ts
<<<<<<< SEARCH <<<<<<< SEARCH
export async function extractTextFromFile(filePath: string): Promise<string> { function getData() {
try { const result = fetchData();
await fs.access(filePath) return results.filter(Boolean);
} catch (error) {
throw new Error(\`File not found: \${filePath}\`)
}
const fileExtension = path.extname(filePath).toLowerCase()
switch (fileExtension) {
case ".pdf":
return extractTextFromPDF(filePath)
case ".docx":
return extractTextFromDOCX(filePath)
case ".ipynb":
return extractTextFromIPYNB(filePath)
default:
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
return addLineNumbers(await fs.readFile(filePath, "utf8"))
} else {
throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
}
}
}
export function addLineNumbers(content: string): string {
const lines = content.split('\\n')
const maxLineNumberWidth = String(lines.length).length
return lines
.map((line, index) => {
const lineNumber = String(index + 1).padStart(maxLineNumberWidth, ' ')
return \`\${lineNumber} | \${line}\`
}).join('\\n')
} }
======= =======
function extractLineRange(content: string, startLine?: number, endLine?: number): string { function getData() {
const lines = content.split('\\n') const data = fetchData();
const start = startLine ? Math.max(1, startLine) : 1 return data.filter(Boolean);
const end = endLine ? Math.min(lines.length, endLine) : lines.length
if (start > end || start > lines.length) {
throw new Error(\`Invalid line range: start=\${start}, end=\${end}, total lines=\${lines.length}\`)
}
return lines.slice(start - 1, end).join('\\n')
}
export async function extractTextFromFile(filePath: string, startLine?: number, endLine?: number): Promise<string> {
try {
await fs.access(filePath)
} catch (error) {
throw new Error(\`File not found: \${filePath}\`)
}
const fileExtension = path.extname(filePath).toLowerCase()
let content: string
switch (fileExtension) {
case ".pdf": {
const dataBuffer = await fs.readFile(filePath)
const data = await pdf(dataBuffer)
content = extractLineRange(data.text, startLine, endLine)
break
}
case ".docx": {
const result = await mammoth.extractRawText({ path: filePath })
content = extractLineRange(result.value, startLine, endLine)
break
}
case ".ipynb": {
const data = await fs.readFile(filePath, "utf8")
const notebook = JSON.parse(data)
let extractedText = ""
for (const cell of notebook.cells) {
if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
extractedText += cell.source.join("\\n") + "\\n"
}
}
content = extractLineRange(extractedText, startLine, endLine)
break
}
default: {
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
const fileContent = await fs.readFile(filePath, "utf8")
content = extractLineRange(fileContent, startLine, endLine)
} else {
throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
}
}
}
return addLineNumbers(content, startLine)
}
export function addLineNumbers(content: string, startLine: number = 1): string {
const lines = content.split('\\n')
const maxLineNumberWidth = String(startLine + lines.length - 1).length
return lines
.map((line, index) => {
const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, ' ')
return \`\${lineNumber} | \${line}\`
}).join('\\n')
} }
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
const expected = `function extractLineRange(content: string, startLine?: number, endLine?: number): string { expect(result).toBe('function getData() {\n const data = fetchData();\n return data.filter(Boolean);\n}\n')
const lines = content.split('\\n') })
const start = startLine ? Math.max(1, startLine) : 1
const end = endLine ? Math.min(lines.length, endLine) : lines.length
if (start > end || start > lines.length) { it('should not match when content is too different (<90% similar)', () => {
throw new Error(\`Invalid line range: start=\${start}, end=\${end}, total lines=\${lines.length}\`) const originalContent = 'function processUsers(data) {\n return data.map(user => user.name);\n}\n'
} const diffContent = `test.ts
<<<<<<< SEARCH
return lines.slice(start - 1, end).join('\\n') function handleItems(items) {
return items.map(item => item.username);
} }
=======
export async function extractTextFromFile(filePath: string, startLine?: number, endLine?: number): Promise<string> { function processData(data) {
try { return data.map(d => d.value);
await fs.access(filePath)
} catch (error) {
throw new Error(\`File not found: \${filePath}\`)
}
const fileExtension = path.extname(filePath).toLowerCase()
let content: string
switch (fileExtension) {
case ".pdf": {
const dataBuffer = await fs.readFile(filePath)
const data = await pdf(dataBuffer)
content = extractLineRange(data.text, startLine, endLine)
break
}
case ".docx": {
const result = await mammoth.extractRawText({ path: filePath })
content = extractLineRange(result.value, startLine, endLine)
break
}
case ".ipynb": {
const data = await fs.readFile(filePath, "utf8")
const notebook = JSON.parse(data)
let extractedText = ""
for (const cell of notebook.cells) {
if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
extractedText += cell.source.join("\\n") + "\\n"
}
}
content = extractLineRange(extractedText, startLine, endLine)
break
}
default: {
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
const fileContent = await fs.readFile(filePath, "utf8")
content = extractLineRange(fileContent, startLine, endLine)
} else {
throw new Error(\`Cannot read text for file type: \${fileExtension}\`)
}
}
}
return addLineNumbers(content, startLine)
} }
>>>>>>> REPLACE`
export function addLineNumbers(content: string, startLine: number = 1): string { const result = strategy.applyDiff(originalContent, diffContent)
const lines = content.split('\\n') expect(result).toBe(false)
const maxLineNumberWidth = String(startLine + lines.length - 1).length })
return lines
.map((line, index) => { it('should match content with extra whitespace', () => {
const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, ' ') const originalContent = 'function sum(a, b) {\n return a + b;\n}'
return \`\${lineNumber} | \${line}\` const diffContent = `test.ts
}).join('\\n') <<<<<<< SEARCH
}` function sum(a, b) {
expect(result).toBe(expected) return a + b;
}
=======
function sum(a, b) {
return a + b + 1;
}
>>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent)
expect(result).toBe('function sum(a, b) {\n return a + b + 1;\n}')
}) })
}) })
describe('getToolDescription', () => { describe('getToolDescription', () => {
let strategy: SearchReplaceDiffStrategy
beforeEach(() => {
strategy = new SearchReplaceDiffStrategy()
})
it('should include the current working directory', () => { it('should include the current working directory', () => {
const cwd = '/test/dir' const cwd = '/test/dir'
const description = strategy.getToolDescription(cwd) const description = strategy.getToolDescription(cwd)
@@ -515,4 +314,3 @@ export function addLineNumbers(content: string, startLine: number = 1): string {
}) })
}) })
}) })

View File

@@ -1,6 +1,59 @@
import { DiffStrategy } from "../types" import { DiffStrategy } from "../types"
function levenshteinDistance(a: string, b: string): number {
const matrix: number[][] = [];
// Initialize matrix
for (let i = 0; i <= a.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= b.length; j++) {
matrix[0][j] = j;
}
// Fill matrix
for (let i = 1; i <= a.length; i++) {
for (let j = 1; j <= b.length; j++) {
if (a[i-1] === b[j-1]) {
matrix[i][j] = matrix[i-1][j-1];
} else {
matrix[i][j] = Math.min(
matrix[i-1][j-1] + 1, // substitution
matrix[i][j-1] + 1, // insertion
matrix[i-1][j] + 1 // deletion
);
}
}
}
return matrix[a.length][b.length];
}
function getSimilarity(original: string, search: string): number {
// Normalize strings by removing extra whitespace but preserve case
const normalizeStr = (str: string) => str.replace(/\s+/g, ' ').trim();
const normalizedOriginal = normalizeStr(original);
const normalizedSearch = normalizeStr(search);
if (normalizedOriginal === normalizedSearch) { return 1; }
// Calculate Levenshtein distance
const distance = levenshteinDistance(normalizedOriginal, normalizedSearch);
// Calculate similarity ratio (0 to 1, where 1 is exact match)
const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
return 1 - (distance / maxLength);
}
export class SearchReplaceDiffStrategy implements DiffStrategy { export class SearchReplaceDiffStrategy implements DiffStrategy {
private fuzzyThreshold: number;
constructor(fuzzyThreshold?: number) {
// Default to exact matching (1.0) unless fuzzy threshold specified
this.fuzzyThreshold = fuzzyThreshold ?? 1.0;
}
getToolDescription(cwd: string): string { getToolDescription(cwd: string): string {
return `## apply_diff return `## apply_diff
Description: Request to replace existing code using search and replace blocks. Description: Request to replace existing code using search and replace blocks.
@@ -78,30 +131,24 @@ Your search/replace content here
const replaceLines = replaceContent.split(/\r?\n/); const replaceLines = replaceContent.split(/\r?\n/);
const originalLines = originalContent.split(/\r?\n/); const originalLines = originalContent.split(/\r?\n/);
// Find the search content in the original // Find the search content in the original using fuzzy matching
let matchIndex = -1; let matchIndex = -1;
let bestMatchScore = 0;
for (let i = 0; i <= originalLines.length - searchLines.length; i++) { for (let i = 0; i <= originalLines.length - searchLines.length; i++) {
let found = true; // Join the lines and calculate overall similarity
const originalChunk = originalLines.slice(i, i + searchLines.length).join('\n');
const searchChunk = searchLines.join('\n');
for (let j = 0; j < searchLines.length; j++) { const similarity = getSimilarity(originalChunk, searchChunk);
const originalLine = originalLines[i + j]; if (similarity > bestMatchScore) {
const searchLine = searchLines[j]; bestMatchScore = similarity;
// Compare lines after removing leading/trailing whitespace
if (originalLine.trim() !== searchLine.trim()) {
found = false;
break;
}
}
if (found) {
matchIndex = i; matchIndex = i;
break;
} }
} }
if (matchIndex === -1) { // Require similarity to meet threshold
if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
return false; return false;
} }
@@ -121,7 +168,7 @@ Your search/replace content here
}); });
// Apply the replacement while preserving exact indentation // Apply the replacement while preserving exact indentation
const indentedReplace = replaceLines.map((line, i) => { const indentedReplaceLines = replaceLines.map((line, i) => {
// Get the corresponding original and search indentations // Get the corresponding original and search indentations
const originalIndent = originalIndents[Math.min(i, originalIndents.length - 1)]; const originalIndent = originalIndents[Math.min(i, originalIndents.length - 1)];
const searchIndent = searchIndents[Math.min(i, searchIndents.length - 1)]; const searchIndent = searchIndents[Math.min(i, searchIndents.length - 1)];
@@ -162,6 +209,6 @@ Your search/replace content here
const beforeMatch = originalLines.slice(0, matchIndex); const beforeMatch = originalLines.slice(0, matchIndex);
const afterMatch = originalLines.slice(matchIndex + searchLines.length); const afterMatch = originalLines.slice(matchIndex + searchLines.length);
return [...beforeMatch, ...indentedReplace, ...afterMatch].join(lineEnding); return [...beforeMatch, ...indentedReplaceLines, ...afterMatch].join(lineEnding);
} }
} }