Do fuzzy match search from the middle out and expand window to 20 lines

This commit is contained in:
Matt Rubens
2024-12-16 23:09:48 -05:00
parent 06202be46f
commit eb56cac16b
2 changed files with 126 additions and 44 deletions

View File

@@ -5,7 +5,7 @@ describe('SearchReplaceDiffStrategy', () => {
let strategy: SearchReplaceDiffStrategy let strategy: SearchReplaceDiffStrategy
beforeEach(() => { beforeEach(() => {
strategy = new SearchReplaceDiffStrategy() // Default 1.0 threshold for exact matching strategy = new SearchReplaceDiffStrategy(1.0, 5) // Default 1.0 threshold for exact matching, 5 line buffer for tests
}) })
it('should replace matching content', () => { it('should replace matching content', () => {
@@ -562,6 +562,63 @@ this.init();
}`); }`);
} }
}); });
it('should find matches from middle out', () => {
const originalContent = `
function one() {
return "target";
}
function two() {
return "target";
}
function three() {
return "target";
}
function four() {
return "target";
}
function five() {
return "target";
}`.trim()
const diffContent = `test.ts
<<<<<<< SEARCH
return "target";
=======
return "updated";
>>>>>>> REPLACE`
// Search around the middle (function three)
// Even though all functions contain the target text,
// it should match the one closest to line 9 first
const result = strategy.applyDiff(originalContent, diffContent, 9, 9)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`function one() {
return "target";
}
function two() {
return "target";
}
function three() {
return "updated";
}
function four() {
return "target";
}
function five() {
return "target";
}`)
}
})
}) })
describe('line number stripping', () => { describe('line number stripping', () => {
@@ -915,7 +972,7 @@ function test() {
} }
}) })
it('should insert at the start of the file if no start_line is provided for insertion', () => { it('should error if no start_line is provided for insertion', () => {
const originalContent = `function test() { const originalContent = `function test() {
return true; return true;
}` }`
@@ -926,22 +983,15 @@ console.log("test");
>>>>>>> REPLACE` >>>>>>> REPLACE`
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true) expect(result.success).toBe(false)
if (result.success) {
expect(result.content).toBe(`console.log("test");
function test() {
return true;
}`)
}
}) })
}) })
}) })
describe('fuzzy matching', () => { describe('fuzzy matching', () => {
let strategy: SearchReplaceDiffStrategy let strategy: SearchReplaceDiffStrategy
beforeEach(() => { beforeEach(() => {
strategy = new SearchReplaceDiffStrategy(0.9) // 90% similarity threshold strategy = new SearchReplaceDiffStrategy(0.9, 5) // 90% similarity threshold, 5 line buffer for tests
}) })
it('should match content with small differences (>90% similar)', () => { it('should match content with small differences (>90% similar)', () => {
@@ -959,6 +1009,8 @@ function getData() {
} }
>>>>>>> REPLACE` >>>>>>> REPLACE`
strategy = new SearchReplaceDiffStrategy(0.9, 5) // Use 5 line buffer for tests
const result = strategy.applyDiff(originalContent, diffContent) const result = strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true) expect(result.success).toBe(true)
if (result.success) { if (result.success) {
@@ -1008,7 +1060,7 @@ function sum(a, b) {
let strategy: SearchReplaceDiffStrategy let strategy: SearchReplaceDiffStrategy
beforeEach(() => { beforeEach(() => {
strategy = new SearchReplaceDiffStrategy() strategy = new SearchReplaceDiffStrategy(0.9, 5)
}) })
it('should find and replace within specified line range', () => { it('should find and replace within specified line range', () => {

View File

@@ -1,7 +1,7 @@
import { DiffStrategy, DiffResult } from "../types" import { DiffStrategy, DiffResult } from "../types"
import { addLineNumbers } from "../../../integrations/misc/extract-text" import { addLineNumbers } from "../../../integrations/misc/extract-text"
const BUFFER_LINES = 5; // Number of extra context lines to show before and after matches const BUFFER_LINES = 20; // Number of extra context lines to show before and after matches
function levenshteinDistance(a: string, b: string): number { function levenshteinDistance(a: string, b: string): number {
const matrix: number[][] = []; const matrix: number[][] = [];
@@ -55,10 +55,12 @@ function getSimilarity(original: string, search: string): number {
export class SearchReplaceDiffStrategy implements DiffStrategy { export class SearchReplaceDiffStrategy implements DiffStrategy {
private fuzzyThreshold: number; private fuzzyThreshold: number;
private bufferLines: number;
constructor(fuzzyThreshold?: number) { constructor(fuzzyThreshold?: number, bufferLines?: number) {
// Default to exact matching (1.0) unless fuzzy threshold specified // Default to exact matching (1.0) unless fuzzy threshold specified
this.fuzzyThreshold = fuzzyThreshold ?? 1.0; this.fuzzyThreshold = fuzzyThreshold ?? 1.0;
this.bufferLines = bufferLines ?? BUFFER_LINES;
} }
getToolDescription(cwd: string): string { getToolDescription(cwd: string): string {
@@ -205,12 +207,34 @@ Result:
const searchLines = searchContent === '' ? [] : searchContent.split(/\r?\n/); const searchLines = searchContent === '' ? [] : searchContent.split(/\r?\n/);
const replaceLines = replaceContent === '' ? [] : replaceContent.split(/\r?\n/); const replaceLines = replaceContent === '' ? [] : replaceContent.split(/\r?\n/);
const originalLines = originalContent.split(/\r?\n/); const originalLines = originalContent.split(/\r?\n/);
// Validate that empty search requires start line
if (searchLines.length === 0 && !startLine) {
return {
success: false,
error: `Empty search content requires start_line to be specified\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, specify the line number where content should be inserted`
};
}
// Validate that empty search requires same start and end line
if (searchLines.length === 0 && startLine && endLine && startLine !== endLine) {
return {
success: false,
error: `Empty search content requires start_line and end_line to be the same (got ${startLine}-${endLine})\n\nDebug Info:\n- Empty search content is only valid for insertions at a specific line\n- For insertions, use the same line number for both start_line and end_line`
};
}
// First try exact line range if provided // Initialize search variables
let matchIndex = -1; let matchIndex = -1;
let bestMatchScore = 0; let bestMatchScore = 0;
let bestMatchContent = ""; let bestMatchContent = "";
const searchChunk = searchLines.join('\n');
// Determine search bounds
let searchStartIndex = 0;
let searchEndIndex = originalLines.length;
// Validate and handle line range if provided
if (startLine && endLine) { if (startLine && endLine) {
// Convert to 0-based index // Convert to 0-based index
const exactStartIndex = startLine - 1; const exactStartIndex = startLine - 1;
@@ -223,44 +247,50 @@ Result:
}; };
} }
// Check exact range first // Try exact match first
const originalChunk = originalLines.slice(exactStartIndex, exactEndIndex + 1).join('\n'); const originalChunk = originalLines.slice(exactStartIndex, exactEndIndex + 1).join('\n');
const searchChunk = searchLines.join('\n');
const similarity = getSimilarity(originalChunk, searchChunk); const similarity = getSimilarity(originalChunk, searchChunk);
if (similarity >= this.fuzzyThreshold) { if (similarity >= this.fuzzyThreshold) {
matchIndex = exactStartIndex; matchIndex = exactStartIndex;
bestMatchScore = similarity; bestMatchScore = similarity;
bestMatchContent = originalChunk; bestMatchContent = originalChunk;
} else {
// Set bounds for buffered search
searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1));
searchEndIndex = Math.min(originalLines.length, endLine + this.bufferLines);
} }
} }
// If no match found in exact range, try expanded range // If no match found yet, try middle-out search within bounds
if (matchIndex === -1) { if (matchIndex === -1) {
let searchStartIndex = 0; const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2);
let searchEndIndex = originalLines.length; let leftIndex = midPoint;
let rightIndex = midPoint + 1;
if (startLine || endLine) { // Search outward from the middle within bounds
// Convert to 0-based index and add buffer while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
if (startLine) { // Check left side if still in range
searchStartIndex = Math.max(0, startLine - (BUFFER_LINES + 1)); if (leftIndex >= searchStartIndex) {
const originalChunk = originalLines.slice(leftIndex, leftIndex + searchLines.length).join('\n');
const similarity = getSimilarity(originalChunk, searchChunk);
if (similarity > bestMatchScore) {
bestMatchScore = similarity;
matchIndex = leftIndex;
bestMatchContent = originalChunk;
}
leftIndex--;
} }
if (endLine) {
searchEndIndex = Math.min(originalLines.length, endLine + BUFFER_LINES);
}
}
// Find the search content in the expanded range using fuzzy matching // Check right side if still in range
for (let i = searchStartIndex; i <= searchEndIndex - searchLines.length; i++) { if (rightIndex <= searchEndIndex - searchLines.length) {
// Join the lines and calculate overall similarity const originalChunk = originalLines.slice(rightIndex, rightIndex + searchLines.length).join('\n');
const originalChunk = originalLines.slice(i, i + searchLines.length).join('\n'); const similarity = getSimilarity(originalChunk, searchChunk);
const searchChunk = searchLines.join('\n'); if (similarity > bestMatchScore) {
bestMatchScore = similarity;
const similarity = getSimilarity(originalChunk, searchChunk); matchIndex = rightIndex;
if (similarity > bestMatchScore) { bestMatchContent = originalChunk;
bestMatchScore = similarity; }
matchIndex = i; rightIndex++;
bestMatchContent = originalChunk;
} }
} }
} }
@@ -271,10 +301,10 @@ Result:
const originalContentSection = startLine !== undefined && endLine !== undefined const originalContentSection = startLine !== undefined && endLine !== undefined
? `\n\nOriginal Content:\n${addLineNumbers( ? `\n\nOriginal Content:\n${addLineNumbers(
originalLines.slice( originalLines.slice(
Math.max(0, startLine - 1 - BUFFER_LINES), Math.max(0, startLine - 1 - this.bufferLines),
Math.min(originalLines.length, endLine + BUFFER_LINES) Math.min(originalLines.length, endLine + this.bufferLines)
).join('\n'), ).join('\n'),
Math.max(1, startLine - BUFFER_LINES) Math.max(1, startLine - this.bufferLines)
)}` )}`
: `\n\nOriginal Content:\n${addLineNumbers(originalLines.join('\n'))}`; : `\n\nOriginal Content:\n${addLineNumbers(originalLines.join('\n'))}`;