From e4c23fb61dd413fe13e8e6b1dbbae3c371b11c04 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Sun, 15 Dec 2024 15:19:58 -0500 Subject: [PATCH 01/14] Prioritize exact line matches in search/replace --- .../__tests__/search-replace.test.ts | 89 +++++++++++++++++++ src/core/diff/strategies/search-replace.ts | 69 ++++++++------ 2 files changed, 133 insertions(+), 25 deletions(-) diff --git a/src/core/diff/strategies/__tests__/search-replace.test.ts b/src/core/diff/strategies/__tests__/search-replace.test.ts index ee2174a..8aa50ee 100644 --- a/src/core/diff/strategies/__tests__/search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/search-replace.test.ts @@ -610,6 +610,95 @@ function two() { function three() { return 3; +}`) + }) + + it('should prioritize exact line match over expanded search', () => { + const originalContent = ` +function one() { + return 1; +} + +function process() { + return "old"; +} + +function process() { + return "old"; +} + +function two() { + return 2; +}` + const diffContent = `test.ts +<<<<<<< SEARCH +function process() { + return "old"; +} +======= +function process() { + return "new"; +} +>>>>>>> REPLACE` + + // Should match the second instance exactly at lines 10-12 + // even though the first instance at 6-8 is within the expanded search range + const result = strategy.applyDiff(originalContent, diffContent, 10, 12) + expect(result).toBe(` +function one() { + return 1; +} + +function process() { + return "old"; +} + +function process() { + return "new"; +} + +function two() { + return 2; +}`) + }) + + it('should fall back to expanded search only if exact match fails', () => { + const originalContent = ` +function one() { + return 1; +} + +function process() { + return "target"; +} + +function two() { + return 2; +}`.trim() + const diffContent = `test.ts +<<<<<<< SEARCH +function process() { + return "target"; +} +======= +function process() { + return "updated"; +} +>>>>>>> REPLACE` + + // Specify wrong line numbers (3-5), but content exists at 6-8 + // Should still find and replace it since it's within the expanded range + const result = strategy.applyDiff(originalContent, diffContent, 3, 5) + expect(result).toBe(`function one() { + return 1; +} + +function process() { + return "updated"; +} + +function two() { + return 2; }`) }) }) diff --git a/src/core/diff/strategies/search-replace.ts b/src/core/diff/strategies/search-replace.ts index ee0272e..3d524b1 100644 --- a/src/core/diff/strategies/search-replace.ts +++ b/src/core/diff/strategies/search-replace.ts @@ -132,41 +132,60 @@ Your search/replace content here const replaceLines = replaceContent.split(/\r?\n/); const originalLines = originalContent.split(/\r?\n/); - // Determine search range based on provided line numbers - let searchStartIndex = 0; - let searchEndIndex = originalLines.length; - - if (startLine !== undefined || endLine !== undefined) { - // Convert to 0-based index and add buffer - if (startLine !== undefined) { - searchStartIndex = Math.max(0, startLine - 6); - } - if (endLine !== undefined) { - searchEndIndex = Math.min(originalLines.length, endLine + 5); - } - } - - // Find the search content in the original using fuzzy matching + // First try exact line range if provided let matchIndex = -1; let bestMatchScore = 0; - for (let i = searchStartIndex; i <= searchEndIndex - searchLines.length; i++) { - // Join the lines and calculate overall similarity - const originalChunk = originalLines.slice(i, i + searchLines.length).join('\n'); + if (startLine !== undefined && endLine !== undefined) { + // Convert to 0-based index + const exactStartIndex = startLine - 1; + const exactEndIndex = endLine - 1; + + // Check exact range first + const originalChunk = originalLines.slice(exactStartIndex, exactEndIndex + 1).join('\n'); const searchChunk = searchLines.join('\n'); const similarity = getSimilarity(originalChunk, searchChunk); - if (similarity > bestMatchScore) { + if (similarity >= this.fuzzyThreshold) { + matchIndex = exactStartIndex; bestMatchScore = similarity; - matchIndex = i; } } - + + // If no match found in exact range, try expanded range + if (matchIndex === -1) { + let searchStartIndex = 0; + let searchEndIndex = originalLines.length; + + if (startLine !== undefined || endLine !== undefined) { + // Convert to 0-based index and add buffer + if (startLine !== undefined) { + searchStartIndex = Math.max(0, startLine - 6); + } + if (endLine !== undefined) { + searchEndIndex = Math.min(originalLines.length, endLine + 5); + } + } + + // Find the search content in the expanded range using fuzzy matching + for (let i = searchStartIndex; i <= searchEndIndex - searchLines.length; i++) { + // Join the lines and calculate overall similarity + const originalChunk = originalLines.slice(i, i + searchLines.length).join('\n'); + const searchChunk = searchLines.join('\n'); + + const similarity = getSimilarity(originalChunk, searchChunk); + if (similarity > bestMatchScore) { + bestMatchScore = similarity; + matchIndex = i; + } + } + } + // Require similarity to meet threshold if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) { return false; } - + // Get the matched lines from the original content const matchedLines = originalLines.slice(matchIndex, matchIndex + searchLines.length); @@ -175,13 +194,13 @@ Your search/replace content here const match = line.match(/^[\t ]*/); return match ? match[0] : ''; }); - + // Get the exact indentation of each line in the search block const searchIndents = searchLines.map(line => { const match = line.match(/^[\t ]*/); return match ? match[0] : ''; }); - + // Apply the replacement while preserving exact indentation const indentedReplaceLines = replaceLines.map((line, i) => { // Get the matched line's exact indentation @@ -198,7 +217,7 @@ Your search/replace content here // Apply the matched indentation plus any relative indentation return matchedIndent + relativeIndent + line.trim(); }); - + // Construct the final content const beforeMatch = originalLines.slice(0, matchIndex); const afterMatch = originalLines.slice(matchIndex + searchLines.length); From 468d317f2f2d625f4a4427bfba35c3e0c05eccde Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Sun, 15 Dec 2024 15:28:24 -0500 Subject: [PATCH 02/14] More indentation tests --- .../__tests__/search-replace.test.ts | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) diff --git a/src/core/diff/strategies/__tests__/search-replace.test.ts b/src/core/diff/strategies/__tests__/search-replace.test.ts index 8aa50ee..cdfa137 100644 --- a/src/core/diff/strategies/__tests__/search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/search-replace.test.ts @@ -228,6 +228,196 @@ function hello() { const result = strategy.applyDiff(originalContent, diffContent) expect(result).toBe(' onScroll={() => updateHighlights()}\n onDragOver={(e) => {\n e.preventDefault()\n e.stopPropagation()\n }}') }) + + it('should handle varying indentation levels correctly', () => { + const originalContent = ` +class Example { + constructor() { + this.value = 0; + if (true) { + this.init(); + } + } +}`.trim(); + + const diffContent = `test.ts +<<<<<<< SEARCH + class Example { + constructor() { + this.value = 0; + if (true) { + this.init(); + } + } + } +======= + class Example { + constructor() { + this.value = 1; + if (true) { + this.init(); + this.setup(); + this.validate(); + } + } + } +>>>>>>> REPLACE`.trim(); + + const result = strategy.applyDiff(originalContent, diffContent); + expect(result).toBe(` +class Example { + constructor() { + this.value = 1; + if (true) { + this.init(); + this.setup(); + this.validate(); + } + } +}`.trim()); + }); + + it('should handle mixed indentation styles in the same file', () => { + const originalContent = `class Example { + constructor() { + this.value = 0; + if (true) { + this.init(); + } + } +}`.trim(); + const diffContent = `test.ts +<<<<<<< SEARCH + constructor() { + this.value = 0; + if (true) { + this.init(); + } + } +======= + constructor() { + this.value = 1; + if (true) { + this.init(); + this.validate(); + } + } +>>>>>>> REPLACE`; + + const result = strategy.applyDiff(originalContent, diffContent); + expect(result).toBe(`class Example { + constructor() { + this.value = 1; + if (true) { + this.init(); + this.validate(); + } + } +}`); + }); + + it('should handle Python-style significant whitespace', () => { + const originalContent = `def example(): + if condition: + do_something() + for item in items: + process(item) + return True`.trim(); + const diffContent = `test.ts +<<<<<<< SEARCH + if condition: + do_something() + for item in items: + process(item) +======= + if condition: + do_something() + while items: + item = items.pop() + process(item) +>>>>>>> REPLACE`; + + const result = strategy.applyDiff(originalContent, diffContent); + expect(result).toBe(`def example(): + if condition: + do_something() + while items: + item = items.pop() + process(item) + return True`); + }); + + it('should preserve empty lines with indentation', () => { + const originalContent = `function test() { + const x = 1; + + if (x) { + return true; + } +}`.trim(); + const diffContent = `test.ts +<<<<<<< SEARCH + const x = 1; + + if (x) { +======= + const x = 1; + + // Check x + if (x) { +>>>>>>> REPLACE`; + + const result = strategy.applyDiff(originalContent, diffContent); + expect(result).toBe(`function test() { + const x = 1; + + // Check x + if (x) { + return true; + } +}`); + }); + + it('should handle indentation when replacing entire blocks', () => { + const originalContent = `class Test { + method() { + if (true) { + console.log("test"); + } + } +}`.trim(); + const diffContent = `test.ts +<<<<<<< SEARCH + method() { + if (true) { + console.log("test"); + } + } +======= + method() { + try { + if (true) { + console.log("test"); + } + } catch (e) { + console.error(e); + } + } +>>>>>>> REPLACE`; + + const result = strategy.applyDiff(originalContent, diffContent); + expect(result).toBe(`class Test { + method() { + try { + if (true) { + console.log("test"); + } + } catch (e) { + console.error(e); + } + } +}`); + }); }) describe('fuzzy matching', () => { From 8159c51b03235928b76a2a96550eae3f974a49a6 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Sun, 15 Dec 2024 15:41:10 -0500 Subject: [PATCH 03/14] Update omission format and keywords --- .../editor/__tests__/detect-omission.test.ts | 66 +++++++++++++++++++ src/integrations/editor/detect-omission.ts | 3 +- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 src/integrations/editor/__tests__/detect-omission.test.ts diff --git a/src/integrations/editor/__tests__/detect-omission.test.ts b/src/integrations/editor/__tests__/detect-omission.test.ts new file mode 100644 index 0000000..4740b1f --- /dev/null +++ b/src/integrations/editor/__tests__/detect-omission.test.ts @@ -0,0 +1,66 @@ +import { detectCodeOmission } from '../detect-omission' + +describe('detectCodeOmission', () => { + const originalContent = `function example() { + // Some code + const x = 1; + const y = 2; + return x + y; +}` + + it('should detect square bracket line range omission', () => { + const newContent = `[Previous content from line 1-305 remains exactly the same] +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should detect single-line comment omission', () => { + const newContent = `// Lines 1-50 remain unchanged +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should detect multi-line comment omission', () => { + const newContent = `/* Previous content remains the same */ +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should detect HTML-style comment omission', () => { + const newContent = ` +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should detect JSX-style comment omission', () => { + const newContent = `{/* Rest of the code remains the same */} +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should detect Python-style comment omission', () => { + const newContent = `# Previous content remains unchanged +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(true) + }) + + it('should not detect regular comments without omission keywords', () => { + const newContent = `// Adding new functionality +const z = 3;` + expect(detectCodeOmission(originalContent, newContent)).toBe(false) + }) + + it('should not detect when comment is part of original content', () => { + const originalWithComment = `// Content remains unchanged +${originalContent}` + const newContent = `// Content remains unchanged +const z = 3;` + expect(detectCodeOmission(originalWithComment, newContent)).toBe(false) + }) + + it('should not detect code that happens to contain omission keywords', () => { + const newContent = `const remains = 'some value'; +const unchanged = true;` + expect(detectCodeOmission(originalContent, newContent)).toBe(false) + }) +}) \ No newline at end of file diff --git a/src/integrations/editor/detect-omission.ts b/src/integrations/editor/detect-omission.ts index 565ebd3..5cb0f8e 100644 --- a/src/integrations/editor/detect-omission.ts +++ b/src/integrations/editor/detect-omission.ts @@ -7,7 +7,7 @@ export function detectCodeOmission(originalFileContent: string, newFileContent: string): boolean { const originalLines = originalFileContent.split("\n") const newLines = newFileContent.split("\n") - const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "..."] + const omissionKeywords = ["remain", "remains", "unchanged", "rest", "previous", "existing", "content", "same", "..."] const commentPatterns = [ /^\s*\/\//, // Single-line comment for most languages @@ -15,6 +15,7 @@ export function detectCodeOmission(originalFileContent: string, newFileContent: /^\s*\/\*/, // Multi-line comment opening /^\s*{\s*\/\*/, // JSX comment opening /^\s*