mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 12:21:13 -05:00
refactor: enhance anchor-based search strategy in Levenshtein match
- Improved the `identifyAnchors` function to return the first and last non-empty lines of the search string. - Updated the `findAnchorMatch` function to validate anchor uniqueness and context more effectively. - Removed unused complexity calculations and streamlined the anchor validation process.
This commit is contained in:
@@ -271,8 +271,9 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
|
|||||||
const similarity = getDMPSimilarity(searchStr, closestMatch)
|
const similarity = getDMPSimilarity(searchStr, closestMatch)
|
||||||
const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
|
const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
|
||||||
const confidence = Math.min(similarity, contextSimilarity)
|
const confidence = Math.min(similarity, contextSimilarity)
|
||||||
|
console.log(searchStr, closestMatch, index, confidence)
|
||||||
return {
|
return {
|
||||||
index,
|
index: confidence === 0 ? -1 : index,
|
||||||
confidence: index !== -1 ? confidence : 0,
|
confidence: index !== -1 ? confidence : 0,
|
||||||
strategy: "levenshtein",
|
strategy: "levenshtein",
|
||||||
}
|
}
|
||||||
@@ -281,92 +282,91 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
|
|||||||
return { index: -1, confidence: 0, strategy: "levenshtein" }
|
return { index: -1, confidence: 0, strategy: "levenshtein" }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to identify anchor lines based on uniqueness and complexity
|
// Helper function to identify anchor lines
|
||||||
function identifyAnchors(searchStr: string, content: string[]): { line: string; index: number; weight: number }[] {
|
function identifyAnchors(searchStr: string): { first: string | null; last: string | null } {
|
||||||
const searchLines = searchStr.split("\n")
|
const searchLines = searchStr.split("\n");
|
||||||
const contentStr = content.join("\n")
|
let first: string | null = null;
|
||||||
const anchors: { line: string; index: number; weight: number }[] = []
|
let last: string | null = null;
|
||||||
|
|
||||||
for (let i = 0; i < searchLines.length; i++) {
|
// Find the first non-empty line
|
||||||
const line = searchLines[i]
|
for (const line of searchLines) {
|
||||||
if (!line.trim()) {continue} // Skip empty lines
|
if (line.trim()) {
|
||||||
|
first = line;
|
||||||
// Calculate line complexity (more special chars = more unique)
|
break;
|
||||||
const specialChars = (line.match(/[^a-zA-Z0-9\s]/g) || []).length
|
|
||||||
const complexity = specialChars / line.length
|
|
||||||
|
|
||||||
// Count occurrences in content
|
|
||||||
const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")
|
|
||||||
const matches = contentStr.match(regex)
|
|
||||||
const occurrences = matches ? matches.length : 0
|
|
||||||
|
|
||||||
// Calculate uniqueness weight
|
|
||||||
const uniquenessWeight = occurrences <= 1 ? 1 : 1 / occurrences
|
|
||||||
const weight = uniquenessWeight * (0.7 + 0.3 * complexity)
|
|
||||||
|
|
||||||
if (weight > 0.5) {
|
|
||||||
// Only consider lines with high enough weight
|
|
||||||
anchors.push({ line, index: i, weight })
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by weight descending
|
// Find the last non-empty line
|
||||||
return anchors.sort((a, b) => b.weight - a.weight)
|
for (let i = searchLines.length - 1; i >= 0; i--) {
|
||||||
|
if (searchLines[i].trim()) {
|
||||||
|
last = searchLines[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to validate anchor positions
|
return { first, last };
|
||||||
function validateAnchorPositions(
|
|
||||||
anchors: { line: string; index: number }[],
|
|
||||||
content: string[],
|
|
||||||
searchLines: string[]
|
|
||||||
): number {
|
|
||||||
for (const anchor of anchors) {
|
|
||||||
const anchorIndex = content.findIndex((line) => line === anchor.line)
|
|
||||||
if (anchorIndex !== -1) {
|
|
||||||
// Check if surrounding context matches
|
|
||||||
const contextBefore = searchLines.slice(Math.max(0, anchor.index - 2), anchor.index).join("\n")
|
|
||||||
const contextAfter = searchLines.slice(anchor.index + 1, anchor.index + 3).join("\n")
|
|
||||||
const contentBefore = content.slice(Math.max(0, anchorIndex - 2), anchorIndex).join("\n")
|
|
||||||
const contentAfter = content.slice(anchorIndex + 1, anchorIndex + 3).join("\n")
|
|
||||||
|
|
||||||
const beforeSimilarity = evaluateSimilarity(contextBefore, contentBefore)
|
|
||||||
const afterSimilarity = evaluateSimilarity(contextAfter, contentAfter)
|
|
||||||
|
|
||||||
if (beforeSimilarity > 0.8 && afterSimilarity > 0.8) {
|
|
||||||
return anchorIndex - anchor.index
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Anchor-based search strategy
|
// Anchor-based search strategy
|
||||||
export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
|
export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
|
||||||
const searchLines = searchStr.split("\n")
|
const searchLines = searchStr.split("\n");
|
||||||
const anchors = identifyAnchors(searchStr, content.slice(startIndex))
|
const { first, last } = identifyAnchors(searchStr);
|
||||||
|
|
||||||
if (anchors.length === 0) {
|
if (!first || !last) {
|
||||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
return { index: -1, confidence: 0, strategy: "anchor" };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to validate position using top anchors
|
let firstIndex = -1;
|
||||||
const offset = validateAnchorPositions(anchors.slice(0, 3), content.slice(startIndex), searchLines)
|
let lastIndex = -1;
|
||||||
|
|
||||||
if (offset !== -1) {
|
// Check if the first anchor is unique
|
||||||
const matchPosition = startIndex + offset
|
let firstOccurrences = 0;
|
||||||
const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n")
|
for (const contentLine of content) {
|
||||||
const similarity = getDMPSimilarity(searchStr, matchedContent)
|
if (contentLine === first) {
|
||||||
const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
|
firstOccurrences++;
|
||||||
const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstOccurrences !== 1) {
|
||||||
|
return { index: -1, confidence: 0, strategy: "anchor" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the first anchor
|
||||||
|
for (let i = startIndex; i < content.length; i++) {
|
||||||
|
if (content[i] === first) {
|
||||||
|
firstIndex = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the last anchor
|
||||||
|
for (let i = content.length - 1; i >= startIndex; i--) {
|
||||||
|
if (content[i] === last) {
|
||||||
|
lastIndex = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstIndex === -1 || lastIndex === -1 || lastIndex <= firstIndex) {
|
||||||
|
return { index: -1, confidence: 0, strategy: "anchor" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate the context
|
||||||
|
const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n");
|
||||||
|
const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n");
|
||||||
|
const contextSimilarity = evaluateSimilarity(expectedContext, actualContext);
|
||||||
|
|
||||||
|
if (contextSimilarity < getAdaptiveThreshold(content.length, confidenceThreshold)) {
|
||||||
|
return { index: -1, confidence: 0, strategy: "anchor" };
|
||||||
|
}
|
||||||
|
|
||||||
|
const confidence = 1;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
index: matchPosition,
|
index: firstIndex,
|
||||||
confidence: Math.min(1, confidence), // Cap at 1
|
confidence: confidence,
|
||||||
strategy: "anchor",
|
strategy: "anchor",
|
||||||
}
|
};
|
||||||
}
|
|
||||||
|
|
||||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main search function that tries all strategies
|
// Main search function that tries all strategies
|
||||||
|
|||||||
Reference in New Issue
Block a user