From f001aad3069a5d0c545d11e21d15af33ac6d3a38 Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:09:40 -0400 Subject: [PATCH] Fix globbyLevelByLevel and increase suggestions file limit --- src/integrations/WorkspaceTracker.ts | 2 +- src/parse-source-code/index.ts | 44 ++++++++++++++++++---------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/integrations/WorkspaceTracker.ts b/src/integrations/WorkspaceTracker.ts index 9ae8199..7820564 100644 --- a/src/integrations/WorkspaceTracker.ts +++ b/src/integrations/WorkspaceTracker.ts @@ -21,7 +21,7 @@ class WorkspaceTracker { if (!cwd) { return } - const [files, _] = await listFiles(cwd, true, 500) + const [files, _] = await listFiles(cwd, true, 1_000) files.forEach((file) => this.filePaths.add(this.normalizeFilePath(file))) this.workspaceDidUpdate() } diff --git a/src/parse-source-code/index.ts b/src/parse-source-code/index.ts index 4a595c7..1437dbd 100644 --- a/src/parse-source-code/index.ts +++ b/src/parse-source-code/index.ts @@ -100,26 +100,40 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb return [files, files.length >= limit] } -// globby doesnt natively support top down level by level globbing, so we implement it ourselves +/* +Breadth-first traversal of directory structure level by level up to a limit: + - Queue-based approach ensures proper breadth-first traversal + - Processes directory patterns level by level + - Captures a representative sample of the directory structure up to the limit + - Minimizes risk of missing deeply nested files + +- Notes: + - Relies on globby to mark directories with / + - Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong) + - Timeout mechanism prevents infinite loops +*/ async function globbyLevelByLevel(limit: number, options?: Options) { - let results: string[] = [] + let results: Set = new Set() + let queue: string[] = ["*"] + const globbingProcess = async () => { - let currentLevel = 0 - while (results.length < limit) { - const pattern = currentLevel === 0 ? "*" : `${"*/".repeat(currentLevel)}*` + while (queue.length > 0 && results.size < limit) { + const pattern = queue.shift()! const filesAtLevel = await globby(pattern, options) - if (filesAtLevel.length === 0) { - break + + for (const file of filesAtLevel) { + if (results.size >= limit) { + break + } + results.add(file) + if (file.endsWith("/")) { + queue.push(`${file}*`) + } } - results.push(...filesAtLevel) - if (results.length >= limit) { - results = results.slice(0, limit) - break - } - currentLevel++ } - return results + return Array.from(results).slice(0, limit) } + // Timeout after 10 seconds and return partial results const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error("Globbing timeout")), 10_000) @@ -128,7 +142,7 @@ async function globbyLevelByLevel(limit: number, options?: Options) { return await Promise.race([globbingProcess(), timeoutPromise]) } catch (error) { console.warn("Globbing timed out, returning partial results") - return results + return Array.from(results) } }