diff --git a/src/core/ClaudeDev.ts b/src/core/ClaudeDev.ts index a5a55ee..c974ebc 100644 --- a/src/core/ClaudeDev.ts +++ b/src/core/ClaudeDev.ts @@ -11,7 +11,8 @@ import { serializeError } from "serialize-error" import * as vscode from "vscode" import { ApiHandler, buildApiHandler } from "../api" import { TerminalManager } from "../integrations/TerminalManager" -import { listFiles, parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter" +import { parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter" +import { listFiles } from "../services/glob/list-files" import { ClaudeDevProvider } from "./webviews/ClaudeDevProvider" import { ApiConfiguration } from "../shared/api" import { ClaudeRequestResult } from "../shared/ClaudeRequestResult" diff --git a/src/integrations/WorkspaceTracker.ts b/src/integrations/WorkspaceTracker.ts index 2e6baf8..b3782ff 100644 --- a/src/integrations/WorkspaceTracker.ts +++ b/src/integrations/WorkspaceTracker.ts @@ -1,6 +1,6 @@ import * as vscode from "vscode" import * as path from "path" -import { listFiles } from "../services/tree-sitter/index" +import { listFiles } from "../services/glob/list-files" import { ClaudeDevProvider } from "../core/webviews/ClaudeDevProvider" const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts new file mode 100644 index 0000000..1f8b532 --- /dev/null +++ b/src/services/glob/list-files.ts @@ -0,0 +1,97 @@ +import { globby, Options } from "globby" +import os from "os" +import * as path from "path" +import { arePathsEqual } from "../../utils/path-helpers" + +export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> { + const absolutePath = path.resolve(dirPath) + // Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague. + const root = process.platform === "win32" ? path.parse(absolutePath).root : "/" + const isRoot = arePathsEqual(absolutePath, root) + if (isRoot) { + return [[root], false] + } + const homeDir = os.homedir() + const isHomeDir = arePathsEqual(absolutePath, homeDir) + if (isHomeDir) { + return [[homeDir], false] + } + + const dirsToIgnore = [ + "node_modules", + "__pycache__", + "env", + "venv", + "target/dependency", + "build/dependencies", + "dist", + "out", + "bundle", + "vendor", + "tmp", + "temp", + "deps", + "pkg", + "Pods", + ".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories. + ].map((dir) => `**/${dir}/**`) + + const options = { + cwd: dirPath, + dot: true, // do not ignore hidden files/directories + absolute: true, + markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep) + gitignore: recursive, // globby ignores any files that are gitignored + ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults + onlyFiles: false, // true by default, false means it will list directories on their own too + } + // * globs all files in one dir, ** globs files in nested directories + const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit) + return [files, files.length >= limit] +} + +/* +Breadth-first traversal of directory structure level by level up to a limit: + - Queue-based approach ensures proper breadth-first traversal + - Processes directory patterns level by level + - Captures a representative sample of the directory structure up to the limit + - Minimizes risk of missing deeply nested files + +- Notes: + - Relies on globby to mark directories with / + - Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong) + - Timeout mechanism prevents infinite loops +*/ +async function globbyLevelByLevel(limit: number, options?: Options) { + let results: Set = new Set() + let queue: string[] = ["*"] + + const globbingProcess = async () => { + while (queue.length > 0 && results.size < limit) { + const pattern = queue.shift()! + const filesAtLevel = await globby(pattern, options) + + for (const file of filesAtLevel) { + if (results.size >= limit) { + break + } + results.add(file) + if (file.endsWith("/")) { + queue.push(`${file}*`) + } + } + } + return Array.from(results).slice(0, limit) + } + + // Timeout after 10 seconds and return partial results + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error("Globbing timeout")), 10_000) + }) + try { + return await Promise.race([globbingProcess(), timeoutPromise]) + } catch (error) { + console.warn("Globbing timed out, returning partial results") + return Array.from(results) + } +} diff --git a/src/services/tree-sitter/index.ts b/src/services/tree-sitter/index.ts index 036746a..2792817 100644 --- a/src/services/tree-sitter/index.ts +++ b/src/services/tree-sitter/index.ts @@ -1,9 +1,7 @@ import * as fs from "fs/promises" -import { globby, Options } from "globby" -import os from "os" import * as path from "path" +import { listFiles } from "../glob/list-files" import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser" -import { arePathsEqual } from "../../utils/path-helpers" // TODO: implement caching behavior to avoid having to keep analyzing project for new tasks. export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Promise { @@ -54,99 +52,6 @@ export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Pr return result ? result : "No source code definitions found." } -export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> { - const absolutePath = path.resolve(dirPath) - // Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague. - const root = process.platform === "win32" ? path.parse(absolutePath).root : "/" - const isRoot = arePathsEqual(absolutePath, root) - if (isRoot) { - return [[root], false] - } - const homeDir = os.homedir() - const isHomeDir = arePathsEqual(absolutePath, homeDir) - if (isHomeDir) { - return [[homeDir], false] - } - - const dirsToIgnore = [ - "node_modules", - "__pycache__", - "env", - "venv", - "target/dependency", - "build/dependencies", - "dist", - "out", - "bundle", - "vendor", - "tmp", - "temp", - "deps", - "pkg", - "Pods", - ".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories. - ].map((dir) => `**/${dir}/**`) - - const options = { - cwd: dirPath, - dot: true, // do not ignore hidden files/directories - absolute: true, - markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep) - gitignore: recursive, // globby ignores any files that are gitignored - ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults - onlyFiles: false, // true by default, false means it will list directories on their own too - } - // * globs all files in one dir, ** globs files in nested directories - const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit) - return [files, files.length >= limit] -} - -/* -Breadth-first traversal of directory structure level by level up to a limit: - - Queue-based approach ensures proper breadth-first traversal - - Processes directory patterns level by level - - Captures a representative sample of the directory structure up to the limit - - Minimizes risk of missing deeply nested files - -- Notes: - - Relies on globby to mark directories with / - - Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong) - - Timeout mechanism prevents infinite loops -*/ -async function globbyLevelByLevel(limit: number, options?: Options) { - let results: Set = new Set() - let queue: string[] = ["*"] - - const globbingProcess = async () => { - while (queue.length > 0 && results.size < limit) { - const pattern = queue.shift()! - const filesAtLevel = await globby(pattern, options) - - for (const file of filesAtLevel) { - if (results.size >= limit) { - break - } - results.add(file) - if (file.endsWith("/")) { - queue.push(`${file}*`) - } - } - } - return Array.from(results).slice(0, limit) - } - - // Timeout after 10 seconds and return partial results - const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => reject(new Error("Globbing timeout")), 10_000) - }) - try { - return await Promise.race([globbingProcess(), timeoutPromise]) - } catch (error) { - console.warn("Globbing timed out, returning partial results") - return Array.from(results) - } -} - function separateFiles(allFiles: string[]): { filesToParse: string[]; remainingFiles: string[] } { const extensions = [ "js",