mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 04:11:10 -05:00
Refactor glob
This commit is contained in:
@@ -11,7 +11,8 @@ import { serializeError } from "serialize-error"
|
||||
import * as vscode from "vscode"
|
||||
import { ApiHandler, buildApiHandler } from "../api"
|
||||
import { TerminalManager } from "../integrations/TerminalManager"
|
||||
import { listFiles, parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter"
|
||||
import { parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter"
|
||||
import { listFiles } from "../services/glob/list-files"
|
||||
import { ClaudeDevProvider } from "./webviews/ClaudeDevProvider"
|
||||
import { ApiConfiguration } from "../shared/api"
|
||||
import { ClaudeRequestResult } from "../shared/ClaudeRequestResult"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import * as vscode from "vscode"
|
||||
import * as path from "path"
|
||||
import { listFiles } from "../services/tree-sitter/index"
|
||||
import { listFiles } from "../services/glob/list-files"
|
||||
import { ClaudeDevProvider } from "../core/webviews/ClaudeDevProvider"
|
||||
|
||||
const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0)
|
||||
|
||||
97
src/services/glob/list-files.ts
Normal file
97
src/services/glob/list-files.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import { globby, Options } from "globby"
|
||||
import os from "os"
|
||||
import * as path from "path"
|
||||
import { arePathsEqual } from "../../utils/path-helpers"
|
||||
|
||||
export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> {
|
||||
const absolutePath = path.resolve(dirPath)
|
||||
// Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague.
|
||||
const root = process.platform === "win32" ? path.parse(absolutePath).root : "/"
|
||||
const isRoot = arePathsEqual(absolutePath, root)
|
||||
if (isRoot) {
|
||||
return [[root], false]
|
||||
}
|
||||
const homeDir = os.homedir()
|
||||
const isHomeDir = arePathsEqual(absolutePath, homeDir)
|
||||
if (isHomeDir) {
|
||||
return [[homeDir], false]
|
||||
}
|
||||
|
||||
const dirsToIgnore = [
|
||||
"node_modules",
|
||||
"__pycache__",
|
||||
"env",
|
||||
"venv",
|
||||
"target/dependency",
|
||||
"build/dependencies",
|
||||
"dist",
|
||||
"out",
|
||||
"bundle",
|
||||
"vendor",
|
||||
"tmp",
|
||||
"temp",
|
||||
"deps",
|
||||
"pkg",
|
||||
"Pods",
|
||||
".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories.
|
||||
].map((dir) => `**/${dir}/**`)
|
||||
|
||||
const options = {
|
||||
cwd: dirPath,
|
||||
dot: true, // do not ignore hidden files/directories
|
||||
absolute: true,
|
||||
markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep)
|
||||
gitignore: recursive, // globby ignores any files that are gitignored
|
||||
ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults
|
||||
onlyFiles: false, // true by default, false means it will list directories on their own too
|
||||
}
|
||||
// * globs all files in one dir, ** globs files in nested directories
|
||||
const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit)
|
||||
return [files, files.length >= limit]
|
||||
}
|
||||
|
||||
/*
|
||||
Breadth-first traversal of directory structure level by level up to a limit:
|
||||
- Queue-based approach ensures proper breadth-first traversal
|
||||
- Processes directory patterns level by level
|
||||
- Captures a representative sample of the directory structure up to the limit
|
||||
- Minimizes risk of missing deeply nested files
|
||||
|
||||
- Notes:
|
||||
- Relies on globby to mark directories with /
|
||||
- Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong)
|
||||
- Timeout mechanism prevents infinite loops
|
||||
*/
|
||||
async function globbyLevelByLevel(limit: number, options?: Options) {
|
||||
let results: Set<string> = new Set()
|
||||
let queue: string[] = ["*"]
|
||||
|
||||
const globbingProcess = async () => {
|
||||
while (queue.length > 0 && results.size < limit) {
|
||||
const pattern = queue.shift()!
|
||||
const filesAtLevel = await globby(pattern, options)
|
||||
|
||||
for (const file of filesAtLevel) {
|
||||
if (results.size >= limit) {
|
||||
break
|
||||
}
|
||||
results.add(file)
|
||||
if (file.endsWith("/")) {
|
||||
queue.push(`${file}*`)
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(results).slice(0, limit)
|
||||
}
|
||||
|
||||
// Timeout after 10 seconds and return partial results
|
||||
const timeoutPromise = new Promise<string[]>((_, reject) => {
|
||||
setTimeout(() => reject(new Error("Globbing timeout")), 10_000)
|
||||
})
|
||||
try {
|
||||
return await Promise.race([globbingProcess(), timeoutPromise])
|
||||
} catch (error) {
|
||||
console.warn("Globbing timed out, returning partial results")
|
||||
return Array.from(results)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
import * as fs from "fs/promises"
|
||||
import { globby, Options } from "globby"
|
||||
import os from "os"
|
||||
import * as path from "path"
|
||||
import { listFiles } from "../glob/list-files"
|
||||
import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser"
|
||||
import { arePathsEqual } from "../../utils/path-helpers"
|
||||
|
||||
// TODO: implement caching behavior to avoid having to keep analyzing project for new tasks.
|
||||
export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Promise<string> {
|
||||
@@ -54,99 +52,6 @@ export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Pr
|
||||
return result ? result : "No source code definitions found."
|
||||
}
|
||||
|
||||
export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> {
|
||||
const absolutePath = path.resolve(dirPath)
|
||||
// Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague.
|
||||
const root = process.platform === "win32" ? path.parse(absolutePath).root : "/"
|
||||
const isRoot = arePathsEqual(absolutePath, root)
|
||||
if (isRoot) {
|
||||
return [[root], false]
|
||||
}
|
||||
const homeDir = os.homedir()
|
||||
const isHomeDir = arePathsEqual(absolutePath, homeDir)
|
||||
if (isHomeDir) {
|
||||
return [[homeDir], false]
|
||||
}
|
||||
|
||||
const dirsToIgnore = [
|
||||
"node_modules",
|
||||
"__pycache__",
|
||||
"env",
|
||||
"venv",
|
||||
"target/dependency",
|
||||
"build/dependencies",
|
||||
"dist",
|
||||
"out",
|
||||
"bundle",
|
||||
"vendor",
|
||||
"tmp",
|
||||
"temp",
|
||||
"deps",
|
||||
"pkg",
|
||||
"Pods",
|
||||
".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories.
|
||||
].map((dir) => `**/${dir}/**`)
|
||||
|
||||
const options = {
|
||||
cwd: dirPath,
|
||||
dot: true, // do not ignore hidden files/directories
|
||||
absolute: true,
|
||||
markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep)
|
||||
gitignore: recursive, // globby ignores any files that are gitignored
|
||||
ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults
|
||||
onlyFiles: false, // true by default, false means it will list directories on their own too
|
||||
}
|
||||
// * globs all files in one dir, ** globs files in nested directories
|
||||
const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit)
|
||||
return [files, files.length >= limit]
|
||||
}
|
||||
|
||||
/*
|
||||
Breadth-first traversal of directory structure level by level up to a limit:
|
||||
- Queue-based approach ensures proper breadth-first traversal
|
||||
- Processes directory patterns level by level
|
||||
- Captures a representative sample of the directory structure up to the limit
|
||||
- Minimizes risk of missing deeply nested files
|
||||
|
||||
- Notes:
|
||||
- Relies on globby to mark directories with /
|
||||
- Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong)
|
||||
- Timeout mechanism prevents infinite loops
|
||||
*/
|
||||
async function globbyLevelByLevel(limit: number, options?: Options) {
|
||||
let results: Set<string> = new Set()
|
||||
let queue: string[] = ["*"]
|
||||
|
||||
const globbingProcess = async () => {
|
||||
while (queue.length > 0 && results.size < limit) {
|
||||
const pattern = queue.shift()!
|
||||
const filesAtLevel = await globby(pattern, options)
|
||||
|
||||
for (const file of filesAtLevel) {
|
||||
if (results.size >= limit) {
|
||||
break
|
||||
}
|
||||
results.add(file)
|
||||
if (file.endsWith("/")) {
|
||||
queue.push(`${file}*`)
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(results).slice(0, limit)
|
||||
}
|
||||
|
||||
// Timeout after 10 seconds and return partial results
|
||||
const timeoutPromise = new Promise<string[]>((_, reject) => {
|
||||
setTimeout(() => reject(new Error("Globbing timeout")), 10_000)
|
||||
})
|
||||
try {
|
||||
return await Promise.race([globbingProcess(), timeoutPromise])
|
||||
} catch (error) {
|
||||
console.warn("Globbing timed out, returning partial results")
|
||||
return Array.from(results)
|
||||
}
|
||||
}
|
||||
|
||||
function separateFiles(allFiles: string[]): { filesToParse: string[]; remainingFiles: string[] } {
|
||||
const extensions = [
|
||||
"js",
|
||||
|
||||
Reference in New Issue
Block a user