mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 20:31:37 -05:00
Refactor glob
This commit is contained in:
@@ -11,7 +11,8 @@ import { serializeError } from "serialize-error"
|
|||||||
import * as vscode from "vscode"
|
import * as vscode from "vscode"
|
||||||
import { ApiHandler, buildApiHandler } from "../api"
|
import { ApiHandler, buildApiHandler } from "../api"
|
||||||
import { TerminalManager } from "../integrations/TerminalManager"
|
import { TerminalManager } from "../integrations/TerminalManager"
|
||||||
import { listFiles, parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter"
|
import { parseSourceCodeForDefinitionsTopLevel } from "../services/tree-sitter"
|
||||||
|
import { listFiles } from "../services/glob/list-files"
|
||||||
import { ClaudeDevProvider } from "./webviews/ClaudeDevProvider"
|
import { ClaudeDevProvider } from "./webviews/ClaudeDevProvider"
|
||||||
import { ApiConfiguration } from "../shared/api"
|
import { ApiConfiguration } from "../shared/api"
|
||||||
import { ClaudeRequestResult } from "../shared/ClaudeRequestResult"
|
import { ClaudeRequestResult } from "../shared/ClaudeRequestResult"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import * as vscode from "vscode"
|
import * as vscode from "vscode"
|
||||||
import * as path from "path"
|
import * as path from "path"
|
||||||
import { listFiles } from "../services/tree-sitter/index"
|
import { listFiles } from "../services/glob/list-files"
|
||||||
import { ClaudeDevProvider } from "../core/webviews/ClaudeDevProvider"
|
import { ClaudeDevProvider } from "../core/webviews/ClaudeDevProvider"
|
||||||
|
|
||||||
const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0)
|
const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0)
|
||||||
|
|||||||
97
src/services/glob/list-files.ts
Normal file
97
src/services/glob/list-files.ts
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
import { globby, Options } from "globby"
|
||||||
|
import os from "os"
|
||||||
|
import * as path from "path"
|
||||||
|
import { arePathsEqual } from "../../utils/path-helpers"
|
||||||
|
|
||||||
|
export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> {
|
||||||
|
const absolutePath = path.resolve(dirPath)
|
||||||
|
// Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague.
|
||||||
|
const root = process.platform === "win32" ? path.parse(absolutePath).root : "/"
|
||||||
|
const isRoot = arePathsEqual(absolutePath, root)
|
||||||
|
if (isRoot) {
|
||||||
|
return [[root], false]
|
||||||
|
}
|
||||||
|
const homeDir = os.homedir()
|
||||||
|
const isHomeDir = arePathsEqual(absolutePath, homeDir)
|
||||||
|
if (isHomeDir) {
|
||||||
|
return [[homeDir], false]
|
||||||
|
}
|
||||||
|
|
||||||
|
const dirsToIgnore = [
|
||||||
|
"node_modules",
|
||||||
|
"__pycache__",
|
||||||
|
"env",
|
||||||
|
"venv",
|
||||||
|
"target/dependency",
|
||||||
|
"build/dependencies",
|
||||||
|
"dist",
|
||||||
|
"out",
|
||||||
|
"bundle",
|
||||||
|
"vendor",
|
||||||
|
"tmp",
|
||||||
|
"temp",
|
||||||
|
"deps",
|
||||||
|
"pkg",
|
||||||
|
"Pods",
|
||||||
|
".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories.
|
||||||
|
].map((dir) => `**/${dir}/**`)
|
||||||
|
|
||||||
|
const options = {
|
||||||
|
cwd: dirPath,
|
||||||
|
dot: true, // do not ignore hidden files/directories
|
||||||
|
absolute: true,
|
||||||
|
markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep)
|
||||||
|
gitignore: recursive, // globby ignores any files that are gitignored
|
||||||
|
ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults
|
||||||
|
onlyFiles: false, // true by default, false means it will list directories on their own too
|
||||||
|
}
|
||||||
|
// * globs all files in one dir, ** globs files in nested directories
|
||||||
|
const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit)
|
||||||
|
return [files, files.length >= limit]
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Breadth-first traversal of directory structure level by level up to a limit:
|
||||||
|
- Queue-based approach ensures proper breadth-first traversal
|
||||||
|
- Processes directory patterns level by level
|
||||||
|
- Captures a representative sample of the directory structure up to the limit
|
||||||
|
- Minimizes risk of missing deeply nested files
|
||||||
|
|
||||||
|
- Notes:
|
||||||
|
- Relies on globby to mark directories with /
|
||||||
|
- Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong)
|
||||||
|
- Timeout mechanism prevents infinite loops
|
||||||
|
*/
|
||||||
|
async function globbyLevelByLevel(limit: number, options?: Options) {
|
||||||
|
let results: Set<string> = new Set()
|
||||||
|
let queue: string[] = ["*"]
|
||||||
|
|
||||||
|
const globbingProcess = async () => {
|
||||||
|
while (queue.length > 0 && results.size < limit) {
|
||||||
|
const pattern = queue.shift()!
|
||||||
|
const filesAtLevel = await globby(pattern, options)
|
||||||
|
|
||||||
|
for (const file of filesAtLevel) {
|
||||||
|
if (results.size >= limit) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
results.add(file)
|
||||||
|
if (file.endsWith("/")) {
|
||||||
|
queue.push(`${file}*`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Array.from(results).slice(0, limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeout after 10 seconds and return partial results
|
||||||
|
const timeoutPromise = new Promise<string[]>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error("Globbing timeout")), 10_000)
|
||||||
|
})
|
||||||
|
try {
|
||||||
|
return await Promise.race([globbingProcess(), timeoutPromise])
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("Globbing timed out, returning partial results")
|
||||||
|
return Array.from(results)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
import * as fs from "fs/promises"
|
import * as fs from "fs/promises"
|
||||||
import { globby, Options } from "globby"
|
|
||||||
import os from "os"
|
|
||||||
import * as path from "path"
|
import * as path from "path"
|
||||||
|
import { listFiles } from "../glob/list-files"
|
||||||
import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser"
|
import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser"
|
||||||
import { arePathsEqual } from "../../utils/path-helpers"
|
|
||||||
|
|
||||||
// TODO: implement caching behavior to avoid having to keep analyzing project for new tasks.
|
// TODO: implement caching behavior to avoid having to keep analyzing project for new tasks.
|
||||||
export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Promise<string> {
|
export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Promise<string> {
|
||||||
@@ -54,99 +52,6 @@ export async function parseSourceCodeForDefinitionsTopLevel(dirPath: string): Pr
|
|||||||
return result ? result : "No source code definitions found."
|
return result ? result : "No source code definitions found."
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> {
|
|
||||||
const absolutePath = path.resolve(dirPath)
|
|
||||||
// Do not allow listing files in root or home directory, which Claude tends to want to do when the user's prompt is vague.
|
|
||||||
const root = process.platform === "win32" ? path.parse(absolutePath).root : "/"
|
|
||||||
const isRoot = arePathsEqual(absolutePath, root)
|
|
||||||
if (isRoot) {
|
|
||||||
return [[root], false]
|
|
||||||
}
|
|
||||||
const homeDir = os.homedir()
|
|
||||||
const isHomeDir = arePathsEqual(absolutePath, homeDir)
|
|
||||||
if (isHomeDir) {
|
|
||||||
return [[homeDir], false]
|
|
||||||
}
|
|
||||||
|
|
||||||
const dirsToIgnore = [
|
|
||||||
"node_modules",
|
|
||||||
"__pycache__",
|
|
||||||
"env",
|
|
||||||
"venv",
|
|
||||||
"target/dependency",
|
|
||||||
"build/dependencies",
|
|
||||||
"dist",
|
|
||||||
"out",
|
|
||||||
"bundle",
|
|
||||||
"vendor",
|
|
||||||
"tmp",
|
|
||||||
"temp",
|
|
||||||
"deps",
|
|
||||||
"pkg",
|
|
||||||
"Pods",
|
|
||||||
".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories.
|
|
||||||
].map((dir) => `**/${dir}/**`)
|
|
||||||
|
|
||||||
const options = {
|
|
||||||
cwd: dirPath,
|
|
||||||
dot: true, // do not ignore hidden files/directories
|
|
||||||
absolute: true,
|
|
||||||
markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep)
|
|
||||||
gitignore: recursive, // globby ignores any files that are gitignored
|
|
||||||
ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults
|
|
||||||
onlyFiles: false, // true by default, false means it will list directories on their own too
|
|
||||||
}
|
|
||||||
// * globs all files in one dir, ** globs files in nested directories
|
|
||||||
const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit)
|
|
||||||
return [files, files.length >= limit]
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Breadth-first traversal of directory structure level by level up to a limit:
|
|
||||||
- Queue-based approach ensures proper breadth-first traversal
|
|
||||||
- Processes directory patterns level by level
|
|
||||||
- Captures a representative sample of the directory structure up to the limit
|
|
||||||
- Minimizes risk of missing deeply nested files
|
|
||||||
|
|
||||||
- Notes:
|
|
||||||
- Relies on globby to mark directories with /
|
|
||||||
- Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong)
|
|
||||||
- Timeout mechanism prevents infinite loops
|
|
||||||
*/
|
|
||||||
async function globbyLevelByLevel(limit: number, options?: Options) {
|
|
||||||
let results: Set<string> = new Set()
|
|
||||||
let queue: string[] = ["*"]
|
|
||||||
|
|
||||||
const globbingProcess = async () => {
|
|
||||||
while (queue.length > 0 && results.size < limit) {
|
|
||||||
const pattern = queue.shift()!
|
|
||||||
const filesAtLevel = await globby(pattern, options)
|
|
||||||
|
|
||||||
for (const file of filesAtLevel) {
|
|
||||||
if (results.size >= limit) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
results.add(file)
|
|
||||||
if (file.endsWith("/")) {
|
|
||||||
queue.push(`${file}*`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Array.from(results).slice(0, limit)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Timeout after 10 seconds and return partial results
|
|
||||||
const timeoutPromise = new Promise<string[]>((_, reject) => {
|
|
||||||
setTimeout(() => reject(new Error("Globbing timeout")), 10_000)
|
|
||||||
})
|
|
||||||
try {
|
|
||||||
return await Promise.race([globbingProcess(), timeoutPromise])
|
|
||||||
} catch (error) {
|
|
||||||
console.warn("Globbing timed out, returning partial results")
|
|
||||||
return Array.from(results)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function separateFiles(allFiles: string[]): { filesToParse: string[]; remainingFiles: string[] } {
|
function separateFiles(allFiles: string[]): { filesToParse: string[]; remainingFiles: string[] } {
|
||||||
const extensions = [
|
const extensions = [
|
||||||
"js",
|
"js",
|
||||||
|
|||||||
Reference in New Issue
Block a user