mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 12:21:13 -05:00
Optimize language parser loading by only loading once for all files
This commit is contained in:
@@ -1,22 +1,7 @@
|
||||
import * as path from "path"
|
||||
import { globby } from "globby"
|
||||
import * as fs from "fs/promises"
|
||||
import Parser from "web-tree-sitter"
|
||||
|
||||
import {
|
||||
javascriptQuery,
|
||||
typescriptQuery,
|
||||
pythonQuery,
|
||||
rustQuery,
|
||||
goQuery,
|
||||
cppQuery,
|
||||
cQuery,
|
||||
csharpQuery,
|
||||
rubyQuery,
|
||||
javaQuery,
|
||||
phpQuery,
|
||||
swiftQuery,
|
||||
} from "./tree-sitter-queries/tags"
|
||||
import { globby } from "globby"
|
||||
import * as path from "path"
|
||||
import { LanguageParser, loadAllLanguages } from "./languageParser"
|
||||
|
||||
async function analyzeProject(dirPath: string): Promise<string> {
|
||||
let result = ""
|
||||
@@ -27,11 +12,14 @@ async function analyzeProject(dirPath: string): Promise<string> {
|
||||
// Separate files to parse and remaining files
|
||||
const { filesToParse, remainingFiles } = separateFiles(allFiles)
|
||||
|
||||
// Load only the necessary language parsers
|
||||
const languageParsers = await loadAllLanguages(filesToParse)
|
||||
|
||||
// Parse specific files and generate result
|
||||
result += "Files parsed with ASTs:\n"
|
||||
for (const file of filesToParse) {
|
||||
result += `File: ${file}\n`
|
||||
const ast = await parseFile(file)
|
||||
const ast = await parseFile(file, languageParsers)
|
||||
result += `AST: ${JSON.stringify(ast, null, 2)}\n\n`
|
||||
}
|
||||
|
||||
@@ -121,84 +109,13 @@ This approach allows us to focus on the most relevant parts of the code (defined
|
||||
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js
|
||||
- https://tree-sitter.github.io/tree-sitter/code-navigation-systems
|
||||
*/
|
||||
async function parseFile(filePath: string): Promise<string> {
|
||||
async function parseFile(filePath: string, languageParsers: LanguageParser): Promise<string> {
|
||||
const fileContent = await fs.readFile(filePath, "utf8")
|
||||
const ext = path.extname(filePath).toLowerCase().slice(1)
|
||||
await Parser.init()
|
||||
const parser = new Parser()
|
||||
let query: Parser.Query
|
||||
|
||||
switch (ext) {
|
||||
case "js":
|
||||
case "jsx":
|
||||
const JavaScript = await loadLanguage("javascript")
|
||||
parser.setLanguage(JavaScript)
|
||||
query = JavaScript.query(javascriptQuery)
|
||||
break
|
||||
case "ts":
|
||||
const TypeScript = await loadLanguage("typescript")
|
||||
parser.setLanguage(TypeScript)
|
||||
query = TypeScript.query(typescriptQuery)
|
||||
break
|
||||
case "tsx":
|
||||
const Tsx = await loadLanguage("tsx")
|
||||
parser.setLanguage(Tsx)
|
||||
query = Tsx.query(typescriptQuery)
|
||||
break
|
||||
case "py":
|
||||
const Python = await loadLanguage("python")
|
||||
parser.setLanguage(Python)
|
||||
query = Python.query(pythonQuery)
|
||||
break
|
||||
case "rs":
|
||||
const Rust = await loadLanguage("rust")
|
||||
parser.setLanguage(Rust)
|
||||
query = Rust.query(rustQuery)
|
||||
break
|
||||
case "go":
|
||||
const Go = await loadLanguage("go")
|
||||
parser.setLanguage(Go)
|
||||
query = Go.query(goQuery)
|
||||
break
|
||||
case "cpp":
|
||||
case "hpp":
|
||||
const Cpp = await loadLanguage("cpp")
|
||||
parser.setLanguage(Cpp)
|
||||
query = Cpp.query(cppQuery)
|
||||
break
|
||||
case "c":
|
||||
case "h":
|
||||
const C = await loadLanguage("c")
|
||||
parser.setLanguage(C)
|
||||
query = C.query(cQuery)
|
||||
break
|
||||
case "cs":
|
||||
const CSharp = await loadLanguage("c_sharp")
|
||||
parser.setLanguage(CSharp)
|
||||
query = CSharp.query(csharpQuery)
|
||||
break
|
||||
case "rb":
|
||||
const Ruby = await loadLanguage("ruby")
|
||||
parser.setLanguage(Ruby)
|
||||
query = Ruby.query(rubyQuery)
|
||||
break
|
||||
case "java":
|
||||
const Java = await loadLanguage("java")
|
||||
parser.setLanguage(Java)
|
||||
query = Java.query(javaQuery)
|
||||
break
|
||||
case "php":
|
||||
const PHP = await loadLanguage("php")
|
||||
parser.setLanguage(PHP)
|
||||
query = PHP.query(phpQuery)
|
||||
break
|
||||
case "swift":
|
||||
const Swift = await loadLanguage("swift")
|
||||
parser.setLanguage(Swift)
|
||||
query = Swift.query(swiftQuery)
|
||||
break
|
||||
default:
|
||||
return `Unsupported file type: ${filePath}`
|
||||
const { parser, query } = languageParsers[ext] || {}
|
||||
if (!parser || !query) {
|
||||
return `Unsupported file type: ${filePath}`
|
||||
}
|
||||
|
||||
let formattedOutput = `${filePath}:\n|----\n`
|
||||
@@ -247,8 +164,4 @@ async function parseFile(filePath: string): Promise<string> {
|
||||
return formattedOutput
|
||||
}
|
||||
|
||||
async function loadLanguage(langName: string) {
|
||||
return await Parser.Language.load(path.join(__dirname, `tree-sitter-${langName}.wasm`))
|
||||
}
|
||||
|
||||
export { analyzeProject }
|
||||
|
||||
145
src/AnalyzeProject/languageParser.ts
Normal file
145
src/AnalyzeProject/languageParser.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
import * as path from "path"
|
||||
import Parser from "web-tree-sitter"
|
||||
import {
|
||||
javascriptQuery,
|
||||
typescriptQuery,
|
||||
pythonQuery,
|
||||
rustQuery,
|
||||
goQuery,
|
||||
cppQuery,
|
||||
cQuery,
|
||||
csharpQuery,
|
||||
rubyQuery,
|
||||
javaQuery,
|
||||
phpQuery,
|
||||
swiftQuery,
|
||||
} from "./tree-sitter-queries/tags"
|
||||
|
||||
export interface LanguageParser {
|
||||
[key: string]: {
|
||||
parser: Parser
|
||||
query: Parser.Query
|
||||
}
|
||||
}
|
||||
|
||||
async function loadLanguage(langName: string) {
|
||||
return await Parser.Language.load(path.join(__dirname, `tree-sitter-${langName}.wasm`))
|
||||
}
|
||||
|
||||
/*
|
||||
Using node bindings for tree-sitter is problematic in vscode extensions
|
||||
because of incompatibility with electron. Going the .wasm route has the
|
||||
advantage of not having to build for multiple architectures.
|
||||
|
||||
We use web-tree-sitter and tree-sitter-wasms which provides auto-updating prebuilt WASM binaries for tree-sitter's language parsers.
|
||||
|
||||
This function loads WASM modules for relevant language parsers based on input files:
|
||||
1. Extracts unique file extensions
|
||||
2. Maps extensions to language names
|
||||
3. Loads corresponding WASM files (containing grammar rules)
|
||||
4. Uses WASM modules to initialize tree-sitter parsers
|
||||
|
||||
This approach optimizes performance by loading only necessary parsers once for all relevant files.
|
||||
|
||||
Sources:
|
||||
- https://github.com/tree-sitter/node-tree-sitter/issues/169
|
||||
- https://github.com/tree-sitter/node-tree-sitter/issues/168
|
||||
- https://github.com/Gregoor/tree-sitter-wasms/blob/main/README.md
|
||||
*/
|
||||
export async function loadAllLanguages(filesToParse: string[]): Promise<LanguageParser> {
|
||||
await Parser.init()
|
||||
|
||||
const extensionsToLoad = new Set(filesToParse.map((file) => path.extname(file).toLowerCase().slice(1)))
|
||||
|
||||
const languageMap: { [key: string]: string } = {
|
||||
js: "javascript",
|
||||
jsx: "javascript",
|
||||
ts: "typescript",
|
||||
tsx: "tsx",
|
||||
py: "python",
|
||||
rs: "rust",
|
||||
go: "go",
|
||||
cpp: "cpp",
|
||||
hpp: "cpp",
|
||||
c: "c",
|
||||
h: "c",
|
||||
cs: "c_sharp",
|
||||
rb: "ruby",
|
||||
java: "java",
|
||||
php: "php",
|
||||
swift: "swift",
|
||||
}
|
||||
|
||||
const languages: { [key: string]: Parser.Language } = {}
|
||||
|
||||
for (const ext of extensionsToLoad) {
|
||||
if (ext in languageMap) {
|
||||
const langName = languageMap[ext as keyof typeof languageMap]
|
||||
if (!languages[langName]) {
|
||||
languages[langName] = await loadLanguage(langName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const parsers: LanguageParser = {}
|
||||
|
||||
for (const ext of extensionsToLoad) {
|
||||
if (ext in languageMap) {
|
||||
const langName = languageMap[ext as keyof typeof languageMap]
|
||||
const lang = languages[langName]
|
||||
|
||||
const parser = new Parser()
|
||||
parser.setLanguage(lang)
|
||||
let query: Parser.Query
|
||||
|
||||
switch (ext) {
|
||||
case "js":
|
||||
case "jsx":
|
||||
query = lang.query(javascriptQuery)
|
||||
break
|
||||
case "ts":
|
||||
case "tsx":
|
||||
query = lang.query(typescriptQuery)
|
||||
break
|
||||
case "py":
|
||||
query = lang.query(pythonQuery)
|
||||
break
|
||||
case "rs":
|
||||
query = lang.query(rustQuery)
|
||||
break
|
||||
case "go":
|
||||
query = lang.query(goQuery)
|
||||
break
|
||||
case "cpp":
|
||||
case "hpp":
|
||||
query = lang.query(cppQuery)
|
||||
break
|
||||
case "c":
|
||||
case "h":
|
||||
query = lang.query(cQuery)
|
||||
break
|
||||
case "cs":
|
||||
query = lang.query(csharpQuery)
|
||||
break
|
||||
case "rb":
|
||||
query = lang.query(rubyQuery)
|
||||
break
|
||||
case "java":
|
||||
query = lang.query(javaQuery)
|
||||
break
|
||||
case "php":
|
||||
query = lang.query(phpQuery)
|
||||
break
|
||||
case "swift":
|
||||
query = lang.query(swiftQuery)
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported language: ${ext}`)
|
||||
}
|
||||
|
||||
parsers[ext] = { parser, query }
|
||||
}
|
||||
}
|
||||
|
||||
return parsers
|
||||
}
|
||||
Reference in New Issue
Block a user