mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 12:21:13 -05:00
Optimize language parser loading by only loading once for all files
This commit is contained in:
@@ -1,22 +1,7 @@
|
|||||||
import * as path from "path"
|
|
||||||
import { globby } from "globby"
|
|
||||||
import * as fs from "fs/promises"
|
import * as fs from "fs/promises"
|
||||||
import Parser from "web-tree-sitter"
|
import { globby } from "globby"
|
||||||
|
import * as path from "path"
|
||||||
import {
|
import { LanguageParser, loadAllLanguages } from "./languageParser"
|
||||||
javascriptQuery,
|
|
||||||
typescriptQuery,
|
|
||||||
pythonQuery,
|
|
||||||
rustQuery,
|
|
||||||
goQuery,
|
|
||||||
cppQuery,
|
|
||||||
cQuery,
|
|
||||||
csharpQuery,
|
|
||||||
rubyQuery,
|
|
||||||
javaQuery,
|
|
||||||
phpQuery,
|
|
||||||
swiftQuery,
|
|
||||||
} from "./tree-sitter-queries/tags"
|
|
||||||
|
|
||||||
async function analyzeProject(dirPath: string): Promise<string> {
|
async function analyzeProject(dirPath: string): Promise<string> {
|
||||||
let result = ""
|
let result = ""
|
||||||
@@ -27,11 +12,14 @@ async function analyzeProject(dirPath: string): Promise<string> {
|
|||||||
// Separate files to parse and remaining files
|
// Separate files to parse and remaining files
|
||||||
const { filesToParse, remainingFiles } = separateFiles(allFiles)
|
const { filesToParse, remainingFiles } = separateFiles(allFiles)
|
||||||
|
|
||||||
|
// Load only the necessary language parsers
|
||||||
|
const languageParsers = await loadAllLanguages(filesToParse)
|
||||||
|
|
||||||
// Parse specific files and generate result
|
// Parse specific files and generate result
|
||||||
result += "Files parsed with ASTs:\n"
|
result += "Files parsed with ASTs:\n"
|
||||||
for (const file of filesToParse) {
|
for (const file of filesToParse) {
|
||||||
result += `File: ${file}\n`
|
result += `File: ${file}\n`
|
||||||
const ast = await parseFile(file)
|
const ast = await parseFile(file, languageParsers)
|
||||||
result += `AST: ${JSON.stringify(ast, null, 2)}\n\n`
|
result += `AST: ${JSON.stringify(ast, null, 2)}\n\n`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,84 +109,13 @@ This approach allows us to focus on the most relevant parts of the code (defined
|
|||||||
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js
|
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js
|
||||||
- https://tree-sitter.github.io/tree-sitter/code-navigation-systems
|
- https://tree-sitter.github.io/tree-sitter/code-navigation-systems
|
||||||
*/
|
*/
|
||||||
async function parseFile(filePath: string): Promise<string> {
|
async function parseFile(filePath: string, languageParsers: LanguageParser): Promise<string> {
|
||||||
const fileContent = await fs.readFile(filePath, "utf8")
|
const fileContent = await fs.readFile(filePath, "utf8")
|
||||||
const ext = path.extname(filePath).toLowerCase().slice(1)
|
const ext = path.extname(filePath).toLowerCase().slice(1)
|
||||||
await Parser.init()
|
|
||||||
const parser = new Parser()
|
|
||||||
let query: Parser.Query
|
|
||||||
|
|
||||||
switch (ext) {
|
const { parser, query } = languageParsers[ext] || {}
|
||||||
case "js":
|
if (!parser || !query) {
|
||||||
case "jsx":
|
return `Unsupported file type: ${filePath}`
|
||||||
const JavaScript = await loadLanguage("javascript")
|
|
||||||
parser.setLanguage(JavaScript)
|
|
||||||
query = JavaScript.query(javascriptQuery)
|
|
||||||
break
|
|
||||||
case "ts":
|
|
||||||
const TypeScript = await loadLanguage("typescript")
|
|
||||||
parser.setLanguage(TypeScript)
|
|
||||||
query = TypeScript.query(typescriptQuery)
|
|
||||||
break
|
|
||||||
case "tsx":
|
|
||||||
const Tsx = await loadLanguage("tsx")
|
|
||||||
parser.setLanguage(Tsx)
|
|
||||||
query = Tsx.query(typescriptQuery)
|
|
||||||
break
|
|
||||||
case "py":
|
|
||||||
const Python = await loadLanguage("python")
|
|
||||||
parser.setLanguage(Python)
|
|
||||||
query = Python.query(pythonQuery)
|
|
||||||
break
|
|
||||||
case "rs":
|
|
||||||
const Rust = await loadLanguage("rust")
|
|
||||||
parser.setLanguage(Rust)
|
|
||||||
query = Rust.query(rustQuery)
|
|
||||||
break
|
|
||||||
case "go":
|
|
||||||
const Go = await loadLanguage("go")
|
|
||||||
parser.setLanguage(Go)
|
|
||||||
query = Go.query(goQuery)
|
|
||||||
break
|
|
||||||
case "cpp":
|
|
||||||
case "hpp":
|
|
||||||
const Cpp = await loadLanguage("cpp")
|
|
||||||
parser.setLanguage(Cpp)
|
|
||||||
query = Cpp.query(cppQuery)
|
|
||||||
break
|
|
||||||
case "c":
|
|
||||||
case "h":
|
|
||||||
const C = await loadLanguage("c")
|
|
||||||
parser.setLanguage(C)
|
|
||||||
query = C.query(cQuery)
|
|
||||||
break
|
|
||||||
case "cs":
|
|
||||||
const CSharp = await loadLanguage("c_sharp")
|
|
||||||
parser.setLanguage(CSharp)
|
|
||||||
query = CSharp.query(csharpQuery)
|
|
||||||
break
|
|
||||||
case "rb":
|
|
||||||
const Ruby = await loadLanguage("ruby")
|
|
||||||
parser.setLanguage(Ruby)
|
|
||||||
query = Ruby.query(rubyQuery)
|
|
||||||
break
|
|
||||||
case "java":
|
|
||||||
const Java = await loadLanguage("java")
|
|
||||||
parser.setLanguage(Java)
|
|
||||||
query = Java.query(javaQuery)
|
|
||||||
break
|
|
||||||
case "php":
|
|
||||||
const PHP = await loadLanguage("php")
|
|
||||||
parser.setLanguage(PHP)
|
|
||||||
query = PHP.query(phpQuery)
|
|
||||||
break
|
|
||||||
case "swift":
|
|
||||||
const Swift = await loadLanguage("swift")
|
|
||||||
parser.setLanguage(Swift)
|
|
||||||
query = Swift.query(swiftQuery)
|
|
||||||
break
|
|
||||||
default:
|
|
||||||
return `Unsupported file type: ${filePath}`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let formattedOutput = `${filePath}:\n|----\n`
|
let formattedOutput = `${filePath}:\n|----\n`
|
||||||
@@ -247,8 +164,4 @@ async function parseFile(filePath: string): Promise<string> {
|
|||||||
return formattedOutput
|
return formattedOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadLanguage(langName: string) {
|
|
||||||
return await Parser.Language.load(path.join(__dirname, `tree-sitter-${langName}.wasm`))
|
|
||||||
}
|
|
||||||
|
|
||||||
export { analyzeProject }
|
export { analyzeProject }
|
||||||
|
|||||||
145
src/AnalyzeProject/languageParser.ts
Normal file
145
src/AnalyzeProject/languageParser.ts
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
import * as path from "path"
|
||||||
|
import Parser from "web-tree-sitter"
|
||||||
|
import {
|
||||||
|
javascriptQuery,
|
||||||
|
typescriptQuery,
|
||||||
|
pythonQuery,
|
||||||
|
rustQuery,
|
||||||
|
goQuery,
|
||||||
|
cppQuery,
|
||||||
|
cQuery,
|
||||||
|
csharpQuery,
|
||||||
|
rubyQuery,
|
||||||
|
javaQuery,
|
||||||
|
phpQuery,
|
||||||
|
swiftQuery,
|
||||||
|
} from "./tree-sitter-queries/tags"
|
||||||
|
|
||||||
|
export interface LanguageParser {
|
||||||
|
[key: string]: {
|
||||||
|
parser: Parser
|
||||||
|
query: Parser.Query
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadLanguage(langName: string) {
|
||||||
|
return await Parser.Language.load(path.join(__dirname, `tree-sitter-${langName}.wasm`))
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Using node bindings for tree-sitter is problematic in vscode extensions
|
||||||
|
because of incompatibility with electron. Going the .wasm route has the
|
||||||
|
advantage of not having to build for multiple architectures.
|
||||||
|
|
||||||
|
We use web-tree-sitter and tree-sitter-wasms which provides auto-updating prebuilt WASM binaries for tree-sitter's language parsers.
|
||||||
|
|
||||||
|
This function loads WASM modules for relevant language parsers based on input files:
|
||||||
|
1. Extracts unique file extensions
|
||||||
|
2. Maps extensions to language names
|
||||||
|
3. Loads corresponding WASM files (containing grammar rules)
|
||||||
|
4. Uses WASM modules to initialize tree-sitter parsers
|
||||||
|
|
||||||
|
This approach optimizes performance by loading only necessary parsers once for all relevant files.
|
||||||
|
|
||||||
|
Sources:
|
||||||
|
- https://github.com/tree-sitter/node-tree-sitter/issues/169
|
||||||
|
- https://github.com/tree-sitter/node-tree-sitter/issues/168
|
||||||
|
- https://github.com/Gregoor/tree-sitter-wasms/blob/main/README.md
|
||||||
|
*/
|
||||||
|
export async function loadAllLanguages(filesToParse: string[]): Promise<LanguageParser> {
|
||||||
|
await Parser.init()
|
||||||
|
|
||||||
|
const extensionsToLoad = new Set(filesToParse.map((file) => path.extname(file).toLowerCase().slice(1)))
|
||||||
|
|
||||||
|
const languageMap: { [key: string]: string } = {
|
||||||
|
js: "javascript",
|
||||||
|
jsx: "javascript",
|
||||||
|
ts: "typescript",
|
||||||
|
tsx: "tsx",
|
||||||
|
py: "python",
|
||||||
|
rs: "rust",
|
||||||
|
go: "go",
|
||||||
|
cpp: "cpp",
|
||||||
|
hpp: "cpp",
|
||||||
|
c: "c",
|
||||||
|
h: "c",
|
||||||
|
cs: "c_sharp",
|
||||||
|
rb: "ruby",
|
||||||
|
java: "java",
|
||||||
|
php: "php",
|
||||||
|
swift: "swift",
|
||||||
|
}
|
||||||
|
|
||||||
|
const languages: { [key: string]: Parser.Language } = {}
|
||||||
|
|
||||||
|
for (const ext of extensionsToLoad) {
|
||||||
|
if (ext in languageMap) {
|
||||||
|
const langName = languageMap[ext as keyof typeof languageMap]
|
||||||
|
if (!languages[langName]) {
|
||||||
|
languages[langName] = await loadLanguage(langName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsers: LanguageParser = {}
|
||||||
|
|
||||||
|
for (const ext of extensionsToLoad) {
|
||||||
|
if (ext in languageMap) {
|
||||||
|
const langName = languageMap[ext as keyof typeof languageMap]
|
||||||
|
const lang = languages[langName]
|
||||||
|
|
||||||
|
const parser = new Parser()
|
||||||
|
parser.setLanguage(lang)
|
||||||
|
let query: Parser.Query
|
||||||
|
|
||||||
|
switch (ext) {
|
||||||
|
case "js":
|
||||||
|
case "jsx":
|
||||||
|
query = lang.query(javascriptQuery)
|
||||||
|
break
|
||||||
|
case "ts":
|
||||||
|
case "tsx":
|
||||||
|
query = lang.query(typescriptQuery)
|
||||||
|
break
|
||||||
|
case "py":
|
||||||
|
query = lang.query(pythonQuery)
|
||||||
|
break
|
||||||
|
case "rs":
|
||||||
|
query = lang.query(rustQuery)
|
||||||
|
break
|
||||||
|
case "go":
|
||||||
|
query = lang.query(goQuery)
|
||||||
|
break
|
||||||
|
case "cpp":
|
||||||
|
case "hpp":
|
||||||
|
query = lang.query(cppQuery)
|
||||||
|
break
|
||||||
|
case "c":
|
||||||
|
case "h":
|
||||||
|
query = lang.query(cQuery)
|
||||||
|
break
|
||||||
|
case "cs":
|
||||||
|
query = lang.query(csharpQuery)
|
||||||
|
break
|
||||||
|
case "rb":
|
||||||
|
query = lang.query(rubyQuery)
|
||||||
|
break
|
||||||
|
case "java":
|
||||||
|
query = lang.query(javaQuery)
|
||||||
|
break
|
||||||
|
case "php":
|
||||||
|
query = lang.query(phpQuery)
|
||||||
|
break
|
||||||
|
case "swift":
|
||||||
|
query = lang.query(swiftQuery)
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported language: ${ext}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
parsers[ext] = { parser, query }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsers
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user