Update language queries to only capture definitions; modify parsing logic to output definition names

This commit is contained in:
Saoud Rizwan
2024-07-27 11:44:02 -04:00
parent bd3089f8aa
commit 35a6ecbca2
14 changed files with 153 additions and 204 deletions

View File

@@ -15,19 +15,28 @@ async function analyzeProject(dirPath: string): Promise<string> {
// Load only the necessary language parsers
const languageParsers = await loadRequiredLanguageParsers(filesToParse)
// Parse specific files and generate result
result += "Files parsed with ASTs:\n"
// Parse specific files we have language parsers for
const filesWithoutDefinitions: string[] = []
for (const file of filesToParse) {
result += `File: ${file}\n`
const ast = await parseFile(file, languageParsers)
result += `AST: ${JSON.stringify(ast, null, 2)}\n\n`
const definitions = await parseFile(file, languageParsers)
if (definitions) {
if (!result) {
result += "# Source code definitions:\n\n"
}
result += `${path.relative(dirPath, file)}\n${definitions}\n`
} else {
filesWithoutDefinitions.push(file)
}
}
// List remaining files
result += "Remaining files (not parsed):\n"
remainingFiles.forEach((file) => {
result += `${file}\n`
})
// List remaining files' paths
result += "# Unparsed files:\n\n"
filesWithoutDefinitions
.concat(remainingFiles)
.sort()
.forEach((file) => {
result += `${path.relative(dirPath, file)}\n`
})
return result
}
@@ -100,7 +109,8 @@ Parsing files using tree-sitter
1. Parse the file content into an AST (Abstract Syntax Tree) using the appropriate language grammar (set of rules that define how the components of a language like keywords, expressions, and statements can be combined to create valid programs).
2. Create a query using a language-specific query string, and run it against the AST's root node to capture specific syntax elements.
- We use tag queries to identify named entities in a program, and then use a syntax capture to label the entity and its name. A notable example of this is GitHub's search-based code navigation.
3. Sort the captures by their position in the file, and format the output by iterating through the captures by i.e. adding "|----\n" for gaps between captured sections.
- Our custom tag queries are based on tree-sitter's default tag queries, but modified to only capture definitions.
3. Sort the captures by their position in the file, output the name of the definition, and format by i.e. adding "|----\n" for gaps between captured sections.
This approach allows us to focus on the most relevant parts of the code (defined by our language-specific queries) and provides a concise yet informative view of the file's structure and key elements.
@@ -109,7 +119,7 @@ This approach allows us to focus on the most relevant parts of the code (defined
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js
- https://tree-sitter.github.io/tree-sitter/code-navigation-systems
*/
async function parseFile(filePath: string, languageParsers: LanguageParser): Promise<string> {
async function parseFile(filePath: string, languageParsers: LanguageParser): Promise<string | undefined> {
const fileContent = await fs.readFile(filePath, "utf8")
const ext = path.extname(filePath).toLowerCase().slice(1)
@@ -118,7 +128,7 @@ async function parseFile(filePath: string, languageParsers: LanguageParser): Pro
return `Unsupported file type: ${filePath}`
}
let formattedOutput = `${filePath}:\n|----\n`
let formattedOutput = ""
try {
// Parse the file content into an Abstract Syntax Tree (AST), a tree-like representation of the code
@@ -138,30 +148,39 @@ async function parseFile(filePath: string, languageParsers: LanguageParser): Pro
let lastLine = -1
captures.forEach((capture) => {
const { node } = capture
const { node, name } = capture
// Get the start and end lines of the current AST node
const startLine = node.startPosition.row
const endLine = node.endPosition.row
// Once we've retrieved the nodes we care about through the language query, we filter for lines with definition names only.
// name.startsWith("name.reference.") > refs can be used for ranking purposes, but we don't need them for the output
// previously we did `name.startsWith("name.definition.")` but this was too strict and excluded some relevant definitions
// Add separator if there's a gap between captures
if (lastLine !== -1 && startLine > lastLine + 1) {
formattedOutput += "|----\n"
}
// Add the captured lines
for (let i = startLine; i <= endLine; i++) {
formattedOutput += `${lines[i]}\n`
// Only add the first line of the definition
// query captures includes the definition name and the definition implementation, but we only want the name (I found discrepencies in the naming structure for various languages, i.e. javascript names would be 'name' and typescript names would be 'name.definition)
if (name.includes("name") && lines[startLine]) {
formattedOutput += `${lines[startLine]}\n`
}
// Adds all the captured lines
// for (let i = startLine; i <= endLine; i++) {
// formattedOutput += `│${lines[i]}\n`
// }
//}
lastLine = endLine
})
} catch (error) {
formattedOutput += `Error parsing file: ${error}\n`
console.log(`Error parsing file: ${error}\n`)
}
formattedOutput += "|----\n"
return formattedOutput
if (formattedOutput.length > 0) {
return `|----\n${formattedOutput}|----\n`
}
return undefined
}
export { analyzeProject }

View File

@@ -45,6 +45,8 @@ Sources:
- https://github.com/tree-sitter/node-tree-sitter/issues/169
- https://github.com/tree-sitter/node-tree-sitter/issues/168
- https://github.com/Gregoor/tree-sitter-wasms/blob/main/README.md
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md
- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/query-test.js
*/
export async function loadRequiredLanguageParsers(filesToParse: string[]): Promise<LanguageParser> {
await Parser.init()

View File

@@ -1,48 +1,23 @@
/*
- class declarations
- interface declarations
- method declarations
- namespace declarations
*/
export default `
(class_declaration
name: (identifier) @name.definition.class
) @definition.class
(class_declaration
bases: (base_list (_) @name.reference.class)
) @reference.class
) @definition.class
(interface_declaration
name: (identifier) @name.definition.interface
) @definition.interface
(interface_declaration
bases: (base_list (_) @name.reference.interface)
) @reference.interface
) @definition.interface
(method_declaration
name: (identifier) @name.definition.method
) @definition.method
(object_creation_expression
type: (identifier) @name.reference.class
) @reference.class
(type_parameter_constraints_clause
target: (identifier) @name.reference.class
) @reference.class
(type_constraint
type: (identifier) @name.reference.class
) @reference.class
(variable_declaration
type: (identifier) @name.reference.class
) @reference.class
(invocation_expression
function:
(member_access_expression
name: (identifier) @name.reference.send
)
) @reference.send
) @definition.method
(namespace_declaration
name: (identifier) @name.definition.module
) @definition.module
`
`

View File

@@ -1,3 +1,10 @@
/*
- struct declarations
- union declarations
- function declarations
- typedef declarations
- enum declarations
*/
export default `
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
@@ -8,4 +15,4 @@ export default `
(type_definition declarator: (type_identifier) @name.definition.type) @definition.type
(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
`
`

View File

@@ -1,3 +1,12 @@
/*
- struct declarations
- union declarations
- function declarations
- method declarations (with namespace scope)
- typedef declarations
- enum declarations
- class declarations
*/
export default `
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
@@ -14,4 +23,4 @@ export default `
(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
(class_specifier name: (type_identifier) @name.definition.class) @definition.class
`
`

View File

@@ -1,3 +1,9 @@
/*
- function declarations (with associated comments)
- method declarations (with associated comments)
- type specifications
- type references
*/
export default `
(
(comment)* @doc
@@ -17,16 +23,8 @@ export default `
(#set-adjacent! @doc @definition.method)
)
(call_expression
function: [
(identifier) @name.reference.call
(parenthesized_expression (identifier) @name.reference.call)
(selector_expression field: (field_identifier) @name.reference.call)
(parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call))
]) @reference.call
(type_spec
name: (type_identifier) @name.definition.type) @definition.type
(type_identifier) @name.reference.type @reference.type
`
`

View File

@@ -1,3 +1,9 @@
/*
- class declarations
- method declarations
- interface declarations
- superclass references
*/
export default `
(class_declaration
name: (identifier) @name.definition.class) @definition.class
@@ -5,18 +11,8 @@ export default `
(method_declaration
name: (identifier) @name.definition.method) @definition.method
(method_invocation
name: (identifier) @name.reference.call
arguments: (argument_list) @reference.call)
(interface_declaration
name: (identifier) @name.definition.interface) @definition.interface
(type_list
(type_identifier) @name.reference.implementation) @reference.implementation
(object_creation_expression
type: (type_identifier) @name.reference.class) @reference.class
(superclass (type_identifier) @name.reference.class) @reference.class
`

View File

@@ -1,10 +1,17 @@
/*
- class definitions
- method definitions
- named function declarations
- arrow functions and function expressions assigned to variables
- exported constants
*/
export default `
(
(comment)* @doc
.
(method_definition
name: (property_identifier) @name.definition.method) @definition.method
(#not-eq? @name.definition.method "constructor")
name: (property_identifier) @name) @definition.method
(#not-eq? @name "constructor")
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.method)
)
@@ -14,9 +21,9 @@ export default `
.
[
(class
name: (_) @name.definition.class)
name: (_) @name)
(class_declaration
name: (_) @name.definition.class)
name: (_) @name)
] @definition.class
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.class)
@@ -26,14 +33,10 @@ export default `
(comment)* @doc
.
[
(function
name: (identifier) @name.definition.function)
(function_declaration
name: (identifier) @name.definition.function)
(generator_function
name: (identifier) @name.definition.function)
name: (identifier) @name)
(generator_function_declaration
name: (identifier) @name.definition.function)
name: (identifier) @name)
] @definition.function
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
@@ -44,8 +47,8 @@ export default `
.
(lexical_declaration
(variable_declarator
name: (identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function)
name: (identifier) @name
value: [(arrow_function) (function_expression)]) @definition.function)
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
)
@@ -55,36 +58,20 @@ export default `
.
(variable_declaration
(variable_declarator
name: (identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function)
name: (identifier) @name
value: [(arrow_function) (function_expression)]) @definition.function)
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
(#select-adjacent! @doc @definition.function)
)
(assignment_expression
left: [
(identifier) @name.definition.function
(member_expression
property: (property_identifier) @name.definition.function)
]
right: [(arrow_function) (function)]
) @definition.function
(pair
key: (property_identifier) @name.definition.function
value: [(arrow_function) (function)]) @definition.function
(
(call_expression
function: (identifier) @name.reference.call) @reference.call
(#not-match? @name.reference.call "^(require)$")
)
(call_expression
function: (member_expression
property: (property_identifier) @name.reference.call)
arguments: (_) @reference.call)
(new_expression
constructor: (_) @name.reference.class) @reference.class
(export_statement value: (assignment_expression left: (identifier) @name right: ([
(number)
(string)
(identifier)
(undefined)
(null)
(new_expression)
(binary_expression)
(call_expression)
]))) @definition.constant
`

View File

@@ -1,3 +1,8 @@
/*
- class declarations
- function definitions
- method declarations
*/
export default `
(class_declaration
name: (name) @name.definition.class) @definition.class
@@ -7,22 +12,4 @@ export default `
(method_declaration
name: (name) @name.definition.function) @definition.function
(object_creation_expression
[
(qualified_name (name) @name.reference.class)
(variable_name (name) @name.reference.class)
]) @reference.class
(function_call_expression
function: [
(qualified_name (name) @name.reference.call)
(variable_name (name)) @name.reference.call
]) @reference.call
(scoped_call_expression
name: (name) @name.reference.call) @reference.call
(member_call_expression
name: (name) @name.reference.call) @reference.call
`

View File

@@ -1,14 +1,11 @@
/*
- class definitions
- function definitions
*/
export default `
(class_definition
name: (identifier) @name.definition.class) @definition.class
(function_definition
name: (identifier) @name.definition.function) @definition.function
(call
function: [
(identifier) @name.reference.call
(attribute
attribute: (identifier) @name.reference.call)
]) @reference.call
`

View File

@@ -1,6 +1,9 @@
/*
- method definitions (including singleton methods and aliases, with associated comments)
- class definitions (including singleton classes, with associated comments)
- module definitions
*/
export default `
; Method definitions
(
(comment)* @doc
.
@@ -17,11 +20,6 @@ export default `
(alias
name: (_) @name.definition.method) @definition.method
(setter
(identifier) @ignore)
; Class definitions
(
(comment)* @doc
.
@@ -43,8 +41,6 @@ export default `
(#select-adjacent! @doc @definition.class)
)
; Module definitions
(
(module
name: [
@@ -53,14 +49,4 @@ export default `
name: (_) @name.definition.module)
]) @definition.module
)
; Calls
(call method: (identifier) @name.reference.call) @reference.call
(
[(identifier) (constant)] @name.reference.call @reference.call
(#is-not? local)
(#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$")
)
`

View File

@@ -1,6 +1,15 @@
/*
- struct definitions
- enum definitions
- union definitions
- type aliases
- method definitions
- function definitions
- trait definitions
- module definitions
- macro definitions
*/
export default `
; ADT definitions
(struct_item
name: (type_identifier) @name.definition.class) @definition.class
@@ -10,53 +19,22 @@ export default `
(union_item
name: (type_identifier) @name.definition.class) @definition.class
; type aliases
(type_item
name: (type_identifier) @name.definition.class) @definition.class
; method definitions
(declaration_list
(function_item
name: (identifier) @name.definition.method)) @definition.method
; function definitions
(function_item
name: (identifier) @name.definition.function) @definition.function
; trait definitions
(trait_item
name: (type_identifier) @name.definition.interface) @definition.interface
; module definitions
(mod_item
name: (identifier) @name.definition.module) @definition.module
; macro definitions
(macro_definition
name: (identifier) @name.definition.macro) @definition.macro
; references
(call_expression
function: (identifier) @name.reference.call) @reference.call
(call_expression
function: (field_expression
field: (field_identifier) @name.reference.call)) @reference.call
(macro_invocation
macro: (identifier) @name.reference.call) @reference.call
; implementations
(impl_item
trait: (type_identifier) @name.reference.implementation) @reference.implementation
(impl_item
type: (type_identifier) @name.reference.implementation
!trait) @reference.implementation
`

View File

@@ -1,3 +1,10 @@
/*
- class declarations
- protocol declarations
- method declarations (including initializers and deinitializers)
- property declarations
- function declarations
*/
export default `
(class_declaration
name: (type_identifier) @name) @definition.class

View File

@@ -1,3 +1,13 @@
/*
- function signatures and declarations
- method signatures and definitions
- abstract method signatures
- class declarations (including abstract classes)
- module declarations
- interface declarations
- type alias declarations
- enum declarations
*/
export default `
(function_signature
name: (identifier) @name.definition.function) @definition.function
@@ -17,12 +27,6 @@ export default `
(interface_declaration
name: (type_identifier) @name.definition.interface) @definition.interface
(type_annotation
(type_identifier) @name.reference.type) @reference.type
(new_expression
constructor: (identifier) @name.reference.class) @reference.class
(function_declaration
name: (identifier) @name.definition.function) @definition.function
@@ -32,9 +36,6 @@ export default `
(class_declaration
name: (type_identifier) @name.definition.class) @definition.class
(interface_declaration
name: (type_identifier) @name.definition.class) @definition.class
(type_alias_declaration
name: (type_identifier) @name.definition.type) @definition.type