From 35a6ecbca234f2d661f38ab321d477b93bc4f94c Mon Sep 17 00:00:00 2001 From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com> Date: Sat, 27 Jul 2024 11:44:02 -0400 Subject: [PATCH] Update language queries to only capture definitions; modify parsing logic to output definition names --- src/AnalyzeProject/index.ts | 63 +++++++++++------ src/AnalyzeProject/languageParser.ts | 2 + .../tree-sitter-queries/tags/c-sharp.ts | 45 +++---------- .../tree-sitter-queries/tags/c.ts | 9 ++- .../tree-sitter-queries/tags/cpp.ts | 11 ++- .../tree-sitter-queries/tags/go.ts | 16 ++--- .../tree-sitter-queries/tags/java.ts | 16 ++--- .../tree-sitter-queries/tags/javascript.ts | 67 ++++++++----------- .../tree-sitter-queries/tags/php.ts | 23 ++----- .../tree-sitter-queries/tags/python.ts | 11 ++- .../tree-sitter-queries/tags/ruby.ts | 24 ++----- .../tree-sitter-queries/tags/rust.ts | 44 +++--------- .../tree-sitter-queries/tags/swift.ts | 7 ++ .../tree-sitter-queries/tags/typescript.ts | 19 +++--- 14 files changed, 153 insertions(+), 204 deletions(-) diff --git a/src/AnalyzeProject/index.ts b/src/AnalyzeProject/index.ts index 2140526..1a42691 100644 --- a/src/AnalyzeProject/index.ts +++ b/src/AnalyzeProject/index.ts @@ -15,19 +15,28 @@ async function analyzeProject(dirPath: string): Promise { // Load only the necessary language parsers const languageParsers = await loadRequiredLanguageParsers(filesToParse) - // Parse specific files and generate result - result += "Files parsed with ASTs:\n" + // Parse specific files we have language parsers for + const filesWithoutDefinitions: string[] = [] for (const file of filesToParse) { - result += `File: ${file}\n` - const ast = await parseFile(file, languageParsers) - result += `AST: ${JSON.stringify(ast, null, 2)}\n\n` + const definitions = await parseFile(file, languageParsers) + if (definitions) { + if (!result) { + result += "# Source code definitions:\n\n" + } + result += `${path.relative(dirPath, file)}\n${definitions}\n` + } else { + filesWithoutDefinitions.push(file) + } } - // List remaining files - result += "Remaining files (not parsed):\n" - remainingFiles.forEach((file) => { - result += `${file}\n` - }) + // List remaining files' paths + result += "# Unparsed files:\n\n" + filesWithoutDefinitions + .concat(remainingFiles) + .sort() + .forEach((file) => { + result += `${path.relative(dirPath, file)}\n` + }) return result } @@ -100,7 +109,8 @@ Parsing files using tree-sitter 1. Parse the file content into an AST (Abstract Syntax Tree) using the appropriate language grammar (set of rules that define how the components of a language like keywords, expressions, and statements can be combined to create valid programs). 2. Create a query using a language-specific query string, and run it against the AST's root node to capture specific syntax elements. - We use tag queries to identify named entities in a program, and then use a syntax capture to label the entity and its name. A notable example of this is GitHub's search-based code navigation. -3. Sort the captures by their position in the file, and format the output by iterating through the captures by i.e. adding "|----\n" for gaps between captured sections. + - Our custom tag queries are based on tree-sitter's default tag queries, but modified to only capture definitions. +3. Sort the captures by their position in the file, output the name of the definition, and format by i.e. adding "|----\n" for gaps between captured sections. This approach allows us to focus on the most relevant parts of the code (defined by our language-specific queries) and provides a concise yet informative view of the file's structure and key elements. @@ -109,7 +119,7 @@ This approach allows us to focus on the most relevant parts of the code (defined - https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js - https://tree-sitter.github.io/tree-sitter/code-navigation-systems */ -async function parseFile(filePath: string, languageParsers: LanguageParser): Promise { +async function parseFile(filePath: string, languageParsers: LanguageParser): Promise { const fileContent = await fs.readFile(filePath, "utf8") const ext = path.extname(filePath).toLowerCase().slice(1) @@ -118,7 +128,7 @@ async function parseFile(filePath: string, languageParsers: LanguageParser): Pro return `Unsupported file type: ${filePath}` } - let formattedOutput = `${filePath}:\n|----\n` + let formattedOutput = "" try { // Parse the file content into an Abstract Syntax Tree (AST), a tree-like representation of the code @@ -138,30 +148,39 @@ async function parseFile(filePath: string, languageParsers: LanguageParser): Pro let lastLine = -1 captures.forEach((capture) => { - const { node } = capture + const { node, name } = capture // Get the start and end lines of the current AST node const startLine = node.startPosition.row const endLine = node.endPosition.row + // Once we've retrieved the nodes we care about through the language query, we filter for lines with definition names only. + // name.startsWith("name.reference.") > refs can be used for ranking purposes, but we don't need them for the output + // previously we did `name.startsWith("name.definition.")` but this was too strict and excluded some relevant definitions // Add separator if there's a gap between captures if (lastLine !== -1 && startLine > lastLine + 1) { formattedOutput += "|----\n" } - - // Add the captured lines - for (let i = startLine; i <= endLine; i++) { - formattedOutput += `│${lines[i]}\n` + // Only add the first line of the definition + // query captures includes the definition name and the definition implementation, but we only want the name (I found discrepencies in the naming structure for various languages, i.e. javascript names would be 'name' and typescript names would be 'name.definition) + if (name.includes("name") && lines[startLine]) { + formattedOutput += `│${lines[startLine]}\n` } + // Adds all the captured lines + // for (let i = startLine; i <= endLine; i++) { + // formattedOutput += `│${lines[i]}\n` + // } + //} lastLine = endLine }) } catch (error) { - formattedOutput += `Error parsing file: ${error}\n` + console.log(`Error parsing file: ${error}\n`) } - formattedOutput += "|----\n" - - return formattedOutput + if (formattedOutput.length > 0) { + return `|----\n${formattedOutput}|----\n` + } + return undefined } export { analyzeProject } diff --git a/src/AnalyzeProject/languageParser.ts b/src/AnalyzeProject/languageParser.ts index 983f0e3..e33a2dc 100644 --- a/src/AnalyzeProject/languageParser.ts +++ b/src/AnalyzeProject/languageParser.ts @@ -45,6 +45,8 @@ Sources: - https://github.com/tree-sitter/node-tree-sitter/issues/169 - https://github.com/tree-sitter/node-tree-sitter/issues/168 - https://github.com/Gregoor/tree-sitter-wasms/blob/main/README.md +- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md +- https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/query-test.js */ export async function loadRequiredLanguageParsers(filesToParse: string[]): Promise { await Parser.init() diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/c-sharp.ts b/src/AnalyzeProject/tree-sitter-queries/tags/c-sharp.ts index 87de895..aeff5f7 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/c-sharp.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/c-sharp.ts @@ -1,48 +1,23 @@ +/* +- class declarations +- interface declarations +- method declarations +- namespace declarations +*/ export default ` (class_declaration name: (identifier) @name.definition.class - ) @definition.class - -(class_declaration - bases: (base_list (_) @name.reference.class) - ) @reference.class +) @definition.class (interface_declaration name: (identifier) @name.definition.interface - ) @definition.interface - -(interface_declaration - bases: (base_list (_) @name.reference.interface) - ) @reference.interface +) @definition.interface (method_declaration name: (identifier) @name.definition.method - ) @definition.method - -(object_creation_expression - type: (identifier) @name.reference.class - ) @reference.class - -(type_parameter_constraints_clause - target: (identifier) @name.reference.class - ) @reference.class - -(type_constraint - type: (identifier) @name.reference.class - ) @reference.class - -(variable_declaration - type: (identifier) @name.reference.class - ) @reference.class - -(invocation_expression - function: - (member_access_expression - name: (identifier) @name.reference.send - ) -) @reference.send +) @definition.method (namespace_declaration name: (identifier) @name.definition.module ) @definition.module -` \ No newline at end of file +` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/c.ts b/src/AnalyzeProject/tree-sitter-queries/tags/c.ts index 8e435ea..1f62896 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/c.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/c.ts @@ -1,3 +1,10 @@ +/* +- struct declarations +- union declarations +- function declarations +- typedef declarations +- enum declarations +*/ export default ` (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class @@ -8,4 +15,4 @@ export default ` (type_definition declarator: (type_identifier) @name.definition.type) @definition.type (enum_specifier name: (type_identifier) @name.definition.type) @definition.type -` \ No newline at end of file +` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/cpp.ts b/src/AnalyzeProject/tree-sitter-queries/tags/cpp.ts index 05ba34a..b0c4099 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/cpp.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/cpp.ts @@ -1,3 +1,12 @@ +/* +- struct declarations +- union declarations +- function declarations +- method declarations (with namespace scope) +- typedef declarations +- enum declarations +- class declarations +*/ export default ` (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class @@ -14,4 +23,4 @@ export default ` (enum_specifier name: (type_identifier) @name.definition.type) @definition.type (class_specifier name: (type_identifier) @name.definition.class) @definition.class -` \ No newline at end of file +` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/go.ts b/src/AnalyzeProject/tree-sitter-queries/tags/go.ts index f3d6364..89a789c 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/go.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/go.ts @@ -1,3 +1,9 @@ +/* +- function declarations (with associated comments) +- method declarations (with associated comments) +- type specifications +- type references +*/ export default ` ( (comment)* @doc @@ -17,16 +23,8 @@ export default ` (#set-adjacent! @doc @definition.method) ) -(call_expression - function: [ - (identifier) @name.reference.call - (parenthesized_expression (identifier) @name.reference.call) - (selector_expression field: (field_identifier) @name.reference.call) - (parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call)) - ]) @reference.call - (type_spec name: (type_identifier) @name.definition.type) @definition.type (type_identifier) @name.reference.type @reference.type -` \ No newline at end of file +` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/java.ts b/src/AnalyzeProject/tree-sitter-queries/tags/java.ts index 27d0b17..cf33d77 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/java.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/java.ts @@ -1,3 +1,9 @@ +/* +- class declarations +- method declarations +- interface declarations +- superclass references +*/ export default ` (class_declaration name: (identifier) @name.definition.class) @definition.class @@ -5,18 +11,8 @@ export default ` (method_declaration name: (identifier) @name.definition.method) @definition.method -(method_invocation - name: (identifier) @name.reference.call - arguments: (argument_list) @reference.call) - (interface_declaration name: (identifier) @name.definition.interface) @definition.interface -(type_list - (type_identifier) @name.reference.implementation) @reference.implementation - -(object_creation_expression - type: (type_identifier) @name.reference.class) @reference.class - (superclass (type_identifier) @name.reference.class) @reference.class ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/javascript.ts b/src/AnalyzeProject/tree-sitter-queries/tags/javascript.ts index 94d5d02..d2bbbdb 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/javascript.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/javascript.ts @@ -1,10 +1,17 @@ +/* +- class definitions +- method definitions +- named function declarations +- arrow functions and function expressions assigned to variables +- exported constants +*/ export default ` ( (comment)* @doc . (method_definition - name: (property_identifier) @name.definition.method) @definition.method - (#not-eq? @name.definition.method "constructor") + name: (property_identifier) @name) @definition.method + (#not-eq? @name "constructor") (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") (#select-adjacent! @doc @definition.method) ) @@ -14,9 +21,9 @@ export default ` . [ (class - name: (_) @name.definition.class) + name: (_) @name) (class_declaration - name: (_) @name.definition.class) + name: (_) @name) ] @definition.class (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") (#select-adjacent! @doc @definition.class) @@ -26,14 +33,10 @@ export default ` (comment)* @doc . [ - (function - name: (identifier) @name.definition.function) (function_declaration - name: (identifier) @name.definition.function) - (generator_function - name: (identifier) @name.definition.function) + name: (identifier) @name) (generator_function_declaration - name: (identifier) @name.definition.function) + name: (identifier) @name) ] @definition.function (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") (#select-adjacent! @doc @definition.function) @@ -44,8 +47,8 @@ export default ` . (lexical_declaration (variable_declarator - name: (identifier) @name.definition.function - value: [(arrow_function) (function)]) @definition.function) + name: (identifier) @name + value: [(arrow_function) (function_expression)]) @definition.function) (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") (#select-adjacent! @doc @definition.function) ) @@ -55,36 +58,20 @@ export default ` . (variable_declaration (variable_declarator - name: (identifier) @name.definition.function - value: [(arrow_function) (function)]) @definition.function) + name: (identifier) @name + value: [(arrow_function) (function_expression)]) @definition.function) (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") (#select-adjacent! @doc @definition.function) ) -(assignment_expression - left: [ - (identifier) @name.definition.function - (member_expression - property: (property_identifier) @name.definition.function) - ] - right: [(arrow_function) (function)] -) @definition.function - -(pair - key: (property_identifier) @name.definition.function - value: [(arrow_function) (function)]) @definition.function - -( - (call_expression - function: (identifier) @name.reference.call) @reference.call - (#not-match? @name.reference.call "^(require)$") -) - -(call_expression - function: (member_expression - property: (property_identifier) @name.reference.call) - arguments: (_) @reference.call) - -(new_expression - constructor: (_) @name.reference.class) @reference.class +(export_statement value: (assignment_expression left: (identifier) @name right: ([ + (number) + (string) + (identifier) + (undefined) + (null) + (new_expression) + (binary_expression) + (call_expression) +]))) @definition.constant ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/php.ts b/src/AnalyzeProject/tree-sitter-queries/tags/php.ts index c216906..7f32836 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/php.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/php.ts @@ -1,3 +1,8 @@ +/* +- class declarations +- function definitions +- method declarations +*/ export default ` (class_declaration name: (name) @name.definition.class) @definition.class @@ -7,22 +12,4 @@ export default ` (method_declaration name: (name) @name.definition.function) @definition.function - -(object_creation_expression - [ - (qualified_name (name) @name.reference.class) - (variable_name (name) @name.reference.class) - ]) @reference.class - -(function_call_expression - function: [ - (qualified_name (name) @name.reference.call) - (variable_name (name)) @name.reference.call - ]) @reference.call - -(scoped_call_expression - name: (name) @name.reference.call) @reference.call - -(member_call_expression - name: (name) @name.reference.call) @reference.call ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/python.ts b/src/AnalyzeProject/tree-sitter-queries/tags/python.ts index ac90dfa..df1e055 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/python.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/python.ts @@ -1,14 +1,11 @@ +/* +- class definitions +- function definitions +*/ export default ` (class_definition name: (identifier) @name.definition.class) @definition.class (function_definition name: (identifier) @name.definition.function) @definition.function - -(call - function: [ - (identifier) @name.reference.call - (attribute - attribute: (identifier) @name.reference.call) - ]) @reference.call ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/ruby.ts b/src/AnalyzeProject/tree-sitter-queries/tags/ruby.ts index 40e30db..9c70968 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/ruby.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/ruby.ts @@ -1,6 +1,9 @@ +/* +- method definitions (including singleton methods and aliases, with associated comments) +- class definitions (including singleton classes, with associated comments) +- module definitions +*/ export default ` -; Method definitions - ( (comment)* @doc . @@ -17,11 +20,6 @@ export default ` (alias name: (_) @name.definition.method) @definition.method -(setter - (identifier) @ignore) - -; Class definitions - ( (comment)* @doc . @@ -43,8 +41,6 @@ export default ` (#select-adjacent! @doc @definition.class) ) -; Module definitions - ( (module name: [ @@ -53,14 +49,4 @@ export default ` name: (_) @name.definition.module) ]) @definition.module ) - -; Calls - -(call method: (identifier) @name.reference.call) @reference.call - -( - [(identifier) (constant)] @name.reference.call @reference.call - (#is-not? local) - (#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$") -) ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/rust.ts b/src/AnalyzeProject/tree-sitter-queries/tags/rust.ts index 7cfd292..558d9bb 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/rust.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/rust.ts @@ -1,6 +1,15 @@ +/* +- struct definitions +- enum definitions +- union definitions +- type aliases +- method definitions +- function definitions +- trait definitions +- module definitions +- macro definitions +*/ export default ` -; ADT definitions - (struct_item name: (type_identifier) @name.definition.class) @definition.class @@ -10,53 +19,22 @@ export default ` (union_item name: (type_identifier) @name.definition.class) @definition.class -; type aliases - (type_item name: (type_identifier) @name.definition.class) @definition.class -; method definitions - (declaration_list (function_item name: (identifier) @name.definition.method)) @definition.method -; function definitions - (function_item name: (identifier) @name.definition.function) @definition.function -; trait definitions (trait_item name: (type_identifier) @name.definition.interface) @definition.interface -; module definitions (mod_item name: (identifier) @name.definition.module) @definition.module -; macro definitions - (macro_definition name: (identifier) @name.definition.macro) @definition.macro - -; references - -(call_expression - function: (identifier) @name.reference.call) @reference.call - -(call_expression - function: (field_expression - field: (field_identifier) @name.reference.call)) @reference.call - -(macro_invocation - macro: (identifier) @name.reference.call) @reference.call - -; implementations - -(impl_item - trait: (type_identifier) @name.reference.implementation) @reference.implementation - -(impl_item - type: (type_identifier) @name.reference.implementation - !trait) @reference.implementation ` diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/swift.ts b/src/AnalyzeProject/tree-sitter-queries/tags/swift.ts index b0820de..7cfd80e 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/swift.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/swift.ts @@ -1,3 +1,10 @@ +/* +- class declarations +- protocol declarations +- method declarations (including initializers and deinitializers) +- property declarations +- function declarations +*/ export default ` (class_declaration name: (type_identifier) @name) @definition.class diff --git a/src/AnalyzeProject/tree-sitter-queries/tags/typescript.ts b/src/AnalyzeProject/tree-sitter-queries/tags/typescript.ts index 272982a..09e928d 100644 --- a/src/AnalyzeProject/tree-sitter-queries/tags/typescript.ts +++ b/src/AnalyzeProject/tree-sitter-queries/tags/typescript.ts @@ -1,3 +1,13 @@ +/* +- function signatures and declarations +- method signatures and definitions +- abstract method signatures +- class declarations (including abstract classes) +- module declarations +- interface declarations +- type alias declarations +- enum declarations +*/ export default ` (function_signature name: (identifier) @name.definition.function) @definition.function @@ -17,12 +27,6 @@ export default ` (interface_declaration name: (type_identifier) @name.definition.interface) @definition.interface -(type_annotation - (type_identifier) @name.reference.type) @reference.type - -(new_expression - constructor: (identifier) @name.reference.class) @reference.class - (function_declaration name: (identifier) @name.definition.function) @definition.function @@ -32,9 +36,6 @@ export default ` (class_declaration name: (type_identifier) @name.definition.class) @definition.class -(interface_declaration - name: (type_identifier) @name.definition.class) @definition.class - (type_alias_declaration name: (type_identifier) @name.definition.type) @definition.type