Skip to main content

reflex/parsers/
mod.rs

1//! Tree-sitter parsers for extracting symbols from source code
2//!
3//! This module provides language-specific parsers that extract symbols
4//! (functions, classes, structs, etc.) from source code using Tree-sitter.
5//!
6//! Each language has its own submodule with a `parse` function that takes
7//! source code and returns a vector of symbols.
8
9pub mod c;
10pub mod cpp;
11pub mod csharp;
12pub mod go;
13pub mod java;
14pub mod kotlin;
15pub mod php;
16pub mod python;
17pub mod ruby;
18pub mod rust;
19pub mod svelte;
20pub mod tsconfig;
21pub mod typescript;
22pub mod vue;
23// pub mod swift;  // Temporarily disabled - tree-sitter-swift 0.7.2 grammar node types diverged from this parser's queries
24pub mod zig;
25
26use crate::models::{Language, SearchResult};
27use anyhow::{Result, anyhow};
28
29/// Parser factory that selects the appropriate parser based on language
30pub struct ParserFactory;
31
32/// Extracted import/dependency information (before file ID resolution)
33#[derive(Debug, Clone)]
34pub struct ImportInfo {
35    /// Import path as written in source code
36    pub imported_path: String,
37    /// Type classification hint (internal/external/stdlib)
38    pub import_type: crate::models::ImportType,
39    /// Line number where import appears
40    pub line_number: usize,
41    /// Imported symbols (for selective imports like `from x import a, b`)
42    pub imported_symbols: Option<Vec<String>>,
43}
44
45/// Extracted export/re-export information (for barrel export tracking)
46#[derive(Debug, Clone)]
47pub struct ExportInfo {
48    /// Symbol being exported (None for wildcard `export * from`)
49    pub exported_symbol: Option<String>,
50    /// Source path where the symbol is re-exported from
51    pub source_path: String,
52    /// Line number where export appears
53    pub line_number: usize,
54}
55
56/// Trait for extracting dependencies from source code
57///
58/// Each language parser can implement this trait to extract import/include
59/// statements from source files.
60pub trait DependencyExtractor {
61    /// Extract all imports/dependencies from source code
62    ///
63    /// Returns a list of ImportInfo records (before file ID resolution).
64    /// The indexer will resolve these to file IDs and store in the database.
65    ///
66    /// # Arguments
67    ///
68    /// * `source` - Source code content
69    ///
70    /// # Returns
71    ///
72    /// Vector of ImportInfo records, or an error if parsing fails
73    fn extract_dependencies(source: &str) -> Result<Vec<ImportInfo>>;
74}
75
76impl ParserFactory {
77    /// Get the tree-sitter grammar for a language
78    ///
79    /// This is the single source of truth for tree-sitter language grammars.
80    /// Used by both symbol parsers and AST query matching.
81    ///
82    /// Returns an error for:
83    /// - Vue/Svelte (use line-based parsing instead of tree-sitter)
84    /// - Swift (parser queries are out of date with tree-sitter-swift 0.7.x grammar)
85    /// - Unknown languages
86    pub fn get_language_grammar(language: Language) -> Result<tree_sitter::Language> {
87        match language {
88            Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()),
89            Language::Python => Ok(tree_sitter_python::LANGUAGE.into()),
90            Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
91            Language::JavaScript => Ok(tree_sitter_typescript::LANGUAGE_TSX.into()),
92            Language::Go => Ok(tree_sitter_go::LANGUAGE.into()),
93            Language::Java => Ok(tree_sitter_java::LANGUAGE.into()),
94            Language::C => Ok(tree_sitter_c::LANGUAGE.into()),
95            Language::Cpp => Ok(tree_sitter_cpp::LANGUAGE.into()),
96            Language::CSharp => Ok(tree_sitter_c_sharp::LANGUAGE.into()),
97            Language::PHP => Ok(tree_sitter_php::LANGUAGE_PHP.into()),
98            Language::Ruby => Ok(tree_sitter_ruby::LANGUAGE.into()),
99            Language::Kotlin => Ok(tree_sitter_kotlin_ng::LANGUAGE.into()),
100            Language::Zig => Ok(tree_sitter_zig::LANGUAGE.into()),
101            Language::Swift => Err(anyhow!(
102                "Swift support temporarily disabled (parser queries out of date with tree-sitter-swift 0.7.x grammar)"
103            )),
104            Language::Vue => Err(anyhow!(
105                "Vue uses line-based parsing, not tree-sitter (tree-sitter-vue incompatible with tree-sitter 0.24+)"
106            )),
107            Language::Svelte => Err(anyhow!(
108                "Svelte uses line-based parsing, not tree-sitter (tree-sitter-svelte incompatible with tree-sitter 0.24+)"
109            )),
110            Language::Unknown => Err(anyhow!("Unknown language")),
111        }
112    }
113
114    /// Get language keywords that should trigger "list all symbols" behavior
115    ///
116    /// When a user searches for a keyword (like "class", "function") with --symbols,
117    /// we interpret it as "list all symbols of that type" rather than looking for
118    /// a symbol literally named "class" or "function".
119    ///
120    /// Returns an empty slice for languages without common keywords or unsupported languages.
121    pub fn get_keywords(language: Language) -> &'static [&'static str] {
122        match language {
123            Language::Rust => &[
124                "fn", "struct", "enum", "trait", "impl", "mod", "const", "static", "type", "macro",
125            ],
126            Language::PHP => &["class", "function", "trait", "interface", "enum"],
127            Language::Python => &["class", "def", "async"],
128            Language::TypeScript | Language::JavaScript => &[
129                "class",
130                "function",
131                "interface",
132                "type",
133                "enum",
134                "const",
135                "let",
136                "var",
137            ],
138            Language::Go => &["func", "struct", "interface", "type", "const", "var"],
139            Language::Java => &["class", "interface", "enum", "@interface"],
140            Language::C => &["struct", "enum", "union", "typedef"],
141            Language::Cpp => &[
142                "class",
143                "struct",
144                "enum",
145                "union",
146                "typedef",
147                "namespace",
148                "template",
149            ],
150            Language::CSharp => &[
151                "class",
152                "struct",
153                "interface",
154                "enum",
155                "delegate",
156                "record",
157                "namespace",
158            ],
159            Language::Ruby => &["class", "module", "def"],
160            Language::Kotlin => &["class", "fun", "interface", "object", "enum", "annotation"],
161            Language::Zig => &["fn", "struct", "enum", "const", "var", "type"],
162            Language::Swift => &["class", "struct", "enum", "protocol", "func", "var", "let"],
163            Language::Vue | Language::Svelte => &["function", "const", "let", "var"],
164            Language::Unknown => &[],
165        }
166    }
167
168    /// Get all keywords across all supported languages
169    ///
170    /// Returns a deduplicated union of keywords from all languages.
171    /// Used for keyword detection when --lang is not specified.
172    ///
173    /// When a user searches for a keyword with --symbols or --kind,
174    /// we enable keyword mode regardless of language filter.
175    pub fn get_all_keywords() -> &'static [&'static str] {
176        &[
177            // Functions
178            "fn",
179            "function",
180            "def",
181            "func",
182            // Classes and types
183            "class",
184            "struct",
185            "enum",
186            "interface",
187            "trait",
188            "type",
189            "record",
190            // Modules and namespaces
191            "mod",
192            "module",
193            "namespace",
194            // Variables and constants
195            "const",
196            "static",
197            "let",
198            "var",
199            // Other constructs
200            "impl",
201            "async",
202            "object",
203            "annotation",
204            "protocol",
205            "union",
206            "typedef",
207            "delegate",
208            "template",
209            // Java annotations
210            "@interface",
211        ]
212    }
213
214    /// Parse a file and extract symbols based on its language
215    pub fn parse(path: &str, source: &str, language: Language) -> Result<Vec<SearchResult>> {
216        match language {
217            Language::Rust => rust::parse(path, source),
218            Language::TypeScript => typescript::parse(path, source, language),
219            Language::JavaScript => typescript::parse(path, source, language),
220            Language::Vue => vue::parse(path, source),
221            Language::Svelte => svelte::parse(path, source),
222            Language::Python => python::parse(path, source),
223            Language::Go => go::parse(path, source),
224            Language::Java => java::parse(path, source),
225            Language::PHP => php::parse(path, source),
226            Language::C => c::parse(path, source),
227            Language::Cpp => cpp::parse(path, source),
228            Language::CSharp => csharp::parse(path, source),
229            Language::Ruby => ruby::parse(path, source),
230            Language::Kotlin => kotlin::parse(path, source),
231            Language::Swift => {
232                log::warn!(
233                    "Swift support temporarily disabled (parser queries out of date with tree-sitter-swift 0.7.x grammar): {}",
234                    path
235                );
236                Ok(vec![])
237            }
238            Language::Zig => zig::parse(path, source),
239            Language::Unknown => {
240                log::warn!("Unknown language for file: {}", path);
241                Ok(vec![])
242            }
243        }
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_parser_factory() {
253        // Simple test to ensure module compiles
254        let _factory = ParserFactory;
255    }
256}