Skip to main content

arbor_core/languages/
mod.rs

1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod dart;
10mod go;
11mod java;
12mod python;
13mod rust;
14mod typescript;
15
16use crate::fallback_parser::is_fallback_supported_extension;
17use crate::node::CodeNode;
18
19/// Trait for language-specific parsing logic.
20///
21/// Each language needs to implement this to handle its unique AST
22/// structure and idioms. The trait provides the Tree-sitter language
23/// and the extraction logic.
24pub trait LanguageParser: Send + Sync {
25    /// Returns the Tree-sitter language for this parser.
26    fn language(&self) -> tree_sitter::Language;
27
28    /// File extensions this parser handles.
29    fn extensions(&self) -> &[&str];
30
31    /// Extracts CodeNodes from a parsed Tree-sitter tree.
32    ///
33    /// This is where the magic happens. Each language traverses
34    /// its AST differently to find functions, classes, etc.
35    fn extract_nodes(
36        &self,
37        tree: &tree_sitter::Tree,
38        source: &str,
39        file_path: &str,
40    ) -> Vec<CodeNode>;
41}
42
43/// Gets a parser for the given file extension.
44///
45/// Returns None if we don't support this extension.
46pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
47    match extension.to_lowercase().as_str() {
48        // TypeScript and JavaScript
49        "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
50        "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
51
52        // Rust
53        "rs" => Some(Box::new(rust::RustParser)),
54
55        // Python
56        "py" | "pyi" => Some(Box::new(python::PythonParser)),
57
58        // Go
59        "go" => Some(Box::new(go::GoParser)),
60
61        // Java
62        "java" => Some(Box::new(java::JavaParser)),
63
64        // C
65        "c" | "h" => Some(Box::new(c::CParser)),
66
67        // C++
68        "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
69
70        // Dart
71        "dart" => Some(Box::new(dart::DartParser)),
72
73        _ => None,
74    }
75}
76
77/// Lists all supported file extensions.
78pub fn supported_extensions() -> &'static [&'static str] {
79    &[
80        "ts", "tsx", "mts", "cts", // TypeScript
81        "js", "jsx", "mjs", "cjs", // JavaScript
82        "rs",  // Rust
83        "py", "pyi",  // Python
84        "go",   // Go
85        "java", // Java
86        "c", "h", // C
87        "cpp", "hpp", "cc", "hh", "cxx", "hxx",  // C++
88        "dart", // Dart
89        "kt", "kts",   // Kotlin (fallback parser)
90        "swift", // Swift (fallback parser)
91        "rb",    // Ruby (fallback parser)
92        "php", "phtml", // PHP (fallback parser)
93        "sh", "bash", "zsh", // Shell (fallback parser)
94    ]
95}
96
97/// Checks if a file extension is supported.
98pub fn is_supported(extension: &str) -> bool {
99    get_parser(extension).is_some() || is_fallback_supported_extension(extension)
100}