arbor_core/languages/
mod.rs

1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod dart;
10mod go;
11mod java;
12mod python;
13mod rust;
14mod typescript;
15
16use crate::node::CodeNode;
17
18/// Trait for language-specific parsing logic.
19///
20/// Each language needs to implement this to handle its unique AST
21/// structure and idioms. The trait provides the Tree-sitter language
22/// and the extraction logic.
23pub trait LanguageParser: Send + Sync {
24    /// Returns the Tree-sitter language for this parser.
25    fn language(&self) -> tree_sitter::Language;
26
27    /// File extensions this parser handles.
28    fn extensions(&self) -> &[&str];
29
30    /// Extracts CodeNodes from a parsed Tree-sitter tree.
31    ///
32    /// This is where the magic happens. Each language traverses
33    /// its AST differently to find functions, classes, etc.
34    fn extract_nodes(
35        &self,
36        tree: &tree_sitter::Tree,
37        source: &str,
38        file_path: &str,
39    ) -> Vec<CodeNode>;
40}
41
42/// Gets a parser for the given file extension.
43///
44/// Returns None if we don't support this extension.
45pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
46    match extension.to_lowercase().as_str() {
47        // TypeScript and JavaScript
48        "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
49        "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
50
51        // Rust
52        "rs" => Some(Box::new(rust::RustParser)),
53
54        // Python
55        "py" | "pyi" => Some(Box::new(python::PythonParser)),
56
57        // Go
58        "go" => Some(Box::new(go::GoParser)),
59
60        // Java
61        "java" => Some(Box::new(java::JavaParser)),
62
63        // C
64        "c" | "h" => Some(Box::new(c::CParser)),
65
66        // C++
67        "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
68
69        // Dart
70        "dart" => Some(Box::new(dart::DartParser)),
71
72        _ => None,
73    }
74}
75
76/// Lists all supported file extensions.
77pub fn supported_extensions() -> &'static [&'static str] {
78    &[
79        "ts", "tsx", "mts", "cts", // TypeScript
80        "js", "jsx", "mjs", "cjs", // JavaScript
81        "rs",  // Rust
82        "py", "pyi",  // Python
83        "go",   // Go
84        "java", // Java
85        "c", "h", // C
86        "cpp", "hpp", "cc", "hh", "cxx", "hxx",  // C++
87        "dart", // Dart
88    ]
89}
90
91/// Checks if a file extension is supported.
92pub fn is_supported(extension: &str) -> bool {
93    get_parser(extension).is_some()
94}