Skip to main content

arbor_core/languages/
mod.rs

1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod dart;
10mod go;
11mod java;
12mod python;
13mod rust;
14mod typescript;
15
16use crate::fallback_parser::is_fallback_supported_extension;
17use crate::node::CodeNode;
18
19/// Trait for language-specific parsing logic.
20///
21/// Each language needs to implement this to handle its unique AST
22/// structure and idioms. The trait provides the Tree-sitter language
23/// and the extraction logic.
24pub trait LanguageParser: Send + Sync {
25    /// Returns the Tree-sitter language for this parser.
26    fn language(&self) -> tree_sitter::Language;
27
28    /// File extensions this parser handles.
29    fn extensions(&self) -> &[&str];
30
31    /// Extracts CodeNodes from a parsed Tree-sitter tree.
32    ///
33    /// This is where the magic happens. Each language traverses
34    /// its AST differently to find functions, classes, etc.
35    fn extract_nodes(
36        &self,
37        tree: &tree_sitter::Tree,
38        source: &str,
39        file_path: &str,
40    ) -> Vec<CodeNode>;
41}
42
43/// Gets a parser for the given file extension.
44///
45/// Returns None if we don't support this extension.
46pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
47    match extension.to_lowercase().as_str() {
48        // TypeScript and JavaScript
49        "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
50        "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
51
52        // Rust
53        "rs" => Some(Box::new(rust::RustParser)),
54
55        // Python
56        "py" | "pyi" => Some(Box::new(python::PythonParser)),
57
58        // Go
59        "go" => Some(Box::new(go::GoParser)),
60
61        // Java
62        "java" => Some(Box::new(java::JavaParser)),
63
64        // C
65        "c" | "h" => Some(Box::new(c::CParser)),
66
67        // C++
68        "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
69
70        // Dart
71        "dart" => Some(Box::new(dart::DartParser)),
72
73        _ => None,
74    }
75}
76
77/// Lists all supported file extensions.
78pub fn supported_extensions() -> &'static [&'static str] {
79    &[
80        "ts", "tsx", "mts", "cts", // TypeScript
81        "js", "jsx", "mjs", "cjs", // JavaScript
82        "rs",  // Rust
83        "py", "pyi",  // Python
84        "go",   // Go
85        "java", // Java
86        "c", "h", // C
87        "cpp", "hpp", "cc", "hh", "cxx", "hxx",  // C++
88        "dart", // Dart
89        "kt", "kts",   // Kotlin (fallback parser)
90        "swift", // Swift (fallback parser)
91        "rb",    // Ruby (fallback parser)
92        "php", "phtml", // PHP (fallback parser)
93        "sh", "bash", "zsh", // Shell (fallback parser)
94    ]
95}
96
97/// Lists all supported language families (for API metadata).
98pub fn supported_language_names() -> &'static [&'static str] {
99    &[
100        "typescript",
101        "javascript",
102        "rust",
103        "python",
104        "go",
105        "java",
106        "c",
107        "cpp",
108        "csharp",
109        "dart",
110        "kotlin",
111        "swift",
112        "ruby",
113        "php",
114        "shell",
115    ]
116}
117
118/// Checks if a file extension is supported.
119pub fn is_supported(extension: &str) -> bool {
120    get_parser(extension).is_some() || is_fallback_supported_extension(extension)
121}