Skip to main content

arbor_core/languages/
mod.rs

1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod csharp;
10mod dart;
11mod go;
12mod java;
13mod python;
14mod rust;
15mod typescript;
16
17use crate::fallback_parser::is_fallback_supported_extension;
18use crate::node::CodeNode;
19
20/// Trait for language-specific parsing logic.
21///
22/// Each language needs to implement this to handle its unique AST
23/// structure and idioms. The trait provides the Tree-sitter language
24/// and the extraction logic.
25pub trait LanguageParser: Send + Sync {
26    /// Returns the Tree-sitter language for this parser.
27    fn language(&self) -> tree_sitter::Language;
28
29    /// File extensions this parser handles.
30    fn extensions(&self) -> &[&str];
31
32    /// Extracts CodeNodes from a parsed Tree-sitter tree.
33    ///
34    /// This is where the magic happens. Each language traverses
35    /// its AST differently to find functions, classes, etc.
36    fn extract_nodes(
37        &self,
38        tree: &tree_sitter::Tree,
39        source: &str,
40        file_path: &str,
41    ) -> Vec<CodeNode>;
42}
43
44/// Gets a parser for the given file extension.
45///
46/// Returns None if we don't support this extension.
47pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
48    match extension.to_lowercase().as_str() {
49        // TypeScript and JavaScript
50        "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
51        "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
52
53        // Rust
54        "rs" => Some(Box::new(rust::RustParser)),
55
56        // Python
57        "py" | "pyi" => Some(Box::new(python::PythonParser)),
58
59        // Go
60        "go" => Some(Box::new(go::GoParser)),
61
62        // Java
63        "java" => Some(Box::new(java::JavaParser)),
64
65        // C
66        "c" | "h" => Some(Box::new(c::CParser)),
67
68        // C++
69        "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
70
71        // C#
72        "cs" => Some(Box::new(csharp::CSharpParser)),
73
74        // Dart
75        "dart" => Some(Box::new(dart::DartParser)),
76
77        _ => None,
78    }
79}
80
81/// Lists all supported file extensions.
82pub fn supported_extensions() -> &'static [&'static str] {
83    &[
84        "ts", "tsx", "mts", "cts", // TypeScript
85        "js", "jsx", "mjs", "cjs", // JavaScript
86        "rs",  // Rust
87        "py", "pyi",  // Python
88        "go",   // Go
89        "java", // Java
90        "c", "h", // C
91        "cpp", "hpp", "cc", "hh", "cxx", "hxx",  // C++
92        "cs",   // C#
93        "dart", // Dart
94        "kt", "kts",   // Kotlin (fallback parser)
95        "swift", // Swift (fallback parser)
96        "rb",    // Ruby (fallback parser)
97        "php", "phtml", // PHP (fallback parser)
98        "sh", "bash", "zsh", // Shell (fallback parser)
99    ]
100}
101
102/// Lists all supported language families (for API metadata).
103pub fn supported_language_names() -> &'static [&'static str] {
104    &[
105        "typescript",
106        "javascript",
107        "rust",
108        "python",
109        "go",
110        "java",
111        "c",
112        "cpp",
113        "csharp",
114        "dart",
115        "kotlin",
116        "swift",
117        "ruby",
118        "php",
119        "shell",
120    ]
121}
122
123/// Checks if a file extension is supported.
124pub fn is_supported(extension: &str) -> bool {
125    get_parser(extension).is_some() || is_fallback_supported_extension(extension)
126}