arbor_core/languages/mod.rs
1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod dart;
10mod go;
11mod java;
12mod python;
13mod rust;
14mod typescript;
15
16use crate::fallback_parser::is_fallback_supported_extension;
17use crate::node::CodeNode;
18
19/// Trait for language-specific parsing logic.
20///
21/// Each language needs to implement this to handle its unique AST
22/// structure and idioms. The trait provides the Tree-sitter language
23/// and the extraction logic.
24pub trait LanguageParser: Send + Sync {
25 /// Returns the Tree-sitter language for this parser.
26 fn language(&self) -> tree_sitter::Language;
27
28 /// File extensions this parser handles.
29 fn extensions(&self) -> &[&str];
30
31 /// Extracts CodeNodes from a parsed Tree-sitter tree.
32 ///
33 /// This is where the magic happens. Each language traverses
34 /// its AST differently to find functions, classes, etc.
35 fn extract_nodes(
36 &self,
37 tree: &tree_sitter::Tree,
38 source: &str,
39 file_path: &str,
40 ) -> Vec<CodeNode>;
41}
42
43/// Gets a parser for the given file extension.
44///
45/// Returns None if we don't support this extension.
46pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
47 match extension.to_lowercase().as_str() {
48 // TypeScript and JavaScript
49 "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
50 "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
51
52 // Rust
53 "rs" => Some(Box::new(rust::RustParser)),
54
55 // Python
56 "py" | "pyi" => Some(Box::new(python::PythonParser)),
57
58 // Go
59 "go" => Some(Box::new(go::GoParser)),
60
61 // Java
62 "java" => Some(Box::new(java::JavaParser)),
63
64 // C
65 "c" | "h" => Some(Box::new(c::CParser)),
66
67 // C++
68 "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
69
70 // Dart
71 "dart" => Some(Box::new(dart::DartParser)),
72
73 _ => None,
74 }
75}
76
77/// Lists all supported file extensions.
78pub fn supported_extensions() -> &'static [&'static str] {
79 &[
80 "ts", "tsx", "mts", "cts", // TypeScript
81 "js", "jsx", "mjs", "cjs", // JavaScript
82 "rs", // Rust
83 "py", "pyi", // Python
84 "go", // Go
85 "java", // Java
86 "c", "h", // C
87 "cpp", "hpp", "cc", "hh", "cxx", "hxx", // C++
88 "dart", // Dart
89 "kt", "kts", // Kotlin (fallback parser)
90 "swift", // Swift (fallback parser)
91 "rb", // Ruby (fallback parser)
92 "php", "phtml", // PHP (fallback parser)
93 "sh", "bash", "zsh", // Shell (fallback parser)
94 ]
95}
96
97/// Checks if a file extension is supported.
98pub fn is_supported(extension: &str) -> bool {
99 get_parser(extension).is_some() || is_fallback_supported_extension(extension)
100}