arbor_core/languages/mod.rs
1//! Language parsers module.
2//!
3//! Each supported language has its own submodule that implements
4//! the LanguageParser trait. This keeps language-specific quirks
5//! isolated and makes it straightforward to add new languages.
6
7mod c;
8mod cpp;
9mod dart;
10mod go;
11mod java;
12mod python;
13mod rust;
14mod typescript;
15
16use crate::node::CodeNode;
17
18/// Trait for language-specific parsing logic.
19///
20/// Each language needs to implement this to handle its unique AST
21/// structure and idioms. The trait provides the Tree-sitter language
22/// and the extraction logic.
23pub trait LanguageParser: Send + Sync {
24 /// Returns the Tree-sitter language for this parser.
25 fn language(&self) -> tree_sitter::Language;
26
27 /// File extensions this parser handles.
28 fn extensions(&self) -> &[&str];
29
30 /// Extracts CodeNodes from a parsed Tree-sitter tree.
31 ///
32 /// This is where the magic happens. Each language traverses
33 /// its AST differently to find functions, classes, etc.
34 fn extract_nodes(
35 &self,
36 tree: &tree_sitter::Tree,
37 source: &str,
38 file_path: &str,
39 ) -> Vec<CodeNode>;
40}
41
42/// Gets a parser for the given file extension.
43///
44/// Returns None if we don't support this extension.
45pub fn get_parser(extension: &str) -> Option<Box<dyn LanguageParser>> {
46 match extension.to_lowercase().as_str() {
47 // TypeScript and JavaScript
48 "ts" | "tsx" | "mts" | "cts" => Some(Box::new(typescript::TypeScriptParser)),
49 "js" | "jsx" | "mjs" | "cjs" => Some(Box::new(typescript::TypeScriptParser)),
50
51 // Rust
52 "rs" => Some(Box::new(rust::RustParser)),
53
54 // Python
55 "py" | "pyi" => Some(Box::new(python::PythonParser)),
56
57 // Go
58 "go" => Some(Box::new(go::GoParser)),
59
60 // Java
61 "java" => Some(Box::new(java::JavaParser)),
62
63 // C
64 "c" | "h" => Some(Box::new(c::CParser)),
65
66 // C++
67 "cpp" | "hpp" | "cc" | "hh" | "cxx" | "hxx" => Some(Box::new(cpp::CppParser)),
68
69 // Dart
70 "dart" => Some(Box::new(dart::DartParser)),
71
72 _ => None,
73 }
74}
75
76/// Lists all supported file extensions.
77pub fn supported_extensions() -> &'static [&'static str] {
78 &[
79 "ts", "tsx", "mts", "cts", // TypeScript
80 "js", "jsx", "mjs", "cjs", // JavaScript
81 "rs", // Rust
82 "py", "pyi", // Python
83 "go", // Go
84 "java", // Java
85 "c", "h", // C
86 "cpp", "hpp", "cc", "hh", "cxx", "hxx", // C++
87 "dart", // Dart
88 ]
89}
90
91/// Checks if a file extension is supported.
92pub fn is_supported(extension: &str) -> bool {
93 get_parser(extension).is_some()
94}