Skip to main content

ripvec_core/
languages.rs

1//! Language registry mapping file extensions to tree-sitter grammars.
2//!
3//! Each supported language has a grammar and a tree-sitter query that
4//! extracts function, class, and method definitions. Compiled queries
5//! are cached so that repeated calls for the same extension are free.
6
7use std::sync::{Arc, OnceLock};
8
9use tree_sitter::{Language, Query};
10
11/// Configuration for a supported source language.
12///
13/// Wrapped in [`Arc`] so it can be shared across threads and returned
14/// from the cache without cloning the compiled [`Query`].
15pub struct LangConfig {
16    /// The tree-sitter Language grammar.
17    pub language: Language,
18    /// Query that extracts semantic chunks (`@def` captures with `@name`).
19    pub query: Query,
20}
21
22/// Look up the language configuration for a file extension.
23///
24/// Compiled queries are cached per extension so repeated calls are free.
25/// Returns `None` for unsupported extensions.
26#[must_use]
27pub fn config_for_extension(ext: &str) -> Option<Arc<LangConfig>> {
28    // Cache of compiled configs, keyed by canonical extension.
29    static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<LangConfig>>> =
30        OnceLock::new();
31
32    let cache = CACHE.get_or_init(|| {
33        let mut m = std::collections::HashMap::new();
34        // Pre-compile all supported extensions
35        for &ext in &[
36            "rs", "py", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc", "cxx", "hpp",
37        ] {
38            if let Some(cfg) = compile_config(ext) {
39                m.insert(ext, Arc::new(cfg));
40            }
41        }
42        m
43    });
44
45    cache.get(ext).cloned()
46}
47
48/// Compile a [`LangConfig`] for the given extension (uncached).
49fn compile_config(ext: &str) -> Option<LangConfig> {
50    let (lang, query_str): (Language, &str) = match ext {
51        // Rust: standalone functions, structs, and methods INSIDE impl/trait blocks.
52        // impl_item and trait_item are NOT captured as wholes — we extract their
53        // individual function_item children for method-level granularity.
54        "rs" => (
55            tree_sitter_rust::LANGUAGE.into(),
56            concat!(
57                "(function_item name: (identifier) @name) @def\n",
58                "(struct_item name: (type_identifier) @name) @def\n",
59                "(enum_item name: (type_identifier) @name) @def\n",
60                "(type_item name: (type_identifier) @name) @def",
61            ),
62        ),
63        // Python: top-level functions AND methods inside classes (function_definition
64        // matches at any nesting depth, so methods are captured individually).
65        "py" => (
66            tree_sitter_python::LANGUAGE.into(),
67            concat!(
68                "(function_definition name: (identifier) @name) @def\n",
69                "(class_definition name: (identifier) @name body: (block) @def)",
70            ),
71        ),
72        // JS: functions, methods, and arrow functions assigned to variables.
73        "js" | "jsx" => (
74            tree_sitter_javascript::LANGUAGE.into(),
75            concat!(
76                "(function_declaration name: (identifier) @name) @def\n",
77                "(method_definition name: (property_identifier) @name) @def\n",
78                "(class_declaration name: (identifier) @name) @def",
79            ),
80        ),
81        "ts" => (
82            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
83            concat!(
84                "(function_declaration name: (identifier) @name) @def\n",
85                "(method_definition name: (property_identifier) @name) @def\n",
86                "(class_declaration name: (type_identifier) @name) @def\n",
87                "(interface_declaration name: (type_identifier) @name) @def",
88            ),
89        ),
90        "tsx" => (
91            tree_sitter_typescript::LANGUAGE_TSX.into(),
92            concat!(
93                "(function_declaration name: (identifier) @name) @def\n",
94                "(method_definition name: (property_identifier) @name) @def\n",
95                "(class_declaration name: (type_identifier) @name) @def\n",
96                "(interface_declaration name: (type_identifier) @name) @def",
97            ),
98        ),
99        "go" => (
100            tree_sitter_go::LANGUAGE.into(),
101            concat!(
102                "(function_declaration name: (identifier) @name) @def\n",
103                "(method_declaration name: (field_identifier) @name) @def",
104            ),
105        ),
106        // Java: methods are already captured individually (method_declaration
107        // matches inside class bodies). Keep class for the signature/fields.
108        "java" => (
109            tree_sitter_java::LANGUAGE.into(),
110            concat!(
111                "(method_declaration name: (identifier) @name) @def\n",
112                "(class_declaration name: (identifier) @name) @def\n",
113                "(interface_declaration name: (identifier) @name) @def",
114            ),
115        ),
116        "c" | "h" => (
117            tree_sitter_c::LANGUAGE.into(),
118            "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def",
119        ),
120        // C++: functions at any level, plus class signatures.
121        "cpp" | "cc" | "cxx" | "hpp" => (
122            tree_sitter_cpp::LANGUAGE.into(),
123            concat!(
124                "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
125                "(class_specifier name: (type_identifier) @name) @def",
126            ),
127        ),
128        _ => return None,
129    };
130    let query = Query::new(&lang, query_str).ok()?;
131    Some(LangConfig {
132        language: lang,
133        query,
134    })
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn rust_extension_resolves() {
143        assert!(config_for_extension("rs").is_some());
144    }
145
146    #[test]
147    fn python_extension_resolves() {
148        assert!(config_for_extension("py").is_some());
149    }
150
151    #[test]
152    fn unknown_extension_returns_none() {
153        assert!(config_for_extension("xyz").is_none());
154    }
155
156    #[test]
157    fn all_supported_extensions() {
158        let exts = [
159            "rs", "py", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc", "cxx", "hpp",
160        ];
161        for ext in &exts {
162            assert!(config_for_extension(ext).is_some(), "failed for {ext}");
163        }
164    }
165}