Skip to main content

aptu_coder_core/languages/
mod.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Language-specific handlers and query definitions for tree-sitter parsing.
4//!
5//! Provides query strings and extraction handlers for supported languages.
6//! Language support is controlled by Cargo `lang-*` features (by default all
7//! available language handlers are enabled): Rust, Go, Java, JavaScript, Python,
8//! TypeScript, TSX, Fortran, C/C++, and C#.
9
10#[cfg(feature = "lang-cpp")]
11pub mod cpp;
12#[cfg(feature = "lang-csharp")]
13pub mod csharp;
14#[cfg(feature = "lang-css")]
15pub mod css;
16#[cfg(feature = "lang-fortran")]
17pub mod fortran;
18#[cfg(feature = "lang-go")]
19pub mod go;
20#[cfg(feature = "lang-html")]
21pub mod html;
22#[cfg(feature = "lang-java")]
23pub mod java;
24#[cfg(feature = "lang-javascript")]
25pub mod javascript;
26#[cfg(feature = "lang-kotlin")]
27pub mod kotlin;
28#[cfg(feature = "lang-markdown")]
29pub mod markdown;
30#[cfg(feature = "lang-python")]
31pub mod python;
32pub mod regex_fallback;
33#[cfg(feature = "lang-rust")]
34pub mod rust;
35#[cfg(any(feature = "lang-typescript", feature = "lang-tsx"))]
36pub mod typescript;
37#[cfg(feature = "lang-yaml")]
38pub mod yaml;
39
40use tree_sitter::{Language, Node};
41
42/// Extract the source text for a node with a bounds check.
43///
44/// Returns `None` if the node's byte range falls outside `source`.
45#[must_use]
46pub fn get_node_text(node: &Node, source: &str) -> Option<String> {
47    let end = node.end_byte();
48    if end <= source.len() {
49        Some(source[node.start_byte()..end].to_string())
50    } else {
51        None
52    }
53}
54
55/// Handler to extract function name from a node.
56pub type ExtractFunctionNameHandler = fn(&Node, &str, &str) -> Option<String>;
57
58/// Handler to find method name for a receiver type.
59pub type FindMethodForReceiverHandler = fn(&Node, &str, Option<usize>) -> Option<String>;
60
61/// Handler to find receiver type for a method.
62pub type FindReceiverTypeHandler = fn(&Node, &str) -> Option<String>;
63
64/// Handler to extract inheritance information from a class node.
65pub type ExtractInheritanceHandler = fn(&Node, &str) -> Vec<String>;
66
67/// Information about a supported language for code analysis.
68pub struct LanguageInfo {
69    pub name: &'static str,
70    pub language: Language,
71    pub element_query: &'static str,
72    pub call_query: &'static str,
73    pub reference_query: Option<&'static str>,
74    pub import_query: Option<&'static str>,
75    pub impl_query: Option<&'static str>,
76    pub impl_trait_query: Option<&'static str>,
77    pub defuse_query: Option<&'static str>,
78    pub extract_function_name: Option<ExtractFunctionNameHandler>,
79    pub find_method_for_receiver: Option<FindMethodForReceiverHandler>,
80    pub find_receiver_type: Option<FindReceiverTypeHandler>,
81    pub extract_inheritance: Option<ExtractInheritanceHandler>,
82}
83
84/// Get language information by language name.
85#[allow(clippy::too_many_lines)] // exhaustive match over all supported languages; splitting harms readability
86pub fn get_language_info(lang_name: &str) -> Option<LanguageInfo> {
87    match lang_name {
88        #[cfg(feature = "lang-rust")]
89        "rust" => Some(LanguageInfo {
90            name: "rust",
91            language: tree_sitter_rust::LANGUAGE.into(),
92            element_query: rust::ELEMENT_QUERY,
93            call_query: rust::CALL_QUERY,
94            reference_query: Some(rust::REFERENCE_QUERY),
95            import_query: Some(rust::IMPORT_QUERY),
96            impl_query: Some(rust::IMPL_QUERY),
97            impl_trait_query: Some(rust::IMPL_TRAIT_QUERY),
98            defuse_query: Some(rust::DEFUSE_QUERY),
99            extract_function_name: Some(rust::extract_function_name),
100            find_method_for_receiver: Some(rust::find_method_for_receiver),
101            find_receiver_type: Some(rust::find_receiver_type),
102            extract_inheritance: Some(rust::extract_inheritance),
103        }),
104        #[cfg(feature = "lang-python")]
105        "python" => Some(LanguageInfo {
106            name: "python",
107            language: tree_sitter_python::LANGUAGE.into(),
108            element_query: python::ELEMENT_QUERY,
109            call_query: python::CALL_QUERY,
110            reference_query: Some(python::REFERENCE_QUERY),
111            import_query: Some(python::IMPORT_QUERY),
112            impl_query: None,
113            impl_trait_query: None,
114            defuse_query: Some(python::DEFUSE_QUERY),
115            extract_function_name: None,
116            find_method_for_receiver: None,
117            find_receiver_type: None,
118            extract_inheritance: Some(python::extract_inheritance),
119        }),
120        #[cfg(feature = "lang-typescript")]
121        "typescript" => Some(LanguageInfo {
122            name: "typescript",
123            language: tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
124            element_query: typescript::ELEMENT_QUERY,
125            call_query: typescript::CALL_QUERY,
126            reference_query: Some(typescript::REFERENCE_QUERY),
127            import_query: Some(typescript::IMPORT_QUERY),
128            impl_query: None,
129            impl_trait_query: None,
130            defuse_query: Some(typescript::DEFUSE_QUERY),
131            extract_function_name: None,
132            find_method_for_receiver: None,
133            find_receiver_type: None,
134            extract_inheritance: Some(typescript::extract_inheritance),
135        }),
136        #[cfg(feature = "lang-tsx")]
137        "tsx" => Some(LanguageInfo {
138            name: "tsx",
139            language: tree_sitter_typescript::LANGUAGE_TSX.into(),
140            element_query: typescript::ELEMENT_QUERY,
141            call_query: typescript::CALL_QUERY,
142            reference_query: Some(typescript::REFERENCE_QUERY),
143            import_query: Some(typescript::IMPORT_QUERY),
144            impl_query: None,
145            impl_trait_query: None,
146            defuse_query: Some(typescript::DEFUSE_QUERY),
147            extract_function_name: None,
148            find_method_for_receiver: None,
149            find_receiver_type: None,
150            extract_inheritance: Some(typescript::extract_inheritance),
151        }),
152        #[cfg(feature = "lang-go")]
153        "go" => Some(LanguageInfo {
154            name: "go",
155            language: tree_sitter_go::LANGUAGE.into(),
156            element_query: go::ELEMENT_QUERY,
157            call_query: go::CALL_QUERY,
158            reference_query: Some(go::REFERENCE_QUERY),
159            import_query: Some(go::IMPORT_QUERY),
160            impl_query: None,
161            impl_trait_query: None,
162            defuse_query: Some(go::DEFUSE_QUERY),
163            extract_function_name: Some(go::extract_function_name),
164            find_method_for_receiver: Some(go::find_method_for_receiver),
165            find_receiver_type: Some(go::find_receiver_type),
166            extract_inheritance: Some(go::extract_inheritance),
167        }),
168        #[cfg(feature = "lang-cpp")]
169        "c" | "cpp" => Some(LanguageInfo {
170            name: if lang_name == "c" { "c" } else { "cpp" },
171            language: tree_sitter_cpp::LANGUAGE.into(),
172            element_query: cpp::ELEMENT_QUERY,
173            call_query: cpp::CALL_QUERY,
174            reference_query: Some(cpp::REFERENCE_QUERY),
175            import_query: Some(cpp::IMPORT_QUERY),
176            impl_query: None,
177            impl_trait_query: None,
178            defuse_query: Some(cpp::DEFUSE_QUERY),
179            extract_function_name: Some(cpp::extract_function_name),
180            find_method_for_receiver: Some(cpp::find_method_for_receiver),
181            find_receiver_type: None,
182            extract_inheritance: Some(cpp::extract_inheritance),
183        }),
184        #[cfg(feature = "lang-java")]
185        "java" => Some(LanguageInfo {
186            name: "java",
187            language: tree_sitter_java::LANGUAGE.into(),
188            element_query: java::ELEMENT_QUERY,
189            call_query: java::CALL_QUERY,
190            reference_query: Some(java::REFERENCE_QUERY),
191            import_query: Some(java::IMPORT_QUERY),
192            impl_query: None,
193            impl_trait_query: None,
194            defuse_query: Some(java::DEFUSE_QUERY),
195            extract_function_name: Some(java::extract_function_name),
196            find_method_for_receiver: Some(java::find_method_for_receiver),
197            find_receiver_type: Some(java::find_receiver_type),
198            extract_inheritance: Some(java::extract_inheritance),
199        }),
200        #[cfg(feature = "lang-kotlin")]
201        "kotlin" => Some(LanguageInfo {
202            name: "kotlin",
203            language: tree_sitter_kotlin_ng::LANGUAGE.into(),
204            element_query: kotlin::ELEMENT_QUERY,
205            call_query: kotlin::CALL_QUERY,
206            reference_query: Some(kotlin::REFERENCE_QUERY),
207            import_query: Some(kotlin::IMPORT_QUERY),
208            impl_query: None,
209            impl_trait_query: None,
210            defuse_query: Some(kotlin::DEFUSE_QUERY),
211            extract_function_name: Some(kotlin::extract_function_name),
212            find_method_for_receiver: Some(kotlin::find_method_for_receiver),
213            find_receiver_type: Some(kotlin::find_receiver_type),
214            extract_inheritance: Some(kotlin::extract_inheritance),
215        }),
216        #[cfg(feature = "lang-fortran")]
217        "fortran" => Some(LanguageInfo {
218            name: "fortran",
219            language: tree_sitter_fortran::LANGUAGE.into(),
220            element_query: fortran::ELEMENT_QUERY,
221            call_query: fortran::CALL_QUERY,
222            reference_query: Some(fortran::REFERENCE_QUERY),
223            import_query: Some(fortran::IMPORT_QUERY),
224            impl_query: None,
225            impl_trait_query: None,
226            defuse_query: None,
227            extract_function_name: Some(fortran::extract_function_name),
228            find_method_for_receiver: Some(fortran::find_method_for_receiver),
229            find_receiver_type: Some(fortran::find_receiver_type),
230            extract_inheritance: Some(fortran::extract_inheritance),
231        }),
232        #[cfg(feature = "lang-csharp")]
233        "csharp" => Some(LanguageInfo {
234            name: "csharp",
235            language: tree_sitter_c_sharp::LANGUAGE.into(),
236            element_query: csharp::ELEMENT_QUERY,
237            call_query: csharp::CALL_QUERY,
238            reference_query: Some(csharp::REFERENCE_QUERY),
239            import_query: Some(csharp::IMPORT_QUERY),
240            impl_query: None,
241            impl_trait_query: None,
242            defuse_query: Some(csharp::DEFUSE_QUERY),
243            extract_function_name: Some(csharp::extract_function_name),
244            find_method_for_receiver: Some(csharp::find_method_for_receiver),
245            find_receiver_type: Some(csharp::find_receiver_type),
246            extract_inheritance: Some(csharp::extract_inheritance),
247        }),
248        #[cfg(feature = "lang-javascript")]
249        "javascript" => Some(LanguageInfo {
250            name: "javascript",
251            language: tree_sitter_javascript::LANGUAGE.into(),
252            element_query: javascript::ELEMENT_QUERY,
253            call_query: javascript::CALL_QUERY,
254            reference_query: None,
255            import_query: Some(javascript::IMPORT_QUERY),
256            impl_query: None,
257            impl_trait_query: None,
258            defuse_query: Some(javascript::DEFUSE_QUERY),
259            extract_function_name: Some(javascript::extract_function_name),
260            find_method_for_receiver: Some(javascript::find_method_for_receiver),
261            find_receiver_type: Some(javascript::find_receiver_type),
262            extract_inheritance: Some(javascript::extract_inheritance),
263        }),
264        // HTML is a reserved feature stub. `tree-sitter-html` 0.23.x is incompatible with the
265        // tree-sitter 0.26 API used by this crate; full HTML support is blocked on the
266        // tree-sitter-html ^0.25 release. Until then, analysis of `.html`/`.htm` files returns
267        // `None` here, which causes `analyze_file` to emit an INVALID_PARAMS error with the
268        // message "unsupported language: html". This is intentional: the extension is registered
269        // so that the file-type is recognised and a clear error surfaces rather than silently
270        // skipping the file.
271        // TODO: implement once tree-sitter-html ^0.25 ships.
272        //       Track releases: https://github.com/tree-sitter/tree-sitter-html/releases
273        #[cfg(feature = "lang-html")]
274        "html" => None,
275        #[cfg(feature = "lang-markdown")]
276        "markdown" => Some(LanguageInfo {
277            name: "markdown",
278            language: tree_sitter_md::LANGUAGE.into(),
279            element_query: markdown::ELEMENT_QUERY,
280            call_query: markdown::CALL_QUERY,
281            reference_query: None,
282            import_query: None,
283            impl_query: None,
284            impl_trait_query: None,
285            defuse_query: None,
286            extract_function_name: None,
287            find_method_for_receiver: None,
288            find_receiver_type: None,
289            extract_inheritance: None,
290        }),
291        #[cfg(feature = "lang-css")]
292        "css" => Some(LanguageInfo {
293            name: "css",
294            language: tree_sitter_css::LANGUAGE.into(),
295            element_query: css::ELEMENT_QUERY,
296            call_query: css::CALL_QUERY,
297            reference_query: None,
298            import_query: Some(css::IMPORT_QUERY),
299            impl_query: None,
300            impl_trait_query: None,
301            defuse_query: None,
302            extract_function_name: None,
303            find_method_for_receiver: None,
304            find_receiver_type: None,
305            extract_inheritance: None,
306        }),
307        #[cfg(feature = "lang-yaml")]
308        "yaml" => Some(LanguageInfo {
309            name: "yaml",
310            language: tree_sitter_yaml::LANGUAGE.into(),
311            element_query: yaml::ELEMENT_QUERY,
312            call_query: yaml::CALL_QUERY,
313            reference_query: None,
314            import_query: None,
315            impl_query: None,
316            impl_trait_query: None,
317            defuse_query: None,
318            extract_function_name: None,
319            find_method_for_receiver: None,
320            find_receiver_type: None,
321            extract_inheritance: None,
322        }),
323        _ => None,
324    }
325}
326
327/// Get the tree-sitter Language object for a given language name.
328///
329/// Returns `None` if the language is not supported or not compiled in.
330#[must_use]
331pub fn get_ts_language(lang_name: &str) -> Option<Language> {
332    match lang_name {
333        #[cfg(feature = "lang-rust")]
334        "rust" => Some(tree_sitter_rust::LANGUAGE.into()),
335        #[cfg(feature = "lang-python")]
336        "python" => Some(tree_sitter_python::LANGUAGE.into()),
337        #[cfg(feature = "lang-typescript")]
338        "typescript" => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
339        #[cfg(feature = "lang-tsx")]
340        "tsx" => Some(tree_sitter_typescript::LANGUAGE_TSX.into()),
341        #[cfg(feature = "lang-go")]
342        "go" => Some(tree_sitter_go::LANGUAGE.into()),
343        #[cfg(feature = "lang-cpp")]
344        "c" | "cpp" => Some(tree_sitter_cpp::LANGUAGE.into()),
345        #[cfg(feature = "lang-java")]
346        "java" => Some(tree_sitter_java::LANGUAGE.into()),
347        #[cfg(feature = "lang-kotlin")]
348        "kotlin" => Some(tree_sitter_kotlin_ng::LANGUAGE.into()),
349        #[cfg(feature = "lang-fortran")]
350        "fortran" => Some(tree_sitter_fortran::LANGUAGE.into()),
351        #[cfg(feature = "lang-csharp")]
352        "csharp" => Some(tree_sitter_c_sharp::LANGUAGE.into()),
353        #[cfg(feature = "lang-javascript")]
354        "javascript" => Some(tree_sitter_javascript::LANGUAGE.into()),
355        #[cfg(feature = "lang-css")]
356        "css" => Some(tree_sitter_css::LANGUAGE.into()),
357        #[cfg(feature = "lang-yaml")]
358        "yaml" => Some(tree_sitter_yaml::LANGUAGE.into()),
359        _ => None,
360    }
361}
362
363/// Attempt regex-based extraction for formats without a tree-sitter grammar.
364///
365/// Returns `Some(SemanticAnalysis)` for CSS, YAML, JSON, TOML, and Astro;
366/// `None` for all other language identifiers (caller should treat as unsupported).
367#[must_use]
368pub fn try_regex_fallback(source: &str, language: &str) -> Option<crate::types::SemanticAnalysis> {
369    match language {
370        #[cfg(not(feature = "lang-css"))]
371        "css" => Some(regex_fallback::extract_css(source)),
372        #[cfg(not(feature = "lang-yaml"))]
373        "yaml" => Some(regex_fallback::extract_yaml(source)),
374        "json" => Some(regex_fallback::extract_json(source)),
375        "toml" => Some(regex_fallback::extract_toml(source)),
376        "astro" => Some(regex_fallback::extract_astro(source)),
377        _ => None,
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn test_get_language_info_known() {
387        // Happy path: known languages return Some
388        assert!(
389            get_language_info("rust").is_some(),
390            "expected Some for 'rust'"
391        );
392        assert!(get_language_info("go").is_some(), "expected Some for 'go'");
393        assert!(
394            get_language_info("python").is_some(),
395            "expected Some for 'python'"
396        );
397    }
398
399    #[test]
400    fn test_get_language_info_unknown() {
401        // Edge case: unknown language returns None
402        assert!(
403            get_language_info("cobol").is_none(),
404            "expected None for 'cobol'"
405        );
406    }
407
408    #[test]
409    fn test_get_ts_language_known() {
410        // Happy path: known language returns Some
411        assert!(
412            get_ts_language("rust").is_some(),
413            "expected Some for 'rust'"
414        );
415    }
416
417    #[test]
418    fn test_get_ts_language_unknown() {
419        // Edge case: unknown language returns None
420        assert!(
421            get_ts_language("cobol").is_none(),
422            "expected None for 'cobol'"
423        );
424    }
425}