Skip to main content

normalize_languages/
ocaml.rs

1//! OCaml language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols};
4use tree_sitter::Node;
5
6/// OCaml language support.
7pub struct OCaml;
8
9impl Language for OCaml {
10    fn name(&self) -> &'static str {
11        "OCaml"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["ml", "mli"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "ocaml"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
25        extract_ocamldoc(node, content)
26    }
27
28    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
29        if node.kind() != "open_module" {
30            return Vec::new();
31        }
32
33        let text = &content[node.byte_range()];
34        let line = node.start_position().row + 1;
35
36        // Extract module name: "open Module.Path"
37        if let Some(rest) = text.strip_prefix("open ") {
38            let module = rest.trim().to_string();
39            return vec![Import {
40                module,
41                names: Vec::new(),
42                alias: None,
43                is_wildcard: true,
44                is_relative: false,
45                line,
46            }];
47        }
48
49        Vec::new()
50    }
51
52    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
53        // OCaml: open Module
54        format!("open {}", import.module)
55    }
56
57    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
58        let name = symbol.name.as_str();
59        match symbol.kind {
60            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
61            crate::SymbolKind::Module => name == "tests" || name == "test",
62            _ => false,
63        }
64    }
65
66    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
67        match node.kind() {
68            "module_definition" => {
69                // module_definition → module_binding → body (structure/functor)
70                let mut c = node.walk();
71                node.children(&mut c)
72                    .find(|n| n.kind() == "module_binding")
73                    .and_then(|binding| binding.child_by_field_name("body"))
74            }
75            _ => node.child_by_field_name("body"),
76        }
77    }
78
79    fn analyze_container_body(
80        &self,
81        body_node: &Node,
82        content: &str,
83        inner_indent: &str,
84    ) -> Option<ContainerBody> {
85        // OCaml module bodies: "struct ... end" or "sig ... end" —
86        // skip the opening keyword line, strip "end" from the tail
87        crate::body::analyze_keyword_end_body(body_node, content, inner_indent)
88    }
89
90    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
91        // Try standard field names first
92        if let Some(n) = node.child_by_field_name("name") {
93            return Some(&content[n.byte_range()]);
94        }
95
96        let kind = node.kind();
97        let mut cursor = node.walk();
98
99        match kind {
100            // value_definition > let_binding > value_name (first)
101            "value_definition" => {
102                for child in node.children(&mut cursor) {
103                    if child.kind() == "let_binding" {
104                        let mut inner = child.walk();
105                        for c in child.children(&mut inner) {
106                            if c.kind() == "value_name" {
107                                return Some(&content[c.byte_range()]);
108                            }
109                        }
110                    }
111                }
112                None
113            }
114            // module_definition > module_binding > module_name
115            "module_definition" => {
116                for child in node.children(&mut cursor) {
117                    if child.kind() == "module_binding" {
118                        let mut inner = child.walk();
119                        for c in child.children(&mut inner) {
120                            if c.kind() == "module_name" {
121                                return Some(&content[c.byte_range()]);
122                            }
123                        }
124                    }
125                }
126                None
127            }
128            // module_type_definition > module_type_name (direct child)
129            "module_type_definition" => {
130                for child in node.children(&mut cursor) {
131                    if child.kind() == "module_type_name" {
132                        return Some(&content[child.byte_range()]);
133                    }
134                }
135                None
136            }
137            // type_definition > type_binding > type_constructor (via name: field)
138            "type_definition" => {
139                for child in node.children(&mut cursor) {
140                    if child.kind() == "type_binding"
141                        && let Some(n) = child.child_by_field_name("name")
142                    {
143                        return Some(&content[n.byte_range()]);
144                    }
145                }
146                None
147            }
148            _ => None,
149        }
150    }
151}
152
153impl LanguageSymbols for OCaml {}
154
155/// Extract an OCamldoc comment (`(** ... *)`) preceding a definition node.
156///
157/// OCamldoc comments are parsed as `comment` nodes by tree-sitter-ocaml.
158/// We look for a prev sibling `comment` that starts with `(**`.
159fn extract_ocamldoc(node: &Node, content: &str) -> Option<String> {
160    let sibling = node.prev_sibling()?;
161    if sibling.kind() != "comment" {
162        return None;
163    }
164    let text = &content[sibling.byte_range()];
165    if text.starts_with("(**") && !text.starts_with("(***") {
166        Some(clean_ocamldoc(text))
167    } else {
168        None
169    }
170}
171
172/// Clean an OCamldoc comment `(** ... *)` into plain text.
173fn clean_ocamldoc(text: &str) -> String {
174    let inner = text
175        .strip_prefix("(**")
176        .unwrap_or(text)
177        .strip_suffix("*)")
178        .unwrap_or(text);
179    let lines: Vec<&str> = inner
180        .lines()
181        .map(|l| l.trim())
182        .filter(|l| !l.is_empty())
183        .collect();
184    lines.join(" ")
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use crate::validate_unused_kinds_audit;
191
192    #[test]
193    fn unused_node_kinds_audit() {
194        #[rustfmt::skip]
195        let documented_unused: &[&str] = &[
196            "abstract_type", "add_operator", "aliased_type", "and_operator",
197            "application_expression", "array_expression", "array_get_expression",
198            "assert_expression", "assign_operator", "bigarray_get_expression",
199            "class_application", "class_binding", "class_body_type",
200            "class_definition", "class_function", "class_function_type",
201            "class_initializer", "class_name", "class_path", "class_type_binding",
202            "class_type_definition", "class_type_name", "class_type_path",
203            "coercion_expression", "concat_operator", "cons_expression",
204            "constrain_module", "constrain_module_type", "constrain_type",
205            "constructed_type", "constructor_declaration", "constructor_name",
206            "constructor_path", "constructor_pattern", "conversion_specification",
207            "do_clause", "else_clause", "exception_definition", "exception_pattern",
208            "expression_item", "extended_module_path", "field_declaration",
209            "field_expression", "field_get_expression", "for_expression",
210            "fun_expression", "function_type", "functor_type", "hash_expression",
211            "hash_operator", "hash_type", "include_module", "include_module_type", "infix_expression",
212            "indexing_operator", "indexing_operator_path", "inheritance_definition",
213            "inheritance_specification", "instance_variable_definition",
214            "instance_variable_expression", "instance_variable_specification",
215            "instantiated_class", "instantiated_class_type", "labeled_argument_type",
216            "labeled_tuple_element_type", "lazy_expression", "let_and_operator",
217            "let_class_expression", "let_exception_expression",
218            "let_module_expression", "let_open_class_expression",
219            "let_open_class_type", "let_open_expression", "let_operator",
220            "list_expression", "local_open_expression", "local_open_type",
221            "match_operator", "method_definition", "method_invocation",
222            "method_name", "method_specification", "method_type", "module_application", "module_parameter", "module_path",
223            "module_type_constraint", "module_type_of",
224            "module_type_path", "mult_operator", "new_expression", "object_copy_expression",
225            "object_expression", "object_type", "or_operator",
226            "package_expression", "package_type", "packed_module",
227            "parenthesized_class_expression", "parenthesized_expression",
228            "parenthesized_module_expression", "parenthesized_module_type",
229            "parenthesized_operator", "parenthesized_type", "polymorphic_type",
230            "polymorphic_variant_type", "pow_operator", "prefix_expression",
231            "prefix_operator", "record_declaration", "record_expression",
232            "refutation_case", "rel_operator", "sequence_expression",
233            "set_expression", "sign_expression", "sign_operator",
234            "string_get_expression", "structure", "tag_specification",
235            "then_clause", "tuple_expression", "tuple_type",
236            "type_constraint", "type_constructor", "type_constructor_path",
237            "type_parameter_constraint", "type_variable", "typed_class_expression",
238            "typed_expression", "typed_module_expression", "typed_pattern",
239            "value_specification", "variant_declaration", "while_expression",
240            // control flow — not extracted as symbols
241            "match_expression",
242            "open_module",
243            "let_expression",
244            "match_case",
245            "function_expression",
246            "if_expression",
247            "try_expression",
248        ];
249        validate_unused_kinds_audit(&OCaml, documented_unused)
250            .expect("OCaml unused node kinds audit failed");
251    }
252}