Skip to main content

normalize_languages/
ocaml.rs

1//! OCaml language support.
2
3use crate::traits::{ImportSpec, ModuleId, ModuleResolver, Resolution, ResolverConfig};
4use crate::{ContainerBody, Import, Language, LanguageSymbols};
5use std::path::Path;
6use tree_sitter::Node;
7
8/// OCaml language support.
9pub struct OCaml;
10
11impl Language for OCaml {
12    fn name(&self) -> &'static str {
13        "OCaml"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["ml", "mli"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "ocaml"
20    }
21
22    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
23        Some(self)
24    }
25
26    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
27        extract_ocamldoc(node, content)
28    }
29
30    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
31        if node.kind() != "open_module" {
32            return Vec::new();
33        }
34
35        let text = &content[node.byte_range()];
36        let line = node.start_position().row + 1;
37
38        // Extract module name: "open Module.Path"
39        if let Some(rest) = text.strip_prefix("open ") {
40            let module = rest.trim().to_string();
41            return vec![Import {
42                module,
43                names: Vec::new(),
44                alias: None,
45                is_wildcard: true,
46                is_relative: false,
47                line,
48            }];
49        }
50
51        Vec::new()
52    }
53
54    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
55        // OCaml: open Module
56        format!("open {}", import.module)
57    }
58
59    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
60        let name = symbol.name.as_str();
61        match symbol.kind {
62            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
63            crate::SymbolKind::Module => name == "tests" || name == "test",
64            _ => false,
65        }
66    }
67
68    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
69        match node.kind() {
70            "module_definition" => {
71                // module_definition → module_binding → body (structure/functor)
72                let mut c = node.walk();
73                node.children(&mut c)
74                    .find(|n| n.kind() == "module_binding")
75                    .and_then(|binding| binding.child_by_field_name("body"))
76            }
77            _ => node.child_by_field_name("body"),
78        }
79    }
80
81    fn analyze_container_body(
82        &self,
83        body_node: &Node,
84        content: &str,
85        inner_indent: &str,
86    ) -> Option<ContainerBody> {
87        // OCaml module bodies: "struct ... end" or "sig ... end" —
88        // skip the opening keyword line, strip "end" from the tail
89        crate::body::analyze_keyword_end_body(body_node, content, inner_indent)
90    }
91
92    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
93        // Try standard field names first
94        if let Some(n) = node.child_by_field_name("name") {
95            return Some(&content[n.byte_range()]);
96        }
97
98        let kind = node.kind();
99        let mut cursor = node.walk();
100
101        match kind {
102            // value_definition > let_binding > value_name (first)
103            "value_definition" => {
104                for child in node.children(&mut cursor) {
105                    if child.kind() == "let_binding" {
106                        let mut inner = child.walk();
107                        for c in child.children(&mut inner) {
108                            if c.kind() == "value_name" {
109                                return Some(&content[c.byte_range()]);
110                            }
111                        }
112                    }
113                }
114                None
115            }
116            // module_definition > module_binding > module_name
117            "module_definition" => {
118                for child in node.children(&mut cursor) {
119                    if child.kind() == "module_binding" {
120                        let mut inner = child.walk();
121                        for c in child.children(&mut inner) {
122                            if c.kind() == "module_name" {
123                                return Some(&content[c.byte_range()]);
124                            }
125                        }
126                    }
127                }
128                None
129            }
130            // module_type_definition > module_type_name (direct child)
131            "module_type_definition" => {
132                for child in node.children(&mut cursor) {
133                    if child.kind() == "module_type_name" {
134                        return Some(&content[child.byte_range()]);
135                    }
136                }
137                None
138            }
139            // type_definition > type_binding > type_constructor (via name: field)
140            "type_definition" => {
141                for child in node.children(&mut cursor) {
142                    if child.kind() == "type_binding"
143                        && let Some(n) = child.child_by_field_name("name")
144                    {
145                        return Some(&content[n.byte_range()]);
146                    }
147                }
148                None
149            }
150            _ => None,
151        }
152    }
153
154    fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
155        static RESOLVER: OCamlModuleResolver = OCamlModuleResolver;
156        Some(&RESOLVER)
157    }
158}
159
160impl LanguageSymbols for OCaml {}
161
162// =============================================================================
163// OCaml Module Resolver
164// =============================================================================
165
166/// Module resolver for OCaml (dune/opam conventions).
167///
168/// OCaml module name = capitalized filename stem. `open Utils` → `utils.ml`.
169pub struct OCamlModuleResolver;
170
171impl ModuleResolver for OCamlModuleResolver {
172    fn workspace_config(&self, root: &Path) -> ResolverConfig {
173        ResolverConfig {
174            workspace_root: root.to_path_buf(),
175            path_mappings: Vec::new(),
176            search_roots: vec![root.to_path_buf(), root.join("lib"), root.join("src")],
177        }
178    }
179
180    fn module_of_file(&self, _root: &Path, file: &Path, _cfg: &ResolverConfig) -> Vec<ModuleId> {
181        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
182        if ext != "ml" && ext != "mli" {
183            return Vec::new();
184        }
185        if let Some(stem) = file.file_stem().and_then(|s| s.to_str()) {
186            // OCaml module name = capitalized stem
187            let module_name = {
188                let mut chars = stem.chars();
189                match chars.next() {
190                    None => String::new(),
191                    Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
192                }
193            };
194            return vec![ModuleId {
195                canonical_path: module_name,
196            }];
197        }
198        Vec::new()
199    }
200
201    fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution {
202        let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or("");
203        if ext != "ml" && ext != "mli" {
204            return Resolution::NotApplicable;
205        }
206        // Strip "open " prefix if present
207        let raw = spec.raw.strip_prefix("open ").unwrap_or(&spec.raw).trim();
208
209        // Convert module name to file: Utils → utils.ml
210        // For dotted paths: Utils.Foo → try Utils/Foo.ml first, then fall back to utils.ml
211        let parts: Vec<&str> = raw.split('.').collect();
212        let exported_name = parts.last().copied().unwrap_or(raw).to_string();
213
214        // Try same directory as from_file first (common case)
215        if let Some(parent) = from_file.parent() {
216            let file_name = parts.last().copied().unwrap_or(raw).to_lowercase();
217            for ext_try in &["ml", "mli"] {
218                let candidate = parent.join(format!("{}.{}", file_name, ext_try));
219                if candidate.exists() {
220                    return Resolution::Resolved(candidate, exported_name.clone());
221                }
222            }
223        }
224
225        // Search in search_roots
226        for search_root in &cfg.search_roots {
227            let file_name = parts.last().copied().unwrap_or(raw).to_lowercase();
228            for ext_try in &["ml", "mli"] {
229                let candidate = search_root.join(format!("{}.{}", file_name, ext_try));
230                if candidate.exists() {
231                    return Resolution::Resolved(candidate, exported_name.clone());
232                }
233            }
234        }
235        Resolution::NotFound
236    }
237}
238
239/// Extract an OCamldoc comment (`(** ... *)`) preceding a definition node.
240///
241/// OCamldoc comments are parsed as `comment` nodes by tree-sitter-ocaml.
242/// We look for a prev sibling `comment` that starts with `(**`.
243fn extract_ocamldoc(node: &Node, content: &str) -> Option<String> {
244    let sibling = node.prev_sibling()?;
245    if sibling.kind() != "comment" {
246        return None;
247    }
248    let text = &content[sibling.byte_range()];
249    if text.starts_with("(**") && !text.starts_with("(***") {
250        Some(clean_ocamldoc(text))
251    } else {
252        None
253    }
254}
255
256/// Clean an OCamldoc comment `(** ... *)` into plain text.
257fn clean_ocamldoc(text: &str) -> String {
258    let inner = text
259        .strip_prefix("(**")
260        .unwrap_or(text)
261        .strip_suffix("*)")
262        .unwrap_or(text);
263    let lines: Vec<&str> = inner
264        .lines()
265        .map(|l| l.trim())
266        .filter(|l| !l.is_empty())
267        .collect();
268    lines.join(" ")
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use crate::validate_unused_kinds_audit;
275
276    #[test]
277    fn unused_node_kinds_audit() {
278        #[rustfmt::skip]
279        let documented_unused: &[&str] = &[
280            "abstract_type", "add_operator", "aliased_type", "and_operator",
281            "application_expression", "array_expression", "array_get_expression",
282            "assert_expression", "assign_operator", "bigarray_get_expression",
283            "class_application", "class_binding", "class_body_type",
284            "class_definition", "class_function", "class_function_type",
285            "class_initializer", "class_name", "class_path", "class_type_binding",
286            "class_type_definition", "class_type_name", "class_type_path",
287            "coercion_expression", "concat_operator", "cons_expression",
288            "constrain_module", "constrain_module_type", "constrain_type",
289            "constructed_type", "constructor_declaration", "constructor_name",
290            "constructor_path", "constructor_pattern", "conversion_specification",
291            "do_clause", "else_clause", "exception_definition", "exception_pattern",
292            "expression_item", "extended_module_path", "field_declaration",
293            "field_expression", "field_get_expression", "for_expression",
294            "fun_expression", "function_type", "functor_type", "hash_expression",
295            "hash_operator", "hash_type", "include_module", "include_module_type", "infix_expression",
296            "indexing_operator", "indexing_operator_path", "inheritance_definition",
297            "inheritance_specification", "instance_variable_definition",
298            "instance_variable_expression", "instance_variable_specification",
299            "instantiated_class", "instantiated_class_type", "labeled_argument_type",
300            "labeled_tuple_element_type", "lazy_expression", "let_and_operator",
301            "let_class_expression", "let_exception_expression",
302            "let_module_expression", "let_open_class_expression",
303            "let_open_class_type", "let_open_expression", "let_operator",
304            "list_expression", "local_open_expression", "local_open_type",
305            "match_operator", "method_definition", "method_invocation",
306            "method_name", "method_specification", "method_type", "module_application", "module_parameter", "module_path",
307            "module_type_constraint", "module_type_of",
308            "module_type_path", "mult_operator", "new_expression", "object_copy_expression",
309            "object_expression", "object_type", "or_operator",
310            "package_expression", "package_type", "packed_module",
311            "parenthesized_class_expression", "parenthesized_expression",
312            "parenthesized_module_expression", "parenthesized_module_type",
313            "parenthesized_operator", "parenthesized_type", "polymorphic_type",
314            "polymorphic_variant_type", "pow_operator", "prefix_expression",
315            "prefix_operator", "record_declaration", "record_expression",
316            "refutation_case", "rel_operator", "sequence_expression",
317            "set_expression", "sign_expression", "sign_operator",
318            "string_get_expression", "structure", "tag_specification",
319            "then_clause", "tuple_expression", "tuple_type",
320            "type_constraint", "type_constructor", "type_constructor_path",
321            "type_parameter_constraint", "type_variable", "typed_class_expression",
322            "typed_expression", "typed_module_expression", "typed_pattern",
323            "value_specification", "variant_declaration", "while_expression",
324            // control flow — not extracted as symbols
325            "match_expression",
326            "open_module",
327            "let_expression",
328            "match_case",
329            "function_expression",
330            "if_expression",
331            "try_expression",
332        ];
333        validate_unused_kinds_audit(&OCaml, documented_unused)
334            .expect("OCaml unused node kinds audit failed");
335    }
336}