Skip to main content

normalize_languages/
ocaml.rs

1//! OCaml language support.
2
3use crate::external_packages::ResolvedPackage;
4use crate::{
5    Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism,
6    simple_function_symbol,
7};
8use std::path::{Path, PathBuf};
9use tree_sitter::Node;
10
11/// OCaml language support.
12pub struct OCaml;
13
14impl Language for OCaml {
15    fn name(&self) -> &'static str {
16        "OCaml"
17    }
18    fn extensions(&self) -> &'static [&'static str] {
19        &["ml", "mli"]
20    }
21    fn grammar_name(&self) -> &'static str {
22        "ocaml"
23    }
24
25    fn has_symbols(&self) -> bool {
26        true
27    }
28
29    fn container_kinds(&self) -> &'static [&'static str] {
30        &[
31            "module_definition",
32            "module_type_definition",
33            "type_definition",
34        ]
35    }
36
37    fn function_kinds(&self) -> &'static [&'static str] {
38        &["value_definition", "let_binding"]
39    }
40
41    fn type_kinds(&self) -> &'static [&'static str] {
42        &["type_definition"]
43    }
44
45    fn import_kinds(&self) -> &'static [&'static str] {
46        &["open_module"]
47    }
48
49    fn public_symbol_kinds(&self) -> &'static [&'static str] {
50        &["value_definition", "type_definition", "module_definition"]
51    }
52
53    fn visibility_mechanism(&self) -> VisibilityMechanism {
54        VisibilityMechanism::ExplicitExport // .mli interface files
55    }
56
57    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
58        let name = match self.node_name(node, content) {
59            Some(n) => n.to_string(),
60            None => return Vec::new(),
61        };
62
63        let kind = match node.kind() {
64            "value_definition" | "let_binding" => SymbolKind::Function,
65            "type_definition" => SymbolKind::Type,
66            "module_definition" => SymbolKind::Module,
67            "module_type_definition" => SymbolKind::Interface,
68            _ => return Vec::new(),
69        };
70
71        vec![Export {
72            name,
73            kind,
74            line: node.start_position().row + 1,
75        }]
76    }
77
78    fn scope_creating_kinds(&self) -> &'static [&'static str] {
79        &["let_expression", "function_expression", "match_expression"]
80    }
81
82    fn control_flow_kinds(&self) -> &'static [&'static str] {
83        &["if_expression", "match_expression", "try_expression"]
84    }
85
86    fn complexity_nodes(&self) -> &'static [&'static str] {
87        &["if_expression", "match_expression", "match_case"]
88    }
89
90    fn nesting_nodes(&self) -> &'static [&'static str] {
91        &["let_expression", "module_definition", "match_expression"]
92    }
93
94    fn signature_suffix(&self) -> &'static str {
95        ""
96    }
97
98    fn extract_function(&self, node: &Node, content: &str, _in_container: bool) -> Option<Symbol> {
99        let name = self.node_name(node, content)?;
100        Some(simple_function_symbol(
101            node,
102            content,
103            name,
104            self.extract_docstring(node, content),
105        ))
106    }
107
108    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol> {
109        let name = self.node_name(node, content)?;
110
111        let (kind, keyword) = match node.kind() {
112            "module_definition" => (SymbolKind::Module, "module"),
113            "module_type_definition" => (SymbolKind::Interface, "module type"),
114            "type_definition" => (SymbolKind::Type, "type"),
115            _ => return None,
116        };
117
118        Some(Symbol {
119            name: name.to_string(),
120            kind,
121            signature: format!("{} {}", keyword, name),
122            docstring: self.extract_docstring(node, content),
123            attributes: Vec::new(),
124            start_line: node.start_position().row + 1,
125            end_line: node.end_position().row + 1,
126            visibility: Visibility::Public,
127            children: Vec::new(),
128            is_interface_impl: false,
129            implements: Vec::new(),
130        })
131    }
132
133    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol> {
134        if node.kind() != "type_definition" {
135            return None;
136        }
137        self.extract_container(node, content)
138    }
139
140    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
141        // OCaml uses (** ... *) for ocamldoc
142        let mut prev = node.prev_sibling();
143        while let Some(sibling) = prev {
144            let text = &content[sibling.byte_range()];
145            if sibling.kind() == "comment" && text.starts_with("(**") {
146                let inner = text
147                    .strip_prefix("(**")
148                    .unwrap_or(text)
149                    .strip_suffix("*)")
150                    .unwrap_or(text)
151                    .trim();
152                if !inner.is_empty() {
153                    return Some(inner.to_string());
154                }
155            }
156            prev = sibling.prev_sibling();
157        }
158        None
159    }
160
161    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
162        Vec::new()
163    }
164
165    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
166        if node.kind() != "open_module" {
167            return Vec::new();
168        }
169
170        let text = &content[node.byte_range()];
171        let line = node.start_position().row + 1;
172
173        // Extract module name: "open Module.Path"
174        if let Some(rest) = text.strip_prefix("open ") {
175            let module = rest.trim().to_string();
176            return vec![Import {
177                module,
178                names: Vec::new(),
179                alias: None,
180                is_wildcard: true,
181                is_relative: false,
182                line,
183            }];
184        }
185
186        Vec::new()
187    }
188
189    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
190        // OCaml: open Module
191        format!("open {}", import.module)
192    }
193
194    fn is_public(&self, _node: &Node, _content: &str) -> bool {
195        true
196    }
197    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
198        Visibility::Public
199    }
200
201    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
202        let name = symbol.name.as_str();
203        match symbol.kind {
204            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
205            crate::SymbolKind::Module => name == "tests" || name == "test",
206            _ => false,
207        }
208    }
209
210    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
211        None
212    }
213
214    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
215        node.child_by_field_name("body")
216    }
217
218    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
219        false
220    }
221
222    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
223        node.child_by_field_name("name")
224            .map(|n| &content[n.byte_range()])
225    }
226
227    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
228        let ext = path.extension()?.to_str()?;
229        if ext != "ml" && ext != "mli" {
230            return None;
231        }
232        let stem = path.file_stem()?.to_str()?;
233        // OCaml module names are capitalized
234        let mut chars: Vec<char> = stem.chars().collect();
235        if let Some(c) = chars.first_mut() {
236            *c = c.to_ascii_uppercase();
237        }
238        Some(chars.into_iter().collect())
239    }
240
241    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
242        let lower = module.to_lowercase();
243        vec![format!("{}.ml", lower), format!("{}.mli", lower)]
244    }
245
246    fn lang_key(&self) -> &'static str {
247        "ocaml"
248    }
249
250    fn is_stdlib_import(&self, import_name: &str, _project_root: &Path) -> bool {
251        // Core OCaml modules
252        matches!(
253            import_name,
254            "Stdlib"
255                | "Pervasives"
256                | "Printf"
257                | "List"
258                | "Array"
259                | "String"
260                | "Bytes"
261                | "Char"
262                | "Int"
263                | "Float"
264                | "Bool"
265                | "Unit"
266                | "Fun"
267                | "Option"
268                | "Result"
269                | "Seq"
270                | "Map"
271                | "Set"
272                | "Hashtbl"
273                | "Stack"
274                | "Queue"
275                | "Stream"
276                | "Buffer"
277                | "Format"
278                | "Scanf"
279                | "Arg"
280                | "Filename"
281                | "Sys"
282                | "Unix"
283        )
284    }
285
286    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
287        None
288    }
289
290    fn resolve_local_import(
291        &self,
292        import: &str,
293        _current_file: &Path,
294        project_root: &Path,
295    ) -> Option<PathBuf> {
296        let lower = import.to_lowercase();
297        for ext in &["ml", "mli"] {
298            let candidates = [
299                project_root.join("lib").join(format!("{}.{}", lower, ext)),
300                project_root.join("src").join(format!("{}.{}", lower, ext)),
301                project_root.join(format!("{}.{}", lower, ext)),
302            ];
303            for c in &candidates {
304                if c.is_file() {
305                    return Some(c.clone());
306                }
307            }
308        }
309        None
310    }
311
312    fn resolve_external_import(
313        &self,
314        _import_name: &str,
315        _project_root: &Path,
316    ) -> Option<ResolvedPackage> {
317        None
318    }
319
320    fn get_version(&self, project_root: &Path) -> Option<String> {
321        // Check for dune or opam files
322        if project_root.join("dune-project").is_file() {
323            return Some("dune".to_string());
324        }
325        let opam_files: Vec<_> = std::fs::read_dir(project_root)
326            .ok()?
327            .filter_map(|e| e.ok())
328            .filter(|e| e.path().extension().map_or(false, |ext| ext == "opam"))
329            .collect();
330        if !opam_files.is_empty() {
331            return Some("opam".to_string());
332        }
333        None
334    }
335
336    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
337        if let Some(home) = std::env::var_os("HOME") {
338            let opam = PathBuf::from(home).join(".opam");
339            if opam.is_dir() {
340                return Some(opam);
341            }
342        }
343        None
344    }
345
346    fn indexable_extensions(&self) -> &'static [&'static str] {
347        &["ml", "mli"]
348    }
349    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
350        Vec::new()
351    }
352
353    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
354        use crate::traits::{has_extension, skip_dotfiles};
355        if skip_dotfiles(name) {
356            return true;
357        }
358        if is_dir && name == "_build" {
359            return true;
360        }
361        !is_dir && !has_extension(name, self.indexable_extensions())
362    }
363
364    fn discover_packages(&self, _source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
365        Vec::new()
366    }
367
368    fn package_module_name(&self, entry_name: &str) -> String {
369        let stem = entry_name
370            .strip_suffix(".ml")
371            .or_else(|| entry_name.strip_suffix(".mli"))
372            .unwrap_or(entry_name);
373        // Capitalize for OCaml module name
374        let mut chars: Vec<char> = stem.chars().collect();
375        if let Some(c) = chars.first_mut() {
376            *c = c.to_ascii_uppercase();
377        }
378        chars.into_iter().collect()
379    }
380
381    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
382        if path.is_file() {
383            Some(path.to_path_buf())
384        } else {
385            None
386        }
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393    use crate::validate_unused_kinds_audit;
394
395    #[test]
396    fn unused_node_kinds_audit() {
397        #[rustfmt::skip]
398        let documented_unused: &[&str] = &[
399            "abstract_type", "add_operator", "aliased_type", "and_operator",
400            "application_expression", "array_expression", "array_get_expression",
401            "assert_expression", "assign_operator", "bigarray_get_expression",
402            "class_application", "class_binding", "class_body_type",
403            "class_definition", "class_function", "class_function_type",
404            "class_initializer", "class_name", "class_path", "class_type_binding",
405            "class_type_definition", "class_type_name", "class_type_path",
406            "coercion_expression", "concat_operator", "cons_expression",
407            "constrain_module", "constrain_module_type", "constrain_type",
408            "constructed_type", "constructor_declaration", "constructor_name",
409            "constructor_path", "constructor_pattern", "conversion_specification",
410            "do_clause", "else_clause", "exception_definition", "exception_pattern",
411            "expression_item", "extended_module_path", "field_declaration",
412            "field_expression", "field_get_expression", "for_expression",
413            "fun_expression", "function_type", "functor_type", "hash_expression",
414            "hash_operator", "hash_type", "include_module", "include_module_type", "infix_expression",
415            "indexing_operator", "indexing_operator_path", "inheritance_definition",
416            "inheritance_specification", "instance_variable_definition",
417            "instance_variable_expression", "instance_variable_specification",
418            "instantiated_class", "instantiated_class_type", "labeled_argument_type",
419            "labeled_tuple_element_type", "lazy_expression", "let_and_operator",
420            "let_class_expression", "let_exception_expression",
421            "let_module_expression", "let_open_class_expression",
422            "let_open_class_type", "let_open_expression", "let_operator",
423            "list_expression", "local_open_expression", "local_open_type",
424            "match_operator", "method_definition", "method_invocation",
425            "method_name", "method_specification", "method_type", "module_application",
426            "module_binding", "module_name", "module_parameter", "module_path",
427            "module_type_constraint", "module_type_name", "module_type_of",
428            "module_type_path", "mult_operator", "new_expression", "object_copy_expression",
429            "object_expression", "object_type", "or_operator",
430            "package_expression", "package_type", "packed_module",
431            "parenthesized_class_expression", "parenthesized_expression",
432            "parenthesized_module_expression", "parenthesized_module_type",
433            "parenthesized_operator", "parenthesized_type", "polymorphic_type",
434            "polymorphic_variant_type", "pow_operator", "prefix_expression",
435            "prefix_operator", "record_declaration", "record_expression",
436            "refutation_case", "rel_operator", "sequence_expression",
437            "set_expression", "sign_expression", "sign_operator",
438            "string_get_expression", "structure", "tag_specification",
439            "then_clause", "tuple_expression", "tuple_type", "type_binding",
440            "type_constraint", "type_constructor", "type_constructor_path",
441            "type_parameter_constraint", "type_variable", "typed_class_expression",
442            "typed_expression", "typed_module_expression", "typed_pattern",
443            "value_specification", "variant_declaration", "while_expression",
444        ];
445        validate_unused_kinds_audit(&OCaml, documented_unused)
446            .expect("OCaml unused node kinds audit failed");
447    }
448}