Skip to main content

normalize_languages/
haskell.rs

1//! Haskell language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols};
4use tree_sitter::Node;
5
6/// Haskell language support.
7pub struct Haskell;
8
9impl Language for Haskell {
10    fn name(&self) -> &'static str {
11        "Haskell"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["hs", "lhs"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "haskell"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
25        extract_haddock(node, content)
26    }
27
28    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
29        if node.kind() != "import" {
30            return Vec::new();
31        }
32
33        let text = &content[node.byte_range()];
34        let line = node.start_position().row + 1;
35
36        // Extract module name after "import" keyword
37        // import qualified Data.Map as M
38        let parts: Vec<&str> = text.split_whitespace().collect();
39        let mut idx = 1;
40        if parts.get(idx) == Some(&"qualified") {
41            idx += 1;
42        }
43
44        if let Some(module) = parts.get(idx) {
45            return vec![Import {
46                module: module.to_string(),
47                names: Vec::new(),
48                alias: None,
49                is_wildcard: !text.contains('('),
50                is_relative: false,
51                line,
52            }];
53        }
54
55        Vec::new()
56    }
57
58    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
59        // Haskell: import Module or import Module (a, b, c)
60        let names_to_use: Vec<&str> = names
61            .map(|n| n.to_vec())
62            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
63        if names_to_use.is_empty() {
64            format!("import {}", import.module)
65        } else {
66            format!("import {} ({})", import.module, names_to_use.join(", "))
67        }
68    }
69
70    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
71        let name = symbol.name.as_str();
72        match symbol.kind {
73            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
74            crate::SymbolKind::Module => name == "tests" || name == "test",
75            _ => false,
76        }
77    }
78
79    fn test_file_globs(&self) -> &'static [&'static str] {
80        &["**/test/**/*.hs", "**/*Spec.hs", "**/*Test.hs"]
81    }
82
83    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
84        // instance MyClass Foo where → symbol name is "MyClass", implements = ["MyClass"]
85        if node.kind() == "instance"
86            && let Some(name_node) = node.child_by_field_name("name")
87        {
88            let class_name = content[name_node.byte_range()].to_string();
89            return crate::ImplementsInfo {
90                is_interface: false,
91                implements: vec![class_name],
92            };
93        }
94        crate::ImplementsInfo::default()
95    }
96
97    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
98        // tree-sitter-haskell uses "declarations" (not "where") for the body
99        node.child_by_field_name("declarations")
100    }
101
102    fn analyze_container_body(
103        &self,
104        body_node: &Node,
105        content: &str,
106        inner_indent: &str,
107    ) -> Option<ContainerBody> {
108        // class_declarations / instance_declarations contain declarations
109        // directly, with no enclosing keywords in the node itself
110        crate::body::analyze_end_body(body_node, content, inner_indent)
111    }
112}
113
114impl LanguageSymbols for Haskell {}
115
116/// Extract a Haddock documentation comment preceding a definition node.
117///
118/// Haddock comments use `-- |` (preceding) or `-- ^` (following) syntax.
119/// The tree-sitter-haskell grammar parses these as `haddock` nodes.
120///
121/// The `haddock` node is a sibling of the `declarations` container, not a
122/// sibling of the `function`/`data_type`/etc. inside it. So we walk up to the
123/// parent (`declarations`) and check the parent's prev sibling.
124fn extract_haddock(node: &Node, content: &str) -> Option<String> {
125    // First check immediate prev siblings (within declarations)
126    let mut prev = node.prev_sibling();
127    while let Some(sibling) = prev {
128        match sibling.kind() {
129            "haddock" => {
130                return Some(clean_haddock(&content[sibling.byte_range()]));
131            }
132            "signature" => {
133                // Skip type signature between haddock and function definition
134            }
135            _ => break,
136        }
137        prev = sibling.prev_sibling();
138    }
139
140    // Check if the parent's prev sibling is a haddock node.
141    // This handles the case where haddock is at the top level (sibling of
142    // `declarations`) while the definition node is inside `declarations`.
143    if let Some(parent) = node.parent()
144        && let Some(sibling) = parent.prev_sibling()
145        && sibling.kind() == "haddock"
146    {
147        return Some(clean_haddock(&content[sibling.byte_range()]));
148    }
149
150    None
151}
152
153/// Clean a Haddock comment into plain text.
154///
155/// Strips `-- |`, `-- ^`, and `--` prefixes from each line.
156fn clean_haddock(text: &str) -> String {
157    let lines: Vec<&str> = text
158        .lines()
159        .map(|l| {
160            let l = l.trim();
161            if let Some(rest) = l.strip_prefix("-- |") {
162                rest.trim()
163            } else if let Some(rest) = l.strip_prefix("-- ^") {
164                rest.trim()
165            } else if let Some(rest) = l.strip_prefix("--") {
166                rest.strip_prefix(' ').unwrap_or(rest)
167            } else {
168                l
169            }
170        })
171        .filter(|l| !l.is_empty())
172        .collect();
173    lines.join(" ")
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use crate::validate_unused_kinds_audit;
180
181    #[test]
182    fn unused_node_kinds_audit() {
183        #[rustfmt::skip]
184        let documented_unused: &[&str] = &[
185            "associated_type", "class_declarations", "constructor",
186            "constructor_operator", "constructor_synonym", "constructor_synonyms",
187            "data_constructor", "data_constructors", "declarations",
188            "default_types", "do_module", "explicit_type", "export", "exports",
189            "forall", "forall_required", "foreign_export", "foreign_import",
190            "function_head_parens", "gadt_constructor", "gadt_constructors",
191            "generator", "import_list", "import_name", "import_package", "imports",
192            "instance_declarations", "lambda_case", "lambda_cases",
193            "linear_function", "list_comprehension", "modifier", "module",
194            "module_export", "module_id", "multi_way_if", "newtype_constructor",
195            "operator", "qualified", "qualifiers", "quantified_variables",
196            "quasiquote_body", "quoted_expression", "quoted_type", "transform",
197            "type_application", "type_binder", "type_family",
198            "type_family_injectivity", "type_family_result", "type_instance",
199            "type_params", "type_patterns", "type_role",
200            "typed_quote",
201            // control flow — not extracted as symbols
202            "lambda",
203            "case",
204            "match",
205            "import",
206        ];
207        validate_unused_kinds_audit(&Haskell, documented_unused)
208            .expect("Haskell unused node kinds audit failed");
209    }
210}