Skip to main content

normalize_languages/
haskell.rs

1//! Haskell language support.
2
3use crate::traits::{ImportSpec, ModuleId, ModuleResolver, Resolution, ResolverConfig};
4use crate::{ContainerBody, Import, Language, LanguageSymbols};
5use std::path::{Path, PathBuf};
6use tree_sitter::Node;
7
8/// Haskell language support.
9pub struct Haskell;
10
11impl Language for Haskell {
12    fn name(&self) -> &'static str {
13        "Haskell"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["hs", "lhs"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "haskell"
20    }
21
22    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
23        Some(self)
24    }
25
26    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
27        extract_haddock(node, content)
28    }
29
30    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
31        if node.kind() != "import" {
32            return Vec::new();
33        }
34
35        let text = &content[node.byte_range()];
36        let line = node.start_position().row + 1;
37
38        // Extract module name after "import" keyword
39        // import qualified Data.Map as M
40        let parts: Vec<&str> = text.split_whitespace().collect();
41        let mut idx = 1;
42        if parts.get(idx) == Some(&"qualified") {
43            idx += 1;
44        }
45
46        if let Some(module) = parts.get(idx) {
47            return vec![Import {
48                module: module.to_string(),
49                names: Vec::new(),
50                alias: None,
51                is_wildcard: !text.contains('('),
52                is_relative: false,
53                line,
54            }];
55        }
56
57        Vec::new()
58    }
59
60    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
61        // Haskell: import Module or import Module (a, b, c)
62        let names_to_use: Vec<&str> = names
63            .map(|n| n.to_vec())
64            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
65        if names_to_use.is_empty() {
66            format!("import {}", import.module)
67        } else {
68            format!("import {} ({})", import.module, names_to_use.join(", "))
69        }
70    }
71
72    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
73        let name = symbol.name.as_str();
74        match symbol.kind {
75            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
76            crate::SymbolKind::Module => name == "tests" || name == "test",
77            _ => false,
78        }
79    }
80
81    fn test_file_globs(&self) -> &'static [&'static str] {
82        &["**/test/**/*.hs", "**/*Spec.hs", "**/*Test.hs"]
83    }
84
85    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
86        // instance MyClass Foo where → symbol name is "MyClass", implements = ["MyClass"]
87        if node.kind() == "instance"
88            && let Some(name_node) = node.child_by_field_name("name")
89        {
90            let class_name = content[name_node.byte_range()].to_string();
91            return crate::ImplementsInfo {
92                is_interface: false,
93                implements: vec![class_name],
94            };
95        }
96        crate::ImplementsInfo::default()
97    }
98
99    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
100        // tree-sitter-haskell uses "declarations" (not "where") for the body
101        node.child_by_field_name("declarations")
102    }
103
104    fn analyze_container_body(
105        &self,
106        body_node: &Node,
107        content: &str,
108        inner_indent: &str,
109    ) -> Option<ContainerBody> {
110        // class_declarations / instance_declarations contain declarations
111        // directly, with no enclosing keywords in the node itself
112        crate::body::analyze_end_body(body_node, content, inner_indent)
113    }
114
115    fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
116        static RESOLVER: HaskellModuleResolver = HaskellModuleResolver;
117        Some(&RESOLVER)
118    }
119}
120
121impl LanguageSymbols for Haskell {}
122
123// =============================================================================
124// Haskell Module Resolver
125// =============================================================================
126
127/// Module resolver for Haskell (Cabal/Stack conventions).
128///
129/// `import Data.Map` → `Data/Map.hs` (or `.lhs`) in the source directory.
130pub struct HaskellModuleResolver;
131
132impl ModuleResolver for HaskellModuleResolver {
133    fn workspace_config(&self, root: &Path) -> ResolverConfig {
134        let mut search_roots: Vec<PathBuf> = Vec::new();
135
136        // Try to find source roots from .cabal file
137        let found_cabal = std::fs::read_dir(root).ok().and_then(|entries| {
138            entries.flatten().find(|e| {
139                e.path()
140                    .extension()
141                    .and_then(|x| x.to_str())
142                    .map(|x| x == "cabal")
143                    .unwrap_or(false)
144            })
145        });
146
147        if let Some(cabal_entry) = found_cabal
148            && let Ok(content) = std::fs::read_to_string(cabal_entry.path())
149        {
150            for line in content.lines() {
151                let trimmed = line.trim();
152                if let Some(rest) = trimmed.strip_prefix("hs-source-dirs:") {
153                    for dir in rest.split_whitespace() {
154                        let candidate = root.join(dir.trim_matches(','));
155                        if candidate.is_dir() {
156                            search_roots.push(candidate);
157                        }
158                    }
159                }
160            }
161        }
162
163        if search_roots.is_empty() {
164            // Default source roots
165            let src = root.join("src");
166            if src.is_dir() {
167                search_roots.push(src);
168            }
169            search_roots.push(root.to_path_buf());
170        }
171
172        ResolverConfig {
173            workspace_root: root.to_path_buf(),
174            path_mappings: Vec::new(),
175            search_roots,
176        }
177    }
178
179    fn module_of_file(&self, _root: &Path, file: &Path, cfg: &ResolverConfig) -> Vec<ModuleId> {
180        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
181        if ext != "hs" && ext != "lhs" {
182            return Vec::new();
183        }
184        for search_root in &cfg.search_roots {
185            if let Ok(rel) = file.strip_prefix(search_root) {
186                let module_path = rel
187                    .to_str()
188                    .unwrap_or("")
189                    .trim_end_matches(".lhs")
190                    .trim_end_matches(".hs")
191                    .replace(['/', '\\'], ".");
192                if !module_path.is_empty() {
193                    return vec![ModuleId {
194                        canonical_path: module_path,
195                    }];
196                }
197            }
198        }
199        Vec::new()
200    }
201
202    fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution {
203        let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or("");
204        if ext != "hs" && ext != "lhs" {
205            return Resolution::NotApplicable;
206        }
207        let raw = &spec.raw;
208        let path_part = raw.replace('.', "/");
209        let exported_name = raw.rsplit('.').next().unwrap_or(raw).to_string();
210
211        for search_root in &cfg.search_roots {
212            for ext_try in &["hs", "lhs"] {
213                let candidate = search_root.join(format!("{}.{}", path_part, ext_try));
214                if candidate.exists() {
215                    return Resolution::Resolved(candidate, exported_name.clone());
216                }
217            }
218        }
219        Resolution::NotFound
220    }
221}
222
223/// Extract a Haddock documentation comment preceding a definition node.
224///
225/// Haddock comments use `-- |` (preceding) or `-- ^` (following) syntax.
226/// The tree-sitter-haskell grammar parses these as `haddock` nodes.
227///
228/// The `haddock` node is a sibling of the `declarations` container, not a
229/// sibling of the `function`/`data_type`/etc. inside it. So we walk up to the
230/// parent (`declarations`) and check the parent's prev sibling.
231fn extract_haddock(node: &Node, content: &str) -> Option<String> {
232    // First check immediate prev siblings (within declarations)
233    let mut prev = node.prev_sibling();
234    while let Some(sibling) = prev {
235        match sibling.kind() {
236            "haddock" => {
237                return Some(clean_haddock(&content[sibling.byte_range()]));
238            }
239            "signature" => {
240                // Skip type signature between haddock and function definition
241            }
242            _ => break,
243        }
244        prev = sibling.prev_sibling();
245    }
246
247    // Check if the parent's prev sibling is a haddock node.
248    // This handles the case where haddock is at the top level (sibling of
249    // `declarations`) while the definition node is inside `declarations`.
250    if let Some(parent) = node.parent()
251        && let Some(sibling) = parent.prev_sibling()
252        && sibling.kind() == "haddock"
253    {
254        return Some(clean_haddock(&content[sibling.byte_range()]));
255    }
256
257    None
258}
259
260/// Clean a Haddock comment into plain text.
261///
262/// Strips `-- |`, `-- ^`, and `--` prefixes from each line.
263fn clean_haddock(text: &str) -> String {
264    let lines: Vec<&str> = text
265        .lines()
266        .map(|l| {
267            let l = l.trim();
268            if let Some(rest) = l.strip_prefix("-- |") {
269                rest.trim()
270            } else if let Some(rest) = l.strip_prefix("-- ^") {
271                rest.trim()
272            } else if let Some(rest) = l.strip_prefix("--") {
273                rest.strip_prefix(' ').unwrap_or(rest)
274            } else {
275                l
276            }
277        })
278        .filter(|l| !l.is_empty())
279        .collect();
280    lines.join(" ")
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use crate::validate_unused_kinds_audit;
287
288    #[test]
289    fn unused_node_kinds_audit() {
290        #[rustfmt::skip]
291        let documented_unused: &[&str] = &[
292            "associated_type", "class_declarations", "constructor",
293            "constructor_operator", "constructor_synonym", "constructor_synonyms",
294            "data_constructor", "data_constructors", "declarations",
295            "default_types", "do_module", "explicit_type", "export", "exports",
296            "forall", "forall_required", "foreign_export", "foreign_import",
297            "function_head_parens", "gadt_constructor", "gadt_constructors",
298            "generator", "import_list", "import_name", "import_package", "imports",
299            "instance_declarations", "lambda_case", "lambda_cases",
300            "linear_function", "list_comprehension", "modifier", "module",
301            "module_export", "module_id", "multi_way_if", "newtype_constructor",
302            "operator", "qualified", "qualifiers", "quantified_variables",
303            "quasiquote_body", "quoted_expression", "quoted_type", "transform",
304            "type_application", "type_binder", "type_family",
305            "type_family_injectivity", "type_family_result", "type_instance",
306            "type_params", "type_patterns", "type_role",
307            "typed_quote",
308            // control flow — not extracted as symbols
309            "lambda",
310            "case",
311            "match",
312            "import",
313        ];
314        validate_unused_kinds_audit(&Haskell, documented_unused)
315            .expect("Haskell unused node kinds audit failed");
316    }
317}