Skip to main content

normalize_languages/
haskell.rs

1//! Haskell language support.
2
3use crate::external_packages::ResolvedPackage;
4use crate::{
5    Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism,
6    simple_function_symbol,
7};
8use std::path::{Path, PathBuf};
9use tree_sitter::Node;
10
11/// Haskell language support.
12pub struct Haskell;
13
14impl Language for Haskell {
15    fn name(&self) -> &'static str {
16        "Haskell"
17    }
18    fn extensions(&self) -> &'static [&'static str] {
19        &["hs", "lhs"]
20    }
21    fn grammar_name(&self) -> &'static str {
22        "haskell"
23    }
24
25    fn has_symbols(&self) -> bool {
26        true
27    }
28
29    fn container_kinds(&self) -> &'static [&'static str] {
30        &["data_type", "newtype", "type_synomym", "class", "instance"]
31    }
32
33    fn function_kinds(&self) -> &'static [&'static str] {
34        &["function", "signature"]
35    }
36
37    fn type_kinds(&self) -> &'static [&'static str] {
38        &["data_type", "newtype", "type_synomym"]
39    }
40
41    fn import_kinds(&self) -> &'static [&'static str] {
42        &["import"]
43    }
44
45    fn public_symbol_kinds(&self) -> &'static [&'static str] {
46        &["function", "data_type", "newtype", "class"]
47    }
48
49    fn visibility_mechanism(&self) -> VisibilityMechanism {
50        VisibilityMechanism::ExplicitExport // module export list
51    }
52
53    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
54        let name = match self.node_name(node, content) {
55            Some(n) => n.to_string(),
56            None => return Vec::new(),
57        };
58
59        let kind = match node.kind() {
60            "function" | "signature" => SymbolKind::Function,
61            "data_type" | "newtype" => SymbolKind::Struct,
62            "type_synomym" => SymbolKind::Type,
63            "class" => SymbolKind::Interface,
64            _ => return Vec::new(),
65        };
66
67        vec![Export {
68            name,
69            kind,
70            line: node.start_position().row + 1,
71        }]
72    }
73
74    fn scope_creating_kinds(&self) -> &'static [&'static str] {
75        &["let", "where", "do", "lambda"]
76    }
77
78    fn control_flow_kinds(&self) -> &'static [&'static str] {
79        &["conditional", "case", "match", "guard"]
80    }
81
82    fn complexity_nodes(&self) -> &'static [&'static str] {
83        &["conditional", "case", "match", "guard", "lambda"]
84    }
85
86    fn nesting_nodes(&self) -> &'static [&'static str] {
87        &["function", "let", "where", "do", "case"]
88    }
89
90    fn signature_suffix(&self) -> &'static str {
91        ""
92    }
93
94    fn extract_function(&self, node: &Node, content: &str, _in_container: bool) -> Option<Symbol> {
95        let name = self.node_name(node, content)?;
96        Some(simple_function_symbol(
97            node,
98            content,
99            name,
100            self.extract_docstring(node, content),
101        ))
102    }
103
104    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol> {
105        let name = self.node_name(node, content)?;
106
107        let (kind, keyword) = match node.kind() {
108            "data_type" => (SymbolKind::Struct, "data"),
109            "newtype" => (SymbolKind::Struct, "newtype"),
110            "type_synomym" => (SymbolKind::Type, "type"),
111            "class" => (SymbolKind::Interface, "class"),
112            "instance" => (SymbolKind::Class, "instance"),
113            _ => return None,
114        };
115
116        Some(Symbol {
117            name: name.to_string(),
118            kind,
119            signature: format!("{} {}", keyword, name),
120            docstring: self.extract_docstring(node, content),
121            attributes: Vec::new(),
122            start_line: node.start_position().row + 1,
123            end_line: node.end_position().row + 1,
124            visibility: Visibility::Public,
125            children: Vec::new(),
126            is_interface_impl: false,
127            implements: Vec::new(),
128        })
129    }
130
131    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol> {
132        self.extract_container(node, content)
133    }
134
135    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
136        // Haskell uses -- | or {- | -} for Haddock docs
137        let mut prev = node.prev_sibling();
138        let mut doc_lines = Vec::new();
139
140        while let Some(sibling) = prev {
141            let text = &content[sibling.byte_range()];
142            if sibling.kind() == "comment" {
143                if text.starts_with("-- |") || text.starts_with("-- ^") {
144                    let line = text
145                        .strip_prefix("-- |")
146                        .or_else(|| text.strip_prefix("-- ^"))
147                        .unwrap_or(text)
148                        .trim();
149                    doc_lines.push(line.to_string());
150                } else if text.starts_with("--") {
151                    let line = text.strip_prefix("--").unwrap_or(text).trim();
152                    doc_lines.push(line.to_string());
153                }
154                prev = sibling.prev_sibling();
155            } else {
156                break;
157            }
158        }
159
160        if doc_lines.is_empty() {
161            return None;
162        }
163
164        doc_lines.reverse();
165        Some(doc_lines.join(" "))
166    }
167
168    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
169        Vec::new()
170    }
171
172    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
173        if node.kind() != "import" {
174            return Vec::new();
175        }
176
177        let text = &content[node.byte_range()];
178        let line = node.start_position().row + 1;
179
180        // Extract module name after "import" keyword
181        // import qualified Data.Map as M
182        let parts: Vec<&str> = text.split_whitespace().collect();
183        let mut idx = 1;
184        if parts.get(idx) == Some(&"qualified") {
185            idx += 1;
186        }
187
188        if let Some(module) = parts.get(idx) {
189            return vec![Import {
190                module: module.to_string(),
191                names: Vec::new(),
192                alias: None,
193                is_wildcard: !text.contains('('),
194                is_relative: false,
195                line,
196            }];
197        }
198
199        Vec::new()
200    }
201
202    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
203        // Haskell: import Module or import Module (a, b, c)
204        let names_to_use: Vec<&str> = names
205            .map(|n| n.to_vec())
206            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
207        if names_to_use.is_empty() {
208            format!("import {}", import.module)
209        } else {
210            format!("import {} ({})", import.module, names_to_use.join(", "))
211        }
212    }
213
214    fn is_public(&self, _node: &Node, _content: &str) -> bool {
215        true
216    }
217    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
218        Visibility::Public
219    }
220
221    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
222        let name = symbol.name.as_str();
223        match symbol.kind {
224            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
225            crate::SymbolKind::Module => name == "tests" || name == "test",
226            _ => false,
227        }
228    }
229
230    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
231        None
232    }
233
234    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
235        node.child_by_field_name("where")
236    }
237
238    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
239        false
240    }
241
242    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
243        node.child_by_field_name("name")
244            .map(|n| &content[n.byte_range()])
245    }
246
247    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
248        let ext = path.extension()?.to_str()?;
249        if ext != "hs" && ext != "lhs" {
250            return None;
251        }
252        let stem = path.file_stem()?.to_str()?;
253        Some(stem.to_string())
254    }
255
256    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
257        let path = module.replace('.', "/");
258        vec![format!("{}.hs", path), format!("{}.lhs", path)]
259    }
260
261    fn lang_key(&self) -> &'static str {
262        "haskell"
263    }
264
265    fn is_stdlib_import(&self, import_name: &str, _project_root: &Path) -> bool {
266        // Common base libraries
267        import_name.starts_with("Prelude")
268            || import_name.starts_with("Data.")
269            || import_name.starts_with("Control.")
270            || import_name.starts_with("System.")
271            || import_name.starts_with("GHC.")
272    }
273
274    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
275        None
276    }
277
278    fn resolve_local_import(
279        &self,
280        import: &str,
281        _current_file: &Path,
282        project_root: &Path,
283    ) -> Option<PathBuf> {
284        let path = import.replace('.', "/");
285        for ext in &["hs", "lhs"] {
286            let candidates = [
287                project_root.join("src").join(format!("{}.{}", path, ext)),
288                project_root.join("lib").join(format!("{}.{}", path, ext)),
289                project_root.join(format!("{}.{}", path, ext)),
290            ];
291            for c in &candidates {
292                if c.is_file() {
293                    return Some(c.clone());
294                }
295            }
296        }
297        None
298    }
299
300    fn resolve_external_import(
301        &self,
302        _import_name: &str,
303        _project_root: &Path,
304    ) -> Option<ResolvedPackage> {
305        None
306    }
307
308    fn get_version(&self, project_root: &Path) -> Option<String> {
309        // Check cabal or package.yaml
310        let cabal_files: Vec<_> = std::fs::read_dir(project_root)
311            .ok()?
312            .filter_map(|e| e.ok())
313            .filter(|e| e.path().extension().map_or(false, |ext| ext == "cabal"))
314            .collect();
315
316        if !cabal_files.is_empty() {
317            return Some("cabal".to_string());
318        }
319
320        if project_root.join("package.yaml").is_file() {
321            return Some("stack".to_string());
322        }
323        None
324    }
325
326    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
327        if let Some(home) = std::env::var_os("HOME") {
328            let cabal = PathBuf::from(&home).join(".cabal/store");
329            if cabal.is_dir() {
330                return Some(cabal);
331            }
332            let stack = PathBuf::from(&home).join(".stack");
333            if stack.is_dir() {
334                return Some(stack);
335            }
336        }
337        None
338    }
339
340    fn indexable_extensions(&self) -> &'static [&'static str] {
341        &["hs", "lhs"]
342    }
343    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
344        Vec::new()
345    }
346
347    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
348        use crate::traits::{has_extension, skip_dotfiles};
349        if skip_dotfiles(name) {
350            return true;
351        }
352        if is_dir && (name == "dist" || name == "dist-newstyle" || name == ".stack-work") {
353            return true;
354        }
355        !is_dir && !has_extension(name, self.indexable_extensions())
356    }
357
358    fn discover_packages(&self, _source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
359        Vec::new()
360    }
361
362    fn package_module_name(&self, entry_name: &str) -> String {
363        entry_name
364            .strip_suffix(".hs")
365            .or_else(|| entry_name.strip_suffix(".lhs"))
366            .unwrap_or(entry_name)
367            .to_string()
368    }
369
370    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
371        if path.is_file() {
372            Some(path.to_path_buf())
373        } else {
374            None
375        }
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use crate::validate_unused_kinds_audit;
383
384    #[test]
385    fn unused_node_kinds_audit() {
386        #[rustfmt::skip]
387        let documented_unused: &[&str] = &[
388            "associated_type", "class_declarations", "constructor",
389            "constructor_operator", "constructor_synonym", "constructor_synonyms",
390            "data_constructor", "data_constructors", "declarations",
391            "default_types", "do_module", "explicit_type", "export", "exports",
392            "forall", "forall_required", "foreign_export", "foreign_import",
393            "function_head_parens", "gadt_constructor", "gadt_constructors",
394            "generator", "import_list", "import_name", "import_package", "imports",
395            "instance_declarations", "lambda_case", "lambda_cases",
396            "linear_function", "list_comprehension", "modifier", "module",
397            "module_export", "module_id", "multi_way_if", "newtype_constructor",
398            "operator", "qualified", "qualifiers", "quantified_variables",
399            "quasiquote_body", "quoted_expression", "quoted_type", "transform",
400            "type_application", "type_binder", "type_family",
401            "type_family_injectivity", "type_family_result", "type_instance",
402            "type_params", "type_patterns", "type_role",
403            "typed_quote",
404        ];
405        validate_unused_kinds_audit(&Haskell, documented_unused)
406            .expect("Haskell unused node kinds audit failed");
407    }
408}