Skip to main content

mati_core/analysis/resolvers/
haskell.rs

1//! Haskell import resolver.
2//!
3//! Converts dot-separated module names to slash-separated paths
4//! (`MyLib.Utils` → `MyLib/Utils.hs`). Checks under `src/`, `app/`,
5//! and the project root. Standard library modules are skipped using an
6//! explicit allowlist of GHC boot-package modules (base, containers,
7//! bytestring, text, array, etc.).
8//!
9//! # Known limitations
10//!
11//! - Cabal and Stack multi-library layouts with custom `hs-source-dirs`
12//!   are not detected — only `src/`, `app/`, and project root are
13//!   searched
14//! - Re-exported modules (`module X (module Y)`) are not followed
15//! - Backpack module signatures (`.hsig` files) are not resolved
16//! - CPP-guarded imports (`#ifdef`-controlled imports via Haskell's
17//!   `{-# LANGUAGE CPP #-}`) are always counted
18//! - Template Haskell splices that generate imports are invisible to
19//!   tree-sitter
20//!
21//! These limitations mean Haskell projects using Backpack, custom
22//! source directories, or heavy Template Haskell will have lower edge
23//! counts. Standard Stack/Cabal projects with `src/` layout get good
24//! coverage.
25
26use super::{FileIndex, LanguageResolver};
27use crate::analysis::parser::ImportStatement;
28use crate::analysis::walker::Language;
29
30pub struct HaskellResolver;
31
32impl LanguageResolver for HaskellResolver {
33    fn resolve(
34        &self,
35        import: &ImportStatement,
36        _importing_file: &str,
37        file_index: &FileIndex,
38    ) -> Option<String> {
39        resolve_haskell(&import.path, file_index)
40    }
41
42    fn language(&self) -> Language {
43        Language::Haskell
44    }
45
46    fn name(&self) -> &'static str {
47        "haskell"
48    }
49}
50
51fn resolve_haskell(module_path: &str, file_index: &FileIndex) -> Option<String> {
52    if is_haskell_stdlib(module_path) {
53        return None;
54    }
55
56    // Convert dots to slashes: MyLib.Utils → MyLib/Utils
57    let rel = module_path.replace('.', "/");
58
59    // Try direct: MyLib/Utils.hs
60    let direct = format!("{rel}.hs");
61    if file_index.contains(&direct) {
62        return Some(direct);
63    }
64
65    // Try under src/: src/MyLib/Utils.hs
66    let src = format!("src/{rel}.hs");
67    if file_index.contains(&src) {
68        return Some(src);
69    }
70
71    // Try under app/: app/MyLib/Utils.hs
72    let app = format!("app/{rel}.hs");
73    if file_index.contains(&app) {
74        return Some(app);
75    }
76
77    // Try literate Haskell: MyLib/Utils.lhs
78    let lhs = format!("{rel}.lhs");
79    if file_index.contains(&lhs) {
80        return Some(lhs);
81    }
82
83    let src_lhs = format!("src/{rel}.lhs");
84    if file_index.contains(&src_lhs) {
85        return Some(src_lhs);
86    }
87
88    None
89}
90
91/// Check whether a Haskell module path belongs to the GHC standard library.
92///
93/// Uses an explicit allowlist of modules from GHC boot packages (base,
94/// containers, bytestring, text, array, time, filepath, directory,
95/// process, deepseq, pretty, parsec, stm, transformers, mtl).
96///
97/// Top-level namespaces that are exclusively stdlib-owned (`GHC`,
98/// `Prelude`, `Foreign`, `Numeric`, `Debug`, `Unsafe`, `Type`) match
99/// on the first segment alone.  For shared namespaces (`Data`,
100/// `Control`, `System`, `Text`) we check the second segment against a
101/// curated list so that third-party modules like `Data.Aeson` are NOT
102/// classified as stdlib.
103fn is_haskell_stdlib(module: &str) -> bool {
104    let mut parts = module.splitn(3, '.');
105    let first = parts.next().unwrap_or("");
106
107    // Namespaces that are entirely GHC-owned.
108    match first {
109        "GHC" | "Prelude" | "Foreign" | "Numeric" | "Debug" | "Unsafe" | "Type" => return true,
110        "Data" | "Control" | "System" | "Text" => {}
111        _ => return false,
112    }
113
114    // For shared namespaces, match second segment against known stdlib modules.
115    let second = match parts.next() {
116        Some(s) => s,
117        // Bare "Data" / "Control" etc. — not a real module import.
118        None => return false,
119    };
120
121    match first {
122        "Data" => matches!(
123            second,
124            // ── base ──
125            "Bifoldable"
126                | "Bifunctor"
127                | "Bitraversable"
128                | "Bits"
129                | "Bool"
130                | "Char"
131                | "Coerce"
132                | "Complex"
133                | "Data"
134                | "Dynamic"
135                | "Either"
136                | "Eq"
137                | "Fixed"
138                | "Foldable"
139                | "Function"
140                | "Functor"
141                | "IORef"
142                | "Int"
143                | "Ix"
144                | "Kind"
145                | "List"
146                | "Maybe"
147                | "Monoid"
148                | "Ord"
149                | "Proxy"
150                | "Ratio"
151                | "STRef"
152                | "Semigroup"
153                | "String"
154                | "Traversable"
155                | "Tuple"
156                | "Type"
157                | "Typeable"
158                | "Unique"
159                | "Void"
160                | "Version"
161                | "Word"
162                // ── containers ──
163                | "Map"
164                | "Set"
165                | "IntMap"
166                | "IntSet"
167                | "Sequence"
168                | "Tree"
169                | "Graph"
170                // ── bytestring ──
171                | "ByteString"
172                // ── text ──
173                | "Text"
174                // ── array ──
175                | "Array"
176                // ── time ──
177                | "Time"
178        ),
179        "Control" => matches!(
180            second,
181            // ── base ──
182            "Applicative"
183                | "Arrow"
184                | "Category"
185                | "Concurrent"
186                | "Exception"
187                | "Monad"
188                // ── deepseq ──
189                | "DeepSeq"
190        ),
191        "System" => matches!(
192            second,
193            // ── base ──
194            "CPUTime"
195                | "Console"
196                | "Environment"
197                | "Exit"
198                | "IO"
199                | "Info"
200                | "Mem"
201                | "Posix"
202                | "Timeout"
203                // ── filepath ──
204                | "FilePath"
205                // ── directory ──
206                | "Directory"
207                // ── process ──
208                | "Process"
209                // ── random ──
210                | "Random"
211        ),
212        "Text" => matches!(
213            second,
214            // ── base ──
215            "ParserCombinators" | "Printf" | "Read" | "Show"
216            // ── pretty ──
217            | "PrettyPrint"
218            // ── parsec ──
219            | "Parsec"
220            // ── regex-base ──
221            | "Regex"
222        ),
223        _ => false,
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::analysis::parser::import::ImportKind;
231
232    fn idx(paths: &[&str]) -> FileIndex {
233        FileIndex::new(paths.iter().map(|s| s.to_string()))
234    }
235
236    fn import(path: &str) -> ImportStatement {
237        ImportStatement::new(path, ImportKind::Normal, 1)
238    }
239
240    #[test]
241    fn stdlib_skipped() {
242        let file_index = idx(&["src/Main.hs"]);
243        assert_eq!(
244            HaskellResolver.resolve(&import("Data.List"), "src/Main.hs", &file_index),
245            None
246        );
247        assert_eq!(
248            HaskellResolver.resolve(&import("Control.Monad"), "src/Main.hs", &file_index),
249            None
250        );
251        assert_eq!(
252            HaskellResolver.resolve(&import("Prelude"), "src/Main.hs", &file_index),
253            None
254        );
255    }
256
257    #[test]
258    fn local_module_resolves_under_src() {
259        let file_index = idx(&["src/Main.hs", "src/MyLib/Utils.hs"]);
260        let result = HaskellResolver.resolve(&import("MyLib.Utils"), "src/Main.hs", &file_index);
261        assert_eq!(result, Some("src/MyLib/Utils.hs".into()));
262    }
263
264    #[test]
265    fn local_module_resolves_at_root() {
266        let file_index = idx(&["Main.hs", "Lib/Helper.hs"]);
267        let result = HaskellResolver.resolve(&import("Lib.Helper"), "Main.hs", &file_index);
268        assert_eq!(result, Some("Lib/Helper.hs".into()));
269    }
270
271    #[test]
272    fn literate_haskell_resolves() {
273        let file_index = idx(&["src/Main.hs", "src/MyLib/Doc.lhs"]);
274        let result = HaskellResolver.resolve(&import("MyLib.Doc"), "src/Main.hs", &file_index);
275        assert_eq!(result, Some("src/MyLib/Doc.lhs".into()));
276    }
277
278    #[test]
279    fn nonexistent_returns_none() {
280        let file_index = idx(&["src/Main.hs"]);
281        assert_eq!(
282            HaskellResolver.resolve(&import("Missing.Module"), "src/Main.hs", &file_index),
283            None
284        );
285    }
286
287    // ── stdlib allowlist tests ─────────────────────────────────────────
288
289    #[test]
290    fn data_list_is_stdlib() {
291        assert!(is_haskell_stdlib("Data.List"));
292        assert!(is_haskell_stdlib("Data.List.NonEmpty"));
293    }
294
295    #[test]
296    fn data_map_is_stdlib() {
297        assert!(is_haskell_stdlib("Data.Map"));
298        assert!(is_haskell_stdlib("Data.Map.Strict"));
299    }
300
301    #[test]
302    fn data_aeson_is_not_stdlib() {
303        assert!(!is_haskell_stdlib("Data.Aeson"));
304        assert!(!is_haskell_stdlib("Data.Aeson.Types"));
305    }
306
307    #[test]
308    fn data_aeson_types_is_not_stdlib() {
309        // Covers sub-module paths too.
310        assert!(!is_haskell_stdlib("Data.Aeson.Types.Internal"));
311        assert!(!is_haskell_stdlib("Data.Aeson.Key"));
312    }
313
314    #[test]
315    fn control_monad_is_stdlib() {
316        assert!(is_haskell_stdlib("Control.Monad"));
317        assert!(is_haskell_stdlib("Control.Monad.IO.Class"));
318        assert!(is_haskell_stdlib("Control.Exception"));
319    }
320
321    #[test]
322    fn user_module_is_not_stdlib() {
323        assert!(!is_haskell_stdlib("MyApp.Foo"));
324        assert!(!is_haskell_stdlib("Lib.Internal.Utils"));
325        assert!(!is_haskell_stdlib("Network.HTTP"));
326    }
327}