Skip to main content

coding_tools/
modgraph.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! The intra-crate **module dependency graph**, built heuristically from `use`
5//! statements — the lean, pure-Rust alternative to booting a semantic engine
6//! (the same honesty class as [`crate::outline`]). It is the single-crate
7//! analogue of [`crate::deps`]: it produces a [`deps::Graph`] whose nodes are
8//! *modules* (crate-relative paths like `domain::entity`) and whose edges are
9//! `use` dependencies, so the very same [`deps::forbid_path`] /
10//! [`deps::cycles`] / [`deps::layer_violations`] assertions apply at module
11//! granularity.
12//!
13//! ## What it sees, and what it does not
14//!
15//! Nodes come from the source files walked (one module per file; `lib.rs` /
16//! `main.rs` / `mod.rs` map to their containing module). Edges come from
17//! `use crate::…`, `use self::…`, and `use super::…` statements — brace groups
18//! are expanded, `as` aliases stripped, globs and `self` segments folded to the
19//! enclosing module, and each path resolved to the **longest known module
20//! prefix**. External imports (`use std::…`, `use serde::…`) are ignored.
21//!
22//! Heuristic, not a parser: each `use` is read from the start of its line (how
23//! rustfmt always writes them); a `use` reaching through a `pub use` re-export
24//! resolves to the re-exporting module (not the origin); `use` inside macros or
25//! `#[cfg(...)]`-disabled code, fully-qualified paths written inline without a
26//! `use`, and the nesting of inline `mod foo { … }` blocks are not modelled —
27//! the latter fold into their file's module. When in doubt the graph
28//! under-reports rather than inventing edges.
29
30use std::collections::{BTreeSet, HashMap, HashSet};
31use std::path::Path;
32use std::sync::OnceLock;
33use std::time::{Duration, Instant};
34
35use clap::{CommandFactory, Parser};
36use regex::Regex;
37
38use crate::deps::{flag_kinds, EdgeKind, Graph, Package};
39use crate::pattern;
40use crate::rules::ProbeOutcome;
41use crate::walk::{self, EntryType};
42
43/// The crate-relative module path for a source file, given its path **relative
44/// to the crate source root**. The crate root file (`lib.rs` / `main.rs`) and
45/// any `mod.rs` map to their containing module; every other file adds its stem.
46///
47/// # Examples
48///
49/// ```
50/// use std::path::Path;
51/// use coding_tools::modgraph::module_name;
52///
53/// assert_eq!(module_name(Path::new("lib.rs")), "crate");
54/// assert_eq!(module_name(Path::new("main.rs")), "crate");
55/// assert_eq!(module_name(Path::new("domain.rs")), "domain");
56/// assert_eq!(module_name(Path::new("domain/mod.rs")), "domain");
57/// assert_eq!(module_name(Path::new("domain/entity.rs")), "domain::entity");
58/// ```
59pub fn module_name(rel: &Path) -> String {
60    let mut segs: Vec<String> = rel
61        .components()
62        .filter_map(|c| match c {
63            std::path::Component::Normal(s) => Some(s.to_string_lossy().into_owned()),
64            _ => None,
65        })
66        .collect();
67    if let Some(file) = segs.pop() {
68        let stem = Path::new(&file)
69            .file_stem()
70            .map(|s| s.to_string_lossy().into_owned())
71            .unwrap_or(file);
72        if stem != "mod" && stem != "lib" && stem != "main" {
73            segs.push(stem);
74        }
75    }
76    if segs.is_empty() {
77        "crate".to_string()
78    } else {
79        segs.join("::")
80    }
81}
82
83/// Build the module-use [`Graph`] from `(module_name, file_contents)` pairs.
84/// Nodes are the module names; an edge `A -> B` means a file in module `A` has
85/// a `use` resolving into module `B`. Self-edges are dropped.
86pub fn build_graph(files: &[(String, String)]) -> Graph {
87    let modules: HashSet<&str> = files.iter().map(|(n, _)| n.as_str()).collect();
88    let mut packages = HashMap::new();
89    let mut edges: HashMap<String, Vec<(String, Vec<EdgeKind>)>> = HashMap::new();
90    let mut members: Vec<String> = Vec::new();
91    for (name, content) in files {
92        packages.insert(
93            name.clone(),
94            Package { name: name.clone(), version: String::new() },
95        );
96        members.push(name.clone());
97        let current = name_segs(name);
98        let mut targets: BTreeSet<String> = BTreeSet::new();
99        for raw in use_targets(content) {
100            if let Some(t) = resolve(&raw, &current, &modules)
101                && t != *name
102            {
103                targets.insert(t);
104            }
105        }
106        edges.insert(
107            name.clone(),
108            targets.into_iter().map(|t| (t, vec![EdgeKind::Normal])).collect(),
109        );
110    }
111    members.sort();
112    members.dedup();
113    Graph { packages, edges, members }
114}
115
116/// The intra-crate `use` targets in a source file: each returned string is a
117/// brace-expanded, alias-stripped path beginning with `crate`, `self`, or
118/// `super` (external imports are dropped).
119pub fn use_targets(content: &str) -> Vec<String> {
120    let mut out = Vec::new();
121    for stmt in use_statements(content) {
122        for leaf in expand_braces(&strip_aliases(&stmt)) {
123            let leaf = leaf.trim();
124            let head = leaf.split("::").next().unwrap_or("");
125            if matches!(head, "crate" | "self" | "super") {
126                out.push(leaf.to_string());
127            }
128        }
129    }
130    out
131}
132
133/// Resolve one raw `use` path (e.g. `crate::a::b::Item`) against the set of
134/// known module names, relative to the `current` module's segments. Returns the
135/// longest known module prefix, or `None` for an unresolvable / external path.
136fn resolve(raw: &str, current: &[String], modules: &HashSet<&str>) -> Option<String> {
137    let parts: Vec<&str> = raw.split("::").map(str::trim).filter(|s| !s.is_empty()).collect();
138    let mut abs: Vec<String> = Vec::new();
139    let mut i = 0;
140    match *parts.first()? {
141        "crate" => i = 1,
142        "self" => {
143            abs = current.to_vec();
144            i = 1;
145        }
146        "super" => {
147            abs = current.to_vec();
148            while parts.get(i) == Some(&"super") {
149                abs.pop();
150                i += 1;
151            }
152        }
153        _ => return None, // an external crate
154    }
155    for p in &parts[i..] {
156        if *p == "self" || *p == "*" {
157            continue; // `a::{self}` / `a::*` both mean module `a`
158        }
159        abs.push((*p).to_string());
160    }
161    // Longest-prefix match: the trailing segments are usually item names.
162    loop {
163        let name = if abs.is_empty() { "crate".to_string() } else { abs.join("::") };
164        if modules.contains(name.as_str()) {
165            return Some(name);
166        }
167        abs.pop()?;
168    }
169}
170
171/// The module-path segments of a module name (`crate` is the empty root).
172fn name_segs(name: &str) -> Vec<String> {
173    if name == "crate" {
174        Vec::new()
175    } else {
176        name.split("::").map(String::from).collect()
177    }
178}
179
180/// Gather `use` statements as path-tree strings (text between `use` and `;`),
181/// joining continuation lines so multi-line brace groups arrive whole.
182fn use_statements(content: &str) -> Vec<String> {
183    let mut stmts = Vec::new();
184    let mut lines = content.lines();
185    while let Some(line) = lines.next() {
186        let Some(rest) = strip_vis_use(line) else {
187            continue;
188        };
189        let mut body = rest.to_string();
190        loop {
191            if let Some(idx) = body.find(';') {
192                body.truncate(idx);
193                stmts.push(body);
194                break;
195            }
196            match lines.next() {
197                Some(next) => {
198                    body.push(' ');
199                    body.push_str(next.trim());
200                }
201                None => {
202                    stmts.push(body);
203                    break;
204                }
205            }
206        }
207    }
208    stmts
209}
210
211/// If `line` is a `use` item (optionally `pub` / `pub(...)`), return the text
212/// after the `use` keyword; otherwise `None`.
213fn strip_vis_use(line: &str) -> Option<&str> {
214    let t = line.trim_start();
215    let after_vis = if let Some(r) = t.strip_prefix("pub") {
216        let r = r.trim_start();
217        let r = if r.starts_with('(') {
218            r.find(')').map(|i| &r[i + 1..]).unwrap_or(r)
219        } else {
220            r
221        };
222        r.trim_start()
223    } else {
224        t
225    };
226    after_vis
227        .strip_prefix("use")
228        .filter(|r| r.starts_with(char::is_whitespace))
229        .map(str::trim_start)
230}
231
232/// Remove ` as Alias` runs (the import-alias keyword) from a use-tree string.
233fn strip_aliases(s: &str) -> std::borrow::Cow<'_, str> {
234    static RE: OnceLock<Regex> = OnceLock::new();
235    let re = RE.get_or_init(|| Regex::new(r"\s+as\s+[A-Za-z_][A-Za-z0-9_]*").unwrap());
236    re.replace_all(s, "")
237}
238
239/// Expand `{…}` groups in a use path into one leaf path per branch.
240fn expand_braces(s: &str) -> Vec<String> {
241    let s = s.trim();
242    match s.find('{') {
243        None => {
244            if s.is_empty() {
245                vec![]
246            } else {
247                vec![s.to_string()]
248            }
249        }
250        Some(open) => {
251            let Some(close) = matching_brace(s.as_bytes(), open) else {
252                return vec![s.to_string()]; // unbalanced: leave verbatim
253            };
254            let prefix = &s[..open];
255            let inner = &s[open + 1..close];
256            let suffix = &s[close + 1..];
257            let mut out = Vec::new();
258            for part in split_top_commas(inner) {
259                let part = part.trim();
260                if part.is_empty() {
261                    continue;
262                }
263                out.extend(expand_braces(&format!("{prefix}{part}{suffix}")));
264            }
265            out
266        }
267    }
268}
269
270/// The index of the `}` matching the `{` at `open`, by depth.
271fn matching_brace(bytes: &[u8], open: usize) -> Option<usize> {
272    let mut depth = 0usize;
273    for (i, &b) in bytes.iter().enumerate().skip(open) {
274        match b {
275            b'{' => depth += 1,
276            b'}' => {
277                depth -= 1;
278                if depth == 0 {
279                    return Some(i);
280                }
281            }
282            _ => {}
283        }
284    }
285    None
286}
287
288/// Split on commas that sit at brace depth zero.
289fn split_top_commas(s: &str) -> Vec<&str> {
290    let mut parts = Vec::new();
291    let mut depth = 0i32;
292    let mut start = 0;
293    for (i, c) in s.char_indices() {
294        match c {
295            '{' => depth += 1,
296            '}' => depth -= 1,
297            ',' if depth == 0 => {
298                parts.push(&s[start..i]);
299                start = i + 1;
300            }
301            _ => {}
302        }
303    }
304    parts.push(&s[start..]);
305    parts
306}
307
308// ----- The `mods` built-in check -------------------------------------------------
309
310/// The targeting + assertion flags of a `mods` built-in check. No framing — the
311/// rule layer owns the verdict.
312#[derive(Parser, Debug)]
313#[command(no_binary_name = true, disable_help_flag = true)]
314struct ModsCheck {
315    #[arg(long, default_value = "src")]
316    base: String,
317    #[arg(long)]
318    name: Option<String>,
319    #[arg(long, value_delimiter = ',')]
320    ext: Vec<String>,
321    #[arg(long)]
322    hidden: bool,
323    #[arg(long)]
324    follow: bool,
325    #[arg(long, value_name = "A=>B")]
326    forbid: Vec<String>,
327    #[arg(long)]
328    acyclic: bool,
329    #[arg(long, value_name = "L0,L1,...", value_delimiter = ',')]
330    layers: Vec<String>,
331    #[arg(long)]
332    layers_closed: bool,
333}
334
335/// The `mods` check's flags as `(name, kind)` pairs, read straight from the clap
336/// grammar (via [`crate::deps::flag_kinds`]). The single source of truth behind
337/// the published `docs/explain/mods.json` schema (a test reconciles the two) and
338/// the valid-flags hint on a bad argument.
339pub fn check_flags() -> Vec<(String, &'static str)> {
340    flag_kinds(ModsCheck::command())
341}
342
343/// Run a `mods` built-in check: walk the crate source under `root`/`--base`,
344/// build the module-use graph, and assert against it. Returns the probe
345/// outcome, a one-line reason, and the violation report. Spec / walk errors are
346/// [`ProbeOutcome::Broken`].
347pub fn check(args: &[String], root: &Path, timeout: Option<Duration>) -> (ProbeOutcome, String, String) {
348    let started = Instant::now();
349    let broken = |msg: String| (ProbeOutcome::Broken, msg, String::new());
350    let cli = match ModsCheck::try_parse_from(args.iter().map(String::as_str)) {
351        Ok(c) => c,
352        Err(e) => {
353            let valid = check_flags().iter().map(|(f, _)| format!("--{f}")).collect::<Vec<_>>().join(" ");
354            return broken(format!(
355                "mods: {} (valid flags: {valid})",
356                e.to_string().lines().next().unwrap_or("bad arguments")
357            ));
358        }
359    };
360    if cli.forbid.is_empty() && !cli.acyclic && cli.layers.is_empty() {
361        return broken("mods: nothing to assert (--forbid/--acyclic/--layers)".to_string());
362    }
363    if cli.layers_closed && cli.layers.is_empty() {
364        return broken("mods: --layers-closed requires --layers".to_string());
365    }
366    let forbids: Vec<(String, String)> = match cli
367        .forbid
368        .iter()
369        .map(|spec| {
370            spec.split_once("=>")
371                .map(|(a, b)| (a.trim().to_string(), b.trim().to_string()))
372                .filter(|(a, b)| !a.is_empty() && !b.is_empty())
373                .ok_or_else(|| format!("mods: --forbid needs 'A=>B', got '{spec}'"))
374        })
375        .collect()
376    {
377        Ok(f) => f,
378        Err(e) => return broken(e),
379    };
380
381    let mut name_spec = cli.name.clone().unwrap_or_default();
382    let exts: Vec<String> = if cli.ext.is_empty() { vec!["rs".to_string()] } else { cli.ext.clone() };
383    for e in &exts {
384        let e = e.trim().trim_start_matches('.');
385        if e.is_empty() {
386            continue;
387        }
388        if !name_spec.is_empty() {
389            name_spec.push('|');
390        }
391        name_spec.push_str(&format!("*.{e}"));
392    }
393    let names = match pattern::compile_name_set(&name_spec) {
394        Ok(n) => n,
395        Err(e) => return broken(format!("mods: invalid --name/--ext: {e}")),
396    };
397    let base = root.join(&cli.base);
398    let selector = walk::Selector {
399        base: base.clone(),
400        names: Some(names),
401        types: vec![EntryType::F],
402        size: None,
403        hidden: cli.hidden,
404        follow: cli.follow,
405        // The module graph always respects ignores; build trees hold no modules.
406        no_ignore: false,
407    };
408    let mut files: Vec<(String, String)> = Vec::new();
409    for entry in selector.walk() {
410        // The walk + per-file read is the unbounded cost on a large tree; honor
411        // the rule's --timeout here (deps bounds its cargo metadata the same way).
412        if let Some(limit) = timeout
413            && started.elapsed() >= limit
414        {
415            return broken(format!("mods: timed out after {:.1}s", limit.as_secs_f64()));
416        }
417        let entry = match entry {
418            Ok(e) => e,
419            Err(e) => return broken(format!("mods: {e}")),
420        };
421        if !entry.file_type().is_some_and(|t| t.is_file()) {
422            continue;
423        }
424        let path = entry.path();
425        let rel = path.strip_prefix(&base).unwrap_or(path);
426        let Ok(text) = std::fs::read_to_string(path) else {
427            continue; // unreadable / non-UTF-8 in a walk: skipped
428        };
429        files.push((module_name(rel), text));
430    }
431    if files.is_empty() {
432        return broken(format!("mods: no source files under {}", base.display()));
433    }
434    let graph = build_graph(&files);
435
436    let allowed: HashSet<EdgeKind> = [EdgeKind::Normal, EdgeKind::Build, EdgeKind::Dev].into_iter().collect();
437    let mut violations: Vec<crate::deps::Violation> = Vec::new();
438    for (from, to) in &forbids {
439        match crate::deps::forbid_path(&graph, from, to, &allowed) {
440            Ok(v) => violations.extend(v),
441            Err(e) => return broken(format!("mods: {e}")),
442        }
443    }
444    if cli.acyclic {
445        violations.extend(crate::deps::cycles(&graph, &allowed, false));
446    }
447    if !cli.layers.is_empty() {
448        let compiled = match cli.layers.iter().map(|p| pattern::compile_anchored(p)).collect::<Result<Vec<_>, _>>() {
449            Ok(c) => c,
450            Err(e) => return broken(format!("mods: --layers invalid pattern: {e}")),
451        };
452        let (layers, unassigned) =
453            match crate::deps::assign_layers(&graph, &cli.layers, |i, n| compiled[i].is_match(n)) {
454                Ok(r) => r,
455                Err(e) => return broken(format!("mods: --layers: {e}")),
456            };
457        violations.extend(crate::deps::layer_violations(&graph, &layers, &allowed));
458        if cli.layers_closed {
459            violations.extend(unassigned.into_iter().map(|name| crate::deps::Violation {
460                check: "layers-closed".to_string(),
461                subject: name,
462                evidence: "matches no layer".to_string(),
463            }));
464        }
465    }
466
467    crate::deps::report_outcome("mods", violations)
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use crate::deps::{self, EdgeKind};
474
475    fn all_edges() -> HashSet<EdgeKind> {
476        [EdgeKind::Normal, EdgeKind::Build, EdgeKind::Dev].into_iter().collect()
477    }
478
479    #[test]
480    fn use_targets_handles_the_common_forms() {
481        let src = r#"
482            // a leading comment with the word use in it
483            use std::collections::HashMap;          // external: dropped
484            use crate::domain::Entity;
485            pub use crate::infra::{Db, cache::Lru};  // re-export + nested brace
486            use self::helpers::go as g;              // self + alias
487            use super::sibling::Thing;
488            use crate::a::{self, b};                 // self segment (folds in resolve)
489            fn body() {
490                use crate::late::Local;             // own-line local import: counts
491            }
492        "#;
493        let mut t = use_targets(src);
494        t.sort();
495        // Raw, pre-resolution paths: brace-expanded, alias-stripped, intra-crate
496        // only (`self`/`*` segments are folded later, by `resolve`).
497        assert_eq!(
498            t,
499            vec![
500                "crate::a::b",
501                "crate::a::self",
502                "crate::domain::Entity",
503                "crate::infra::Db",
504                "crate::infra::cache::Lru",
505                "crate::late::Local",
506                "self::helpers::go",
507                "super::sibling::Thing",
508            ]
509        );
510    }
511
512    #[test]
513    fn use_targets_joins_multiline_groups() {
514        let src = "use crate::a::{\n    b,\n    c::d,\n};\n";
515        let mut t = use_targets(src);
516        t.sort();
517        assert_eq!(t, vec!["crate::a::b", "crate::a::c::d"]);
518    }
519
520    #[test]
521    fn resolve_picks_the_longest_known_module() {
522        let modules: HashSet<&str> = ["crate", "a", "a::b", "domain"].into_iter().collect();
523        // `a::b::Item` -> module `a::b`; `a::Item` -> module `a`.
524        assert_eq!(resolve("crate::a::b::Item", &[], &modules).as_deref(), Some("a::b"));
525        assert_eq!(resolve("crate::a::Item", &[], &modules).as_deref(), Some("a"));
526        // self/super resolve relative to the current module.
527        let cur = name_segs("a::b");
528        assert_eq!(resolve("super::Item", &cur, &modules).as_deref(), Some("a"));
529        assert_eq!(resolve("self::Item", &cur, &modules).as_deref(), Some("a::b"));
530        // An item in the crate root folds to `crate`; externals are None.
531        assert_eq!(resolve("crate::TopItem", &[], &modules).as_deref(), Some("crate"));
532        assert_eq!(resolve("serde::Deserialize", &[], &modules), None);
533    }
534
535    /// crate -> domain -> infra (a clean three-module layering).
536    fn sample_crate() -> Vec<(String, String)> {
537        vec![
538            ("crate".into(), "mod domain;\nmod infra;\nuse crate::domain::Entity;\n".into()),
539            ("domain".into(), "use crate::infra::Db;\npub struct Entity;\n".into()),
540            ("infra".into(), "pub struct Db;\n".into()),
541        ]
542    }
543
544    #[test]
545    fn build_graph_edges_and_forbid() {
546        let g = build_graph(&sample_crate());
547        // domain reaches infra; the reverse does not hold.
548        let v = deps::forbid_path(&g, "domain", "infra", &all_edges()).unwrap().unwrap();
549        assert_eq!(v.subject, "domain=>infra");
550        assert_eq!(v.evidence, "domain -> infra");
551        assert!(deps::forbid_path(&g, "infra", "domain", &all_edges()).unwrap().is_none());
552    }
553
554    #[test]
555    fn build_graph_layers_flag_an_upward_module_edge() {
556        let g = build_graph(&sample_crate());
557        // infra is the lowest layer (highest first): domain must not reach infra.
558        let labels = vec!["infra".to_string(), "domain".to_string()];
559        let (layers, _) = deps::assign_layers(&g, &labels, |i, name| labels[i] == name).unwrap();
560        let viol = deps::layer_violations(&g, &layers, &all_edges());
561        assert_eq!(viol.len(), 1);
562        assert_eq!(viol[0].subject, "domain => infra");
563        assert_eq!(viol[0].evidence, "domain -> infra");
564    }
565
566    #[test]
567    fn build_graph_detects_a_module_cycle() {
568        let files = vec![
569            ("crate".into(), "mod a;\nmod b;\n".to_string()),
570            ("a".into(), "use crate::b::Thing;\n".to_string()),
571            ("b".into(), "use crate::a::Other;\n".to_string()),
572        ];
573        let g = build_graph(&files);
574        let cycles = deps::cycles(&g, &all_edges(), false);
575        assert_eq!(cycles.len(), 1);
576        assert_eq!(cycles[0].subject, "a, b");
577        assert_eq!(cycles[0].evidence, "a -> b -> a");
578    }
579}