Skip to main content

coding_tools/
modgraph.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! The intra-crate **module dependency graph**, built heuristically from `use`
5//! statements — the lean, pure-Rust alternative to booting a semantic engine
6//! (the same honesty class as [`crate::outline`]). It is the single-crate
7//! analogue of [`crate::deps`]: it produces a [`deps::Graph`] whose nodes are
8//! *modules* (crate-relative paths like `domain::entity`) and whose edges are
9//! `use` dependencies, so the very same [`deps::forbid_path`] /
10//! [`deps::cycles`] / [`deps::layer_violations`] assertions apply at module
11//! granularity.
12//!
13//! ## What it sees, and what it does not
14//!
15//! Nodes come from the source files walked (one module per file; `lib.rs` /
16//! `main.rs` / `mod.rs` map to their containing module). Edges come from
17//! `use crate::…`, `use self::…`, and `use super::…` statements — brace groups
18//! are expanded, `as` aliases stripped, globs and `self` segments folded to the
19//! enclosing module, and each path resolved to the **longest known module
20//! prefix**. External imports (`use std::…`, `use serde::…`) are ignored.
21//!
22//! Heuristic, not a parser: each `use` is read from the start of its line (how
23//! rustfmt always writes them); a `use` reaching through a `pub use` re-export
24//! resolves to the re-exporting module (not the origin); `use` inside macros or
25//! `#[cfg(...)]`-disabled code, fully-qualified paths written inline without a
26//! `use`, and the nesting of inline `mod foo { … }` blocks are not modelled —
27//! the latter fold into their file's module. When in doubt the graph
28//! under-reports rather than inventing edges.
29
30use std::collections::{BTreeSet, HashMap, HashSet};
31use std::path::Path;
32use std::sync::OnceLock;
33use std::time::{Duration, Instant};
34
35use clap::{CommandFactory, Parser};
36use regex::Regex;
37
38use crate::deps::{EdgeKind, Graph, Package, grammar};
39use crate::pattern;
40use crate::rules::ProbeOutcome;
41use crate::walk::{self, EntryType};
42
43/// The crate-relative module path for a source file, given its path **relative
44/// to the crate source root**. The crate root file (`lib.rs` / `main.rs`) and
45/// any `mod.rs` map to their containing module; every other file adds its stem.
46///
47/// # Examples
48///
49/// ```
50/// use std::path::Path;
51/// use coding_tools::modgraph::module_name;
52///
53/// assert_eq!(module_name(Path::new("lib.rs")), "crate");
54/// assert_eq!(module_name(Path::new("main.rs")), "crate");
55/// assert_eq!(module_name(Path::new("domain.rs")), "domain");
56/// assert_eq!(module_name(Path::new("domain/mod.rs")), "domain");
57/// assert_eq!(module_name(Path::new("domain/entity.rs")), "domain::entity");
58/// ```
59pub fn module_name(rel: &Path) -> String {
60    let mut segs: Vec<String> = rel
61        .components()
62        .filter_map(|c| match c {
63            std::path::Component::Normal(s) => Some(s.to_string_lossy().into_owned()),
64            _ => None,
65        })
66        .collect();
67    if let Some(file) = segs.pop() {
68        let stem = Path::new(&file)
69            .file_stem()
70            .map(|s| s.to_string_lossy().into_owned())
71            .unwrap_or(file);
72        if stem != "mod" && stem != "lib" && stem != "main" {
73            segs.push(stem);
74        }
75    }
76    if segs.is_empty() {
77        "crate".to_string()
78    } else {
79        segs.join("::")
80    }
81}
82
83/// Build the module-use [`Graph`] from `(module_name, file_contents)` pairs.
84/// Nodes are the module names; an edge `A -> B` means a file in module `A` has
85/// a `use` resolving into module `B`. Self-edges are dropped.
86pub fn build_graph(files: &[(String, String)]) -> Graph {
87    let modules: HashSet<&str> = files.iter().map(|(n, _)| n.as_str()).collect();
88    let mut packages = HashMap::new();
89    let mut edges: HashMap<String, Vec<(String, Vec<EdgeKind>)>> = HashMap::new();
90    let mut members: Vec<String> = Vec::new();
91    for (name, content) in files {
92        packages.insert(
93            name.clone(),
94            Package {
95                name: name.clone(),
96                version: String::new(),
97            },
98        );
99        members.push(name.clone());
100        let current = name_segs(name);
101        let mut targets: BTreeSet<String> = BTreeSet::new();
102        for raw in use_targets(content) {
103            if let Some(t) = resolve(&raw, &current, &modules)
104                && t != *name
105            {
106                targets.insert(t);
107            }
108        }
109        edges.insert(
110            name.clone(),
111            targets
112                .into_iter()
113                .map(|t| (t, vec![EdgeKind::Normal]))
114                .collect(),
115        );
116    }
117    members.sort();
118    members.dedup();
119    Graph {
120        packages,
121        edges,
122        members,
123    }
124}
125
126/// The intra-crate `use` targets in a source file: each returned string is a
127/// brace-expanded, alias-stripped path beginning with `crate`, `self`, or
128/// `super` (external imports are dropped).
129pub fn use_targets(content: &str) -> Vec<String> {
130    let mut out = Vec::new();
131    for stmt in use_statements(content) {
132        for leaf in expand_braces(&strip_aliases(&stmt)) {
133            let leaf = leaf.trim();
134            let head = leaf.split("::").next().unwrap_or("");
135            if matches!(head, "crate" | "self" | "super") {
136                out.push(leaf.to_string());
137            }
138        }
139    }
140    out
141}
142
143/// Resolve one raw `use` path (e.g. `crate::a::b::Item`) against the set of
144/// known module names, relative to the `current` module's segments. Returns the
145/// longest known module prefix, or `None` for an unresolvable / external path.
146fn resolve(raw: &str, current: &[String], modules: &HashSet<&str>) -> Option<String> {
147    let parts: Vec<&str> = raw
148        .split("::")
149        .map(str::trim)
150        .filter(|s| !s.is_empty())
151        .collect();
152    let mut abs: Vec<String> = Vec::new();
153    let mut i = 0;
154    match *parts.first()? {
155        "crate" => i = 1,
156        "self" => {
157            abs = current.to_vec();
158            i = 1;
159        }
160        "super" => {
161            abs = current.to_vec();
162            while parts.get(i) == Some(&"super") {
163                abs.pop();
164                i += 1;
165            }
166        }
167        _ => return None, // an external crate
168    }
169    for p in &parts[i..] {
170        if *p == "self" || *p == "*" {
171            continue; // `a::{self}` / `a::*` both mean module `a`
172        }
173        abs.push((*p).to_string());
174    }
175    // Longest-prefix match: the trailing segments are usually item names.
176    loop {
177        let name = if abs.is_empty() {
178            "crate".to_string()
179        } else {
180            abs.join("::")
181        };
182        if modules.contains(name.as_str()) {
183            return Some(name);
184        }
185        abs.pop()?;
186    }
187}
188
189/// The module-path segments of a module name (`crate` is the empty root).
190fn name_segs(name: &str) -> Vec<String> {
191    if name == "crate" {
192        Vec::new()
193    } else {
194        name.split("::").map(String::from).collect()
195    }
196}
197
198/// Gather `use` statements as path-tree strings (text between `use` and `;`),
199/// joining continuation lines so multi-line brace groups arrive whole.
200fn use_statements(content: &str) -> Vec<String> {
201    let mut stmts = Vec::new();
202    let mut lines = content.lines();
203    while let Some(line) = lines.next() {
204        let Some(rest) = strip_vis_use(line) else {
205            continue;
206        };
207        let mut body = rest.to_string();
208        loop {
209            if let Some(idx) = body.find(';') {
210                body.truncate(idx);
211                stmts.push(body);
212                break;
213            }
214            match lines.next() {
215                Some(next) => {
216                    body.push(' ');
217                    body.push_str(next.trim());
218                }
219                None => {
220                    stmts.push(body);
221                    break;
222                }
223            }
224        }
225    }
226    stmts
227}
228
229/// If `line` is a `use` item (optionally `pub` / `pub(...)`), return the text
230/// after the `use` keyword; otherwise `None`.
231fn strip_vis_use(line: &str) -> Option<&str> {
232    let t = line.trim_start();
233    let after_vis = if let Some(r) = t.strip_prefix("pub") {
234        let r = r.trim_start();
235        let r = if r.starts_with('(') {
236            r.find(')').map(|i| &r[i + 1..]).unwrap_or(r)
237        } else {
238            r
239        };
240        r.trim_start()
241    } else {
242        t
243    };
244    after_vis
245        .strip_prefix("use")
246        .filter(|r| r.starts_with(char::is_whitespace))
247        .map(str::trim_start)
248}
249
250/// Remove ` as Alias` runs (the import-alias keyword) from a use-tree string.
251fn strip_aliases(s: &str) -> std::borrow::Cow<'_, str> {
252    static RE: OnceLock<Regex> = OnceLock::new();
253    let re = RE.get_or_init(|| Regex::new(r"\s+as\s+[A-Za-z_][A-Za-z0-9_]*").unwrap());
254    re.replace_all(s, "")
255}
256
257/// Expand `{…}` groups in a use path into one leaf path per branch.
258fn expand_braces(s: &str) -> Vec<String> {
259    let s = s.trim();
260    match s.find('{') {
261        None => {
262            if s.is_empty() {
263                vec![]
264            } else {
265                vec![s.to_string()]
266            }
267        }
268        Some(open) => {
269            let Some(close) = matching_brace(s.as_bytes(), open) else {
270                return vec![s.to_string()]; // unbalanced: leave verbatim
271            };
272            let prefix = &s[..open];
273            let inner = &s[open + 1..close];
274            let suffix = &s[close + 1..];
275            let mut out = Vec::new();
276            for part in split_top_commas(inner) {
277                let part = part.trim();
278                if part.is_empty() {
279                    continue;
280                }
281                out.extend(expand_braces(&format!("{prefix}{part}{suffix}")));
282            }
283            out
284        }
285    }
286}
287
288/// The index of the `}` matching the `{` at `open`, by depth.
289fn matching_brace(bytes: &[u8], open: usize) -> Option<usize> {
290    let mut depth = 0usize;
291    for (i, &b) in bytes.iter().enumerate().skip(open) {
292        match b {
293            b'{' => depth += 1,
294            b'}' => {
295                depth -= 1;
296                if depth == 0 {
297                    return Some(i);
298                }
299            }
300            _ => {}
301        }
302    }
303    None
304}
305
306/// Split on commas that sit at brace depth zero.
307fn split_top_commas(s: &str) -> Vec<&str> {
308    let mut parts = Vec::new();
309    let mut depth = 0i32;
310    let mut start = 0;
311    for (i, c) in s.char_indices() {
312        match c {
313            '{' => depth += 1,
314            '}' => depth -= 1,
315            ',' if depth == 0 => {
316                parts.push(&s[start..i]);
317                start = i + 1;
318            }
319            _ => {}
320        }
321    }
322    parts.push(&s[start..]);
323    parts
324}
325
326// ----- The `mods` built-in check -------------------------------------------------
327
328/// The targeting + assertion flags of a `mods` built-in check. No framing — the
329/// rule layer owns the verdict.
330#[derive(Parser, Debug)]
331#[command(no_binary_name = true, disable_help_flag = true)]
332struct ModsCheck {
333    #[arg(long, default_value = "src")]
334    base: String,
335    #[arg(long)]
336    name: Option<String>,
337    #[arg(long, value_delimiter = ',')]
338    ext: Vec<String>,
339    #[arg(long)]
340    hidden: bool,
341    #[arg(long)]
342    follow: bool,
343    #[arg(long, value_name = "A=>B")]
344    forbid: Vec<String>,
345    #[arg(long)]
346    acyclic: bool,
347    #[arg(long, value_name = "L0,L1,...", value_delimiter = ',')]
348    layers: Vec<String>,
349    #[arg(long)]
350    layers_closed: bool,
351}
352
353/// The `mods` check's introspected grammar (see [`crate::deps::grammar`]).
354pub fn check_grammar() -> crate::deps::Grammar {
355    grammar(ModsCheck::command())
356}
357
358/// Run a `mods` built-in check: walk the crate source under `root`/`--base`,
359/// build the module-use graph, and assert against it. Returns the probe
360/// outcome, a one-line reason, and the violation report. Spec / walk errors are
361/// [`ProbeOutcome::Broken`].
362pub fn check(
363    args: &[String],
364    root: &Path,
365    timeout: Option<Duration>,
366) -> (ProbeOutcome, String, String) {
367    let started = Instant::now();
368    let broken = |msg: String| (ProbeOutcome::Broken, msg, String::new());
369    let cli = match ModsCheck::try_parse_from(args.iter().map(String::as_str)) {
370        Ok(c) => c,
371        Err(e) => {
372            let valid = check_grammar()
373                .flags
374                .iter()
375                .map(|s| format!("--{}", s.name))
376                .collect::<Vec<_>>()
377                .join(" ");
378            return broken(format!(
379                "mods: {} (valid flags: {valid})",
380                e.to_string().lines().next().unwrap_or("bad arguments")
381            ));
382        }
383    };
384    if cli.forbid.is_empty() && !cli.acyclic && cli.layers.is_empty() {
385        return broken("mods: nothing to assert (--forbid/--acyclic/--layers)".to_string());
386    }
387    if cli.layers_closed && cli.layers.is_empty() {
388        return broken("mods: --layers-closed requires --layers".to_string());
389    }
390    let forbids: Vec<(String, String)> = match cli
391        .forbid
392        .iter()
393        .map(|spec| {
394            spec.split_once("=>")
395                .map(|(a, b)| (a.trim().to_string(), b.trim().to_string()))
396                .filter(|(a, b)| !a.is_empty() && !b.is_empty())
397                .ok_or_else(|| format!("mods: --forbid needs 'A=>B', got '{spec}'"))
398        })
399        .collect()
400    {
401        Ok(f) => f,
402        Err(e) => return broken(e),
403    };
404
405    let mut name_spec = cli.name.clone().unwrap_or_default();
406    let exts: Vec<String> = if cli.ext.is_empty() {
407        vec!["rs".to_string()]
408    } else {
409        cli.ext.clone()
410    };
411    for e in &exts {
412        let e = e.trim().trim_start_matches('.');
413        if e.is_empty() {
414            continue;
415        }
416        if !name_spec.is_empty() {
417            name_spec.push('|');
418        }
419        name_spec.push_str(&format!("*.{e}"));
420    }
421    let names = match pattern::compile_name_set(&name_spec) {
422        Ok(n) => n,
423        Err(e) => return broken(format!("mods: invalid --name/--ext: {e}")),
424    };
425    let base = root.join(&cli.base);
426    let selector = walk::Selector {
427        base: base.clone(),
428        names: Some(names),
429        types: vec![EntryType::F],
430        size: None,
431        hidden: cli.hidden,
432        follow: cli.follow,
433        // The module graph always respects ignores; build trees hold no modules.
434        no_ignore: false,
435    };
436    let mut files: Vec<(String, String)> = Vec::new();
437    for entry in selector.walk() {
438        // The walk + per-file read is the unbounded cost on a large tree; honor
439        // the rule's --timeout here (deps bounds its cargo metadata the same way).
440        if let Some(limit) = timeout
441            && started.elapsed() >= limit
442        {
443            return broken(format!("mods: timed out after {:.1}s", limit.as_secs_f64()));
444        }
445        let entry = match entry {
446            Ok(e) => e,
447            Err(e) => return broken(format!("mods: {e}")),
448        };
449        if !entry.file_type().is_some_and(|t| t.is_file()) {
450            continue;
451        }
452        let path = entry.path();
453        let rel = path.strip_prefix(&base).unwrap_or(path);
454        let Ok(text) = std::fs::read_to_string(path) else {
455            continue; // unreadable / non-UTF-8 in a walk: skipped
456        };
457        files.push((module_name(rel), text));
458    }
459    if files.is_empty() {
460        return broken(format!("mods: no source files under {}", base.display()));
461    }
462    let graph = build_graph(&files);
463
464    let allowed: HashSet<EdgeKind> = [EdgeKind::Normal, EdgeKind::Build, EdgeKind::Dev]
465        .into_iter()
466        .collect();
467    let mut violations: Vec<crate::deps::Violation> = Vec::new();
468    for (from, to) in &forbids {
469        match crate::deps::forbid_path(&graph, from, to, &allowed) {
470            Ok(v) => violations.extend(v),
471            Err(e) => return broken(format!("mods: {e}")),
472        }
473    }
474    if cli.acyclic {
475        violations.extend(crate::deps::cycles(&graph, &allowed, false));
476    }
477    if !cli.layers.is_empty() {
478        let compiled = match cli
479            .layers
480            .iter()
481            .map(|p| pattern::compile_anchored(p))
482            .collect::<Result<Vec<_>, _>>()
483        {
484            Ok(c) => c,
485            Err(e) => return broken(format!("mods: --layers invalid pattern: {e}")),
486        };
487        let (layers, unassigned) =
488            match crate::deps::assign_layers(&graph, &cli.layers, |i, n| compiled[i].is_match(n)) {
489                Ok(r) => r,
490                Err(e) => return broken(format!("mods: --layers: {e}")),
491            };
492        violations.extend(crate::deps::layer_violations(&graph, &layers, &allowed));
493        if cli.layers_closed {
494            violations.extend(unassigned.into_iter().map(|name| crate::deps::Violation {
495                check: "layers-closed".to_string(),
496                subject: name,
497                evidence: "matches no layer".to_string(),
498            }));
499        }
500    }
501
502    crate::deps::report_outcome("mods", violations)
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508    use crate::deps::{self, EdgeKind};
509
510    fn all_edges() -> HashSet<EdgeKind> {
511        [EdgeKind::Normal, EdgeKind::Build, EdgeKind::Dev]
512            .into_iter()
513            .collect()
514    }
515
516    #[test]
517    fn use_targets_handles_the_common_forms() {
518        let src = r#"
519            // a leading comment with the word use in it
520            use std::collections::HashMap;          // external: dropped
521            use crate::domain::Entity;
522            pub use crate::infra::{Db, cache::Lru};  // re-export + nested brace
523            use self::helpers::go as g;              // self + alias
524            use super::sibling::Thing;
525            use crate::a::{self, b};                 // self segment (folds in resolve)
526            fn body() {
527                use crate::late::Local;             // own-line local import: counts
528            }
529        "#;
530        let mut t = use_targets(src);
531        t.sort();
532        // Raw, pre-resolution paths: brace-expanded, alias-stripped, intra-crate
533        // only (`self`/`*` segments are folded later, by `resolve`).
534        assert_eq!(
535            t,
536            vec![
537                "crate::a::b",
538                "crate::a::self",
539                "crate::domain::Entity",
540                "crate::infra::Db",
541                "crate::infra::cache::Lru",
542                "crate::late::Local",
543                "self::helpers::go",
544                "super::sibling::Thing",
545            ]
546        );
547    }
548
549    #[test]
550    fn use_targets_joins_multiline_groups() {
551        let src = "use crate::a::{\n    b,\n    c::d,\n};\n";
552        let mut t = use_targets(src);
553        t.sort();
554        assert_eq!(t, vec!["crate::a::b", "crate::a::c::d"]);
555    }
556
557    #[test]
558    fn resolve_picks_the_longest_known_module() {
559        let modules: HashSet<&str> = ["crate", "a", "a::b", "domain"].into_iter().collect();
560        // `a::b::Item` -> module `a::b`; `a::Item` -> module `a`.
561        assert_eq!(
562            resolve("crate::a::b::Item", &[], &modules).as_deref(),
563            Some("a::b")
564        );
565        assert_eq!(
566            resolve("crate::a::Item", &[], &modules).as_deref(),
567            Some("a")
568        );
569        // self/super resolve relative to the current module.
570        let cur = name_segs("a::b");
571        assert_eq!(resolve("super::Item", &cur, &modules).as_deref(), Some("a"));
572        assert_eq!(
573            resolve("self::Item", &cur, &modules).as_deref(),
574            Some("a::b")
575        );
576        // An item in the crate root folds to `crate`; externals are None.
577        assert_eq!(
578            resolve("crate::TopItem", &[], &modules).as_deref(),
579            Some("crate")
580        );
581        assert_eq!(resolve("serde::Deserialize", &[], &modules), None);
582    }
583
584    /// crate -> domain -> infra (a clean three-module layering).
585    fn sample_crate() -> Vec<(String, String)> {
586        vec![
587            (
588                "crate".into(),
589                "mod domain;\nmod infra;\nuse crate::domain::Entity;\n".into(),
590            ),
591            (
592                "domain".into(),
593                "use crate::infra::Db;\npub struct Entity;\n".into(),
594            ),
595            ("infra".into(), "pub struct Db;\n".into()),
596        ]
597    }
598
599    #[test]
600    fn build_graph_edges_and_forbid() {
601        let g = build_graph(&sample_crate());
602        // domain reaches infra; the reverse does not hold.
603        let v = deps::forbid_path(&g, "domain", "infra", &all_edges())
604            .unwrap()
605            .unwrap();
606        assert_eq!(v.subject, "domain=>infra");
607        assert_eq!(v.evidence, "domain -> infra");
608        assert!(
609            deps::forbid_path(&g, "infra", "domain", &all_edges())
610                .unwrap()
611                .is_none()
612        );
613    }
614
615    #[test]
616    fn build_graph_layers_flag_an_upward_module_edge() {
617        let g = build_graph(&sample_crate());
618        // infra is the lowest layer (highest first): domain must not reach infra.
619        let labels = vec!["infra".to_string(), "domain".to_string()];
620        let (layers, _) = deps::assign_layers(&g, &labels, |i, name| labels[i] == name).unwrap();
621        let viol = deps::layer_violations(&g, &layers, &all_edges());
622        assert_eq!(viol.len(), 1);
623        assert_eq!(viol[0].subject, "domain => infra");
624        assert_eq!(viol[0].evidence, "domain -> infra");
625    }
626
627    #[test]
628    fn build_graph_detects_a_module_cycle() {
629        let files = vec![
630            ("crate".into(), "mod a;\nmod b;\n".to_string()),
631            ("a".into(), "use crate::b::Thing;\n".to_string()),
632            ("b".into(), "use crate::a::Other;\n".to_string()),
633        ];
634        let g = build_graph(&files);
635        let cycles = deps::cycles(&g, &all_edges(), false);
636        assert_eq!(cycles.len(), 1);
637        assert_eq!(cycles[0].subject, "a, b");
638        assert_eq!(cycles[0].evidence, "a -> b -> a");
639    }
640}