coding_tools/
survey.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! `ct-survey`'s format-contextualized codebase survey.
5//!
6//! Where [`crate::tree`] reports file-generic line/word/character counts over any
7//! tree, `ct-survey` reports them **bucketed by the units a build system defines**
8//! — for Rust, the workspace → crate → module hierarchy. The honesty classes are
9//! kept distinct and carried into the output so they are never silently conflated:
10//!
11//! * **authoritative** — crate identity, workspace membership, and cargo target
12//!   kinds, read from `cargo metadata` (the same mechanism [`crate::deps`] uses);
13//! * **exact** — file, line, word, and character counts;
14//! * **heuristic** — the module bucketing (via [`crate::modgraph::module_name`])
15//!   and the `#[test]` tally, which a scan approximates rather than proves.
16//!
17//! The pure pieces here (metadata parse, the test scan, the roll-up, rendering)
18//! are doctested; `src/bin/ct-survey.rs` is the thin IO shell that walks the
19//! filesystem and drives them.
20
21use std::collections::BTreeMap;
22use std::path::{Path, PathBuf};
23use std::sync::OnceLock;
24
25use regex::Regex;
26use serde_json::{Value, json};
27
28use crate::modgraph::module_name;
29
30/// Which contextual group type frames a survey.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
32pub enum GroupKind {
33    /// A cargo workspace: the elements are its member crates.
34    CargoWorkspace,
35    /// A single cargo crate: the element is that crate alone.
36    CargoCrate,
37}
38
39impl GroupKind {
40    /// The `--group` token / JSON label.
41    pub fn label(self) -> &'static str {
42        match self {
43            GroupKind::CargoWorkspace => "cargo-workspace",
44            GroupKind::CargoCrate => "cargo-crate",
45        }
46    }
47}
48
49/// How deep the survey graph descends.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
51pub enum Depth {
52    /// Stop at crates (no per-module breakdown).
53    Crate,
54    /// Descend into each crate's modules (the default).
55    Module,
56}
57
58/// Sort key for crates and, within each crate, its modules.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
60pub enum SortKey {
61    /// By name, ascending (the default).
62    Name,
63    /// By file count, largest first.
64    Files,
65    /// By line count, largest first.
66    Lines,
67    /// By heuristic test count, largest first.
68    Tests,
69}
70
71/// Infer the contextual group type from a `Cargo.toml`'s text: a manifest that
72/// declares a `[workspace]` table is a [`GroupKind::CargoWorkspace`], otherwise a
73/// [`GroupKind::CargoCrate`]. This probes only the provided manifest — the
74/// authoritative member and target data still comes from `cargo metadata`.
75///
76/// # Examples
77///
78/// ```
79/// use coding_tools::survey::{infer_group, GroupKind};
80///
81/// assert_eq!(infer_group("[workspace]\nmembers = [\"a\"]\n"), GroupKind::CargoWorkspace);
82/// assert_eq!(infer_group("[workspace.package]\nversion = \"1\"\n"), GroupKind::CargoWorkspace);
83/// assert_eq!(infer_group("[package]\nname = \"x\"\n"), GroupKind::CargoCrate);
84/// // A commented-out header does not count.
85/// assert_eq!(infer_group("# [workspace]\n[package]\n"), GroupKind::CargoCrate);
86/// ```
87pub fn infer_group(manifest_text: &str) -> GroupKind {
88    for line in manifest_text.lines() {
89        let t = line.trim();
90        if t.starts_with("[workspace]") || t.starts_with("[workspace.") {
91            return GroupKind::CargoWorkspace;
92        }
93    }
94    GroupKind::CargoCrate
95}
96
97/// One cargo target within a package.
98#[derive(Debug, Clone)]
99pub struct Target {
100    /// Cargo target kinds, e.g. `["lib"]`, `["bin"]`, `["test"]`, `["bench"]`.
101    pub kinds: Vec<String>,
102    /// Absolute path to the target's entry source file.
103    pub src_path: String,
104}
105
106/// One package as `cargo metadata` reports it (the subset a survey needs).
107#[derive(Debug, Clone)]
108pub struct PkgMeta {
109    /// Opaque package id (the metadata graph key).
110    pub id: String,
111    /// Crate name.
112    pub name: String,
113    /// Resolved version.
114    pub version: String,
115    /// Absolute path to the package's `Cargo.toml`.
116    pub manifest_path: String,
117    /// The package's build targets.
118    pub targets: Vec<Target>,
119}
120
121impl PkgMeta {
122    /// The package directory (its `Cargo.toml`'s parent).
123    pub fn dir(&self) -> PathBuf {
124        Path::new(&self.manifest_path)
125            .parent()
126            .map(Path::to_path_buf)
127            .unwrap_or_else(|| PathBuf::from("."))
128    }
129
130    /// The primary source root for module bucketing: the directory of the `lib`
131    /// target's entry file, else the first `bin`, else the first target. `None`
132    /// when no target carries a source path.
133    pub fn src_root(&self) -> Option<PathBuf> {
134        let pick = self
135            .targets
136            .iter()
137            .find(|t| t.kinds.iter().any(|k| k == "lib"))
138            .or_else(|| {
139                self.targets
140                    .iter()
141                    .find(|t| t.kinds.iter().any(|k| k == "bin"))
142            })
143            .or_else(|| self.targets.first())?;
144        Path::new(&pick.src_path).parent().map(Path::to_path_buf)
145    }
146
147    /// Authoritative count of cargo test targets (a `kind` of `test`).
148    pub fn test_targets(&self) -> u64 {
149        self.targets
150            .iter()
151            .filter(|t| t.kinds.iter().any(|k| k == "test"))
152            .count() as u64
153    }
154
155    /// Authoritative count of cargo bench targets (a `kind` of `bench`).
156    pub fn bench_targets(&self) -> u64 {
157        self.targets
158            .iter()
159            .filter(|t| t.kinds.iter().any(|k| k == "bench"))
160            .count() as u64
161    }
162}
163
164/// The parsed subset of `cargo metadata`: packages by id, workspace member ids,
165/// and the workspace root directory.
166#[derive(Debug, Clone)]
167pub struct Metadata {
168    /// Package id → its metadata.
169    pub packages: BTreeMap<String, PkgMeta>,
170    /// Workspace member package ids.
171    pub members: Vec<String>,
172    /// The workspace root directory.
173    pub workspace_root: String,
174}
175
176/// Parse `cargo metadata --format-version 1` JSON into the survey [`Metadata`].
177/// Errors on malformed JSON or a missing `packages`/`workspace_members` array —
178/// a defective read, never a silent empty survey.
179pub fn parse_metadata(text: &str) -> Result<Metadata, String> {
180    let v: Value = serde_json::from_str(text).map_err(|e| format!("cargo metadata JSON: {e}"))?;
181    let mut packages = BTreeMap::new();
182    for p in v["packages"]
183        .as_array()
184        .ok_or("metadata missing packages")?
185    {
186        let id = p["id"].as_str().ok_or("package missing id")?.to_string();
187        let targets = p["targets"]
188            .as_array()
189            .map(|ts| {
190                ts.iter()
191                    .map(|t| Target {
192                        kinds: t["kind"]
193                            .as_array()
194                            .map(|ks| {
195                                ks.iter()
196                                    .filter_map(|k| k.as_str().map(String::from))
197                                    .collect()
198                            })
199                            .unwrap_or_default(),
200                        src_path: t["src_path"].as_str().unwrap_or("").to_string(),
201                    })
202                    .collect()
203            })
204            .unwrap_or_default();
205        packages.insert(
206            id.clone(),
207            PkgMeta {
208                id,
209                name: p["name"].as_str().unwrap_or("").to_string(),
210                version: p["version"].as_str().unwrap_or("").to_string(),
211                manifest_path: p["manifest_path"].as_str().unwrap_or("").to_string(),
212                targets,
213            },
214        );
215    }
216    let members = v["workspace_members"]
217        .as_array()
218        .ok_or("metadata missing workspace_members")?
219        .iter()
220        .filter_map(|m| m.as_str().map(String::from))
221        .collect();
222    let workspace_root = v["workspace_root"].as_str().unwrap_or("").to_string();
223    Ok(Metadata {
224        packages,
225        members,
226        workspace_root,
227    })
228}
229
230/// Heuristic count of test functions in a Rust source: attributes whose final
231/// path segment is `test` — `#[test]`, `#[tokio::test]`, `#[test_case::test]`,
232/// and the like. A comprehension aid, not a parser: it does not discount
233/// attributes inside strings or comments, and `#[cfg(test)]` (a module gate, not
234/// a test) is deliberately excluded. Always reported as a heuristic value.
235///
236/// # Examples
237///
238/// ```
239/// use coding_tools::survey::count_tests;
240///
241/// assert_eq!(count_tests("#[test]\nfn a() {}\n#[tokio::test]\nasync fn b() {}"), 2);
242/// // `#[cfg(test)]` gates a module; it is not a test.
243/// assert_eq!(count_tests("#[cfg(test)]\nmod tests { fn helper() {} }"), 0);
244/// assert_eq!(count_tests("fn not_a_test() {}"), 0);
245/// ```
246pub fn count_tests(src: &str) -> u64 {
247    static RE: OnceLock<Regex> = OnceLock::new();
248    let re = RE.get_or_init(|| {
249        Regex::new(r"#\[\s*(?:[A-Za-z_]\w*\s*::\s*)*test\s*[\](]").expect("a valid regex")
250    });
251    re.find_iter(src).count() as u64
252}
253
254/// One walked source file's contribution: its path relative to the crate's
255/// source root (`None` when it lies outside that root, e.g. an integration test
256/// under `tests/`), its exact counts, and its heuristic test tally.
257#[derive(Debug, Clone)]
258pub struct FileStat {
259    /// Path relative to the crate source root, `/`-separated; `None` if outside.
260    pub rel_to_src: Option<String>,
261    /// Exact line count.
262    pub lines: u64,
263    /// Exact word count.
264    pub words: u64,
265    /// Exact character count.
266    pub chars: u64,
267    /// Heuristic `#[test]` count.
268    pub tests: u64,
269}
270
271/// A rolled-up count block (a crate's or a module's).
272#[derive(Debug, Clone, Default, PartialEq, Eq)]
273pub struct Counts {
274    /// Number of source files.
275    pub files: u64,
276    /// Total lines.
277    pub lines: u64,
278    /// Total words.
279    pub words: u64,
280    /// Total characters.
281    pub chars: u64,
282    /// Total heuristic test count.
283    pub tests: u64,
284}
285
286/// One module node in the survey graph.
287#[derive(Debug, Clone)]
288pub struct ModuleNode {
289    /// Crate-relative module path (e.g. `domain::entity`).
290    pub name: String,
291    /// The module's counts.
292    pub counts: Counts,
293}
294
295/// Roll a crate's [`FileStat`]s into whole-crate [`Counts`] (every file) plus a
296/// per-module breakdown (only files under the source root, bucketed by
297/// [`module_name`]), the modules sorted by name. The whole-crate total can
298/// exceed the module sum: files outside the source root (integration tests,
299/// benches) count toward the crate but belong to no module.
300///
301/// # Examples
302///
303/// ```
304/// use coding_tools::survey::{roll_up, FileStat};
305///
306/// let files = vec![
307///     FileStat { rel_to_src: Some("lib.rs".into()), lines: 10, words: 20, chars: 100, tests: 1 },
308///     FileStat { rel_to_src: Some("a/mod.rs".into()), lines: 5, words: 8, chars: 40, tests: 0 },
309///     FileStat { rel_to_src: None, lines: 3, words: 4, chars: 20, tests: 2 }, // a tests/ file
310/// ];
311/// let (crate_counts, modules) = roll_up(&files);
312/// assert_eq!(crate_counts.files, 3);
313/// assert_eq!(crate_counts.lines, 18);
314/// assert_eq!(crate_counts.tests, 3);
315/// // Two modules: `a` and `crate` (lib.rs); the tests/ file is in neither.
316/// assert_eq!(modules.len(), 2);
317/// assert_eq!(modules[0].name, "a");
318/// assert_eq!(modules[1].name, "crate");
319/// ```
320pub fn roll_up(files: &[FileStat]) -> (Counts, Vec<ModuleNode>) {
321    let mut crate_counts = Counts::default();
322    let mut by_mod: BTreeMap<String, Counts> = BTreeMap::new();
323    for f in files {
324        crate_counts.files += 1;
325        crate_counts.lines += f.lines;
326        crate_counts.words += f.words;
327        crate_counts.chars += f.chars;
328        crate_counts.tests += f.tests;
329        if let Some(rel) = &f.rel_to_src {
330            let m = by_mod.entry(module_name(Path::new(rel))).or_default();
331            m.files += 1;
332            m.lines += f.lines;
333            m.words += f.words;
334            m.chars += f.chars;
335            m.tests += f.tests;
336        }
337    }
338    let modules = by_mod
339        .into_iter()
340        .map(|(name, counts)| ModuleNode { name, counts })
341        .collect();
342    (crate_counts, modules)
343}
344
345/// One crate node in the survey graph.
346#[derive(Debug, Clone)]
347pub struct CrateNode {
348    /// Crate name.
349    pub name: String,
350    /// Resolved version.
351    pub version: String,
352    /// The crate's rolled-up counts (every source file).
353    pub counts: Counts,
354    /// Authoritative cargo test-target count.
355    pub test_targets: u64,
356    /// Authoritative cargo bench-target count.
357    pub bench_targets: u64,
358    /// The crate's modules (empty at `--depth crate`).
359    pub modules: Vec<ModuleNode>,
360}
361
362/// A complete survey graph.
363#[derive(Debug, Clone)]
364pub struct Survey {
365    /// The contextual group type this survey was built under.
366    pub group: GroupKind,
367    /// Workspace (or lone crate) display name.
368    pub name: String,
369    /// Workspace root (or lone crate) directory.
370    pub root: String,
371    /// The surveyed crates.
372    pub crates: Vec<CrateNode>,
373}
374
375fn order(a_name: &str, b_name: &str, a: u64, b: u64, key: SortKey) -> std::cmp::Ordering {
376    match key {
377        SortKey::Name => a_name.cmp(b_name),
378        // Count keys descend (largest first); ties break by name.
379        _ => b.cmp(&a).then_with(|| a_name.cmp(b_name)),
380    }
381}
382
383fn count_for(c: &Counts, key: SortKey) -> u64 {
384    match key {
385        SortKey::Name | SortKey::Files => c.files,
386        SortKey::Lines => c.lines,
387        SortKey::Tests => c.tests,
388    }
389}
390
391impl Survey {
392    /// Sort crates, and each crate's modules, by `key` in place.
393    pub fn sort(&mut self, key: SortKey) {
394        self.crates.sort_by(|a, b| {
395            order(
396                &a.name,
397                &b.name,
398                count_for(&a.counts, key),
399                count_for(&b.counts, key),
400                key,
401            )
402        });
403        for c in &mut self.crates {
404            c.modules.sort_by(|a, b| {
405                order(
406                    &a.name,
407                    &b.name,
408                    count_for(&a.counts, key),
409                    count_for(&b.counts, key),
410                    key,
411                )
412            });
413        }
414    }
415}
416
417/// The whole-survey totals: rolled-up [`Counts`] plus authoritative test- and
418/// bench-target counts across every crate.
419pub fn totals(survey: &Survey) -> (Counts, u64, u64) {
420    let mut c = Counts::default();
421    let mut test_targets = 0;
422    let mut bench_targets = 0;
423    for cr in &survey.crates {
424        c.files += cr.counts.files;
425        c.lines += cr.counts.lines;
426        c.words += cr.counts.words;
427        c.chars += cr.counts.chars;
428        c.tests += cr.counts.tests;
429        test_targets += cr.test_targets;
430        bench_targets += cr.bench_targets;
431    }
432    (c, test_targets, bench_targets)
433}
434
435/// Render the survey as indented text. Heuristic values (test counts) wear a
436/// trailing `~`; a closing legend explains the marks.
437///
438/// # Examples
439///
440/// ```
441/// use coding_tools::survey::{render_text, CrateNode, Counts, Depth, GroupKind, Survey};
442///
443/// let survey = Survey {
444///     group: GroupKind::CargoCrate,
445///     name: "demo".into(),
446///     root: "/demo".into(),
447///     crates: vec![CrateNode {
448///         name: "demo".into(),
449///         version: "0.1.0".into(),
450///         counts: Counts { files: 2, lines: 30, words: 40, chars: 300, tests: 3 },
451///         test_targets: 1,
452///         bench_targets: 0,
453///         modules: vec![],
454///     }],
455/// };
456/// let text = render_text(&survey, Depth::Crate);
457/// assert!(text.starts_with("crate demo"));
458/// assert!(text.contains("tests 3~"));
459/// assert!(text.contains("test-targets 1"));
460/// ```
461pub fn render_text(survey: &Survey, depth: Depth) -> String {
462    let mut out = String::new();
463    match survey.group {
464        GroupKind::CargoWorkspace => out.push_str(&format!(
465            "workspace {} — {} crate(s)   [grouping: authoritative via cargo metadata]\n",
466            survey.name,
467            survey.crates.len()
468        )),
469        GroupKind::CargoCrate => out.push_str(&format!(
470            "crate {}   [grouping: authoritative via cargo metadata]\n",
471            survey.name
472        )),
473    }
474    for c in &survey.crates {
475        out.push_str(&format!(
476            "  {} v{}  files {}  lines {}  tests {}~  test-targets {}  benches {}\n",
477            c.name,
478            c.version,
479            c.counts.files,
480            c.counts.lines,
481            c.counts.tests,
482            c.test_targets,
483            c.bench_targets
484        ));
485        if depth == Depth::Module {
486            for m in &c.modules {
487                out.push_str(&format!(
488                    "    {}  files {}  lines {}  tests {}~\n",
489                    m.name, m.counts.files, m.counts.lines, m.counts.tests
490                ));
491            }
492        }
493    }
494    let (tot, test_targets, bench_targets) = totals(survey);
495    out.push_str(&format!(
496        "totals  files {}  lines {}  tests {}~  test-targets {}  benches {}\n",
497        tot.files, tot.lines, tot.tests, test_targets, bench_targets
498    ));
499    out.push_str(
500        "(~ = heuristic; file/line counts exact; grouping and target counts authoritative)\n",
501    );
502    out
503}
504
505/// The survey as a structured JSON value, each metric block tagged with the
506/// honesty class it belongs to (so an exact line count is never read as a
507/// heuristic test count).
508pub fn to_json(survey: &Survey) -> Value {
509    let (tot, test_targets, bench_targets) = totals(survey);
510    let crates: Vec<Value> = survey
511        .crates
512        .iter()
513        .map(|c| {
514            let modules: Vec<Value> = c
515                .modules
516                .iter()
517                .map(|m| {
518                    json!({
519                        "name": m.name,
520                        "files": m.counts.files,
521                        "lines": m.counts.lines,
522                        "words": m.counts.words,
523                        "chars": m.counts.chars,
524                        "tests": m.counts.tests,
525                    })
526                })
527                .collect();
528            json!({
529                "name": c.name,
530                "version": c.version,
531                "files": c.counts.files,
532                "lines": c.counts.lines,
533                "words": c.counts.words,
534                "chars": c.counts.chars,
535                "tests": c.counts.tests,
536                "test_targets": c.test_targets,
537                "bench_targets": c.bench_targets,
538                "modules": modules,
539            })
540        })
541        .collect();
542    json!({
543        "tool": "ct-survey",
544        "group": survey.group.label(),
545        "name": survey.name,
546        "root": survey.root,
547        "honesty": {
548            "grouping": "authoritative",
549            "counts": "exact",
550            "tests": "heuristic",
551            "test_targets": "authoritative",
552            "modules": "heuristic",
553        },
554        "crates": crates,
555        "totals": {
556            "crates": survey.crates.len(),
557            "files": tot.files,
558            "lines": tot.lines,
559            "words": tot.words,
560            "chars": tot.chars,
561            "tests": tot.tests,
562            "test_targets": test_targets,
563            "bench_targets": bench_targets,
564        },
565    })
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571
572    /// A one-package metadata document with lib/bin/test/bench targets.
573    fn sample() -> &'static str {
574        r#"{
575          "packages": [
576            {"id": "app 0.1.0 (path+file:///w/app)", "name": "app", "version": "0.1.0",
577             "manifest_path": "/w/app/Cargo.toml",
578             "targets": [
579               {"kind": ["lib"], "src_path": "/w/app/src/lib.rs"},
580               {"kind": ["bin"], "src_path": "/w/app/src/bin/tool.rs"},
581               {"kind": ["test"], "src_path": "/w/app/tests/it.rs"},
582               {"kind": ["bench"], "src_path": "/w/app/benches/b.rs"}
583             ]}
584          ],
585          "workspace_members": ["app 0.1.0 (path+file:///w/app)"],
586          "workspace_root": "/w"
587        }"#
588    }
589
590    #[test]
591    fn parses_packages_members_and_targets() {
592        let m = parse_metadata(sample()).unwrap();
593        assert_eq!(m.members.len(), 1);
594        assert_eq!(m.workspace_root, "/w");
595        let p = m.packages.values().next().unwrap();
596        assert_eq!(p.name, "app");
597        assert_eq!(p.version, "0.1.0");
598        assert_eq!(p.test_targets(), 1);
599        assert_eq!(p.bench_targets(), 1);
600        assert_eq!(p.dir(), Path::new("/w/app"));
601        // The lib target wins the source root, not the bin.
602        assert_eq!(p.src_root().unwrap(), Path::new("/w/app/src"));
603    }
604
605    #[test]
606    fn malformed_or_incomplete_metadata_errors() {
607        assert!(parse_metadata("{ not json").is_err());
608        assert!(parse_metadata("{}").is_err());
609    }
610
611    #[test]
612    fn test_scan_counts_attributes_not_cfg_gates() {
613        let src =
614            "#[cfg(test)]\nmod t {\n  #[test]\n  fn a() {}\n  #[tokio::test]\n  async fn b() {}\n}";
615        assert_eq!(count_tests(src), 2);
616    }
617
618    #[test]
619    fn sort_orders_crates_and_breaks_ties_by_name() {
620        let mk = |name: &str, files: u64| CrateNode {
621            name: name.into(),
622            version: "0".into(),
623            counts: Counts {
624                files,
625                ..Counts::default()
626            },
627            test_targets: 0,
628            bench_targets: 0,
629            modules: vec![],
630        };
631        let mut s = Survey {
632            group: GroupKind::CargoWorkspace,
633            name: "w".into(),
634            root: "/w".into(),
635            crates: vec![mk("b", 1), mk("a", 3), mk("c", 3)],
636        };
637        s.sort(SortKey::Files);
638        // Descending by files; a and c tie at 3, name breaks the tie.
639        let order: Vec<&str> = s.crates.iter().map(|c| c.name.as_str()).collect();
640        assert_eq!(order, ["a", "c", "b"]);
641        s.sort(SortKey::Name);
642        let order: Vec<&str> = s.crates.iter().map(|c| c.name.as_str()).collect();
643        assert_eq!(order, ["a", "b", "c"]);
644    }
645}
coding_tools/survey.rs

coding_tools/
survey.rs