coding_tools/survey.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! `ct-survey`'s format-contextualized codebase survey.
5//!
6//! Where [`crate::tree`] reports file-generic line/word/character counts over any
7//! tree, `ct-survey` reports them **bucketed by the units a build system defines**
8//! — for Rust, the workspace → crate → module hierarchy. The honesty classes are
9//! kept distinct and carried into the output so they are never silently conflated:
10//!
11//! * **authoritative** — crate identity, workspace membership, and cargo target
12//! kinds, read from `cargo metadata` (the same mechanism [`crate::deps`] uses);
13//! * **exact** — file, line, word, and character counts;
14//! * **heuristic** — the module bucketing (via [`crate::modgraph::module_name`])
15//! and the `#[test]` tally, which a scan approximates rather than proves.
16//!
17//! The pure pieces here (metadata parse, the test scan, the roll-up, rendering)
18//! are doctested; `src/bin/ct-survey.rs` is the thin IO shell that walks the
19//! filesystem and drives them.
20
21use std::collections::BTreeMap;
22use std::path::{Path, PathBuf};
23use std::sync::OnceLock;
24
25use regex::Regex;
26use serde_json::{Value, json};
27
28use crate::modgraph::module_name;
29
30/// Which contextual group type frames a survey.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
32pub enum GroupKind {
33 /// A cargo workspace: the elements are its member crates.
34 CargoWorkspace,
35 /// A single cargo crate: the element is that crate alone.
36 CargoCrate,
37}
38
39impl GroupKind {
40 /// The `--group` token / JSON label.
41 pub fn label(self) -> &'static str {
42 match self {
43 GroupKind::CargoWorkspace => "cargo-workspace",
44 GroupKind::CargoCrate => "cargo-crate",
45 }
46 }
47}
48
49/// How deep the survey graph descends.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
51pub enum Depth {
52 /// Stop at crates (no per-module breakdown).
53 Crate,
54 /// Descend into each crate's modules (the default).
55 Module,
56}
57
58/// Sort key for crates and, within each crate, its modules.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
60pub enum SortKey {
61 /// By name, ascending (the default).
62 Name,
63 /// By file count, largest first.
64 Files,
65 /// By line count, largest first.
66 Lines,
67 /// By heuristic test count, largest first.
68 Tests,
69}
70
71/// Infer the contextual group type from a `Cargo.toml`'s text: a manifest that
72/// declares a `[workspace]` table is a [`GroupKind::CargoWorkspace`], otherwise a
73/// [`GroupKind::CargoCrate`]. This probes only the provided manifest — the
74/// authoritative member and target data still comes from `cargo metadata`.
75///
76/// # Examples
77///
78/// ```
79/// use coding_tools::survey::{infer_group, GroupKind};
80///
81/// assert_eq!(infer_group("[workspace]\nmembers = [\"a\"]\n"), GroupKind::CargoWorkspace);
82/// assert_eq!(infer_group("[workspace.package]\nversion = \"1\"\n"), GroupKind::CargoWorkspace);
83/// assert_eq!(infer_group("[package]\nname = \"x\"\n"), GroupKind::CargoCrate);
84/// // A commented-out header does not count.
85/// assert_eq!(infer_group("# [workspace]\n[package]\n"), GroupKind::CargoCrate);
86/// ```
87pub fn infer_group(manifest_text: &str) -> GroupKind {
88 for line in manifest_text.lines() {
89 let t = line.trim();
90 if t.starts_with("[workspace]") || t.starts_with("[workspace.") {
91 return GroupKind::CargoWorkspace;
92 }
93 }
94 GroupKind::CargoCrate
95}
96
97/// One cargo target within a package.
98#[derive(Debug, Clone)]
99pub struct Target {
100 /// Cargo target kinds, e.g. `["lib"]`, `["bin"]`, `["test"]`, `["bench"]`.
101 pub kinds: Vec<String>,
102 /// Absolute path to the target's entry source file.
103 pub src_path: String,
104}
105
106/// One package as `cargo metadata` reports it (the subset a survey needs).
107#[derive(Debug, Clone)]
108pub struct PkgMeta {
109 /// Opaque package id (the metadata graph key).
110 pub id: String,
111 /// Crate name.
112 pub name: String,
113 /// Resolved version.
114 pub version: String,
115 /// Absolute path to the package's `Cargo.toml`.
116 pub manifest_path: String,
117 /// The package's build targets.
118 pub targets: Vec<Target>,
119}
120
121impl PkgMeta {
122 /// The package directory (its `Cargo.toml`'s parent).
123 pub fn dir(&self) -> PathBuf {
124 Path::new(&self.manifest_path)
125 .parent()
126 .map(Path::to_path_buf)
127 .unwrap_or_else(|| PathBuf::from("."))
128 }
129
130 /// The primary source root for module bucketing: the directory of the `lib`
131 /// target's entry file, else the first `bin`, else the first target. `None`
132 /// when no target carries a source path.
133 pub fn src_root(&self) -> Option<PathBuf> {
134 let pick = self
135 .targets
136 .iter()
137 .find(|t| t.kinds.iter().any(|k| k == "lib"))
138 .or_else(|| {
139 self.targets
140 .iter()
141 .find(|t| t.kinds.iter().any(|k| k == "bin"))
142 })
143 .or_else(|| self.targets.first())?;
144 Path::new(&pick.src_path).parent().map(Path::to_path_buf)
145 }
146
147 /// Authoritative count of cargo test targets (a `kind` of `test`).
148 pub fn test_targets(&self) -> u64 {
149 self.targets
150 .iter()
151 .filter(|t| t.kinds.iter().any(|k| k == "test"))
152 .count() as u64
153 }
154
155 /// Authoritative count of cargo bench targets (a `kind` of `bench`).
156 pub fn bench_targets(&self) -> u64 {
157 self.targets
158 .iter()
159 .filter(|t| t.kinds.iter().any(|k| k == "bench"))
160 .count() as u64
161 }
162}
163
164/// The parsed subset of `cargo metadata`: packages by id, workspace member ids,
165/// and the workspace root directory.
166#[derive(Debug, Clone)]
167pub struct Metadata {
168 /// Package id → its metadata.
169 pub packages: BTreeMap<String, PkgMeta>,
170 /// Workspace member package ids.
171 pub members: Vec<String>,
172 /// The workspace root directory.
173 pub workspace_root: String,
174}
175
176/// Parse `cargo metadata --format-version 1` JSON into the survey [`Metadata`].
177/// Errors on malformed JSON or a missing `packages`/`workspace_members` array —
178/// a defective read, never a silent empty survey.
179pub fn parse_metadata(text: &str) -> Result<Metadata, String> {
180 let v: Value = serde_json::from_str(text).map_err(|e| format!("cargo metadata JSON: {e}"))?;
181 let mut packages = BTreeMap::new();
182 for p in v["packages"]
183 .as_array()
184 .ok_or("metadata missing packages")?
185 {
186 let id = p["id"].as_str().ok_or("package missing id")?.to_string();
187 let targets = p["targets"]
188 .as_array()
189 .map(|ts| {
190 ts.iter()
191 .map(|t| Target {
192 kinds: t["kind"]
193 .as_array()
194 .map(|ks| {
195 ks.iter()
196 .filter_map(|k| k.as_str().map(String::from))
197 .collect()
198 })
199 .unwrap_or_default(),
200 src_path: t["src_path"].as_str().unwrap_or("").to_string(),
201 })
202 .collect()
203 })
204 .unwrap_or_default();
205 packages.insert(
206 id.clone(),
207 PkgMeta {
208 id,
209 name: p["name"].as_str().unwrap_or("").to_string(),
210 version: p["version"].as_str().unwrap_or("").to_string(),
211 manifest_path: p["manifest_path"].as_str().unwrap_or("").to_string(),
212 targets,
213 },
214 );
215 }
216 let members = v["workspace_members"]
217 .as_array()
218 .ok_or("metadata missing workspace_members")?
219 .iter()
220 .filter_map(|m| m.as_str().map(String::from))
221 .collect();
222 let workspace_root = v["workspace_root"].as_str().unwrap_or("").to_string();
223 Ok(Metadata {
224 packages,
225 members,
226 workspace_root,
227 })
228}
229
230/// Heuristic count of test functions in a Rust source: attributes whose final
231/// path segment is `test` — `#[test]`, `#[tokio::test]`, `#[test_case::test]`,
232/// and the like. A comprehension aid, not a parser: it does not discount
233/// attributes inside strings or comments, and `#[cfg(test)]` (a module gate, not
234/// a test) is deliberately excluded. Always reported as a heuristic value.
235///
236/// # Examples
237///
238/// ```
239/// use coding_tools::survey::count_tests;
240///
241/// assert_eq!(count_tests("#[test]\nfn a() {}\n#[tokio::test]\nasync fn b() {}"), 2);
242/// // `#[cfg(test)]` gates a module; it is not a test.
243/// assert_eq!(count_tests("#[cfg(test)]\nmod tests { fn helper() {} }"), 0);
244/// assert_eq!(count_tests("fn not_a_test() {}"), 0);
245/// ```
246pub fn count_tests(src: &str) -> u64 {
247 static RE: OnceLock<Regex> = OnceLock::new();
248 let re = RE.get_or_init(|| {
249 Regex::new(r"#\[\s*(?:[A-Za-z_]\w*\s*::\s*)*test\s*[\](]").expect("a valid regex")
250 });
251 re.find_iter(src).count() as u64
252}
253
254/// One walked source file's contribution: its path relative to the crate's
255/// source root (`None` when it lies outside that root, e.g. an integration test
256/// under `tests/`), its exact counts, and its heuristic test tally.
257#[derive(Debug, Clone)]
258pub struct FileStat {
259 /// Path relative to the crate source root, `/`-separated; `None` if outside.
260 pub rel_to_src: Option<String>,
261 /// Exact line count.
262 pub lines: u64,
263 /// Exact word count.
264 pub words: u64,
265 /// Exact character count.
266 pub chars: u64,
267 /// Heuristic `#[test]` count.
268 pub tests: u64,
269}
270
271/// A rolled-up count block (a crate's or a module's).
272#[derive(Debug, Clone, Default, PartialEq, Eq)]
273pub struct Counts {
274 /// Number of source files.
275 pub files: u64,
276 /// Total lines.
277 pub lines: u64,
278 /// Total words.
279 pub words: u64,
280 /// Total characters.
281 pub chars: u64,
282 /// Total heuristic test count.
283 pub tests: u64,
284}
285
286/// One module node in the survey graph.
287#[derive(Debug, Clone)]
288pub struct ModuleNode {
289 /// Crate-relative module path (e.g. `domain::entity`).
290 pub name: String,
291 /// The module's counts.
292 pub counts: Counts,
293}
294
295/// Roll a crate's [`FileStat`]s into whole-crate [`Counts`] (every file) plus a
296/// per-module breakdown (only files under the source root, bucketed by
297/// [`module_name`]), the modules sorted by name. The whole-crate total can
298/// exceed the module sum: files outside the source root (integration tests,
299/// benches) count toward the crate but belong to no module.
300///
301/// # Examples
302///
303/// ```
304/// use coding_tools::survey::{roll_up, FileStat};
305///
306/// let files = vec![
307/// FileStat { rel_to_src: Some("lib.rs".into()), lines: 10, words: 20, chars: 100, tests: 1 },
308/// FileStat { rel_to_src: Some("a/mod.rs".into()), lines: 5, words: 8, chars: 40, tests: 0 },
309/// FileStat { rel_to_src: None, lines: 3, words: 4, chars: 20, tests: 2 }, // a tests/ file
310/// ];
311/// let (crate_counts, modules) = roll_up(&files);
312/// assert_eq!(crate_counts.files, 3);
313/// assert_eq!(crate_counts.lines, 18);
314/// assert_eq!(crate_counts.tests, 3);
315/// // Two modules: `a` and `crate` (lib.rs); the tests/ file is in neither.
316/// assert_eq!(modules.len(), 2);
317/// assert_eq!(modules[0].name, "a");
318/// assert_eq!(modules[1].name, "crate");
319/// ```
320pub fn roll_up(files: &[FileStat]) -> (Counts, Vec<ModuleNode>) {
321 let mut crate_counts = Counts::default();
322 let mut by_mod: BTreeMap<String, Counts> = BTreeMap::new();
323 for f in files {
324 crate_counts.files += 1;
325 crate_counts.lines += f.lines;
326 crate_counts.words += f.words;
327 crate_counts.chars += f.chars;
328 crate_counts.tests += f.tests;
329 if let Some(rel) = &f.rel_to_src {
330 let m = by_mod.entry(module_name(Path::new(rel))).or_default();
331 m.files += 1;
332 m.lines += f.lines;
333 m.words += f.words;
334 m.chars += f.chars;
335 m.tests += f.tests;
336 }
337 }
338 let modules = by_mod
339 .into_iter()
340 .map(|(name, counts)| ModuleNode { name, counts })
341 .collect();
342 (crate_counts, modules)
343}
344
345/// One crate node in the survey graph.
346#[derive(Debug, Clone)]
347pub struct CrateNode {
348 /// Crate name.
349 pub name: String,
350 /// Resolved version.
351 pub version: String,
352 /// The crate's rolled-up counts (every source file).
353 pub counts: Counts,
354 /// Authoritative cargo test-target count.
355 pub test_targets: u64,
356 /// Authoritative cargo bench-target count.
357 pub bench_targets: u64,
358 /// The crate's modules (empty at `--depth crate`).
359 pub modules: Vec<ModuleNode>,
360}
361
362/// A complete survey graph.
363#[derive(Debug, Clone)]
364pub struct Survey {
365 /// The contextual group type this survey was built under.
366 pub group: GroupKind,
367 /// Workspace (or lone crate) display name.
368 pub name: String,
369 /// Workspace root (or lone crate) directory.
370 pub root: String,
371 /// The surveyed crates.
372 pub crates: Vec<CrateNode>,
373}
374
375fn order(a_name: &str, b_name: &str, a: u64, b: u64, key: SortKey) -> std::cmp::Ordering {
376 match key {
377 SortKey::Name => a_name.cmp(b_name),
378 // Count keys descend (largest first); ties break by name.
379 _ => b.cmp(&a).then_with(|| a_name.cmp(b_name)),
380 }
381}
382
383fn count_for(c: &Counts, key: SortKey) -> u64 {
384 match key {
385 SortKey::Name | SortKey::Files => c.files,
386 SortKey::Lines => c.lines,
387 SortKey::Tests => c.tests,
388 }
389}
390
391impl Survey {
392 /// Sort crates, and each crate's modules, by `key` in place.
393 pub fn sort(&mut self, key: SortKey) {
394 self.crates.sort_by(|a, b| {
395 order(
396 &a.name,
397 &b.name,
398 count_for(&a.counts, key),
399 count_for(&b.counts, key),
400 key,
401 )
402 });
403 for c in &mut self.crates {
404 c.modules.sort_by(|a, b| {
405 order(
406 &a.name,
407 &b.name,
408 count_for(&a.counts, key),
409 count_for(&b.counts, key),
410 key,
411 )
412 });
413 }
414 }
415}
416
417/// The whole-survey totals: rolled-up [`Counts`] plus authoritative test- and
418/// bench-target counts across every crate.
419pub fn totals(survey: &Survey) -> (Counts, u64, u64) {
420 let mut c = Counts::default();
421 let mut test_targets = 0;
422 let mut bench_targets = 0;
423 for cr in &survey.crates {
424 c.files += cr.counts.files;
425 c.lines += cr.counts.lines;
426 c.words += cr.counts.words;
427 c.chars += cr.counts.chars;
428 c.tests += cr.counts.tests;
429 test_targets += cr.test_targets;
430 bench_targets += cr.bench_targets;
431 }
432 (c, test_targets, bench_targets)
433}
434
435/// Render the survey as indented text. Heuristic values (test counts) wear a
436/// trailing `~`; a closing legend explains the marks.
437///
438/// # Examples
439///
440/// ```
441/// use coding_tools::survey::{render_text, CrateNode, Counts, Depth, GroupKind, Survey};
442///
443/// let survey = Survey {
444/// group: GroupKind::CargoCrate,
445/// name: "demo".into(),
446/// root: "/demo".into(),
447/// crates: vec![CrateNode {
448/// name: "demo".into(),
449/// version: "0.1.0".into(),
450/// counts: Counts { files: 2, lines: 30, words: 40, chars: 300, tests: 3 },
451/// test_targets: 1,
452/// bench_targets: 0,
453/// modules: vec![],
454/// }],
455/// };
456/// let text = render_text(&survey, Depth::Crate);
457/// assert!(text.starts_with("crate demo"));
458/// assert!(text.contains("tests 3~"));
459/// assert!(text.contains("test-targets 1"));
460/// ```
461pub fn render_text(survey: &Survey, depth: Depth) -> String {
462 let mut out = String::new();
463 match survey.group {
464 GroupKind::CargoWorkspace => out.push_str(&format!(
465 "workspace {} — {} crate(s) [grouping: authoritative via cargo metadata]\n",
466 survey.name,
467 survey.crates.len()
468 )),
469 GroupKind::CargoCrate => out.push_str(&format!(
470 "crate {} [grouping: authoritative via cargo metadata]\n",
471 survey.name
472 )),
473 }
474 for c in &survey.crates {
475 out.push_str(&format!(
476 " {} v{} files {} lines {} tests {}~ test-targets {} benches {}\n",
477 c.name,
478 c.version,
479 c.counts.files,
480 c.counts.lines,
481 c.counts.tests,
482 c.test_targets,
483 c.bench_targets
484 ));
485 if depth == Depth::Module {
486 for m in &c.modules {
487 out.push_str(&format!(
488 " {} files {} lines {} tests {}~\n",
489 m.name, m.counts.files, m.counts.lines, m.counts.tests
490 ));
491 }
492 }
493 }
494 let (tot, test_targets, bench_targets) = totals(survey);
495 out.push_str(&format!(
496 "totals files {} lines {} tests {}~ test-targets {} benches {}\n",
497 tot.files, tot.lines, tot.tests, test_targets, bench_targets
498 ));
499 out.push_str(
500 "(~ = heuristic; file/line counts exact; grouping and target counts authoritative)\n",
501 );
502 out
503}
504
505/// The survey as a structured JSON value, each metric block tagged with the
506/// honesty class it belongs to (so an exact line count is never read as a
507/// heuristic test count).
508pub fn to_json(survey: &Survey) -> Value {
509 let (tot, test_targets, bench_targets) = totals(survey);
510 let crates: Vec<Value> = survey
511 .crates
512 .iter()
513 .map(|c| {
514 let modules: Vec<Value> = c
515 .modules
516 .iter()
517 .map(|m| {
518 json!({
519 "name": m.name,
520 "files": m.counts.files,
521 "lines": m.counts.lines,
522 "words": m.counts.words,
523 "chars": m.counts.chars,
524 "tests": m.counts.tests,
525 })
526 })
527 .collect();
528 json!({
529 "name": c.name,
530 "version": c.version,
531 "files": c.counts.files,
532 "lines": c.counts.lines,
533 "words": c.counts.words,
534 "chars": c.counts.chars,
535 "tests": c.counts.tests,
536 "test_targets": c.test_targets,
537 "bench_targets": c.bench_targets,
538 "modules": modules,
539 })
540 })
541 .collect();
542 json!({
543 "tool": "ct-survey",
544 "group": survey.group.label(),
545 "name": survey.name,
546 "root": survey.root,
547 "honesty": {
548 "grouping": "authoritative",
549 "counts": "exact",
550 "tests": "heuristic",
551 "test_targets": "authoritative",
552 "modules": "heuristic",
553 },
554 "crates": crates,
555 "totals": {
556 "crates": survey.crates.len(),
557 "files": tot.files,
558 "lines": tot.lines,
559 "words": tot.words,
560 "chars": tot.chars,
561 "tests": tot.tests,
562 "test_targets": test_targets,
563 "bench_targets": bench_targets,
564 },
565 })
566}
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571
572 /// A one-package metadata document with lib/bin/test/bench targets.
573 fn sample() -> &'static str {
574 r#"{
575 "packages": [
576 {"id": "app 0.1.0 (path+file:///w/app)", "name": "app", "version": "0.1.0",
577 "manifest_path": "/w/app/Cargo.toml",
578 "targets": [
579 {"kind": ["lib"], "src_path": "/w/app/src/lib.rs"},
580 {"kind": ["bin"], "src_path": "/w/app/src/bin/tool.rs"},
581 {"kind": ["test"], "src_path": "/w/app/tests/it.rs"},
582 {"kind": ["bench"], "src_path": "/w/app/benches/b.rs"}
583 ]}
584 ],
585 "workspace_members": ["app 0.1.0 (path+file:///w/app)"],
586 "workspace_root": "/w"
587 }"#
588 }
589
590 #[test]
591 fn parses_packages_members_and_targets() {
592 let m = parse_metadata(sample()).unwrap();
593 assert_eq!(m.members.len(), 1);
594 assert_eq!(m.workspace_root, "/w");
595 let p = m.packages.values().next().unwrap();
596 assert_eq!(p.name, "app");
597 assert_eq!(p.version, "0.1.0");
598 assert_eq!(p.test_targets(), 1);
599 assert_eq!(p.bench_targets(), 1);
600 assert_eq!(p.dir(), Path::new("/w/app"));
601 // The lib target wins the source root, not the bin.
602 assert_eq!(p.src_root().unwrap(), Path::new("/w/app/src"));
603 }
604
605 #[test]
606 fn malformed_or_incomplete_metadata_errors() {
607 assert!(parse_metadata("{ not json").is_err());
608 assert!(parse_metadata("{}").is_err());
609 }
610
611 #[test]
612 fn test_scan_counts_attributes_not_cfg_gates() {
613 let src =
614 "#[cfg(test)]\nmod t {\n #[test]\n fn a() {}\n #[tokio::test]\n async fn b() {}\n}";
615 assert_eq!(count_tests(src), 2);
616 }
617
618 #[test]
619 fn sort_orders_crates_and_breaks_ties_by_name() {
620 let mk = |name: &str, files: u64| CrateNode {
621 name: name.into(),
622 version: "0".into(),
623 counts: Counts {
624 files,
625 ..Counts::default()
626 },
627 test_targets: 0,
628 bench_targets: 0,
629 modules: vec![],
630 };
631 let mut s = Survey {
632 group: GroupKind::CargoWorkspace,
633 name: "w".into(),
634 root: "/w".into(),
635 crates: vec![mk("b", 1), mk("a", 3), mk("c", 3)],
636 };
637 s.sort(SortKey::Files);
638 // Descending by files; a and c tie at 3, name breaks the tie.
639 let order: Vec<&str> = s.crates.iter().map(|c| c.name.as_str()).collect();
640 assert_eq!(order, ["a", "c", "b"]);
641 s.sort(SortKey::Name);
642 let order: Vec<&str> = s.crates.iter().map(|c| c.name.as_str()).collect();
643 assert_eq!(order, ["a", "b", "c"]);
644 }
645}