code_ranker_plugin_rust/
lib.rs

1use anyhow::Result;
2use code_ranker_plugin_api::{
3    attrs::ValueType,
4    default_cycle_kinds, default_node_kinds,
5    graph::Graph,
6    level::{AttributeSpec, Direction, EdgeKindSpec, Grouping, Level, Thresholds},
7    log,
8    node::Node,
9    plugin::{LanguagePlugin, PluginInput, Preset},
10};
11use std::collections::BTreeMap;
12use std::path::Path;
13
14use cargo_metadata::MetadataCommand;
15
16mod collapse;
17mod crate_graph;
18mod ids;
19mod internal;
20mod module_graph;
21mod rust_ts;
22
23use collapse::collapse_to_files;
24use internal::GraphBuilder;
25
26pub struct RustPlugin;
27
28/// One Rust-only metric-lens preset: (id, title, sort_metric, connections,
29/// doc_slug, prompt body). Same shape as the generic catalog in
30/// `code-ranker-cli/src/presets.rs`, but these rank modules by a single
31/// coupling/size metric rather than a design principle. Slugs resolve to
32/// `principles/rust/<slug>.md`.
33type MetricPreset = (
34    &'static str,
35    &'static str,
36    &'static str,
37    &'static [&'static str],
38    &'static str,
39    &'static str,
40);
41
42const RUST_METRIC_PRESETS: &[MetricPreset] = &[
43    (
44        "HK",
45        "HK — Henry-Kafura Coupling",
46        "hk",
47        &["in", "out"],
48        "henry-kafura-coupling",
49        "These modules carry heavy Henry-Kafura coupling — HK = sloc × (fan_in × fan_out)²,\n\
50         where sloc is the module's source lines of code (real code lines, excluding blanks\n\
51         and comment-only lines), fan_in is how many modules depend on it, and fan_out is how\n\
52         many it depends on.\n\
53         A high score is a large module sitting on a busy crossroads of incoming and outgoing\n\
54         dependencies, so any change here ripples widely.\n\n\
55         For each module below, lower the factor that dominates its HK: shrink the module by\n\
56         extracting cohesive pieces, or cut fan-in/fan-out by narrowing its public surface and\n\
57         depending on fewer collaborators (introduce an abstraction, move a responsibility).\n\
58         Keep existing API contracts intact.",
59    ),
60    (
61        "SLOC",
62        "SLOC — Module Size",
63        "sloc",
64        &[],
65        "module-size",
66        "These are the largest modules by source lines of code. Size alone is not a defect, but\n\
67         oversized files usually bundle several responsibilities and are hard to read, test and\n\
68         review.\n\n\
69         For each module below, identify the distinct responsibilities it holds and propose how\n\
70         to split it into smaller, cohesive modules — each with a single clear purpose — without\n\
71         changing external behaviour.",
72    ),
73    (
74        "FANIN",
75        "Fan-in — Afferent Coupling",
76        "fan_in",
77        &["in"],
78        "fan-in-afferent-coupling",
79        "These modules have high fan-in: many other modules depend on them. They are\n\
80         load-bearing — a change here forces changes (or re-review) across every dependant, and\n\
81         a bug here is widely felt.\n\n\
82         For each module below, confirm its public surface is a stable, minimal contract. Narrow\n\
83         the API to what callers actually need, split it if different callers use disjoint parts\n\
84         (see Interface Segregation), and stabilise the abstractions the rest of the codebase\n\
85         leans on.",
86    ),
87    (
88        "FANOUT",
89        "Fan-out — Efferent Coupling",
90        "fan_out",
91        &["out"],
92        "fan-out-efferent-coupling",
93        "These modules have high fan-out: they depend on many other modules. High efferent\n\
94         coupling makes a module fragile (it breaks when any dependency changes) and hard to\n\
95         test or reuse in isolation.\n\n\
96         For each module below, reduce its direct dependencies: depend on abstractions rather\n\
97         than concretes (see Dependency Inversion), collapse several fine-grained collaborators\n\
98         behind one focused interface, and move logic that pulls in unrelated dependencies into\n\
99         a more appropriate module.",
100    ),
101];
102
103impl LanguagePlugin for RustPlugin {
104    fn name(&self) -> &str {
105        "rust"
106    }
107
108    fn detect(&self, workspace: &Path, _input: &PluginInput) -> bool {
109        workspace.join("Cargo.toml").exists()
110    }
111
112    fn levels(&self) -> Vec<Level> {
113        let mut edge_kinds: BTreeMap<String, EdgeKindSpec> = BTreeMap::new();
114        edge_kinds.insert(
115            "uses".into(),
116            EdgeKindSpec {
117                flow: true,
118                label: Some("uses".into()),
119                description: Some(
120                    "Code dependency — this file references an item the target file defines.<br>\
121                     Captured from `use path::Item;`, a qualified path (`crate::a::Item`, \
122                     `other_crate::Item`), or a derive (`#[derive(serde::Serialize)]`).<br>\
123                     The path resolves to the file that defines the item (following `pub use` \
124                     re-exports), so the edge points at the definition, not a re-export hub.<br>\
125                     This is the real dependency: it counts toward fan-in / fan-out, \
126                     Henry-Kafura coupling and cycles."
127                        .into(),
128                ),
129            },
130        );
131        edge_kinds.insert(
132            "contains".into(),
133            EdgeKindSpec {
134                flow: false,
135                label: Some("contains".into()),
136                description: Some(
137                    "Module ownership — the parent declares the child module \
138                     (`mod foo;` / `pub mod foo;`), so `foo.rs` (or `foo/mod.rs`) belongs to it.<br>\
139                     This is the Rust module tree: structure, not a code dependency.<br>\
140                     Kept in the data but not drawn on the main map, and excluded from \
141                     fan-in / fan-out / HK / cycles."
142                        .into(),
143                ),
144            },
145        );
146        edge_kinds.insert(
147            "reexports".into(),
148            EdgeKindSpec {
149                flow: false,
150                label: Some("reexport".into()),
151                description: Some(
152                    "Re-export (`pub use foo::Item;`) — re-publishes another file's item as part of \
153                     this file's public API (the crate-root / prelude facade, e.g. `lib.rs` doing \
154                     `pub use access_scope::AccessScope;`).<br>\
155                     A facade, not a dependency: excluded from fan-in / fan-out / HK / cycles and \
156                     not drawn on the main map, like `contains`.<br>\
157                     A consumer's `use this_crate::Item` is attributed to the file that defines \
158                     `Item`, so re-export hubs (`lib.rs` / `mod.rs`) collect no false coupling — the \
159                     `pub use` is still recorded here so you can see what a file exposes."
160                        .into(),
161                ),
162            },
163        );
164        edge_kinds.insert(
165            "super".into(),
166            EdgeKindSpec {
167                flow: false,
168                label: Some("super".into()),
169                description: Some(
170                    "Namespace pull from an enclosing module — a glob `use` that reaches \
171                     *up* the module tree (`use super::*`, `use crate::<ancestor>::*`), \
172                     bringing the parent's items into the child's scope.<br>\
173                     Usually structural scope-sugar (a module split across files referring \
174                     back to itself). But if the child actually uses a parent item brought \
175                     in by the glob, it IS a real back-dependency — technically a cycle. \
176                     code-ranker can't tell the two apart without name resolution, so it \
177                     treats `super` as a **low-priority** cycle and leaves it non-flow: \
178                     deprioritized next to obvious cross-module cycles.<br>\
179                     Kept in the data but not drawn on the main map, and excluded from \
180                     fan-in / fan-out / HK / cycles — like `contains`."
181                        .into(),
182                ),
183            },
184        );
185
186        let aspec = AttributeSpec::new;
187
188        let mut node_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
189        node_attributes.insert("path".into(), aspec(ValueType::Str, "Path"));
190        node_attributes.insert("crate".into(), aspec(ValueType::Str, "Crate"));
191        node_attributes.insert("loc".into(), aspec(ValueType::Int, "Lines"));
192        node_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
193        node_attributes.insert("external".into(), aspec(ValueType::Bool, "External"));
194        node_attributes.insert("version".into(), aspec(ValueType::Str, "Version"));
195        node_attributes.insert("items".into(), aspec(ValueType::Int, "Items"));
196        let mut unsafe_spec = aspec(ValueType::Int, "Unsafe");
197        unsafe_spec.short = Some("Unsafe".into());
198        unsafe_spec.description = Some(
199            "Count of `unsafe` blocks and `unsafe fn`/`impl`/`trait` declarations \
200             in production code (test items are excluded). Syntactic count: \
201             `unsafe` inside a macro body is not seen, and the figure is not \
202             type-checked."
203                .into(),
204        );
205        unsafe_spec.direction = Direction::LowerBetter;
206        node_attributes.insert("unsafe".into(), unsafe_spec);
207
208        let mut edge_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
209        edge_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
210
211        vec![Level {
212            name: "files".into(),
213            edge_kinds,
214            node_attributes,
215            edge_attributes,
216            attribute_groups: BTreeMap::new(),
217            node_kinds: default_node_kinds(),
218            cycle_kinds: default_cycle_kinds(),
219            // Cluster the diagram by the owning crate (compilation unit), not by
220            // the source folder. Falls back to `dir` if `crate` is ever absent.
221            grouping: Some(Grouping {
222                key: Some("crate".into()),
223                function: None,
224            }),
225        }]
226    }
227
228    fn thresholds(&self) -> BTreeMap<String, Thresholds> {
229        // Calibrated on 21 Rust crates (≥2K SLOC). ~50% of projects breach
230        // `info`, ~10% breach `warning`.
231        BTreeMap::from([
232            (
233                "hk".into(),
234                Thresholds {
235                    info: 150_000.0,
236                    warning: 10_000_000.0,
237                },
238            ),
239            (
240                "sloc".into(),
241                Thresholds {
242                    info: 800.0,
243                    warning: 3_000.0,
244                },
245            ),
246            (
247                "fan_out".into(),
248                Thresholds {
249                    info: 8.0,
250                    warning: 18.0,
251                },
252            ),
253            (
254                "items".into(),
255                Thresholds {
256                    info: 20.0,
257                    warning: 50.0,
258                },
259            ),
260        ])
261    }
262
263    fn presets(&self, mut defaults: Vec<Preset>, _input: &PluginInput) -> Vec<Preset> {
264        // Append Rust-only metric lenses to the generic catalog. Their doc links
265        // reuse the principles base directory derived from an existing default's
266        // `doc_url`, so they resolve to `principles/rust/<slug>.md` without
267        // duplicating the host/base constant that lives in the CLI crate.
268        let base_dir = defaults
269            .iter()
270            .find_map(|p| p.doc_url.as_deref())
271            .and_then(|u| u.rsplit_once('/').map(|(dir, _)| dir.to_string()));
272        for &(id, title, sort_metric, connections, slug, prompt) in RUST_METRIC_PRESETS {
273            defaults.push(Preset {
274                id: id.to_string(),
275                label: id.to_string(),
276                title: title.to_string(),
277                prompt: prompt.to_string(),
278                doc_url: base_dir.as_ref().map(|d| format!("{d}/{slug}.md")),
279                sort_metric: sort_metric.to_string(),
280                connections: connections.iter().map(|s| (*s).to_string()).collect(),
281            });
282        }
283        defaults
284    }
285
286    fn analyze(&self, workspace: &Path, _level: &str, input: &PluginInput) -> Result<Graph> {
287        let mut builder = GraphBuilder::new();
288        syn_analyze(workspace, input.ignore_tests, &mut builder)?;
289        let internal = builder.build();
290        Ok(collapse_to_files(internal))
291    }
292
293    fn metrics(&self, graph: &mut Graph) -> usize {
294        // Each `.rs` file node is re-read (by its absolute-path `id`) and measured
295        // by our `tree-sitter-rust` engine; `#[cfg(test)]` / `#[test]` items are
296        // stripped first so metrics reflect production code only (their lines
297        // become `tloc`).
298        let mut annotated = 0;
299        for node in &mut graph.nodes {
300            if node.kind != "file" {
301                continue;
302            }
303            let Ok(src) = std::fs::read(&node.id) else {
304                continue;
305            };
306            if rust_file_metrics(node, &src) {
307                annotated += 1;
308            }
309        }
310        annotated
311    }
312
313    fn is_test_path(&self, rel_path: &str) -> bool {
314        // Cargo's integration-test / bench targets live under top-level
315        // `tests/` and `benches/` dirs. (Inline `#[cfg(test)]` modules are a
316        // separate, attribute-based notion handled during the syn walk.)
317        matches!(rel_path.split('/').next(), Some("tests") | Some("benches"))
318    }
319
320    fn versions(&self, _workspace: &Path, _input: &PluginInput) -> Vec<(String, String)> {
321        version_string()
322            .map(|rv| vec![("rustc".to_string(), rv)])
323            .unwrap_or_default()
324    }
325
326    fn roots(&self, _workspace: &Path) -> Vec<(String, String)> {
327        rust_toolchain_roots()
328    }
329
330    fn metric_specs(
331        &self,
332        mut defaults: BTreeMap<String, AttributeSpec>,
333    ) -> BTreeMap<String, AttributeSpec> {
334        // Rust strips inline `#[cfg(test)]` / `#[test]` / `#[bench]` items before
335        // measuring, so the LOC metrics count production code only — a nuance the
336        // language-neutral default descriptions omit. Refine them for Rust.
337        let rust_loc_note: &[(&str, &str)] = &[
338            (
339                "sloc",
340                "Source lines of code — lines with at least one non-whitespace, non-comment character. Blank and comment-only lines are not counted. In Rust, lines inside `#[cfg(test)]` / `#[test]` items are excluded too, so this counts production code only (unlike `loc`, the raw file line count).",
341            ),
342            (
343                "lloc",
344                "Logical lines — counts statements, not physical lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
345            ),
346            (
347                "cloc",
348                "Comment-only lines (inline comments on code lines are not counted). In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
349            ),
350            (
351                "blank",
352                "Empty or whitespace-only lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
353            ),
354        ];
355        for (key, desc) in rust_loc_note {
356            if let Some(spec) = defaults.get_mut(*key) {
357                spec.description = Some((*desc).to_string());
358            }
359        }
360        defaults
361    }
362}
363
364/// The Rust/Cargo toolchain path roots used to shorten external node ids in the
365/// snapshot: `cargo` (`$CARGO_HOME`), `registry` (the crates.io source dir),
366/// `rustup` (`$RUSTUP_HOME`), and `rust-src` (the stdlib source under the active
367/// sysroot). These are Rust-specific, so they live here in the Rust plugin rather
368/// than in the language-agnostic orchestrator.
369fn rust_toolchain_roots() -> Vec<(String, String)> {
370    let mut roots = Vec::new();
371    let home = std::env::var("HOME").unwrap_or_default();
372
373    let cargo = std::env::var("CARGO_HOME").unwrap_or_else(|_| format!("{home}/.cargo"));
374    let rustup = std::env::var("RUSTUP_HOME").unwrap_or_else(|_| format!("{home}/.rustup"));
375
376    if !cargo.is_empty() {
377        // Auto-detect crates.io registry hash dir (e.g. index.crates.io-<hash>).
378        let registry_src = format!("{cargo}/registry/src");
379        if let Ok(entries) = std::fs::read_dir(&registry_src) {
380            for entry in entries.flatten() {
381                let name = entry.file_name().to_string_lossy().to_string();
382                if name.starts_with("index.crates.io") {
383                    roots.push(("registry".to_string(), format!("{registry_src}/{name}")));
384                    break;
385                }
386            }
387        }
388        roots.push(("cargo".to_string(), cargo));
389    }
390    if !rustup.is_empty() {
391        // Add rust-src root: sysroot/lib/rustlib/src/rust/library — shortens stdlib
392        // paths from {rustup}/toolchains/.../library/... to {rust-src}/...
393        if which::which("rustc").is_ok()
394            && let Ok(out) = log::timed("rustc --print sysroot", || {
395                std::process::Command::new("rustc")
396                    .args(["--print", "sysroot"])
397                    .output()
398            })
399            && out.status.success()
400        {
401            let sysroot = String::from_utf8_lossy(&out.stdout).trim().to_string();
402            let rust_lib = format!("{sysroot}/lib/rustlib/src/rust/library");
403            if std::path::Path::new(&rust_lib).exists() {
404                roots.push(("rust-src".to_string(), rust_lib));
405            }
406        }
407        roots.push(("rustup".to_string(), rustup));
408    }
409    roots
410}
411
412/// Syntactic stage: resolve the workspace via `cargo metadata` and build the
413/// internal crate + module/use graphs.
414fn syn_analyze(workspace: &Path, ignore_tests: bool, builder: &mut GraphBuilder) -> Result<()> {
415    let manifest = workspace.join("Cargo.toml");
416    // code-ranker is an offline tool: it never fetches from the network. See the
417    // comment in the original lib.rs for the research notes on --offline vs
418    // --no-deps vs full. Short version: --offline keeps external/cross-crate
419    // edges AND never goes to the network; the cache must be warm.
420    let metadata = log::timed("cargo metadata --offline", || {
421        MetadataCommand::new()
422            .manifest_path(&manifest)
423            .other_options(vec!["--offline".to_string()])
424            .exec()
425    })
426    .map_err(|err| offline_metadata_error(&manifest, err))?;
427
428    crate_graph::contribute(&metadata, builder);
429    module_graph::contribute(&metadata, ignore_tests, builder)?;
430    Ok(())
431}
432
433fn offline_metadata_error(manifest: &Path, err: cargo_metadata::Error) -> anyhow::Error {
434    anyhow::anyhow!(
435        "cargo metadata (offline) failed for {manifest}\n\n\
436         code-ranker is an offline tool — it never downloads dependencies. It reads \
437         the dependency graph from cargo's local cache, which must already be \
438         populated for this project.\n\n\
439         Warm the cache once (with network), then re-run code-ranker:\n    \
440         cargo metadata --manifest-path {manifest} >/dev/null\n\
441         (a prior `cargo build` / `cargo fetch` works too).\n\n\
442         In CI: run code-ranker on the same image/cache as your build or test jobs, \
443         where the cache is already warm.\n\n\
444         Underlying cargo error: {err}",
445        manifest = manifest.display(),
446    )
447}
448
449fn version_string() -> Option<String> {
450    which::which("rustc").ok()?;
451    let out = log::timed("rustc --version", || {
452        std::process::Command::new("rustc")
453            .arg("--version")
454            .output()
455    })
456    .ok()?;
457    if out.status.success() {
458        Some(
459            String::from_utf8_lossy(&out.stdout)
460                .split_whitespace()
461                .nth(1)
462                .unwrap_or("unknown")
463                .to_string(),
464        )
465    } else {
466        None
467    }
468}
469
470// ─────────────────────────────────────────────────────────────────────────────
471// Complexity: strip inline tests, run the tree-sitter-rust engine, write metrics
472// ─────────────────────────────────────────────────────────────────────────────
473
474/// Compute and write Rust complexity metrics for one file node from its source
475/// bytes. `#[cfg(test)]` / `#[test]` / `#[bench]` items are stripped first (their
476/// lines become `tloc`), then the in-tree `rust_ts` engine runs. Returns `true`
477/// if metrics were written (`false` if the source did not parse).
478fn rust_file_metrics(node: &mut Node, src: &[u8]) -> bool {
479    let (prod, tloc) = strip_cfg_test(src);
480    let Some(mut m) = rust_ts::compute(&prod) else {
481        return false;
482    };
483    m.tloc = tloc as f64;
484    code_ranker_graph::write_metrics(node, &m);
485    true
486}
487
488/// True if any attribute gates an item to tests: `#[test]`, `#[bench]`, or
489/// `#[cfg(test)]` / `#[cfg(all(test, …))]` / `#[cfg(any(test, …))]`. A `test`
490/// **identifier** inside `cfg(...)` is what matches — `cfg(feature = "test")`
491/// (a string literal) does not.
492fn is_test_attr(attr: &syn::Attribute) -> bool {
493    if attr.path().is_ident("test") || attr.path().is_ident("bench") {
494        return true;
495    }
496    if attr.path().is_ident("cfg")
497        && let syn::Meta::List(list) = &attr.meta
498    {
499        return tokens_have_test_ident(list.tokens.clone());
500    }
501    false
502}
503
504/// Recursively scan a token stream for a bare `test` identifier (descends into
505/// `all(...)` / `any(...)` groups).
506fn tokens_have_test_ident(ts: proc_macro2::TokenStream) -> bool {
507    ts.into_iter().any(|t| match t {
508        proc_macro2::TokenTree::Ident(i) => i == "test",
509        proc_macro2::TokenTree::Group(g) => tokens_have_test_ident(g.stream()),
510        _ => false,
511    })
512}
513
514/// Visitor collecting the 1-based, inclusive line ranges of test-only items
515/// (`#[cfg(test)]` modules, `#[test]`/`#[cfg(test)]` fns), attribute line
516/// included. It recurses into ordinary modules to catch nested test modules but
517/// not into a test item it already captured.
518#[derive(Default)]
519struct TestSpans {
520    ranges: Vec<(usize, usize)>,
521}
522
523impl TestSpans {
524    fn record(&mut self, attrs: &[syn::Attribute], span: proc_macro2::Span) {
525        use syn::spanned::Spanned;
526        let start = attrs
527            .iter()
528            .map(|a| a.span().start().line)
529            .chain(std::iter::once(span.start().line))
530            .min()
531            .unwrap_or(0);
532        self.ranges.push((start, span.end().line));
533    }
534}
535
536impl<'ast> syn::visit::Visit<'ast> for TestSpans {
537    fn visit_item_mod(&mut self, m: &'ast syn::ItemMod) {
538        use syn::spanned::Spanned;
539        if m.attrs.iter().any(is_test_attr) {
540            self.record(&m.attrs, m.span());
541        } else {
542            syn::visit::visit_item_mod(self, m);
543        }
544    }
545    fn visit_item_fn(&mut self, f: &'ast syn::ItemFn) {
546        use syn::spanned::Spanned;
547        if f.attrs.iter().any(is_test_attr) {
548            self.record(&f.attrs, f.span());
549        }
550    }
551}
552
553/// Step 1 of the Rust line accounting: remove `#[cfg(test)]` / `#[test]` /
554/// `#[bench]` items so the production metrics (`sloc` / `cloc` / `blank` / `hk` /
555/// complexity) are then measured on production code only. Returns the production
556/// source **and** `tloc` — the number of test lines removed (the whole test
557/// region: attribute, body, braces). Parse failures or no test items return the
558/// source unchanged with `tloc = 0`.
559fn strip_cfg_test(src: &[u8]) -> (Vec<u8>, usize) {
560    use syn::visit::Visit;
561    let Ok(text) = std::str::from_utf8(src) else {
562        return (src.to_vec(), 0);
563    };
564    let Ok(file) = syn::parse_file(text) else {
565        return (src.to_vec(), 0);
566    };
567    let mut spans = TestSpans::default();
568    spans.visit_file(&file);
569    if spans.ranges.is_empty() {
570        return (src.to_vec(), 0);
571    }
572    let drop: std::collections::HashSet<usize> =
573        spans.ranges.iter().flat_map(|&(s, e)| s..=e).collect();
574    let tloc = drop.len();
575    let mut out: String = text
576        .lines()
577        .enumerate()
578        .filter(|(i, _)| !drop.contains(&(i + 1)))
579        .map(|(_, l)| l)
580        .collect::<Vec<_>>()
581        .join("\n");
582    out.push('\n');
583    (out.into_bytes(), tloc)
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589    use code_ranker_plugin_api::attrs::AttrValue;
590    use internal::NodeKind;
591
592    fn strip(src: &str) -> String {
593        String::from_utf8(strip_cfg_test(src.as_bytes()).0).unwrap()
594    }
595
596    /// Build a `Module` internal node for one file, with structural attrs.
597    /// `line` distinguishes an inline module (`Some`) from a file-backed one
598    /// (`None`); `collapse_to_files` lets the file-backed node win.
599    #[allow(clippy::too_many_arguments)]
600    fn module_node(
601        id: &str,
602        path: &str,
603        line: Option<u32>,
604        visibility: internal::Visibility,
605        loc: u32,
606        items: u32,
607        unsafe_count: u32,
608        krate: &str,
609    ) -> internal::Node {
610        internal::Node {
611            id: id.into(),
612            kind: NodeKind::Module,
613            name: id.into(),
614            path: path.into(),
615            parent: None,
616            external: None,
617            version: None,
618            visibility: Some(visibility),
619            loc: Some(loc),
620            line,
621            item_count: Some(items),
622            unsafe_count: Some(unsafe_count),
623            crate_label: Some(krate.into()),
624        }
625    }
626
627    #[test]
628    fn collapse_lets_the_file_backed_module_overwrite_structural_attrs() {
629        // Two modules map to one file id (same `path`): an inline module
630        // (`line = Some`) is seen first and seeds the file node, then the
631        // file-backed module (`line = None`) is the source of truth and must
632        // overwrite every structural attr (visibility / loc / items / unsafe /
633        // crate). This exercises the Occupied-entry update branch of
634        // `collapse_to_files`.
635        let mut builder = GraphBuilder::new();
636        builder.add_node(module_node(
637            "inline",
638            "/x/foo.rs",
639            Some(5),
640            internal::Visibility::Private,
641            1,
642            1,
643            0,
644            "wrong-crate",
645        ));
646        builder.add_node(module_node(
647            "file",
648            "/x/foo.rs",
649            None,
650            internal::Visibility::Public,
651            42,
652            7,
653            3,
654            "mycrate",
655        ));
656
657        let graph = collapse_to_files(builder.build());
658
659        let file = graph
660            .nodes
661            .iter()
662            .find(|n| n.id == "/x/foo.rs")
663            .expect("the two modules collapsed into one file node");
664        assert_eq!(file.kind, "file");
665        assert_eq!(
666            file.attrs.get("visibility"),
667            Some(&AttrValue::Str("public".into())),
668            "file-backed visibility wins"
669        );
670        assert_eq!(
671            file.attrs.get("loc"),
672            Some(&AttrValue::Int(42)),
673            "file-backed loc wins"
674        );
675        assert_eq!(
676            file.attrs.get("items"),
677            Some(&AttrValue::Int(7)),
678            "file-backed item count wins"
679        );
680        assert_eq!(
681            file.attrs.get("unsafe"),
682            Some(&AttrValue::Int(3)),
683            "file-backed unsafe count wins (and is non-zero so it is kept)"
684        );
685        assert_eq!(
686            file.attrs.get("crate"),
687            Some(&AttrValue::Str("mycrate".into())),
688            "file-backed crate label wins"
689        );
690    }
691
692    #[test]
693    fn strips_cfg_test_module_with_its_attribute() {
694        let out = strip(
695            "pub fn prod() -> i32 {\n    1\n}\n\n\
696             #[cfg(test)]\nmod tests {\n    use super::*;\n    #[test]\n    fn t() { assert_eq!(prod(), 1); }\n}\n",
697        );
698        assert!(out.contains("pub fn prod"), "production kept: {out}");
699        assert!(!out.contains("mod tests"), "test mod removed: {out}");
700        assert!(
701            !out.contains("#[cfg(test)]"),
702            "the cfg attr line removed too: {out}"
703        );
704        assert!(!out.contains("fn t()"), "test fn removed: {out}");
705    }
706
707    #[test]
708    fn strips_standalone_test_and_bench_fns() {
709        let out = strip("fn prod() {}\n#[test]\nfn it_works() {}\n#[bench]\nfn b(_: &mut ()) {}\n");
710        assert!(out.contains("fn prod"));
711        assert!(
712            !out.contains("it_works") && !out.contains("fn b("),
713            "test/bench fns removed: {out}"
714        );
715    }
716
717    #[test]
718    fn keeps_non_test_cfg_and_similarly_named_items() {
719        // `cfg(feature = "test")` is a string literal, not a `test` ident; a
720        // `mod tests_data` is not gated. Both stay.
721        let out = strip("#[cfg(feature = \"test\")]\npub mod gated {}\npub mod tests_data {}\n");
722        assert!(out.contains("pub mod gated"), "feature-cfg kept: {out}");
723        assert!(
724            out.contains("tests_data"),
725            "non-gated lookalike kept: {out}"
726        );
727    }
728
729    #[test]
730    fn strips_cfg_all_test_combinations() {
731        let out = strip("fn p() {}\n#[cfg(all(test, feature = \"x\"))]\nmod t {}\n");
732        assert!(out.contains("fn p"));
733        assert!(!out.contains("mod t"), "cfg(all(test,…)) removed: {out}");
734    }
735
736    #[test]
737    fn unchanged_without_tests_or_on_parse_error() {
738        let prod = "pub fn a() {}\n";
739        assert_eq!(
740            strip_cfg_test(prod.as_bytes()),
741            (prod.as_bytes().to_vec(), 0)
742        );
743        let broken = "@@@ not rust @@@";
744        assert_eq!(
745            strip_cfg_test(broken.as_bytes()),
746            (broken.as_bytes().to_vec(), 0)
747        );
748    }
749
750    #[test]
751    fn tloc_counts_the_whole_removed_test_region() {
752        // 4 lines removed: the #[cfg(test)] attr, `mod tests {`, the body line,
753        // and the closing `}`.
754        let src = "pub fn p() {}\n#[cfg(test)]\nmod tests {\n    fn t() {}\n}\n";
755        let (_prod, tloc) = strip_cfg_test(src.as_bytes());
756        assert_eq!(tloc, 4);
757    }
758
759    fn metric(node: &code_ranker_plugin_api::node::Node, key: &str) -> Option<f64> {
760        match node.attrs.get(key) {
761            Some(code_ranker_plugin_api::attrs::AttrValue::Int(v)) => Some(*v as f64),
762            Some(code_ranker_plugin_api::attrs::AttrValue::Float(v)) => Some(*v),
763            _ => None,
764        }
765    }
766
767    /// Strip inline tests from `src`, run the in-tree Rust engine, write the
768    /// metrics onto a fresh file node, and read one metric — the in-process
769    /// building block for the metamorphic tests below. Handles `.rs` only.
770    fn metric_of(_path: &str, src: &str, key: &str) -> Option<f64> {
771        let (prod, tloc) = strip_cfg_test(src.as_bytes());
772        let mut m = rust_ts::compute(&prod)?;
773        m.tloc = tloc as f64;
774        let mut node = code_ranker_plugin_api::node::Node {
775            id: "t.rs".into(),
776            kind: "file".into(),
777            name: "t.rs".into(),
778            parent: None,
779            attrs: Default::default(),
780        };
781        code_ranker_graph::write_metrics(&mut node, &m);
782        metric(&node, key)
783    }
784
785    // ---- Layer 1: metamorphic FP / FN matrix (see docs/metric-correctness.md) --
786    //
787    // Asserts the AST-Accurate principle across `metric × language × lexical
788    // position × direction`: a control-flow / exit keyword appearing only as a
789    // look-alike must NOT move the per-function metrics (no false positive); every
790    // real construct form MUST be counted (no false negative). Pure in-process
791    // parses — ~0 cost against the 20s budget. (LOC / Halstead are intentionally
792    // NOT in the keyword-invariance set: a real comment line legitimately changes
793    // `cloc`, a string legitimately adds Halstead operands — that is not an FP.)
794
795    /// A Rust function carrying real branching (so all five per-function metrics
796    /// are non-zero), with an optional doc-comment prefix and an optional
797    /// statement injected into the body. Used to build FP-matrix variants.
798    fn rs_src(doc: &str, body_inject: &str) -> String {
799        format!(
800            "{doc}fn f(a: i32, b: i32) -> i32 {{\n\
801             {body_inject}    let g = |x: i32| x + 1;\n\
802                 if a > 0 {{ return g(b); }}\n\
803                 a + b\n\
804             }}\n"
805        )
806    }
807
808    // Per-language keyword look-alike guard set — the construct keywords/operators
809    // a complexity (or `unsafe`) metric can key on. The FP matrix injects these
810    // *only* as look-alikes and asserts no metric moves. This mirrors the
811    // "Keyword look-alike guard set" in principles/rust/metrics.md, and
812    // `rust_trigger_set_documented_in_spec` asserts the spec documents every entry
813    // — so the two cannot drift. A superset of the analyzer's real triggers is
814    // fine.
815    const RUST_TRIGGERS: &[&str] = &[
816        "if", "else", "match", "while", "for", "loop", "return", "unsafe", "&&", "||", "?",
817    ];
818
819    #[test]
820    fn rust_complexity_fp_matrix() {
821        // Every lexical position that could smuggle a keyword in as text. None may
822        // change cyclomatic / cognitive / exits / args / closures vs the base.
823        let base = rs_src("", "");
824        let kw = RUST_TRIGGERS.join(" ");
825        let positions: &[(&str, String)] = &[
826            (
827                "line comment",
828                rs_src("", &format!("    // {kw} && || ?\n")),
829            ),
830            (
831                "block comment",
832                rs_src("", &format!("    /* {kw} && || ? */\n")),
833            ),
834            ("doc comment", rs_src(&format!("/// {kw}\n"), "")),
835            (
836                "string",
837                rs_src("", &format!("    let _s = \"{kw} && || ?\";\n")),
838            ),
839            (
840                "raw string",
841                rs_src("", &format!("    let _r = r#\"{kw} && ||\"#;\n")),
842            ),
843            (
844                "identifier",
845                rs_src(
846                    "",
847                    "    let if_match_return_loop = 0; let _ = if_match_return_loop;\n",
848                ),
849            ),
850            (
851                "format string",
852                rs_src("", "    let _f = format!(\"if {} while\", a);\n"),
853            ),
854            (
855                "macro body",
856                rs_src("", "    let _m = vec![\"if\", \"match\", \"while\"];\n"),
857            ),
858            (
859                "raw identifier",
860                rs_src("", "    let r#match = 1; let _ = r#match;\n"),
861            ),
862        ];
863        for key in ["cyclomatic", "cognitive", "exits", "args", "closures"] {
864            let want = metric_of("t.rs", &base, key);
865            for (pos, src) in positions {
866                assert_eq!(
867                    metric_of("t.rs", src, key),
868                    want,
869                    "metric `{key}` moved when a keyword appeared only in: {pos}"
870                );
871            }
872        }
873    }
874
875    #[test]
876    fn cyclomatic_counts_every_branch_form() {
877        // FN guard: every branch form the analyzer recognizes must raise
878        // cyclomatic above a branch-free baseline. (Exact per-form increments are
879        // the analyzer's rule — layer 4; here we only assert "detected".)
880        let baseline =
881            metric_of("t.rs", "fn f() -> i32 { 0 }\n", "cyclomatic").expect("baseline cyclomatic");
882        let forms: &[(&str, &str)] = &[
883            ("if", "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n"),
884            (
885                "else-if",
886                "fn f(a: i32) -> i32 { if a > 0 { 1 } else if a < 0 { 2 } else { 3 } }\n",
887            ),
888            (
889                "match",
890                "fn f(a: i32) -> i32 { match a { 0 => 1, _ => 2 } }\n",
891            ),
892            (
893                "while",
894                "fn f(mut a: i32) -> i32 { while a > 0 { a -= 1; } a }\n",
895            ),
896            (
897                "for",
898                "fn f(a: i32) -> i32 { let mut s = 0; for i in 0..a { s += i; } s }\n",
899            ),
900            ("loop", "fn f() -> i32 { loop { break; } 0 }\n"),
901            (
902                "&&",
903                "fn f(a: i32, b: i32) -> i32 { let _ = a > 0 && b > 0; 0 }\n",
904            ),
905            (
906                "||",
907                "fn f(a: i32, b: i32) -> i32 { let _ = a > 0 || b > 0; 0 }\n",
908            ),
909            ("?", "fn f() -> Option<i32> { let x = Some(1)?; Some(x) }\n"),
910            (
911                "if let",
912                "fn f() -> i32 { if let Some(x) = Some(1) { x } else { 0 } }\n",
913            ),
914            (
915                "while let",
916                "fn f() -> i32 { let mut it = [1].into_iter(); let mut n = 0; while let Some(_) = it.next() { n += 1; } n }\n",
917            ),
918        ];
919        for (name, src) in forms {
920            let c = metric_of("t.rs", src, "cyclomatic")
921                .unwrap_or_else(|| panic!("cyclomatic missing for `{name}`"));
922            assert!(
923                c > baseline,
924                "branch form `{name}` not counted (cyclomatic {c} <= baseline {baseline})"
925            );
926        }
927        // Magnitude anchor: one extra `if` adds exactly 1.
928        let one = metric_of(
929            "t.rs",
930            "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n",
931            "cyclomatic",
932        )
933        .unwrap();
934        let two = metric_of(
935            "t.rs",
936            "fn f(a: i32) -> i32 { if a > 0 { 1 } else if a < 0 { 2 } else { 3 } }\n",
937            "cyclomatic",
938        )
939        .unwrap();
940        assert_eq!(two - one, 1.0, "one extra real `if` must add exactly 1");
941    }
942
943    #[test]
944    fn rust_complexity_fn_per_metric() {
945        // FN guard for the non-cyclomatic per-function metrics: a real construct
946        // must surface the metric.
947        let cognitive = metric_of(
948            "t.rs",
949            "fn f(a: i32, b: i32) -> i32 { if a > 0 { if b > 0 { 1 } else { 2 } } else { 3 } }\n",
950            "cognitive",
951        )
952        .expect("cognitive present");
953        assert!(cognitive > 0.0, "nested branches must raise cognitive");
954
955        let exits = metric_of("t.rs", "fn f(a: i32) -> i32 { return a; }\n", "exits")
956            .expect("exits present");
957        assert!(exits >= 1.0, "a real `return` must be counted as an exit");
958
959        let args = metric_of(
960            "t.rs",
961            "fn f(a: i32, b: i32, c: i32) -> i32 { a + b + c }\n",
962            "args",
963        )
964        .expect("args present");
965        assert!(
966            args >= 3.0,
967            "three parameters must count as >=3 args, got {args}"
968        );
969
970        let closures = metric_of(
971            "t.rs",
972            "fn f() -> i32 { let g = |x: i32| x + 1; g(1) }\n",
973            "closures",
974        )
975        .expect("closures present");
976        assert!(closures >= 1.0, "a real closure must be counted");
977    }
978
979    #[test]
980    fn rust_only_complexity_fp_matrix() {
981        // FP invariance for cyclomatic / cognitive, driven by Rust's documented
982        // trigger set injected into comment / string positions.
983        let check = |path: &str, base: &str, traps: &[String]| {
984            for key in ["cyclomatic", "cognitive"] {
985                let want = metric_of(path, base, key);
986                for trap in traps {
987                    assert_eq!(
988                        metric_of(path, trap, key),
989                        want,
990                        "{path} metric `{key}` moved on a keyword look-alike"
991                    );
992                }
993            }
994        };
995
996        let kw = RUST_TRIGGERS.join(" ");
997        let base = "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n";
998        check(
999            "t.rs",
1000            base,
1001            &[
1002                format!("// {kw}\n{base}"),
1003                format!(
1004                    "fn f(a: i32) -> i32 {{ let _ = \"{kw}\"; if a > 0 {{ 1 }} else {{ 2 }} }}\n"
1005                ),
1006            ],
1007        );
1008    }
1009
1010    #[test]
1011    fn rust_trigger_set_documented_in_spec() {
1012        // Lock-step guard: every keyword the FP matrix injects must be documented
1013        // in Rust's metrics spec, so the trigger list and the spec's "Keyword
1014        // look-alike guard set" cannot drift apart.
1015        let root = concat!(env!("CARGO_MANIFEST_DIR"), "/../..");
1016        let path = format!("{root}/principles/rust/metrics.md");
1017        let spec = std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {path}: {e}"));
1018        for kw in RUST_TRIGGERS {
1019            assert!(
1020                spec.contains(&format!("`{kw}`")),
1021                "trigger `{kw}` is not documented in principles/rust/metrics.md — spec and FP test drifted"
1022            );
1023        }
1024    }
1025
1026    // ---- Layer 2: generative tests (see docs/metric-correctness.md) ------------
1027    //
1028    // Generate programs with a KNOWN construct count, then assert the metric
1029    // equals ground truth across a combinatorial grid. Deterministic (no random
1030    // dependency, no flakiness) — proptest-style randomized fuzz is a later
1031    // nightly extension. Still pure in-process parses; the whole grid is ~ms.
1032
1033    /// A Rust function with `noise` keyword-laden look-alike lines (a comment plus
1034    /// a string binding, neither a real construct) followed by `branches` real,
1035    /// independent `if` statements (each adds exactly 1 to cyclomatic).
1036    fn gen_rs(branches: usize, noise: usize) -> String {
1037        let mut body = String::new();
1038        for i in 0..noise {
1039            body.push_str(&format!(
1040                "    // if match while for loop return && || ? noise {i}\n"
1041            ));
1042            body.push_str(&format!(
1043                "    let _n{i} = \"if match while return && ||\";\n"
1044            ));
1045        }
1046        for i in 0..branches {
1047            body.push_str(&format!("    if x > {i} {{ let _ = {i}; }}\n"));
1048        }
1049        format!("fn f(x: i32) -> i32 {{\n{body}    0\n}}\n")
1050    }
1051
1052    #[test]
1053    fn generative_cyclomatic_counts_branches_not_noise() {
1054        // Ground truth by construction: cyclomatic = baseline + (real `if` count),
1055        // independent of how many keyword look-alike lines surround it. Sweeps an
1056        // 8×8 grid of (branches, noise) — 64 generated programs.
1057        for noise in 0..8 {
1058            let base =
1059                metric_of("t.rs", &gen_rs(0, noise), "cyclomatic").expect("cyclomatic present");
1060            for branches in 0..8 {
1061                let cyc = metric_of("t.rs", &gen_rs(branches, noise), "cyclomatic")
1062                    .expect("cyclomatic present");
1063                assert_eq!(
1064                    cyc,
1065                    base + branches as f64,
1066                    "cyclomatic must add exactly 1 per real `if` and 0 per noise line \
1067                     (branches={branches}, noise={noise})"
1068                );
1069            }
1070        }
1071    }
1072
1073    #[test]
1074    fn generative_complexity_invariant_to_noise() {
1075        // A fixed real structure (2 args, a closure, a branch, a `return`) with a
1076        // growing pile of keyword look-alikes around it. Every per-function metric
1077        // must stay exactly at its noise-free value — no false positive at any
1078        // noise level.
1079        let mk = |noise: usize| -> String {
1080            let mut body = String::new();
1081            for i in 0..noise {
1082                body.push_str(&format!("    // if match return unsafe && || {i}\n"));
1083                body.push_str(&format!("    let _n{i} = \"if match return && ||\";\n"));
1084            }
1085            format!(
1086                "fn f(a: i32, b: i32) -> i32 {{\n\
1087                 {body}    let g = |x: i32| x + 1;\n\
1088                     if a > 0 {{ return g(b); }}\n\
1089                     a + b\n\
1090                 }}\n"
1091            )
1092        };
1093        for key in ["cyclomatic", "cognitive", "exits", "args", "closures"] {
1094            let want = metric_of("t.rs", &mk(0), key);
1095            for noise in 1..10 {
1096                assert_eq!(
1097                    metric_of("t.rs", &mk(noise), key),
1098                    want,
1099                    "metric `{key}` moved at noise={noise} — keyword look-alikes leaked in"
1100                );
1101            }
1102        }
1103    }
1104
1105    #[test]
1106    fn per_function_metrics_aggregate_over_child_functions() {
1107        // Regression for the whole "root-vs-sum" class: `write_metrics` once read
1108        // the ROOT space value for `cyclomatic` / `cognitive` / `exits` / `args` /
1109        // `closures`, which for a file is the vacuous root count (0, or 1 for
1110        // cyclomatic) — every file looked identical. The real signal lives in the
1111        // child function spaces, so each must be the SUM over them.
1112        //
1113        // `a` takes 2 args, nests two `if`s, and `return`s; `b` defines a 1-arg
1114        // closure. So the file must surface: cyclomatic (summed branches), a
1115        // non-zero cognitive (nesting), exits (the `return`), args (2 fn + 1
1116        // closure = 3), and closures (1).
1117        let src = "fn a(x: i32, y: i32) -> i32 { if x > 0 { if x > 1 { return x; } y } else { 3 } }\n\
1118                   fn b() -> i32 { let f = |z: i32| z + 1; f(2) }\n";
1119        // Each is summed over the child functions — well above the vacuous root
1120        // value, proving aggregation rather than a root-only read.
1121        let cyc = metric_of("t.rs", src, "cyclomatic").expect("cyclomatic present");
1122        assert!(cyc > 1.0, "cyclomatic should be summed, got {cyc}");
1123        let cog = metric_of("t.rs", src, "cognitive").expect("cognitive present");
1124        assert!(cog > 0.0, "cognitive should be summed, got {cog}");
1125        let exits = metric_of("t.rs", src, "exits").expect("exits present");
1126        assert!(exits >= 1.0, "exits should count the `return`, got {exits}");
1127        let args = metric_of("t.rs", src, "args").expect("args present");
1128        assert!(
1129            args >= 3.0,
1130            "args should sum fn (2) + closure (1), got {args}"
1131        );
1132        let closures = metric_of("t.rs", src, "closures").expect("closures present");
1133        assert!(
1134            closures >= 1.0,
1135            "closures should count the closure, got {closures}"
1136        );
1137    }
1138
1139    // ---- Layer 3: asserted anchors (see docs/metric-correctness.md) -----------
1140    //
1141    // Layers 1 & 2 prove RELATIVE behaviour (noise-invariance, +1 per construct)
1142    // but never pin an ABSOLUTE value, so a uniform offset/scale bug (every count
1143    // shifted by +1, or doubled) would pass green. These anchors pin exact values
1144    // hand-derived from principles/rust/metrics.md, catching that scale class.
1145
1146    #[test]
1147    fn complexity_absolute_anchors_hand_derived() {
1148        // Integer counting metrics, pinned to EXACT file-level values, hand-derived
1149        // from the spec's rules (metrics.md §cyclomatic / §exits,args,closures).
1150        //
1151        // These pin the analyzer-of-record's whole-file values (what we emit):
1152        //   • `cyclomatic` = the file unit's base path (1) + Σ over functions of
1153        //     (1 + branch points). Per-function McCabe (`V(G)=E−N+2P` = Σ over
1154        //     functions) is the theory; the analyzer adds the file unit on top and
1155        //     we emit it verbatim (it is also the value `mi` is computed from).
1156        //     `classify` = file 1 + fn 4 (base1+if+else-if+||) = 5.
1157        //   • `exits` = Σ over functions of (a value-returning `-> T` exit +
1158        //     explicit return/?). "Exit points" has no canonical theory, so the
1159        //     analyzer's rule is the source of truth (metrics.md §exits). The
1160        //     `-> i32` snippets below read 2 (the explicit return + the `-> T` exit).
1161        //   • `args` / `closures` / `cognitive` have no file-unit offset.
1162        // All pinned so any drift from the analyzer's output is caught.
1163        let classify = "fn classify(n: i32) -> &'static str {\n\
1164            \x20   if n < 0 { \"neg\" } else if n == 0 || n == 1 { \"small\" } else { \"big\" }\n\
1165            }\n";
1166        let two_closures =
1167            "fn f() { let g = |x: i32| x + 1; let h = |y: i32| y; let _ = (g, h); }\n";
1168        // (label, path, src, key, exact_expected)
1169        let cases: &[(&str, &str, &str, &str, f64)] = &[
1170            // file unit 1 + fn(base1 + if + else-if + ||) = 1 + 4 = 5.
1171            ("classify", "t.rs", classify, "cyclomatic", 5.0),
1172            // file unit 1 + fn(base1 + 1 if) = 1 + 2 = 3 (else is free).
1173            (
1174                "single if",
1175                "t.rs",
1176                "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n",
1177                "cyclomatic",
1178                3.0,
1179            ),
1180            // 1 explicit return + 1 value-returning exit (`-> i32`) → 2.
1181            (
1182                "one return",
1183                "t.rs",
1184                "fn f() -> i32 { return 1; }\n",
1185                "exits",
1186                2.0,
1187            ),
1188            // 1 `?` + 1 value-returning exit (`-> Option`) → 2.
1189            (
1190                "one try op",
1191                "t.rs",
1192                "fn f() -> Option<i32> { let x = Some(1)?; Some(x) }\n",
1193                "exits",
1194                2.0,
1195            ),
1196            (
1197                "three params",
1198                "t.rs",
1199                "fn f(a: i32, b: i32, c: i32) -> i32 { a + b + c }\n",
1200                "args",
1201                3.0,
1202            ),
1203            ("two closures", "t.rs", two_closures, "closures", 2.0),
1204            ("two closure args", "t.rs", two_closures, "args", 2.0),
1205        ];
1206        let mut fails = Vec::new();
1207        for (label, path, src, key, want) in cases {
1208            match metric_of(path, src, key) {
1209                Some(got) if got == *want => {}
1210                other => fails.push(format!("{label}: {key} want {want}, got {other:?}")),
1211            }
1212        }
1213        assert!(
1214            fails.is_empty(),
1215            "failing integer anchors:\n{}",
1216            fails.join("\n")
1217        );
1218    }
1219
1220    #[test]
1221    fn complexity_frozen_scale_anchors() {
1222        // Algorithm-specific metrics (cognitive nesting weights, Halstead
1223        // dictionaries, MI) cannot be hand-derived reliably, so they are FROZEN
1224        // anchors: values produced by `rust-code-analysis` for one fixed snippet,
1225        // verified once. Their job is to catch a uniform offset/scale regression
1226        // (a library bump that doubles `volume`, an MI formula edit) — not to
1227        // claim an independent ground truth. They change only when the underlying
1228        // algorithm changes, and that change should be deliberate.
1229        let classify = "fn classify(n: i32) -> &'static str {\n\
1230            \x20   if n < 0 { \"neg\" } else if n == 0 || n == 1 { \"small\" } else { \"big\" }\n\
1231            }\n";
1232        // (key, expected, abs_tolerance)
1233        let cases: &[(&str, f64, f64)] = &[
1234            ("cognitive", 4.0, 0.0),   // exact integer
1235            ("vocabulary", 18.0, 0.0), // η₁ + η₂, exact integer
1236            ("length", 28.0, 0.0),     // N₁ + N₂, exact integer
1237            ("volume", 116.757, 0.01), // length × log₂(vocabulary)
1238            ("effort", 875.684, 0.01), // difficulty × volume
1239            ("mi", 127.299, 0.01),     // maintainability index
1240            ("mi_sei", 108.463, 0.01), // SEI variant
1241        ];
1242        let mut fails = Vec::new();
1243        for (key, want, tol) in cases {
1244            match metric_of("t.rs", classify, key) {
1245                Some(got) if (got - *want).abs() <= *tol => {}
1246                other => fails.push(format!("{key}: want {want} (±{tol}), got {other:?}")),
1247            }
1248        }
1249        assert!(
1250            fails.is_empty(),
1251            "failing scale anchors:\n{}",
1252            fails.join("\n")
1253        );
1254    }
1255
1256    #[test]
1257    fn declaration_only_file_emits_no_complexity() {
1258        // No functions → only the file unit space → cyclomatic is a vacuous 1 and
1259        // cognitive is 0. Both must be dropped (not shown as a meaningless "1"),
1260        // matching how `put` already drops cognitive's 0. Mirrors real files like
1261        // a clap CLI model or a type-definitions module.
1262        let src = "pub struct Cli { pub verbose: bool }\n\
1263                   pub enum Mode { A, B }\n";
1264        assert_eq!(
1265            metric_of("t.rs", src, "cyclomatic"),
1266            None,
1267            "a declaration-only file must not emit a vacuous cyclomatic"
1268        );
1269        assert_eq!(
1270            metric_of("t.rs", src, "cognitive"),
1271            None,
1272            "a declaration-only file must not emit cognitive"
1273        );
1274    }
1275
1276    #[test]
1277    fn metric_specs_override_adds_rust_cfg_test_note() {
1278        // The neutral default descriptions carry no language nuance; the Rust
1279        // plugin re-adds the `#[cfg(test)]` LOC-exclusion note for sloc/lloc/
1280        // cloc/blank — so it appears only in Rust snapshots, never in py/js/ts.
1281        let defaults = code_ranker_graph::metric_specs().0;
1282        // sanity: the shared default is language-neutral
1283        assert!(
1284            !defaults["blank"]
1285                .description
1286                .as_deref()
1287                .unwrap_or("")
1288                .contains("#[cfg(test)]"),
1289            "the shared default must stay language-neutral"
1290        );
1291
1292        let refined = RustPlugin.metric_specs(defaults);
1293        for key in ["sloc", "lloc", "cloc", "blank"] {
1294            let desc = refined[key].description.as_deref().unwrap_or("");
1295            assert!(
1296                desc.contains("#[cfg(test)]"),
1297                "Rust `{key}` description should note the cfg(test) exclusion"
1298            );
1299        }
1300    }
1301}
code_ranker_plugin_rust/lib.rs

code_ranker_plugin_rust/
lib.rs