Skip to main content

code_ranker_plugin_rust/
lib.rs

1use anyhow::Result;
2use code_ranker_plugin_api::{
3    attrs::{AttrValue, ValueType},
4    default_cycle_kinds, default_node_kinds,
5    edge::Edge,
6    graph::Graph,
7    level::{AttributeSpec, Direction, EdgeKindSpec, Grouping, Level, Thresholds},
8    log,
9    node::Node,
10    plugin::{LanguagePlugin, PluginInput, Preset},
11};
12use std::collections::hash_map::Entry;
13use std::collections::{BTreeMap, HashMap, HashSet};
14use std::path::Path;
15
16use cargo_metadata::MetadataCommand;
17
18mod crate_graph;
19mod ids;
20mod internal;
21mod module_graph;
22mod rust_ts;
23
24use internal::{EdgeKind, GraphBuilder, InternalGraph, NodeKind};
25
26pub struct RustPlugin;
27
28/// One Rust-only metric-lens preset: (id, title, sort_metric, connections,
29/// doc_slug, prompt body). Same shape as the generic catalog in
30/// `code-ranker-cli/src/presets.rs`, but these rank modules by a single
31/// coupling/size metric rather than a design principle. Slugs resolve to
32/// `principles/rust/<slug>.md`.
33type MetricPreset = (
34    &'static str,
35    &'static str,
36    &'static str,
37    &'static [&'static str],
38    &'static str,
39    &'static str,
40);
41
42const RUST_METRIC_PRESETS: &[MetricPreset] = &[
43    (
44        "HK",
45        "HK — Henry-Kafura Coupling",
46        "hk",
47        &["in", "out"],
48        "henry-kafura-coupling",
49        "These modules carry heavy Henry-Kafura coupling — HK = sloc × (fan_in × fan_out)²,\n\
50         where sloc is the module's source lines of code (real code lines, excluding blanks\n\
51         and comment-only lines), fan_in is how many modules depend on it, and fan_out is how\n\
52         many it depends on.\n\
53         A high score is a large module sitting on a busy crossroads of incoming and outgoing\n\
54         dependencies, so any change here ripples widely.\n\n\
55         For each module below, lower the factor that dominates its HK: shrink the module by\n\
56         extracting cohesive pieces, or cut fan-in/fan-out by narrowing its public surface and\n\
57         depending on fewer collaborators (introduce an abstraction, move a responsibility).\n\
58         Keep existing API contracts intact.",
59    ),
60    (
61        "SLOC",
62        "SLOC — Module Size",
63        "sloc",
64        &[],
65        "module-size",
66        "These are the largest modules by source lines of code. Size alone is not a defect, but\n\
67         oversized files usually bundle several responsibilities and are hard to read, test and\n\
68         review.\n\n\
69         For each module below, identify the distinct responsibilities it holds and propose how\n\
70         to split it into smaller, cohesive modules — each with a single clear purpose — without\n\
71         changing external behaviour.",
72    ),
73    (
74        "FANIN",
75        "Fan-in — Afferent Coupling",
76        "fan_in",
77        &["in"],
78        "fan-in-afferent-coupling",
79        "These modules have high fan-in: many other modules depend on them. They are\n\
80         load-bearing — a change here forces changes (or re-review) across every dependant, and\n\
81         a bug here is widely felt.\n\n\
82         For each module below, confirm its public surface is a stable, minimal contract. Narrow\n\
83         the API to what callers actually need, split it if different callers use disjoint parts\n\
84         (see Interface Segregation), and stabilise the abstractions the rest of the codebase\n\
85         leans on.",
86    ),
87    (
88        "FANOUT",
89        "Fan-out — Efferent Coupling",
90        "fan_out",
91        &["out"],
92        "fan-out-efferent-coupling",
93        "These modules have high fan-out: they depend on many other modules. High efferent\n\
94         coupling makes a module fragile (it breaks when any dependency changes) and hard to\n\
95         test or reuse in isolation.\n\n\
96         For each module below, reduce its direct dependencies: depend on abstractions rather\n\
97         than concretes (see Dependency Inversion), collapse several fine-grained collaborators\n\
98         behind one focused interface, and move logic that pulls in unrelated dependencies into\n\
99         a more appropriate module.",
100    ),
101];
102
103impl LanguagePlugin for RustPlugin {
104    fn name(&self) -> &str {
105        "rust"
106    }
107
108    fn detect(&self, workspace: &Path, _input: &PluginInput) -> bool {
109        workspace.join("Cargo.toml").exists()
110    }
111
112    fn levels(&self) -> Vec<Level> {
113        let mut edge_kinds: BTreeMap<String, EdgeKindSpec> = BTreeMap::new();
114        edge_kinds.insert(
115            "uses".into(),
116            EdgeKindSpec {
117                flow: true,
118                label: Some("uses".into()),
119                description: Some(
120                    "Code dependency — this file references an item the target file defines.<br>\
121                     Captured from `use path::Item;`, a qualified path (`crate::a::Item`, \
122                     `other_crate::Item`), or a derive (`#[derive(serde::Serialize)]`).<br>\
123                     The path resolves to the file that defines the item (following `pub use` \
124                     re-exports), so the edge points at the definition, not a re-export hub.<br>\
125                     This is the real dependency: it counts toward fan-in / fan-out, \
126                     Henry-Kafura coupling and cycles."
127                        .into(),
128                ),
129            },
130        );
131        edge_kinds.insert(
132            "contains".into(),
133            EdgeKindSpec {
134                flow: false,
135                label: Some("contains".into()),
136                description: Some(
137                    "Module ownership — the parent declares the child module \
138                     (`mod foo;` / `pub mod foo;`), so `foo.rs` (or `foo/mod.rs`) belongs to it.<br>\
139                     This is the Rust module tree: structure, not a code dependency.<br>\
140                     Kept in the data but not drawn on the main map, and excluded from \
141                     fan-in / fan-out / HK / cycles."
142                        .into(),
143                ),
144            },
145        );
146        edge_kinds.insert(
147            "reexports".into(),
148            EdgeKindSpec {
149                flow: false,
150                label: Some("reexport".into()),
151                description: Some(
152                    "Re-export (`pub use foo::Item;`) — re-publishes another file's item as part of \
153                     this file's public API (the crate-root / prelude facade, e.g. `lib.rs` doing \
154                     `pub use access_scope::AccessScope;`).<br>\
155                     A facade, not a dependency: excluded from fan-in / fan-out / HK / cycles and \
156                     not drawn on the main map, like `contains`.<br>\
157                     A consumer's `use this_crate::Item` is attributed to the file that defines \
158                     `Item`, so re-export hubs (`lib.rs` / `mod.rs`) collect no false coupling — the \
159                     `pub use` is still recorded here so you can see what a file exposes."
160                        .into(),
161                ),
162            },
163        );
164        edge_kinds.insert(
165            "super".into(),
166            EdgeKindSpec {
167                flow: false,
168                label: Some("super".into()),
169                description: Some(
170                    "Namespace pull from an enclosing module — a glob `use` that reaches \
171                     *up* the module tree (`use super::*`, `use crate::<ancestor>::*`), \
172                     bringing the parent's items into the child's scope.<br>\
173                     Usually structural scope-sugar (a module split across files referring \
174                     back to itself). But if the child actually uses a parent item brought \
175                     in by the glob, it IS a real back-dependency — technically a cycle. \
176                     code-ranker can't tell the two apart without name resolution, so it \
177                     treats `super` as a **low-priority** cycle and leaves it non-flow: \
178                     deprioritized next to obvious cross-module cycles.<br>\
179                     Kept in the data but not drawn on the main map, and excluded from \
180                     fan-in / fan-out / HK / cycles — like `contains`."
181                        .into(),
182                ),
183            },
184        );
185
186        let aspec = AttributeSpec::new;
187
188        let mut node_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
189        node_attributes.insert("path".into(), aspec(ValueType::Str, "Path"));
190        node_attributes.insert("crate".into(), aspec(ValueType::Str, "Crate"));
191        node_attributes.insert("loc".into(), aspec(ValueType::Int, "Lines"));
192        node_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
193        node_attributes.insert("external".into(), aspec(ValueType::Bool, "External"));
194        node_attributes.insert("version".into(), aspec(ValueType::Str, "Version"));
195        node_attributes.insert("items".into(), aspec(ValueType::Int, "Items"));
196        let mut unsafe_spec = aspec(ValueType::Int, "Unsafe");
197        unsafe_spec.short = Some("Unsafe".into());
198        unsafe_spec.description = Some(
199            "Count of `unsafe` blocks and `unsafe fn`/`impl`/`trait` declarations \
200             in production code (test items are excluded). Syntactic count: \
201             `unsafe` inside a macro body is not seen, and the figure is not \
202             type-checked."
203                .into(),
204        );
205        unsafe_spec.direction = Direction::LowerBetter;
206        node_attributes.insert("unsafe".into(), unsafe_spec);
207
208        let mut edge_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
209        edge_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
210
211        vec![Level {
212            name: "files".into(),
213            edge_kinds,
214            node_attributes,
215            edge_attributes,
216            attribute_groups: BTreeMap::new(),
217            node_kinds: default_node_kinds(),
218            cycle_kinds: default_cycle_kinds(),
219            // Cluster the diagram by the owning crate (compilation unit), not by
220            // the source folder. Falls back to `dir` if `crate` is ever absent.
221            grouping: Some(Grouping {
222                key: Some("crate".into()),
223                function: None,
224            }),
225        }]
226    }
227
228    fn thresholds(&self) -> BTreeMap<String, Thresholds> {
229        // Calibrated on 21 Rust crates (≥2K SLOC). ~50% of projects breach
230        // `info`, ~10% breach `warning`.
231        BTreeMap::from([
232            (
233                "hk".into(),
234                Thresholds {
235                    info: 150_000.0,
236                    warning: 10_000_000.0,
237                },
238            ),
239            (
240                "sloc".into(),
241                Thresholds {
242                    info: 800.0,
243                    warning: 3_000.0,
244                },
245            ),
246            (
247                "fan_out".into(),
248                Thresholds {
249                    info: 8.0,
250                    warning: 18.0,
251                },
252            ),
253            (
254                "items".into(),
255                Thresholds {
256                    info: 20.0,
257                    warning: 50.0,
258                },
259            ),
260        ])
261    }
262
263    fn presets(&self, mut defaults: Vec<Preset>, _input: &PluginInput) -> Vec<Preset> {
264        // Append Rust-only metric lenses to the generic catalog. Their doc links
265        // reuse the principles base directory derived from an existing default's
266        // `doc_url`, so they resolve to `principles/rust/<slug>.md` without
267        // duplicating the host/base constant that lives in the CLI crate.
268        let base_dir = defaults
269            .iter()
270            .find_map(|p| p.doc_url.as_deref())
271            .and_then(|u| u.rsplit_once('/').map(|(dir, _)| dir.to_string()));
272        for &(id, title, sort_metric, connections, slug, prompt) in RUST_METRIC_PRESETS {
273            defaults.push(Preset {
274                id: id.to_string(),
275                label: id.to_string(),
276                title: title.to_string(),
277                prompt: prompt.to_string(),
278                doc_url: base_dir.as_ref().map(|d| format!("{d}/{slug}.md")),
279                sort_metric: sort_metric.to_string(),
280                connections: connections.iter().map(|s| (*s).to_string()).collect(),
281            });
282        }
283        defaults
284    }
285
286    fn analyze(&self, workspace: &Path, _level: &str, input: &PluginInput) -> Result<Graph> {
287        let mut builder = GraphBuilder::new();
288        syn_analyze(workspace, input.ignore_tests, &mut builder)?;
289        let internal = builder.build();
290        Ok(collapse_to_files(internal))
291    }
292
293    fn metrics(&self, graph: &mut Graph) -> usize {
294        // Each `.rs` file node is re-read (by its absolute-path `id`) and measured
295        // by our `tree-sitter-rust` engine; `#[cfg(test)]` / `#[test]` items are
296        // stripped first so metrics reflect production code only (their lines
297        // become `tloc`).
298        let mut annotated = 0;
299        for node in &mut graph.nodes {
300            if node.kind != "file" {
301                continue;
302            }
303            let Ok(src) = std::fs::read(&node.id) else {
304                continue;
305            };
306            if rust_file_metrics(node, &src) {
307                annotated += 1;
308            }
309        }
310        annotated
311    }
312
313    fn is_test_path(&self, rel_path: &str) -> bool {
314        // Cargo's integration-test / bench targets live under top-level
315        // `tests/` and `benches/` dirs. (Inline `#[cfg(test)]` modules are a
316        // separate, attribute-based notion handled during the syn walk.)
317        matches!(rel_path.split('/').next(), Some("tests") | Some("benches"))
318    }
319
320    fn versions(&self, _workspace: &Path, _input: &PluginInput) -> Vec<(String, String)> {
321        version_string()
322            .map(|rv| vec![("rustc".to_string(), rv)])
323            .unwrap_or_default()
324    }
325
326    fn roots(&self, _workspace: &Path) -> Vec<(String, String)> {
327        rust_toolchain_roots()
328    }
329
330    fn metric_specs(
331        &self,
332        mut defaults: BTreeMap<String, AttributeSpec>,
333    ) -> BTreeMap<String, AttributeSpec> {
334        // Rust strips inline `#[cfg(test)]` / `#[test]` / `#[bench]` items before
335        // measuring, so the LOC metrics count production code only — a nuance the
336        // language-neutral default descriptions omit. Refine them for Rust.
337        let rust_loc_note: &[(&str, &str)] = &[
338            (
339                "sloc",
340                "Source lines of code — lines with at least one non-whitespace, non-comment character. Blank and comment-only lines are not counted. In Rust, lines inside `#[cfg(test)]` / `#[test]` items are excluded too, so this counts production code only (unlike `loc`, the raw file line count).",
341            ),
342            (
343                "lloc",
344                "Logical lines — counts statements, not physical lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
345            ),
346            (
347                "cloc",
348                "Comment-only lines (inline comments on code lines are not counted). In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
349            ),
350            (
351                "blank",
352                "Empty or whitespace-only lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
353            ),
354        ];
355        for (key, desc) in rust_loc_note {
356            if let Some(spec) = defaults.get_mut(*key) {
357                spec.description = Some((*desc).to_string());
358            }
359        }
360        defaults
361    }
362}
363
364/// The Rust/Cargo toolchain path roots used to shorten external node ids in the
365/// snapshot: `cargo` (`$CARGO_HOME`), `registry` (the crates.io source dir),
366/// `rustup` (`$RUSTUP_HOME`), and `rust-src` (the stdlib source under the active
367/// sysroot). These are Rust-specific, so they live here in the Rust plugin rather
368/// than in the language-agnostic orchestrator.
369fn rust_toolchain_roots() -> Vec<(String, String)> {
370    let mut roots = Vec::new();
371    let home = std::env::var("HOME").unwrap_or_default();
372
373    let cargo = std::env::var("CARGO_HOME").unwrap_or_else(|_| format!("{home}/.cargo"));
374    let rustup = std::env::var("RUSTUP_HOME").unwrap_or_else(|_| format!("{home}/.rustup"));
375
376    if !cargo.is_empty() {
377        // Auto-detect crates.io registry hash dir (e.g. index.crates.io-<hash>).
378        let registry_src = format!("{cargo}/registry/src");
379        if let Ok(entries) = std::fs::read_dir(&registry_src) {
380            for entry in entries.flatten() {
381                let name = entry.file_name().to_string_lossy().to_string();
382                if name.starts_with("index.crates.io") {
383                    roots.push(("registry".to_string(), format!("{registry_src}/{name}")));
384                    break;
385                }
386            }
387        }
388        roots.push(("cargo".to_string(), cargo));
389    }
390    if !rustup.is_empty() {
391        // Add rust-src root: sysroot/lib/rustlib/src/rust/library — shortens stdlib
392        // paths from {rustup}/toolchains/.../library/... to {rust-src}/...
393        if which::which("rustc").is_ok()
394            && let Ok(out) = log::timed("rustc --print sysroot", || {
395                std::process::Command::new("rustc")
396                    .args(["--print", "sysroot"])
397                    .output()
398            })
399            && out.status.success()
400        {
401            let sysroot = String::from_utf8_lossy(&out.stdout).trim().to_string();
402            let rust_lib = format!("{sysroot}/lib/rustlib/src/rust/library");
403            if std::path::Path::new(&rust_lib).exists() {
404                roots.push(("rust-src".to_string(), rust_lib));
405            }
406        }
407        roots.push(("rustup".to_string(), rustup));
408    }
409    roots
410}
411
412/// Syntactic stage: resolve the workspace via `cargo metadata` and build the
413/// internal crate + module/use graphs.
414fn syn_analyze(workspace: &Path, ignore_tests: bool, builder: &mut GraphBuilder) -> Result<()> {
415    let manifest = workspace.join("Cargo.toml");
416    // code-ranker is an offline tool: it never fetches from the network. See the
417    // comment in the original lib.rs for the research notes on --offline vs
418    // --no-deps vs full. Short version: --offline keeps external/cross-crate
419    // edges AND never goes to the network; the cache must be warm.
420    let metadata = log::timed("cargo metadata --offline", || {
421        MetadataCommand::new()
422            .manifest_path(&manifest)
423            .other_options(vec!["--offline".to_string()])
424            .exec()
425    })
426    .map_err(|err| offline_metadata_error(&manifest, err))?;
427
428    crate_graph::contribute(&metadata, builder);
429    module_graph::contribute(&metadata, ignore_tests, builder)?;
430    Ok(())
431}
432
433fn offline_metadata_error(manifest: &Path, err: cargo_metadata::Error) -> anyhow::Error {
434    anyhow::anyhow!(
435        "cargo metadata (offline) failed for {manifest}\n\n\
436         code-ranker is an offline tool — it never downloads dependencies. It reads \
437         the dependency graph from cargo's local cache, which must already be \
438         populated for this project.\n\n\
439         Warm the cache once (with network), then re-run code-ranker:\n    \
440         cargo metadata --manifest-path {manifest} >/dev/null\n\
441         (a prior `cargo build` / `cargo fetch` works too).\n\n\
442         In CI: run code-ranker on the same image/cache as your build or test jobs, \
443         where the cache is already warm.\n\n\
444         Underlying cargo error: {err}",
445        manifest = manifest.display(),
446    )
447}
448
449fn version_string() -> Option<String> {
450    which::which("rustc").ok()?;
451    let out = log::timed("rustc --version", || {
452        std::process::Command::new("rustc")
453            .arg("--version")
454            .output()
455    })
456    .ok()?;
457    if out.status.success() {
458        Some(
459            String::from_utf8_lossy(&out.stdout)
460                .split_whitespace()
461                .nth(1)
462                .unwrap_or("unknown")
463                .to_string(),
464        )
465    } else {
466        None
467    }
468}
469
470/// Collapse the internal module graph into a file-level `api::Graph`.
471///
472/// - Every `Module` node maps to a `file` node keyed by its ABSOLUTE source
473///   path (no `file:` prefix). Inline modules collapse into the file they live
474///   in. The file-backed module (line == None) is the source of truth for
475///   structural attrs.
476/// - External crate nodes become one `external` node each (id `ext:{name}`).
477/// - `use`/`pub use` edges are re-pointed to files; self-edges (within the same
478///   file) are dropped.
479/// - Crate→crate dependency edges (metadata-level) are dropped; precise
480///   file→file edges come from `use` statements.
481fn collapse_to_files(full: InternalGraph) -> Graph {
482    let mut id_map: HashMap<String, String> = HashMap::new();
483    let mut file_nodes: HashMap<String, Node> = HashMap::new();
484    let mut ext_nodes: HashMap<String, Node> = HashMap::new();
485
486    // Pre-pass: map each LOCAL crate node to its crate-root source file
487    // (lib.rs / main.rs) via the crate→root-module Contains edge. This lets
488    // cross-crate `use other_crate::…` become file→file edges.
489    let node_by_id: HashMap<&str, &internal::Node> =
490        full.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
491    let crate_ids: HashSet<&str> = full
492        .nodes
493        .iter()
494        .filter(|n| n.kind == NodeKind::Crate)
495        .map(|n| n.id.as_str())
496        .collect();
497    let mut crate_root_file: HashMap<String, String> = HashMap::new();
498    for e in &full.edges {
499        if e.kind != EdgeKind::Contains {
500            continue;
501        }
502        let (Some(from), Some(to)) = (
503            node_by_id.get(e.from.as_str()),
504            node_by_id.get(e.to.as_str()),
505        ) else {
506            continue;
507        };
508        if from.kind == NodeKind::Crate && to.kind == NodeKind::Module && !to.path.is_empty() {
509            let file = to.path.clone(); // ABSOLUTE path, no prefix
510            match crate_root_file.entry(e.from.clone()) {
511                Entry::Vacant(v) => {
512                    v.insert(file);
513                }
514                Entry::Occupied(mut o) if to.path.ends_with("lib.rs") => {
515                    *o.get_mut() = file;
516                }
517                Entry::Occupied(_) => {}
518            }
519        }
520    }
521
522    for node in &full.nodes {
523        match node.kind {
524            NodeKind::Module => {
525                let fid = node.path.clone(); // ABSOLUTE path
526                id_map.insert(node.id.clone(), fid.clone());
527                let name = Path::new(&node.path)
528                    .file_name()
529                    .map(|s| s.to_string_lossy().into_owned())
530                    .unwrap_or_else(|| node.name.clone());
531                match file_nodes.entry(fid.clone()) {
532                    Entry::Vacant(v) => {
533                        let mut attrs = BTreeMap::new();
534                        if let Some(vis) = &node.visibility {
535                            attrs.insert(
536                                "visibility".to_string(),
537                                AttrValue::Str(vis.as_str().to_string()),
538                            );
539                        }
540                        if let Some(loc) = node.loc {
541                            attrs.insert("loc".to_string(), AttrValue::Int(loc as i64));
542                        }
543                        if let Some(items) = node.item_count {
544                            attrs.insert("items".to_string(), AttrValue::Int(items as i64));
545                        }
546                        // Omit when zero, like other metrics — files with no
547                        // `unsafe` simply carry no key.
548                        if let Some(u) = node.unsafe_count
549                            && u > 0
550                        {
551                            attrs.insert("unsafe".to_string(), AttrValue::Int(u as i64));
552                        }
553                        if let Some(krate) = &node.crate_label {
554                            attrs.insert("crate".to_string(), AttrValue::Str(krate.clone()));
555                        }
556                        v.insert(Node {
557                            id: fid,
558                            kind: "file".into(),
559                            name,
560                            parent: None,
561                            attrs,
562                        });
563                    }
564                    Entry::Occupied(mut o) => {
565                        // The file-backed module (line == None) is the source
566                        // of truth for the file's structural attrs.
567                        if node.line.is_none() {
568                            let n = o.get_mut();
569                            if let Some(vis) = &node.visibility {
570                                n.attrs.insert(
571                                    "visibility".to_string(),
572                                    AttrValue::Str(vis.as_str().to_string()),
573                                );
574                            }
575                            if let Some(loc) = node.loc {
576                                n.attrs
577                                    .insert("loc".to_string(), AttrValue::Int(loc as i64));
578                            }
579                            if let Some(items) = node.item_count {
580                                n.attrs
581                                    .insert("items".to_string(), AttrValue::Int(items as i64));
582                            }
583                            if let Some(u) = node.unsafe_count
584                                && u > 0
585                            {
586                                n.attrs
587                                    .insert("unsafe".to_string(), AttrValue::Int(u as i64));
588                            }
589                            if let Some(krate) = &node.crate_label {
590                                n.attrs
591                                    .insert("crate".to_string(), AttrValue::Str(krate.clone()));
592                            }
593                        }
594                    }
595                }
596            }
597            NodeKind::Crate if node.external.unwrap_or(false) => {
598                let eid = format!("ext:{}", node.name);
599                id_map.insert(node.id.clone(), eid.clone());
600                // The on-disk directory of this dependency (parent of its
601                // Cargo.toml), e.g. `…/registry/src/…/serde-1.0.228`.
602                let lib_path = Path::new(&node.path)
603                    .parent()
604                    .map(|p| p.to_string_lossy().into_owned())
605                    .unwrap_or_default();
606                ext_nodes.entry(eid.clone()).or_insert_with(|| {
607                    let mut attrs = BTreeMap::new();
608                    attrs.insert("external".to_string(), AttrValue::Bool(true));
609                    if let Some(v) = &node.version {
610                        attrs.insert("version".to_string(), AttrValue::Str(v.clone()));
611                    }
612                    if !lib_path.is_empty() {
613                        attrs.insert("path".to_string(), AttrValue::Str(lib_path));
614                    }
615                    Node {
616                        id: eid,
617                        kind: "external".into(),
618                        name: node.name.clone(),
619                        parent: None,
620                        attrs,
621                    }
622                });
623            }
624            // A local workspace crate maps to its root file.
625            NodeKind::Crate => {
626                if let Some(file) = crate_root_file.get(&node.id) {
627                    id_map.insert(node.id.clone(), file.clone());
628                }
629            }
630        }
631    }
632
633    // Re-point edges to file/external granularity.
634    let mut seen: HashSet<(String, String, String)> = HashSet::new();
635    let mut edges: Vec<Edge> = Vec::new();
636    for e in &full.edges {
637        // Drop crate→crate dependency edges; precise file→file edges come from
638        // `use` statements.
639        if crate_ids.contains(e.from.as_str()) && crate_ids.contains(e.to.as_str()) {
640            continue;
641        }
642        let (Some(from), Some(to)) = (id_map.get(&e.from), id_map.get(&e.to)) else {
643            continue;
644        };
645        if from == to {
646            continue; // within the same file — not a connection
647        }
648        let kind_str = match e.kind {
649            EdgeKind::Contains => "contains",
650            EdgeKind::Uses => "uses",
651            EdgeKind::Reexports => "reexports",
652            EdgeKind::Super => "super",
653        };
654        if !seen.insert((from.clone(), to.clone(), kind_str.to_string())) {
655            continue;
656        }
657        let mut attrs = BTreeMap::new();
658        if e.kind == EdgeKind::Reexports
659            && let Some(vis) = &e.visibility
660        {
661            attrs.insert(
662                "visibility".to_string(),
663                AttrValue::Str(vis.as_str().to_string()),
664            );
665        }
666        edges.push(Edge {
667            source: from.clone(),
668            target: to.clone(),
669            kind: kind_str.to_string(),
670            line: e.line,
671            attrs,
672        });
673    }
674
675    // Assemble nodes: all files + only the libraries actually referenced.
676    let referenced_ext: HashSet<&str> = edges
677        .iter()
678        .filter(|e| ext_nodes.contains_key(&e.target))
679        .map(|e| e.target.as_str())
680        .collect();
681    let mut nodes: Vec<Node> = file_nodes.into_values().collect();
682    nodes.extend(
683        ext_nodes
684            .into_iter()
685            .filter(|(id, _)| referenced_ext.contains(id.as_str()))
686            .map(|(_, n)| n),
687    );
688
689    // Deterministic output ordering.
690    nodes.sort_by(|a, b| a.id.cmp(&b.id));
691    edges.sort_by(|a, b| {
692        a.source
693            .cmp(&b.source)
694            .then(a.target.cmp(&b.target))
695            .then(a.kind.cmp(&b.kind))
696    });
697
698    Graph { nodes, edges }
699}
700
701// ─────────────────────────────────────────────────────────────────────────────
702// Complexity: strip inline tests, run the tree-sitter-rust engine, write metrics
703// ─────────────────────────────────────────────────────────────────────────────
704
705/// Compute and write Rust complexity metrics for one file node from its source
706/// bytes. `#[cfg(test)]` / `#[test]` / `#[bench]` items are stripped first (their
707/// lines become `tloc`), then the in-tree `rust_ts` engine runs. Returns `true`
708/// if metrics were written (`false` if the source did not parse).
709fn rust_file_metrics(node: &mut Node, src: &[u8]) -> bool {
710    let (prod, tloc) = strip_cfg_test(src);
711    let Some(mut m) = rust_ts::compute(&prod) else {
712        return false;
713    };
714    m.tloc = tloc as f64;
715    code_ranker_graph::write_metrics(node, &m);
716    true
717}
718
719/// True if any attribute gates an item to tests: `#[test]`, `#[bench]`, or
720/// `#[cfg(test)]` / `#[cfg(all(test, …))]` / `#[cfg(any(test, …))]`. A `test`
721/// **identifier** inside `cfg(...)` is what matches — `cfg(feature = "test")`
722/// (a string literal) does not.
723fn is_test_attr(attr: &syn::Attribute) -> bool {
724    if attr.path().is_ident("test") || attr.path().is_ident("bench") {
725        return true;
726    }
727    if attr.path().is_ident("cfg")
728        && let syn::Meta::List(list) = &attr.meta
729    {
730        return tokens_have_test_ident(list.tokens.clone());
731    }
732    false
733}
734
735/// Recursively scan a token stream for a bare `test` identifier (descends into
736/// `all(...)` / `any(...)` groups).
737fn tokens_have_test_ident(ts: proc_macro2::TokenStream) -> bool {
738    ts.into_iter().any(|t| match t {
739        proc_macro2::TokenTree::Ident(i) => i == "test",
740        proc_macro2::TokenTree::Group(g) => tokens_have_test_ident(g.stream()),
741        _ => false,
742    })
743}
744
745/// Visitor collecting the 1-based, inclusive line ranges of test-only items
746/// (`#[cfg(test)]` modules, `#[test]`/`#[cfg(test)]` fns), attribute line
747/// included. It recurses into ordinary modules to catch nested test modules but
748/// not into a test item it already captured.
749#[derive(Default)]
750struct TestSpans {
751    ranges: Vec<(usize, usize)>,
752}
753
754impl TestSpans {
755    fn record(&mut self, attrs: &[syn::Attribute], span: proc_macro2::Span) {
756        use syn::spanned::Spanned;
757        let start = attrs
758            .iter()
759            .map(|a| a.span().start().line)
760            .chain(std::iter::once(span.start().line))
761            .min()
762            .unwrap_or(0);
763        self.ranges.push((start, span.end().line));
764    }
765}
766
767impl<'ast> syn::visit::Visit<'ast> for TestSpans {
768    fn visit_item_mod(&mut self, m: &'ast syn::ItemMod) {
769        use syn::spanned::Spanned;
770        if m.attrs.iter().any(is_test_attr) {
771            self.record(&m.attrs, m.span());
772        } else {
773            syn::visit::visit_item_mod(self, m);
774        }
775    }
776    fn visit_item_fn(&mut self, f: &'ast syn::ItemFn) {
777        use syn::spanned::Spanned;
778        if f.attrs.iter().any(is_test_attr) {
779            self.record(&f.attrs, f.span());
780        }
781    }
782}
783
784/// Step 1 of the Rust line accounting: remove `#[cfg(test)]` / `#[test]` /
785/// `#[bench]` items so the production metrics (`sloc` / `cloc` / `blank` / `hk` /
786/// complexity) are then measured on production code only. Returns the production
787/// source **and** `tloc` — the number of test lines removed (the whole test
788/// region: attribute, body, braces). Parse failures or no test items return the
789/// source unchanged with `tloc = 0`.
790fn strip_cfg_test(src: &[u8]) -> (Vec<u8>, usize) {
791    use syn::visit::Visit;
792    let Ok(text) = std::str::from_utf8(src) else {
793        return (src.to_vec(), 0);
794    };
795    let Ok(file) = syn::parse_file(text) else {
796        return (src.to_vec(), 0);
797    };
798    let mut spans = TestSpans::default();
799    spans.visit_file(&file);
800    if spans.ranges.is_empty() {
801        return (src.to_vec(), 0);
802    }
803    let drop: std::collections::HashSet<usize> =
804        spans.ranges.iter().flat_map(|&(s, e)| s..=e).collect();
805    let tloc = drop.len();
806    let mut out: String = text
807        .lines()
808        .enumerate()
809        .filter(|(i, _)| !drop.contains(&(i + 1)))
810        .map(|(_, l)| l)
811        .collect::<Vec<_>>()
812        .join("\n");
813    out.push('\n');
814    (out.into_bytes(), tloc)
815}
816
817#[cfg(test)]
818mod tests {
819    use super::*;
820
821    fn strip(src: &str) -> String {
822        String::from_utf8(strip_cfg_test(src.as_bytes()).0).unwrap()
823    }
824
825    /// Build a `Module` internal node for one file, with structural attrs.
826    /// `line` distinguishes an inline module (`Some`) from a file-backed one
827    /// (`None`); `collapse_to_files` lets the file-backed node win.
828    #[allow(clippy::too_many_arguments)]
829    fn module_node(
830        id: &str,
831        path: &str,
832        line: Option<u32>,
833        visibility: internal::Visibility,
834        loc: u32,
835        items: u32,
836        unsafe_count: u32,
837        krate: &str,
838    ) -> internal::Node {
839        internal::Node {
840            id: id.into(),
841            kind: NodeKind::Module,
842            name: id.into(),
843            path: path.into(),
844            parent: None,
845            external: None,
846            version: None,
847            visibility: Some(visibility),
848            loc: Some(loc),
849            line,
850            item_count: Some(items),
851            unsafe_count: Some(unsafe_count),
852            crate_label: Some(krate.into()),
853        }
854    }
855
856    #[test]
857    fn collapse_lets_the_file_backed_module_overwrite_structural_attrs() {
858        // Two modules map to one file id (same `path`): an inline module
859        // (`line = Some`) is seen first and seeds the file node, then the
860        // file-backed module (`line = None`) is the source of truth and must
861        // overwrite every structural attr (visibility / loc / items / unsafe /
862        // crate). This exercises the Occupied-entry update branch of
863        // `collapse_to_files`.
864        let mut builder = GraphBuilder::new();
865        builder.add_node(module_node(
866            "inline",
867            "/x/foo.rs",
868            Some(5),
869            internal::Visibility::Private,
870            1,
871            1,
872            0,
873            "wrong-crate",
874        ));
875        builder.add_node(module_node(
876            "file",
877            "/x/foo.rs",
878            None,
879            internal::Visibility::Public,
880            42,
881            7,
882            3,
883            "mycrate",
884        ));
885
886        let graph = collapse_to_files(builder.build());
887
888        let file = graph
889            .nodes
890            .iter()
891            .find(|n| n.id == "/x/foo.rs")
892            .expect("the two modules collapsed into one file node");
893        assert_eq!(file.kind, "file");
894        assert_eq!(
895            file.attrs.get("visibility"),
896            Some(&AttrValue::Str("public".into())),
897            "file-backed visibility wins"
898        );
899        assert_eq!(
900            file.attrs.get("loc"),
901            Some(&AttrValue::Int(42)),
902            "file-backed loc wins"
903        );
904        assert_eq!(
905            file.attrs.get("items"),
906            Some(&AttrValue::Int(7)),
907            "file-backed item count wins"
908        );
909        assert_eq!(
910            file.attrs.get("unsafe"),
911            Some(&AttrValue::Int(3)),
912            "file-backed unsafe count wins (and is non-zero so it is kept)"
913        );
914        assert_eq!(
915            file.attrs.get("crate"),
916            Some(&AttrValue::Str("mycrate".into())),
917            "file-backed crate label wins"
918        );
919    }
920
921    #[test]
922    fn strips_cfg_test_module_with_its_attribute() {
923        let out = strip(
924            "pub fn prod() -> i32 {\n    1\n}\n\n\
925             #[cfg(test)]\nmod tests {\n    use super::*;\n    #[test]\n    fn t() { assert_eq!(prod(), 1); }\n}\n",
926        );
927        assert!(out.contains("pub fn prod"), "production kept: {out}");
928        assert!(!out.contains("mod tests"), "test mod removed: {out}");
929        assert!(
930            !out.contains("#[cfg(test)]"),
931            "the cfg attr line removed too: {out}"
932        );
933        assert!(!out.contains("fn t()"), "test fn removed: {out}");
934    }
935
936    #[test]
937    fn strips_standalone_test_and_bench_fns() {
938        let out = strip("fn prod() {}\n#[test]\nfn it_works() {}\n#[bench]\nfn b(_: &mut ()) {}\n");
939        assert!(out.contains("fn prod"));
940        assert!(
941            !out.contains("it_works") && !out.contains("fn b("),
942            "test/bench fns removed: {out}"
943        );
944    }
945
946    #[test]
947    fn keeps_non_test_cfg_and_similarly_named_items() {
948        // `cfg(feature = "test")` is a string literal, not a `test` ident; a
949        // `mod tests_data` is not gated. Both stay.
950        let out = strip("#[cfg(feature = \"test\")]\npub mod gated {}\npub mod tests_data {}\n");
951        assert!(out.contains("pub mod gated"), "feature-cfg kept: {out}");
952        assert!(
953            out.contains("tests_data"),
954            "non-gated lookalike kept: {out}"
955        );
956    }
957
958    #[test]
959    fn strips_cfg_all_test_combinations() {
960        let out = strip("fn p() {}\n#[cfg(all(test, feature = \"x\"))]\nmod t {}\n");
961        assert!(out.contains("fn p"));
962        assert!(!out.contains("mod t"), "cfg(all(test,…)) removed: {out}");
963    }
964
965    #[test]
966    fn unchanged_without_tests_or_on_parse_error() {
967        let prod = "pub fn a() {}\n";
968        assert_eq!(
969            strip_cfg_test(prod.as_bytes()),
970            (prod.as_bytes().to_vec(), 0)
971        );
972        let broken = "@@@ not rust @@@";
973        assert_eq!(
974            strip_cfg_test(broken.as_bytes()),
975            (broken.as_bytes().to_vec(), 0)
976        );
977    }
978
979    #[test]
980    fn tloc_counts_the_whole_removed_test_region() {
981        // 4 lines removed: the #[cfg(test)] attr, `mod tests {`, the body line,
982        // and the closing `}`.
983        let src = "pub fn p() {}\n#[cfg(test)]\nmod tests {\n    fn t() {}\n}\n";
984        let (_prod, tloc) = strip_cfg_test(src.as_bytes());
985        assert_eq!(tloc, 4);
986    }
987
988    fn metric(node: &code_ranker_plugin_api::node::Node, key: &str) -> Option<f64> {
989        match node.attrs.get(key) {
990            Some(code_ranker_plugin_api::attrs::AttrValue::Int(v)) => Some(*v as f64),
991            Some(code_ranker_plugin_api::attrs::AttrValue::Float(v)) => Some(*v),
992            _ => None,
993        }
994    }
995
996    /// Strip inline tests from `src`, run the in-tree Rust engine, write the
997    /// metrics onto a fresh file node, and read one metric — the in-process
998    /// building block for the metamorphic tests below. Handles `.rs` only.
999    fn metric_of(_path: &str, src: &str, key: &str) -> Option<f64> {
1000        let (prod, tloc) = strip_cfg_test(src.as_bytes());
1001        let mut m = rust_ts::compute(&prod)?;
1002        m.tloc = tloc as f64;
1003        let mut node = code_ranker_plugin_api::node::Node {
1004            id: "t.rs".into(),
1005            kind: "file".into(),
1006            name: "t.rs".into(),
1007            parent: None,
1008            attrs: Default::default(),
1009        };
1010        code_ranker_graph::write_metrics(&mut node, &m);
1011        metric(&node, key)
1012    }
1013
1014    // ---- Layer 1: metamorphic FP / FN matrix (see docs/metric-correctness.md) --
1015    //
1016    // Asserts the AST-Accurate principle across `metric × language × lexical
1017    // position × direction`: a control-flow / exit keyword appearing only as a
1018    // look-alike must NOT move the per-function metrics (no false positive); every
1019    // real construct form MUST be counted (no false negative). Pure in-process
1020    // parses — ~0 cost against the 20s budget. (LOC / Halstead are intentionally
1021    // NOT in the keyword-invariance set: a real comment line legitimately changes
1022    // `cloc`, a string legitimately adds Halstead operands — that is not an FP.)
1023
1024    /// A Rust function carrying real branching (so all five per-function metrics
1025    /// are non-zero), with an optional doc-comment prefix and an optional
1026    /// statement injected into the body. Used to build FP-matrix variants.
1027    fn rs_src(doc: &str, body_inject: &str) -> String {
1028        format!(
1029            "{doc}fn f(a: i32, b: i32) -> i32 {{\n\
1030             {body_inject}    let g = |x: i32| x + 1;\n\
1031                 if a > 0 {{ return g(b); }}\n\
1032                 a + b\n\
1033             }}\n"
1034        )
1035    }
1036
1037    // Per-language keyword look-alike guard set — the construct keywords/operators
1038    // a complexity (or `unsafe`) metric can key on. The FP matrix injects these
1039    // *only* as look-alikes and asserts no metric moves. This mirrors the
1040    // "Keyword look-alike guard set" in principles/rust/metrics.md, and
1041    // `rust_trigger_set_documented_in_spec` asserts the spec documents every entry
1042    // — so the two cannot drift. A superset of the analyzer's real triggers is
1043    // fine.
1044    const RUST_TRIGGERS: &[&str] = &[
1045        "if", "else", "match", "while", "for", "loop", "return", "unsafe", "&&", "||", "?",
1046    ];
1047
1048    #[test]
1049    fn rust_complexity_fp_matrix() {
1050        // Every lexical position that could smuggle a keyword in as text. None may
1051        // change cyclomatic / cognitive / exits / args / closures vs the base.
1052        let base = rs_src("", "");
1053        let kw = RUST_TRIGGERS.join(" ");
1054        let positions: &[(&str, String)] = &[
1055            (
1056                "line comment",
1057                rs_src("", &format!("    // {kw} && || ?\n")),
1058            ),
1059            (
1060                "block comment",
1061                rs_src("", &format!("    /* {kw} && || ? */\n")),
1062            ),
1063            ("doc comment", rs_src(&format!("/// {kw}\n"), "")),
1064            (
1065                "string",
1066                rs_src("", &format!("    let _s = \"{kw} && || ?\";\n")),
1067            ),
1068            (
1069                "raw string",
1070                rs_src("", &format!("    let _r = r#\"{kw} && ||\"#;\n")),
1071            ),
1072            (
1073                "identifier",
1074                rs_src(
1075                    "",
1076                    "    let if_match_return_loop = 0; let _ = if_match_return_loop;\n",
1077                ),
1078            ),
1079            (
1080                "format string",
1081                rs_src("", "    let _f = format!(\"if {} while\", a);\n"),
1082            ),
1083            (
1084                "macro body",
1085                rs_src("", "    let _m = vec![\"if\", \"match\", \"while\"];\n"),
1086            ),
1087            (
1088                "raw identifier",
1089                rs_src("", "    let r#match = 1; let _ = r#match;\n"),
1090            ),
1091        ];
1092        for key in ["cyclomatic", "cognitive", "exits", "args", "closures"] {
1093            let want = metric_of("t.rs", &base, key);
1094            for (pos, src) in positions {
1095                assert_eq!(
1096                    metric_of("t.rs", src, key),
1097                    want,
1098                    "metric `{key}` moved when a keyword appeared only in: {pos}"
1099                );
1100            }
1101        }
1102    }
1103
1104    #[test]
1105    fn cyclomatic_counts_every_branch_form() {
1106        // FN guard: every branch form the analyzer recognizes must raise
1107        // cyclomatic above a branch-free baseline. (Exact per-form increments are
1108        // the analyzer's rule — layer 4; here we only assert "detected".)
1109        let baseline =
1110            metric_of("t.rs", "fn f() -> i32 { 0 }\n", "cyclomatic").expect("baseline cyclomatic");
1111        let forms: &[(&str, &str)] = &[
1112            ("if", "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n"),
1113            (
1114                "else-if",
1115                "fn f(a: i32) -> i32 { if a > 0 { 1 } else if a < 0 { 2 } else { 3 } }\n",
1116            ),
1117            (
1118                "match",
1119                "fn f(a: i32) -> i32 { match a { 0 => 1, _ => 2 } }\n",
1120            ),
1121            (
1122                "while",
1123                "fn f(mut a: i32) -> i32 { while a > 0 { a -= 1; } a }\n",
1124            ),
1125            (
1126                "for",
1127                "fn f(a: i32) -> i32 { let mut s = 0; for i in 0..a { s += i; } s }\n",
1128            ),
1129            ("loop", "fn f() -> i32 { loop { break; } 0 }\n"),
1130            (
1131                "&&",
1132                "fn f(a: i32, b: i32) -> i32 { let _ = a > 0 && b > 0; 0 }\n",
1133            ),
1134            (
1135                "||",
1136                "fn f(a: i32, b: i32) -> i32 { let _ = a > 0 || b > 0; 0 }\n",
1137            ),
1138            ("?", "fn f() -> Option<i32> { let x = Some(1)?; Some(x) }\n"),
1139            (
1140                "if let",
1141                "fn f() -> i32 { if let Some(x) = Some(1) { x } else { 0 } }\n",
1142            ),
1143            (
1144                "while let",
1145                "fn f() -> i32 { let mut it = [1].into_iter(); let mut n = 0; while let Some(_) = it.next() { n += 1; } n }\n",
1146            ),
1147        ];
1148        for (name, src) in forms {
1149            let c = metric_of("t.rs", src, "cyclomatic")
1150                .unwrap_or_else(|| panic!("cyclomatic missing for `{name}`"));
1151            assert!(
1152                c > baseline,
1153                "branch form `{name}` not counted (cyclomatic {c} <= baseline {baseline})"
1154            );
1155        }
1156        // Magnitude anchor: one extra `if` adds exactly 1.
1157        let one = metric_of(
1158            "t.rs",
1159            "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n",
1160            "cyclomatic",
1161        )
1162        .unwrap();
1163        let two = metric_of(
1164            "t.rs",
1165            "fn f(a: i32) -> i32 { if a > 0 { 1 } else if a < 0 { 2 } else { 3 } }\n",
1166            "cyclomatic",
1167        )
1168        .unwrap();
1169        assert_eq!(two - one, 1.0, "one extra real `if` must add exactly 1");
1170    }
1171
1172    #[test]
1173    fn rust_complexity_fn_per_metric() {
1174        // FN guard for the non-cyclomatic per-function metrics: a real construct
1175        // must surface the metric.
1176        let cognitive = metric_of(
1177            "t.rs",
1178            "fn f(a: i32, b: i32) -> i32 { if a > 0 { if b > 0 { 1 } else { 2 } } else { 3 } }\n",
1179            "cognitive",
1180        )
1181        .expect("cognitive present");
1182        assert!(cognitive > 0.0, "nested branches must raise cognitive");
1183
1184        let exits = metric_of("t.rs", "fn f(a: i32) -> i32 { return a; }\n", "exits")
1185            .expect("exits present");
1186        assert!(exits >= 1.0, "a real `return` must be counted as an exit");
1187
1188        let args = metric_of(
1189            "t.rs",
1190            "fn f(a: i32, b: i32, c: i32) -> i32 { a + b + c }\n",
1191            "args",
1192        )
1193        .expect("args present");
1194        assert!(
1195            args >= 3.0,
1196            "three parameters must count as >=3 args, got {args}"
1197        );
1198
1199        let closures = metric_of(
1200            "t.rs",
1201            "fn f() -> i32 { let g = |x: i32| x + 1; g(1) }\n",
1202            "closures",
1203        )
1204        .expect("closures present");
1205        assert!(closures >= 1.0, "a real closure must be counted");
1206    }
1207
1208    #[test]
1209    fn rust_only_complexity_fp_matrix() {
1210        // FP invariance for cyclomatic / cognitive, driven by Rust's documented
1211        // trigger set injected into comment / string positions.
1212        let check = |path: &str, base: &str, traps: &[String]| {
1213            for key in ["cyclomatic", "cognitive"] {
1214                let want = metric_of(path, base, key);
1215                for trap in traps {
1216                    assert_eq!(
1217                        metric_of(path, trap, key),
1218                        want,
1219                        "{path} metric `{key}` moved on a keyword look-alike"
1220                    );
1221                }
1222            }
1223        };
1224
1225        let kw = RUST_TRIGGERS.join(" ");
1226        let base = "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n";
1227        check(
1228            "t.rs",
1229            base,
1230            &[
1231                format!("// {kw}\n{base}"),
1232                format!(
1233                    "fn f(a: i32) -> i32 {{ let _ = \"{kw}\"; if a > 0 {{ 1 }} else {{ 2 }} }}\n"
1234                ),
1235            ],
1236        );
1237    }
1238
1239    #[test]
1240    fn rust_trigger_set_documented_in_spec() {
1241        // Lock-step guard: every keyword the FP matrix injects must be documented
1242        // in Rust's metrics spec, so the trigger list and the spec's "Keyword
1243        // look-alike guard set" cannot drift apart.
1244        let root = concat!(env!("CARGO_MANIFEST_DIR"), "/../..");
1245        let path = format!("{root}/principles/rust/metrics.md");
1246        let spec = std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {path}: {e}"));
1247        for kw in RUST_TRIGGERS {
1248            assert!(
1249                spec.contains(&format!("`{kw}`")),
1250                "trigger `{kw}` is not documented in principles/rust/metrics.md — spec and FP test drifted"
1251            );
1252        }
1253    }
1254
1255    // ---- Layer 2: generative tests (see docs/metric-correctness.md) ------------
1256    //
1257    // Generate programs with a KNOWN construct count, then assert the metric
1258    // equals ground truth across a combinatorial grid. Deterministic (no random
1259    // dependency, no flakiness) — proptest-style randomized fuzz is a later
1260    // nightly extension. Still pure in-process parses; the whole grid is ~ms.
1261
1262    /// A Rust function with `noise` keyword-laden look-alike lines (a comment plus
1263    /// a string binding, neither a real construct) followed by `branches` real,
1264    /// independent `if` statements (each adds exactly 1 to cyclomatic).
1265    fn gen_rs(branches: usize, noise: usize) -> String {
1266        let mut body = String::new();
1267        for i in 0..noise {
1268            body.push_str(&format!(
1269                "    // if match while for loop return && || ? noise {i}\n"
1270            ));
1271            body.push_str(&format!(
1272                "    let _n{i} = \"if match while return && ||\";\n"
1273            ));
1274        }
1275        for i in 0..branches {
1276            body.push_str(&format!("    if x > {i} {{ let _ = {i}; }}\n"));
1277        }
1278        format!("fn f(x: i32) -> i32 {{\n{body}    0\n}}\n")
1279    }
1280
1281    #[test]
1282    fn generative_cyclomatic_counts_branches_not_noise() {
1283        // Ground truth by construction: cyclomatic = baseline + (real `if` count),
1284        // independent of how many keyword look-alike lines surround it. Sweeps an
1285        // 8×8 grid of (branches, noise) — 64 generated programs.
1286        for noise in 0..8 {
1287            let base =
1288                metric_of("t.rs", &gen_rs(0, noise), "cyclomatic").expect("cyclomatic present");
1289            for branches in 0..8 {
1290                let cyc = metric_of("t.rs", &gen_rs(branches, noise), "cyclomatic")
1291                    .expect("cyclomatic present");
1292                assert_eq!(
1293                    cyc,
1294                    base + branches as f64,
1295                    "cyclomatic must add exactly 1 per real `if` and 0 per noise line \
1296                     (branches={branches}, noise={noise})"
1297                );
1298            }
1299        }
1300    }
1301
1302    #[test]
1303    fn generative_complexity_invariant_to_noise() {
1304        // A fixed real structure (2 args, a closure, a branch, a `return`) with a
1305        // growing pile of keyword look-alikes around it. Every per-function metric
1306        // must stay exactly at its noise-free value — no false positive at any
1307        // noise level.
1308        let mk = |noise: usize| -> String {
1309            let mut body = String::new();
1310            for i in 0..noise {
1311                body.push_str(&format!("    // if match return unsafe && || {i}\n"));
1312                body.push_str(&format!("    let _n{i} = \"if match return && ||\";\n"));
1313            }
1314            format!(
1315                "fn f(a: i32, b: i32) -> i32 {{\n\
1316                 {body}    let g = |x: i32| x + 1;\n\
1317                     if a > 0 {{ return g(b); }}\n\
1318                     a + b\n\
1319                 }}\n"
1320            )
1321        };
1322        for key in ["cyclomatic", "cognitive", "exits", "args", "closures"] {
1323            let want = metric_of("t.rs", &mk(0), key);
1324            for noise in 1..10 {
1325                assert_eq!(
1326                    metric_of("t.rs", &mk(noise), key),
1327                    want,
1328                    "metric `{key}` moved at noise={noise} — keyword look-alikes leaked in"
1329                );
1330            }
1331        }
1332    }
1333
1334    #[test]
1335    fn per_function_metrics_aggregate_over_child_functions() {
1336        // Regression for the whole "root-vs-sum" class: `write_metrics` once read
1337        // the ROOT space value for `cyclomatic` / `cognitive` / `exits` / `args` /
1338        // `closures`, which for a file is the vacuous root count (0, or 1 for
1339        // cyclomatic) — every file looked identical. The real signal lives in the
1340        // child function spaces, so each must be the SUM over them.
1341        //
1342        // `a` takes 2 args, nests two `if`s, and `return`s; `b` defines a 1-arg
1343        // closure. So the file must surface: cyclomatic (summed branches), a
1344        // non-zero cognitive (nesting), exits (the `return`), args (2 fn + 1
1345        // closure = 3), and closures (1).
1346        let src = "fn a(x: i32, y: i32) -> i32 { if x > 0 { if x > 1 { return x; } y } else { 3 } }\n\
1347                   fn b() -> i32 { let f = |z: i32| z + 1; f(2) }\n";
1348        // Each is summed over the child functions — well above the vacuous root
1349        // value, proving aggregation rather than a root-only read.
1350        let cyc = metric_of("t.rs", src, "cyclomatic").expect("cyclomatic present");
1351        assert!(cyc > 1.0, "cyclomatic should be summed, got {cyc}");
1352        let cog = metric_of("t.rs", src, "cognitive").expect("cognitive present");
1353        assert!(cog > 0.0, "cognitive should be summed, got {cog}");
1354        let exits = metric_of("t.rs", src, "exits").expect("exits present");
1355        assert!(exits >= 1.0, "exits should count the `return`, got {exits}");
1356        let args = metric_of("t.rs", src, "args").expect("args present");
1357        assert!(
1358            args >= 3.0,
1359            "args should sum fn (2) + closure (1), got {args}"
1360        );
1361        let closures = metric_of("t.rs", src, "closures").expect("closures present");
1362        assert!(
1363            closures >= 1.0,
1364            "closures should count the closure, got {closures}"
1365        );
1366    }
1367
1368    // ---- Layer 3: asserted anchors (see docs/metric-correctness.md) -----------
1369    //
1370    // Layers 1 & 2 prove RELATIVE behaviour (noise-invariance, +1 per construct)
1371    // but never pin an ABSOLUTE value, so a uniform offset/scale bug (every count
1372    // shifted by +1, or doubled) would pass green. These anchors pin exact values
1373    // hand-derived from principles/rust/metrics.md, catching that scale class.
1374
1375    #[test]
1376    fn complexity_absolute_anchors_hand_derived() {
1377        // Integer counting metrics, pinned to EXACT file-level values, hand-derived
1378        // from the spec's rules (metrics.md §cyclomatic / §exits,args,closures).
1379        //
1380        // These pin the analyzer-of-record's whole-file values (what we emit):
1381        //   • `cyclomatic` = the file unit's base path (1) + Σ over functions of
1382        //     (1 + branch points). Per-function McCabe (`V(G)=E−N+2P` = Σ over
1383        //     functions) is the theory; the analyzer adds the file unit on top and
1384        //     we emit it verbatim (it is also the value `mi` is computed from).
1385        //     `classify` = file 1 + fn 4 (base1+if+else-if+||) = 5.
1386        //   • `exits` = Σ over functions of (a value-returning `-> T` exit +
1387        //     explicit return/?). "Exit points" has no canonical theory, so the
1388        //     analyzer's rule is the source of truth (metrics.md §exits). The
1389        //     `-> i32` snippets below read 2 (the explicit return + the `-> T` exit).
1390        //   • `args` / `closures` / `cognitive` have no file-unit offset.
1391        // All pinned so any drift from the analyzer's output is caught.
1392        let classify = "fn classify(n: i32) -> &'static str {\n\
1393            \x20   if n < 0 { \"neg\" } else if n == 0 || n == 1 { \"small\" } else { \"big\" }\n\
1394            }\n";
1395        let two_closures =
1396            "fn f() { let g = |x: i32| x + 1; let h = |y: i32| y; let _ = (g, h); }\n";
1397        // (label, path, src, key, exact_expected)
1398        let cases: &[(&str, &str, &str, &str, f64)] = &[
1399            // file unit 1 + fn(base1 + if + else-if + ||) = 1 + 4 = 5.
1400            ("classify", "t.rs", classify, "cyclomatic", 5.0),
1401            // file unit 1 + fn(base1 + 1 if) = 1 + 2 = 3 (else is free).
1402            (
1403                "single if",
1404                "t.rs",
1405                "fn f(a: i32) -> i32 { if a > 0 { 1 } else { 2 } }\n",
1406                "cyclomatic",
1407                3.0,
1408            ),
1409            // 1 explicit return + 1 value-returning exit (`-> i32`) → 2.
1410            (
1411                "one return",
1412                "t.rs",
1413                "fn f() -> i32 { return 1; }\n",
1414                "exits",
1415                2.0,
1416            ),
1417            // 1 `?` + 1 value-returning exit (`-> Option`) → 2.
1418            (
1419                "one try op",
1420                "t.rs",
1421                "fn f() -> Option<i32> { let x = Some(1)?; Some(x) }\n",
1422                "exits",
1423                2.0,
1424            ),
1425            (
1426                "three params",
1427                "t.rs",
1428                "fn f(a: i32, b: i32, c: i32) -> i32 { a + b + c }\n",
1429                "args",
1430                3.0,
1431            ),
1432            ("two closures", "t.rs", two_closures, "closures", 2.0),
1433            ("two closure args", "t.rs", two_closures, "args", 2.0),
1434        ];
1435        let mut fails = Vec::new();
1436        for (label, path, src, key, want) in cases {
1437            match metric_of(path, src, key) {
1438                Some(got) if got == *want => {}
1439                other => fails.push(format!("{label}: {key} want {want}, got {other:?}")),
1440            }
1441        }
1442        assert!(
1443            fails.is_empty(),
1444            "failing integer anchors:\n{}",
1445            fails.join("\n")
1446        );
1447    }
1448
1449    #[test]
1450    fn complexity_frozen_scale_anchors() {
1451        // Algorithm-specific metrics (cognitive nesting weights, Halstead
1452        // dictionaries, MI) cannot be hand-derived reliably, so they are FROZEN
1453        // anchors: values produced by `rust-code-analysis` for one fixed snippet,
1454        // verified once. Their job is to catch a uniform offset/scale regression
1455        // (a library bump that doubles `volume`, an MI formula edit) — not to
1456        // claim an independent ground truth. They change only when the underlying
1457        // algorithm changes, and that change should be deliberate.
1458        let classify = "fn classify(n: i32) -> &'static str {\n\
1459            \x20   if n < 0 { \"neg\" } else if n == 0 || n == 1 { \"small\" } else { \"big\" }\n\
1460            }\n";
1461        // (key, expected, abs_tolerance)
1462        let cases: &[(&str, f64, f64)] = &[
1463            ("cognitive", 4.0, 0.0),   // exact integer
1464            ("vocabulary", 18.0, 0.0), // η₁ + η₂, exact integer
1465            ("length", 28.0, 0.0),     // N₁ + N₂, exact integer
1466            ("volume", 116.757, 0.01), // length × log₂(vocabulary)
1467            ("effort", 875.684, 0.01), // difficulty × volume
1468            ("mi", 127.299, 0.01),     // maintainability index
1469            ("mi_sei", 108.463, 0.01), // SEI variant
1470        ];
1471        let mut fails = Vec::new();
1472        for (key, want, tol) in cases {
1473            match metric_of("t.rs", classify, key) {
1474                Some(got) if (got - *want).abs() <= *tol => {}
1475                other => fails.push(format!("{key}: want {want} (±{tol}), got {other:?}")),
1476            }
1477        }
1478        assert!(
1479            fails.is_empty(),
1480            "failing scale anchors:\n{}",
1481            fails.join("\n")
1482        );
1483    }
1484
1485    #[test]
1486    fn declaration_only_file_emits_no_complexity() {
1487        // No functions → only the file unit space → cyclomatic is a vacuous 1 and
1488        // cognitive is 0. Both must be dropped (not shown as a meaningless "1"),
1489        // matching how `put` already drops cognitive's 0. Mirrors real files like
1490        // a clap CLI model or a type-definitions module.
1491        let src = "pub struct Cli { pub verbose: bool }\n\
1492                   pub enum Mode { A, B }\n";
1493        assert_eq!(
1494            metric_of("t.rs", src, "cyclomatic"),
1495            None,
1496            "a declaration-only file must not emit a vacuous cyclomatic"
1497        );
1498        assert_eq!(
1499            metric_of("t.rs", src, "cognitive"),
1500            None,
1501            "a declaration-only file must not emit cognitive"
1502        );
1503    }
1504
1505    #[test]
1506    fn metric_specs_override_adds_rust_cfg_test_note() {
1507        // The neutral default descriptions carry no language nuance; the Rust
1508        // plugin re-adds the `#[cfg(test)]` LOC-exclusion note for sloc/lloc/
1509        // cloc/blank — so it appears only in Rust snapshots, never in py/js/ts.
1510        let defaults = code_ranker_graph::metric_specs().0;
1511        // sanity: the shared default is language-neutral
1512        assert!(
1513            !defaults["blank"]
1514                .description
1515                .as_deref()
1516                .unwrap_or("")
1517                .contains("#[cfg(test)]"),
1518            "the shared default must stay language-neutral"
1519        );
1520
1521        let refined = RustPlugin.metric_specs(defaults);
1522        for key in ["sloc", "lloc", "cloc", "blank"] {
1523            let desc = refined[key].description.as_deref().unwrap_or("");
1524            assert!(
1525                desc.contains("#[cfg(test)]"),
1526                "Rust `{key}` description should note the cfg(test) exclusion"
1527            );
1528        }
1529    }
1530}