Skip to main content

code_ranker_plugin_rust/
lib.rs

1use anyhow::Result;
2use code_ranker_plugin_api::{
3    attrs::{AttrValue, ValueType},
4    default_cycle_kinds, default_node_kinds,
5    edge::Edge,
6    graph::Graph,
7    level::{AttributeSpec, EdgeKindSpec, Grouping, Level, Thresholds},
8    log,
9    node::Node,
10    plugin::{LanguagePlugin, PluginInput, Preset},
11};
12use std::collections::hash_map::Entry;
13use std::collections::{BTreeMap, HashMap, HashSet};
14use std::path::Path;
15
16use cargo_metadata::MetadataCommand;
17
18mod crate_graph;
19mod ids;
20mod internal;
21mod module_graph;
22
23use internal::{EdgeKind, GraphBuilder, InternalGraph, NodeKind};
24
25pub struct RustPlugin;
26
27/// One Rust-only metric-lens preset: (id, title, sort_metric, connections,
28/// doc_slug, prompt body). Same shape as the generic catalog in
29/// `code-ranker-cli/src/presets.rs`, but these rank modules by a single
30/// coupling/size metric rather than a design principle. Slugs resolve to
31/// `principles/rust/<slug>.md`.
32type MetricPreset = (
33    &'static str,
34    &'static str,
35    &'static str,
36    &'static [&'static str],
37    &'static str,
38    &'static str,
39);
40
41const RUST_METRIC_PRESETS: &[MetricPreset] = &[
42    (
43        "HK",
44        "HK — Henry-Kafura Coupling",
45        "hk",
46        &["in", "out"],
47        "henry-kafura-coupling",
48        "These modules carry heavy Henry-Kafura coupling — HK = sloc × (fan_in × fan_out)²,\n\
49         where sloc is the module's source lines of code (real code lines, excluding blanks\n\
50         and comment-only lines), fan_in is how many modules depend on it, and fan_out is how\n\
51         many it depends on.\n\
52         A high score is a large module sitting on a busy crossroads of incoming and outgoing\n\
53         dependencies, so any change here ripples widely.\n\n\
54         For each module below, lower the factor that dominates its HK: shrink the module by\n\
55         extracting cohesive pieces, or cut fan-in/fan-out by narrowing its public surface and\n\
56         depending on fewer collaborators (introduce an abstraction, move a responsibility).\n\
57         Keep existing API contracts intact.",
58    ),
59    (
60        "SLOC",
61        "SLOC — Module Size",
62        "sloc",
63        &[],
64        "module-size",
65        "These are the largest modules by source lines of code. Size alone is not a defect, but\n\
66         oversized files usually bundle several responsibilities and are hard to read, test and\n\
67         review.\n\n\
68         For each module below, identify the distinct responsibilities it holds and propose how\n\
69         to split it into smaller, cohesive modules — each with a single clear purpose — without\n\
70         changing external behaviour.",
71    ),
72    (
73        "FANIN",
74        "Fan-in — Afferent Coupling",
75        "fan_in",
76        &["in"],
77        "fan-in-afferent-coupling",
78        "These modules have high fan-in: many other modules depend on them. They are\n\
79         load-bearing — a change here forces changes (or re-review) across every dependant, and\n\
80         a bug here is widely felt.\n\n\
81         For each module below, confirm its public surface is a stable, minimal contract. Narrow\n\
82         the API to what callers actually need, split it if different callers use disjoint parts\n\
83         (see Interface Segregation), and stabilise the abstractions the rest of the codebase\n\
84         leans on.",
85    ),
86    (
87        "FANOUT",
88        "Fan-out — Efferent Coupling",
89        "fan_out",
90        &["out"],
91        "fan-out-efferent-coupling",
92        "These modules have high fan-out: they depend on many other modules. High efferent\n\
93         coupling makes a module fragile (it breaks when any dependency changes) and hard to\n\
94         test or reuse in isolation.\n\n\
95         For each module below, reduce its direct dependencies: depend on abstractions rather\n\
96         than concretes (see Dependency Inversion), collapse several fine-grained collaborators\n\
97         behind one focused interface, and move logic that pulls in unrelated dependencies into\n\
98         a more appropriate module.",
99    ),
100];
101
102impl LanguagePlugin for RustPlugin {
103    fn name(&self) -> &str {
104        "rust"
105    }
106
107    fn detect(&self, workspace: &Path, _input: &PluginInput) -> bool {
108        workspace.join("Cargo.toml").exists()
109    }
110
111    fn levels(&self) -> Vec<Level> {
112        let mut edge_kinds: BTreeMap<String, EdgeKindSpec> = BTreeMap::new();
113        edge_kinds.insert(
114            "uses".into(),
115            EdgeKindSpec {
116                flow: true,
117                label: Some("uses".into()),
118                description: Some(
119                    "Code dependency — this file references an item the target file defines.<br>\
120                     Captured from `use path::Item;`, a qualified path (`crate::a::Item`, \
121                     `other_crate::Item`), or a derive (`#[derive(serde::Serialize)]`).<br>\
122                     The path resolves to the file that defines the item (following `pub use` \
123                     re-exports), so the edge points at the definition, not a re-export hub.<br>\
124                     This is the real dependency: it counts toward fan-in / fan-out, \
125                     Henry-Kafura coupling and cycles."
126                        .into(),
127                ),
128            },
129        );
130        edge_kinds.insert(
131            "contains".into(),
132            EdgeKindSpec {
133                flow: false,
134                label: Some("contains".into()),
135                description: Some(
136                    "Module ownership — the parent declares the child module \
137                     (`mod foo;` / `pub mod foo;`), so `foo.rs` (or `foo/mod.rs`) belongs to it.<br>\
138                     This is the Rust module tree: structure, not a code dependency.<br>\
139                     Kept in the data but not drawn on the main map, and excluded from \
140                     fan-in / fan-out / HK / cycles."
141                        .into(),
142                ),
143            },
144        );
145        edge_kinds.insert(
146            "reexports".into(),
147            EdgeKindSpec {
148                flow: false,
149                label: Some("reexport".into()),
150                description: Some(
151                    "Re-export (`pub use foo::Item;`) — re-publishes another file's item as part of \
152                     this file's public API (the crate-root / prelude facade, e.g. `lib.rs` doing \
153                     `pub use access_scope::AccessScope;`).<br>\
154                     A facade, not a dependency: excluded from fan-in / fan-out / HK / cycles and \
155                     not drawn on the main map, like `contains`.<br>\
156                     A consumer's `use this_crate::Item` is attributed to the file that defines \
157                     `Item`, so re-export hubs (`lib.rs` / `mod.rs`) collect no false coupling — the \
158                     `pub use` is still recorded here so you can see what a file exposes."
159                        .into(),
160                ),
161            },
162        );
163        edge_kinds.insert(
164            "super".into(),
165            EdgeKindSpec {
166                flow: false,
167                label: Some("super".into()),
168                description: Some(
169                    "Namespace pull from an enclosing module — a glob `use` that reaches \
170                     *up* the module tree (`use super::*`, `use crate::<ancestor>::*`), \
171                     bringing the parent's items into the child's scope.<br>\
172                     Usually structural scope-sugar (a module split across files referring \
173                     back to itself). But if the child actually uses a parent item brought \
174                     in by the glob, it IS a real back-dependency — technically a cycle. \
175                     code-ranker can't tell the two apart without name resolution, so it \
176                     treats `super` as a **low-priority** cycle and leaves it non-flow: \
177                     deprioritized next to obvious cross-module cycles.<br>\
178                     Kept in the data but not drawn on the main map, and excluded from \
179                     fan-in / fan-out / HK / cycles — like `contains`."
180                        .into(),
181                ),
182            },
183        );
184
185        let aspec = AttributeSpec::new;
186
187        let mut node_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
188        node_attributes.insert("path".into(), aspec(ValueType::Str, "Path"));
189        node_attributes.insert("crate".into(), aspec(ValueType::Str, "Crate"));
190        node_attributes.insert("loc".into(), aspec(ValueType::Int, "Lines"));
191        node_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
192        node_attributes.insert("external".into(), aspec(ValueType::Bool, "External"));
193        node_attributes.insert("version".into(), aspec(ValueType::Str, "Version"));
194        node_attributes.insert("items".into(), aspec(ValueType::Int, "Items"));
195
196        let mut edge_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
197        edge_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
198
199        vec![Level {
200            name: "files".into(),
201            edge_kinds,
202            node_attributes,
203            edge_attributes,
204            attribute_groups: BTreeMap::new(),
205            node_kinds: default_node_kinds(),
206            cycle_kinds: default_cycle_kinds(),
207            // Cluster the diagram by the owning crate (compilation unit), not by
208            // the source folder. Falls back to `dir` if `crate` is ever absent.
209            grouping: Some(Grouping {
210                key: Some("crate".into()),
211                function: None,
212            }),
213        }]
214    }
215
216    fn thresholds(&self) -> BTreeMap<String, Thresholds> {
217        // Calibrated on 21 Rust crates (≥2K SLOC). ~50% of projects breach
218        // `info`, ~10% breach `warning`.
219        BTreeMap::from([
220            (
221                "hk".into(),
222                Thresholds {
223                    info: 150_000.0,
224                    warning: 10_000_000.0,
225                },
226            ),
227            (
228                "sloc".into(),
229                Thresholds {
230                    info: 800.0,
231                    warning: 3_000.0,
232                },
233            ),
234            (
235                "fan_out".into(),
236                Thresholds {
237                    info: 8.0,
238                    warning: 18.0,
239                },
240            ),
241            (
242                "items".into(),
243                Thresholds {
244                    info: 20.0,
245                    warning: 50.0,
246                },
247            ),
248        ])
249    }
250
251    fn presets(&self, mut defaults: Vec<Preset>, _input: &PluginInput) -> Vec<Preset> {
252        // Append Rust-only metric lenses to the generic catalog. Their doc links
253        // reuse the principles base directory derived from an existing default's
254        // `doc_url`, so they resolve to `principles/rust/<slug>.md` without
255        // duplicating the host/base constant that lives in the CLI crate.
256        let base_dir = defaults
257            .iter()
258            .find_map(|p| p.doc_url.as_deref())
259            .and_then(|u| u.rsplit_once('/').map(|(dir, _)| dir.to_string()));
260        for &(id, title, sort_metric, connections, slug, prompt) in RUST_METRIC_PRESETS {
261            defaults.push(Preset {
262                id: id.to_string(),
263                label: id.to_string(),
264                title: title.to_string(),
265                prompt: prompt.to_string(),
266                doc_url: base_dir.as_ref().map(|d| format!("{d}/{slug}.md")),
267                sort_metric: sort_metric.to_string(),
268                connections: connections.iter().map(|s| (*s).to_string()).collect(),
269            });
270        }
271        defaults
272    }
273
274    fn analyze(&self, workspace: &Path, _level: &str, input: &PluginInput) -> Result<Graph> {
275        let mut builder = GraphBuilder::new();
276        syn_analyze(workspace, input.ignore_tests, &mut builder)?;
277        let internal = builder.build();
278        Ok(collapse_to_files(internal))
279    }
280
281    fn is_test_path(&self, rel_path: &str) -> bool {
282        // Cargo's integration-test / bench targets live under top-level
283        // `tests/` and `benches/` dirs. (Inline `#[cfg(test)]` modules are a
284        // separate, attribute-based notion handled during the syn walk.)
285        matches!(rel_path.split('/').next(), Some("tests") | Some("benches"))
286    }
287
288    fn versions(&self, _workspace: &Path, _input: &PluginInput) -> Vec<(String, String)> {
289        version_string()
290            .map(|rv| vec![("rustc".to_string(), rv)])
291            .unwrap_or_default()
292    }
293}
294
295/// Syntactic stage: resolve the workspace via `cargo metadata` and build the
296/// internal crate + module/use graphs.
297fn syn_analyze(workspace: &Path, ignore_tests: bool, builder: &mut GraphBuilder) -> Result<()> {
298    let manifest = workspace.join("Cargo.toml");
299    // code-ranker is an offline tool: it never fetches from the network. See the
300    // comment in the original lib.rs for the research notes on --offline vs
301    // --no-deps vs full. Short version: --offline keeps external/cross-crate
302    // edges AND never goes to the network; the cache must be warm.
303    let metadata = log::timed("cargo metadata --offline", || {
304        MetadataCommand::new()
305            .manifest_path(&manifest)
306            .other_options(vec!["--offline".to_string()])
307            .exec()
308    })
309    .map_err(|err| offline_metadata_error(&manifest, err))?;
310
311    crate_graph::contribute(&metadata, builder);
312    module_graph::contribute(&metadata, ignore_tests, builder)?;
313    Ok(())
314}
315
316fn offline_metadata_error(manifest: &Path, err: cargo_metadata::Error) -> anyhow::Error {
317    anyhow::anyhow!(
318        "cargo metadata (offline) failed for {manifest}\n\n\
319         code-ranker is an offline tool — it never downloads dependencies. It reads \
320         the dependency graph from cargo's local cache, which must already be \
321         populated for this project.\n\n\
322         Warm the cache once (with network), then re-run code-ranker:\n    \
323         cargo metadata --manifest-path {manifest} >/dev/null\n\
324         (a prior `cargo build` / `cargo fetch` works too).\n\n\
325         In CI: run code-ranker on the same image/cache as your build or test jobs, \
326         where the cache is already warm.\n\n\
327         Underlying cargo error: {err}",
328        manifest = manifest.display(),
329    )
330}
331
332fn version_string() -> Option<String> {
333    which::which("rustc").ok()?;
334    let out = log::timed("rustc --version", || {
335        std::process::Command::new("rustc")
336            .arg("--version")
337            .output()
338    })
339    .ok()?;
340    if out.status.success() {
341        Some(
342            String::from_utf8_lossy(&out.stdout)
343                .split_whitespace()
344                .nth(1)
345                .unwrap_or("unknown")
346                .to_string(),
347        )
348    } else {
349        None
350    }
351}
352
353/// Collapse the internal module graph into a file-level `api::Graph`.
354///
355/// - Every `Module` node maps to a `file` node keyed by its ABSOLUTE source
356///   path (no `file:` prefix). Inline modules collapse into the file they live
357///   in. The file-backed module (line == None) is the source of truth for
358///   structural attrs.
359/// - External crate nodes become one `external` node each (id `ext:{name}`).
360/// - `use`/`pub use` edges are re-pointed to files; self-edges (within the same
361///   file) are dropped.
362/// - Crate→crate dependency edges (metadata-level) are dropped; precise
363///   file→file edges come from `use` statements.
364fn collapse_to_files(full: InternalGraph) -> Graph {
365    let mut id_map: HashMap<String, String> = HashMap::new();
366    let mut file_nodes: HashMap<String, Node> = HashMap::new();
367    let mut ext_nodes: HashMap<String, Node> = HashMap::new();
368
369    // Pre-pass: map each LOCAL crate node to its crate-root source file
370    // (lib.rs / main.rs) via the crate→root-module Contains edge. This lets
371    // cross-crate `use other_crate::…` become file→file edges.
372    let node_by_id: HashMap<&str, &internal::Node> =
373        full.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
374    let crate_ids: HashSet<&str> = full
375        .nodes
376        .iter()
377        .filter(|n| n.kind == NodeKind::Crate)
378        .map(|n| n.id.as_str())
379        .collect();
380    let mut crate_root_file: HashMap<String, String> = HashMap::new();
381    for e in &full.edges {
382        if e.kind != EdgeKind::Contains {
383            continue;
384        }
385        let (Some(from), Some(to)) = (
386            node_by_id.get(e.from.as_str()),
387            node_by_id.get(e.to.as_str()),
388        ) else {
389            continue;
390        };
391        if from.kind == NodeKind::Crate && to.kind == NodeKind::Module && !to.path.is_empty() {
392            let file = to.path.clone(); // ABSOLUTE path, no prefix
393            match crate_root_file.entry(e.from.clone()) {
394                Entry::Vacant(v) => {
395                    v.insert(file);
396                }
397                Entry::Occupied(mut o) if to.path.ends_with("lib.rs") => {
398                    *o.get_mut() = file;
399                }
400                Entry::Occupied(_) => {}
401            }
402        }
403    }
404
405    for node in &full.nodes {
406        match node.kind {
407            NodeKind::Module => {
408                let fid = node.path.clone(); // ABSOLUTE path
409                id_map.insert(node.id.clone(), fid.clone());
410                let name = Path::new(&node.path)
411                    .file_name()
412                    .map(|s| s.to_string_lossy().into_owned())
413                    .unwrap_or_else(|| node.name.clone());
414                match file_nodes.entry(fid.clone()) {
415                    Entry::Vacant(v) => {
416                        let mut attrs = BTreeMap::new();
417                        if let Some(vis) = &node.visibility {
418                            attrs.insert(
419                                "visibility".to_string(),
420                                AttrValue::Str(vis.as_str().to_string()),
421                            );
422                        }
423                        if let Some(loc) = node.loc {
424                            attrs.insert("loc".to_string(), AttrValue::Int(loc as i64));
425                        }
426                        if let Some(items) = node.item_count {
427                            attrs.insert("items".to_string(), AttrValue::Int(items as i64));
428                        }
429                        if let Some(krate) = &node.crate_label {
430                            attrs.insert("crate".to_string(), AttrValue::Str(krate.clone()));
431                        }
432                        v.insert(Node {
433                            id: fid,
434                            kind: "file".into(),
435                            name,
436                            parent: None,
437                            attrs,
438                        });
439                    }
440                    Entry::Occupied(mut o) => {
441                        // The file-backed module (line == None) is the source
442                        // of truth for the file's structural attrs.
443                        if node.line.is_none() {
444                            let n = o.get_mut();
445                            if let Some(vis) = &node.visibility {
446                                n.attrs.insert(
447                                    "visibility".to_string(),
448                                    AttrValue::Str(vis.as_str().to_string()),
449                                );
450                            }
451                            if let Some(loc) = node.loc {
452                                n.attrs
453                                    .insert("loc".to_string(), AttrValue::Int(loc as i64));
454                            }
455                            if let Some(items) = node.item_count {
456                                n.attrs
457                                    .insert("items".to_string(), AttrValue::Int(items as i64));
458                            }
459                            if let Some(krate) = &node.crate_label {
460                                n.attrs
461                                    .insert("crate".to_string(), AttrValue::Str(krate.clone()));
462                            }
463                        }
464                    }
465                }
466            }
467            NodeKind::Crate if node.external.unwrap_or(false) => {
468                let eid = format!("ext:{}", node.name);
469                id_map.insert(node.id.clone(), eid.clone());
470                // The on-disk directory of this dependency (parent of its
471                // Cargo.toml), e.g. `…/registry/src/…/serde-1.0.228`.
472                let lib_path = Path::new(&node.path)
473                    .parent()
474                    .map(|p| p.to_string_lossy().into_owned())
475                    .unwrap_or_default();
476                ext_nodes.entry(eid.clone()).or_insert_with(|| {
477                    let mut attrs = BTreeMap::new();
478                    attrs.insert("external".to_string(), AttrValue::Bool(true));
479                    if let Some(v) = &node.version {
480                        attrs.insert("version".to_string(), AttrValue::Str(v.clone()));
481                    }
482                    if !lib_path.is_empty() {
483                        attrs.insert("path".to_string(), AttrValue::Str(lib_path));
484                    }
485                    Node {
486                        id: eid,
487                        kind: "external".into(),
488                        name: node.name.clone(),
489                        parent: None,
490                        attrs,
491                    }
492                });
493            }
494            // A local workspace crate maps to its root file.
495            NodeKind::Crate => {
496                if let Some(file) = crate_root_file.get(&node.id) {
497                    id_map.insert(node.id.clone(), file.clone());
498                }
499            }
500        }
501    }
502
503    // Re-point edges to file/external granularity.
504    let mut seen: HashSet<(String, String, String)> = HashSet::new();
505    let mut edges: Vec<Edge> = Vec::new();
506    for e in &full.edges {
507        // Drop crate→crate dependency edges; precise file→file edges come from
508        // `use` statements.
509        if crate_ids.contains(e.from.as_str()) && crate_ids.contains(e.to.as_str()) {
510            continue;
511        }
512        let (Some(from), Some(to)) = (id_map.get(&e.from), id_map.get(&e.to)) else {
513            continue;
514        };
515        if from == to {
516            continue; // within the same file — not a connection
517        }
518        let kind_str = match e.kind {
519            EdgeKind::Contains => "contains",
520            EdgeKind::Uses => "uses",
521            EdgeKind::Reexports => "reexports",
522            EdgeKind::Super => "super",
523        };
524        if !seen.insert((from.clone(), to.clone(), kind_str.to_string())) {
525            continue;
526        }
527        let mut attrs = BTreeMap::new();
528        if e.kind == EdgeKind::Reexports
529            && let Some(vis) = &e.visibility
530        {
531            attrs.insert(
532                "visibility".to_string(),
533                AttrValue::Str(vis.as_str().to_string()),
534            );
535        }
536        edges.push(Edge {
537            source: from.clone(),
538            target: to.clone(),
539            kind: kind_str.to_string(),
540            line: e.line,
541            attrs,
542        });
543    }
544
545    // Assemble nodes: all files + only the libraries actually referenced.
546    let referenced_ext: HashSet<&str> = edges
547        .iter()
548        .filter(|e| ext_nodes.contains_key(&e.target))
549        .map(|e| e.target.as_str())
550        .collect();
551    let mut nodes: Vec<Node> = file_nodes.into_values().collect();
552    nodes.extend(
553        ext_nodes
554            .into_iter()
555            .filter(|(id, _)| referenced_ext.contains(id.as_str()))
556            .map(|(_, n)| n),
557    );
558
559    // Deterministic output ordering.
560    nodes.sort_by(|a, b| a.id.cmp(&b.id));
561    edges.sort_by(|a, b| {
562        a.source
563            .cmp(&b.source)
564            .then(a.target.cmp(&b.target))
565            .then(a.kind.cmp(&b.kind))
566    });
567
568    Graph { nodes, edges }
569}