1use anyhow::Result;
2use code_ranker_plugin_api::{
3 attrs::{AttrValue, ValueType},
4 default_cycle_kinds, default_node_kinds,
5 edge::Edge,
6 graph::Graph,
7 level::{AttributeSpec, EdgeKindSpec, Grouping, Level, Thresholds},
8 log,
9 node::Node,
10 plugin::{LanguagePlugin, PluginInput, Preset},
11};
12use std::collections::hash_map::Entry;
13use std::collections::{BTreeMap, HashMap, HashSet};
14use std::path::Path;
15
16use cargo_metadata::MetadataCommand;
17
18mod crate_graph;
19mod ids;
20mod internal;
21mod module_graph;
22
23use internal::{EdgeKind, GraphBuilder, InternalGraph, NodeKind};
24
25pub struct RustPlugin;
26
27type MetricPreset = (
33 &'static str,
34 &'static str,
35 &'static str,
36 &'static [&'static str],
37 &'static str,
38 &'static str,
39);
40
41const RUST_METRIC_PRESETS: &[MetricPreset] = &[
42 (
43 "HK",
44 "HK — Henry-Kafura Coupling",
45 "hk",
46 &["in", "out"],
47 "henry-kafura-coupling",
48 "These modules carry heavy Henry-Kafura coupling — HK = sloc × (fan_in × fan_out)²,\n\
49 where sloc is the module's source lines of code (real code lines, excluding blanks\n\
50 and comment-only lines), fan_in is how many modules depend on it, and fan_out is how\n\
51 many it depends on.\n\
52 A high score is a large module sitting on a busy crossroads of incoming and outgoing\n\
53 dependencies, so any change here ripples widely.\n\n\
54 For each module below, lower the factor that dominates its HK: shrink the module by\n\
55 extracting cohesive pieces, or cut fan-in/fan-out by narrowing its public surface and\n\
56 depending on fewer collaborators (introduce an abstraction, move a responsibility).\n\
57 Keep existing API contracts intact.",
58 ),
59 (
60 "SLOC",
61 "SLOC — Module Size",
62 "sloc",
63 &[],
64 "module-size",
65 "These are the largest modules by source lines of code. Size alone is not a defect, but\n\
66 oversized files usually bundle several responsibilities and are hard to read, test and\n\
67 review.\n\n\
68 For each module below, identify the distinct responsibilities it holds and propose how\n\
69 to split it into smaller, cohesive modules — each with a single clear purpose — without\n\
70 changing external behaviour.",
71 ),
72 (
73 "FANIN",
74 "Fan-in — Afferent Coupling",
75 "fan_in",
76 &["in"],
77 "fan-in-afferent-coupling",
78 "These modules have high fan-in: many other modules depend on them. They are\n\
79 load-bearing — a change here forces changes (or re-review) across every dependant, and\n\
80 a bug here is widely felt.\n\n\
81 For each module below, confirm its public surface is a stable, minimal contract. Narrow\n\
82 the API to what callers actually need, split it if different callers use disjoint parts\n\
83 (see Interface Segregation), and stabilise the abstractions the rest of the codebase\n\
84 leans on.",
85 ),
86 (
87 "FANOUT",
88 "Fan-out — Efferent Coupling",
89 "fan_out",
90 &["out"],
91 "fan-out-efferent-coupling",
92 "These modules have high fan-out: they depend on many other modules. High efferent\n\
93 coupling makes a module fragile (it breaks when any dependency changes) and hard to\n\
94 test or reuse in isolation.\n\n\
95 For each module below, reduce its direct dependencies: depend on abstractions rather\n\
96 than concretes (see Dependency Inversion), collapse several fine-grained collaborators\n\
97 behind one focused interface, and move logic that pulls in unrelated dependencies into\n\
98 a more appropriate module.",
99 ),
100];
101
102impl LanguagePlugin for RustPlugin {
103 fn name(&self) -> &str {
104 "rust"
105 }
106
107 fn detect(&self, workspace: &Path, _input: &PluginInput) -> bool {
108 workspace.join("Cargo.toml").exists()
109 }
110
111 fn levels(&self) -> Vec<Level> {
112 let mut edge_kinds: BTreeMap<String, EdgeKindSpec> = BTreeMap::new();
113 edge_kinds.insert(
114 "uses".into(),
115 EdgeKindSpec {
116 flow: true,
117 label: Some("uses".into()),
118 description: Some(
119 "Code dependency — this file references an item the target file defines.<br>\
120 Captured from `use path::Item;`, a qualified path (`crate::a::Item`, \
121 `other_crate::Item`), or a derive (`#[derive(serde::Serialize)]`).<br>\
122 The path resolves to the file that defines the item (following `pub use` \
123 re-exports), so the edge points at the definition, not a re-export hub.<br>\
124 This is the real dependency: it counts toward fan-in / fan-out, \
125 Henry-Kafura coupling and cycles."
126 .into(),
127 ),
128 },
129 );
130 edge_kinds.insert(
131 "contains".into(),
132 EdgeKindSpec {
133 flow: false,
134 label: Some("contains".into()),
135 description: Some(
136 "Module ownership — the parent declares the child module \
137 (`mod foo;` / `pub mod foo;`), so `foo.rs` (or `foo/mod.rs`) belongs to it.<br>\
138 This is the Rust module tree: structure, not a code dependency.<br>\
139 Kept in the data but not drawn on the main map, and excluded from \
140 fan-in / fan-out / HK / cycles."
141 .into(),
142 ),
143 },
144 );
145 edge_kinds.insert(
146 "reexports".into(),
147 EdgeKindSpec {
148 flow: false,
149 label: Some("reexport".into()),
150 description: Some(
151 "Re-export (`pub use foo::Item;`) — re-publishes another file's item as part of \
152 this file's public API (the crate-root / prelude facade, e.g. `lib.rs` doing \
153 `pub use access_scope::AccessScope;`).<br>\
154 A facade, not a dependency: excluded from fan-in / fan-out / HK / cycles and \
155 not drawn on the main map, like `contains`.<br>\
156 A consumer's `use this_crate::Item` is attributed to the file that defines \
157 `Item`, so re-export hubs (`lib.rs` / `mod.rs`) collect no false coupling — the \
158 `pub use` is still recorded here so you can see what a file exposes."
159 .into(),
160 ),
161 },
162 );
163 edge_kinds.insert(
164 "super".into(),
165 EdgeKindSpec {
166 flow: false,
167 label: Some("super".into()),
168 description: Some(
169 "Namespace pull from an enclosing module — a glob `use` that reaches \
170 *up* the module tree (`use super::*`, `use crate::<ancestor>::*`), \
171 bringing the parent's items into the child's scope.<br>\
172 Usually structural scope-sugar (a module split across files referring \
173 back to itself). But if the child actually uses a parent item brought \
174 in by the glob, it IS a real back-dependency — technically a cycle. \
175 code-ranker can't tell the two apart without name resolution, so it \
176 treats `super` as a **low-priority** cycle and leaves it non-flow: \
177 deprioritized next to obvious cross-module cycles.<br>\
178 Kept in the data but not drawn on the main map, and excluded from \
179 fan-in / fan-out / HK / cycles — like `contains`."
180 .into(),
181 ),
182 },
183 );
184
185 let aspec = AttributeSpec::new;
186
187 let mut node_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
188 node_attributes.insert("path".into(), aspec(ValueType::Str, "Path"));
189 node_attributes.insert("crate".into(), aspec(ValueType::Str, "Crate"));
190 node_attributes.insert("loc".into(), aspec(ValueType::Int, "Lines"));
191 node_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
192 node_attributes.insert("external".into(), aspec(ValueType::Bool, "External"));
193 node_attributes.insert("version".into(), aspec(ValueType::Str, "Version"));
194 node_attributes.insert("items".into(), aspec(ValueType::Int, "Items"));
195
196 let mut edge_attributes: BTreeMap<String, AttributeSpec> = BTreeMap::new();
197 edge_attributes.insert("visibility".into(), aspec(ValueType::Str, "Visibility"));
198
199 vec![Level {
200 name: "files".into(),
201 edge_kinds,
202 node_attributes,
203 edge_attributes,
204 attribute_groups: BTreeMap::new(),
205 node_kinds: default_node_kinds(),
206 cycle_kinds: default_cycle_kinds(),
207 grouping: Some(Grouping {
210 key: Some("crate".into()),
211 function: None,
212 }),
213 }]
214 }
215
216 fn thresholds(&self) -> BTreeMap<String, Thresholds> {
217 BTreeMap::from([
220 (
221 "hk".into(),
222 Thresholds {
223 info: 150_000.0,
224 warning: 10_000_000.0,
225 },
226 ),
227 (
228 "sloc".into(),
229 Thresholds {
230 info: 800.0,
231 warning: 3_000.0,
232 },
233 ),
234 (
235 "fan_out".into(),
236 Thresholds {
237 info: 8.0,
238 warning: 18.0,
239 },
240 ),
241 (
242 "items".into(),
243 Thresholds {
244 info: 20.0,
245 warning: 50.0,
246 },
247 ),
248 ])
249 }
250
251 fn presets(&self, mut defaults: Vec<Preset>, _input: &PluginInput) -> Vec<Preset> {
252 let base_dir = defaults
257 .iter()
258 .find_map(|p| p.doc_url.as_deref())
259 .and_then(|u| u.rsplit_once('/').map(|(dir, _)| dir.to_string()));
260 for &(id, title, sort_metric, connections, slug, prompt) in RUST_METRIC_PRESETS {
261 defaults.push(Preset {
262 id: id.to_string(),
263 label: id.to_string(),
264 title: title.to_string(),
265 prompt: prompt.to_string(),
266 doc_url: base_dir.as_ref().map(|d| format!("{d}/{slug}.md")),
267 sort_metric: sort_metric.to_string(),
268 connections: connections.iter().map(|s| (*s).to_string()).collect(),
269 });
270 }
271 defaults
272 }
273
274 fn analyze(&self, workspace: &Path, _level: &str, input: &PluginInput) -> Result<Graph> {
275 let mut builder = GraphBuilder::new();
276 syn_analyze(workspace, input.ignore_tests, &mut builder)?;
277 let internal = builder.build();
278 Ok(collapse_to_files(internal))
279 }
280
281 fn is_test_path(&self, rel_path: &str) -> bool {
282 matches!(rel_path.split('/').next(), Some("tests") | Some("benches"))
286 }
287
288 fn versions(&self, _workspace: &Path, _input: &PluginInput) -> Vec<(String, String)> {
289 version_string()
290 .map(|rv| vec![("rustc".to_string(), rv)])
291 .unwrap_or_default()
292 }
293}
294
295fn syn_analyze(workspace: &Path, ignore_tests: bool, builder: &mut GraphBuilder) -> Result<()> {
298 let manifest = workspace.join("Cargo.toml");
299 let metadata = log::timed("cargo metadata --offline", || {
304 MetadataCommand::new()
305 .manifest_path(&manifest)
306 .other_options(vec!["--offline".to_string()])
307 .exec()
308 })
309 .map_err(|err| offline_metadata_error(&manifest, err))?;
310
311 crate_graph::contribute(&metadata, builder);
312 module_graph::contribute(&metadata, ignore_tests, builder)?;
313 Ok(())
314}
315
316fn offline_metadata_error(manifest: &Path, err: cargo_metadata::Error) -> anyhow::Error {
317 anyhow::anyhow!(
318 "cargo metadata (offline) failed for {manifest}\n\n\
319 code-ranker is an offline tool — it never downloads dependencies. It reads \
320 the dependency graph from cargo's local cache, which must already be \
321 populated for this project.\n\n\
322 Warm the cache once (with network), then re-run code-ranker:\n \
323 cargo metadata --manifest-path {manifest} >/dev/null\n\
324 (a prior `cargo build` / `cargo fetch` works too).\n\n\
325 In CI: run code-ranker on the same image/cache as your build or test jobs, \
326 where the cache is already warm.\n\n\
327 Underlying cargo error: {err}",
328 manifest = manifest.display(),
329 )
330}
331
332fn version_string() -> Option<String> {
333 which::which("rustc").ok()?;
334 let out = log::timed("rustc --version", || {
335 std::process::Command::new("rustc")
336 .arg("--version")
337 .output()
338 })
339 .ok()?;
340 if out.status.success() {
341 Some(
342 String::from_utf8_lossy(&out.stdout)
343 .split_whitespace()
344 .nth(1)
345 .unwrap_or("unknown")
346 .to_string(),
347 )
348 } else {
349 None
350 }
351}
352
353fn collapse_to_files(full: InternalGraph) -> Graph {
365 let mut id_map: HashMap<String, String> = HashMap::new();
366 let mut file_nodes: HashMap<String, Node> = HashMap::new();
367 let mut ext_nodes: HashMap<String, Node> = HashMap::new();
368
369 let node_by_id: HashMap<&str, &internal::Node> =
373 full.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
374 let crate_ids: HashSet<&str> = full
375 .nodes
376 .iter()
377 .filter(|n| n.kind == NodeKind::Crate)
378 .map(|n| n.id.as_str())
379 .collect();
380 let mut crate_root_file: HashMap<String, String> = HashMap::new();
381 for e in &full.edges {
382 if e.kind != EdgeKind::Contains {
383 continue;
384 }
385 let (Some(from), Some(to)) = (
386 node_by_id.get(e.from.as_str()),
387 node_by_id.get(e.to.as_str()),
388 ) else {
389 continue;
390 };
391 if from.kind == NodeKind::Crate && to.kind == NodeKind::Module && !to.path.is_empty() {
392 let file = to.path.clone(); match crate_root_file.entry(e.from.clone()) {
394 Entry::Vacant(v) => {
395 v.insert(file);
396 }
397 Entry::Occupied(mut o) if to.path.ends_with("lib.rs") => {
398 *o.get_mut() = file;
399 }
400 Entry::Occupied(_) => {}
401 }
402 }
403 }
404
405 for node in &full.nodes {
406 match node.kind {
407 NodeKind::Module => {
408 let fid = node.path.clone(); id_map.insert(node.id.clone(), fid.clone());
410 let name = Path::new(&node.path)
411 .file_name()
412 .map(|s| s.to_string_lossy().into_owned())
413 .unwrap_or_else(|| node.name.clone());
414 match file_nodes.entry(fid.clone()) {
415 Entry::Vacant(v) => {
416 let mut attrs = BTreeMap::new();
417 if let Some(vis) = &node.visibility {
418 attrs.insert(
419 "visibility".to_string(),
420 AttrValue::Str(vis.as_str().to_string()),
421 );
422 }
423 if let Some(loc) = node.loc {
424 attrs.insert("loc".to_string(), AttrValue::Int(loc as i64));
425 }
426 if let Some(items) = node.item_count {
427 attrs.insert("items".to_string(), AttrValue::Int(items as i64));
428 }
429 if let Some(krate) = &node.crate_label {
430 attrs.insert("crate".to_string(), AttrValue::Str(krate.clone()));
431 }
432 v.insert(Node {
433 id: fid,
434 kind: "file".into(),
435 name,
436 parent: None,
437 attrs,
438 });
439 }
440 Entry::Occupied(mut o) => {
441 if node.line.is_none() {
444 let n = o.get_mut();
445 if let Some(vis) = &node.visibility {
446 n.attrs.insert(
447 "visibility".to_string(),
448 AttrValue::Str(vis.as_str().to_string()),
449 );
450 }
451 if let Some(loc) = node.loc {
452 n.attrs
453 .insert("loc".to_string(), AttrValue::Int(loc as i64));
454 }
455 if let Some(items) = node.item_count {
456 n.attrs
457 .insert("items".to_string(), AttrValue::Int(items as i64));
458 }
459 if let Some(krate) = &node.crate_label {
460 n.attrs
461 .insert("crate".to_string(), AttrValue::Str(krate.clone()));
462 }
463 }
464 }
465 }
466 }
467 NodeKind::Crate if node.external.unwrap_or(false) => {
468 let eid = format!("ext:{}", node.name);
469 id_map.insert(node.id.clone(), eid.clone());
470 let lib_path = Path::new(&node.path)
473 .parent()
474 .map(|p| p.to_string_lossy().into_owned())
475 .unwrap_or_default();
476 ext_nodes.entry(eid.clone()).or_insert_with(|| {
477 let mut attrs = BTreeMap::new();
478 attrs.insert("external".to_string(), AttrValue::Bool(true));
479 if let Some(v) = &node.version {
480 attrs.insert("version".to_string(), AttrValue::Str(v.clone()));
481 }
482 if !lib_path.is_empty() {
483 attrs.insert("path".to_string(), AttrValue::Str(lib_path));
484 }
485 Node {
486 id: eid,
487 kind: "external".into(),
488 name: node.name.clone(),
489 parent: None,
490 attrs,
491 }
492 });
493 }
494 NodeKind::Crate => {
496 if let Some(file) = crate_root_file.get(&node.id) {
497 id_map.insert(node.id.clone(), file.clone());
498 }
499 }
500 }
501 }
502
503 let mut seen: HashSet<(String, String, String)> = HashSet::new();
505 let mut edges: Vec<Edge> = Vec::new();
506 for e in &full.edges {
507 if crate_ids.contains(e.from.as_str()) && crate_ids.contains(e.to.as_str()) {
510 continue;
511 }
512 let (Some(from), Some(to)) = (id_map.get(&e.from), id_map.get(&e.to)) else {
513 continue;
514 };
515 if from == to {
516 continue; }
518 let kind_str = match e.kind {
519 EdgeKind::Contains => "contains",
520 EdgeKind::Uses => "uses",
521 EdgeKind::Reexports => "reexports",
522 EdgeKind::Super => "super",
523 };
524 if !seen.insert((from.clone(), to.clone(), kind_str.to_string())) {
525 continue;
526 }
527 let mut attrs = BTreeMap::new();
528 if e.kind == EdgeKind::Reexports
529 && let Some(vis) = &e.visibility
530 {
531 attrs.insert(
532 "visibility".to_string(),
533 AttrValue::Str(vis.as_str().to_string()),
534 );
535 }
536 edges.push(Edge {
537 source: from.clone(),
538 target: to.clone(),
539 kind: kind_str.to_string(),
540 line: e.line,
541 attrs,
542 });
543 }
544
545 let referenced_ext: HashSet<&str> = edges
547 .iter()
548 .filter(|e| ext_nodes.contains_key(&e.target))
549 .map(|e| e.target.as_str())
550 .collect();
551 let mut nodes: Vec<Node> = file_nodes.into_values().collect();
552 nodes.extend(
553 ext_nodes
554 .into_iter()
555 .filter(|(id, _)| referenced_ext.contains(id.as_str()))
556 .map(|(_, n)| n),
557 );
558
559 nodes.sort_by(|a, b| a.id.cmp(&b.id));
561 edges.sort_by(|a, b| {
562 a.source
563 .cmp(&b.source)
564 .then(a.target.cmp(&b.target))
565 .then(a.kind.cmp(&b.kind))
566 });
567
568 Graph { nodes, edges }
569}