Skip to main content

code_ranker_graph/
cycles.rs

1//! Cycle detection over information-flow edges (Kosaraju SCC). Edges count iff
2//! their kind is in `flow_kinds` (derived from `EdgeKindSpec.flow`); structural
3//! kinds like `contains` are excluded, so a `mod foo;` parent/child pair is not
4//! flagged as a false cycle.
5
6use crate::level_graph::CycleGroup;
7use code_ranker_plugin_api::{attrs::AttrValue, graph::Graph};
8use std::collections::HashMap;
9use std::collections::HashSet;
10
11/// Detect SCCs (≥ 2 members) over flow edges, write a `cycle` attribute on each
12/// participating node (`"mutual"` for a 2-node SCC, `"chain"` for 3+), and
13/// return the cycle groups.
14pub fn annotate_cycles(graph: &mut Graph, flow_kinds: &HashSet<String>) -> Vec<CycleGroup> {
15    let n = graph.nodes.len();
16    if n == 0 {
17        return Vec::new();
18    }
19
20    let id_to_idx: HashMap<&str, usize> = graph
21        .nodes
22        .iter()
23        .enumerate()
24        .map(|(i, node)| (node.id.as_str(), i))
25        .collect();
26
27    let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
28    for edge in &graph.edges {
29        if !flow_kinds.contains(&edge.kind) {
30            continue;
31        }
32        if let (Some(&fi), Some(&ti)) = (
33            id_to_idx.get(edge.source.as_str()),
34            id_to_idx.get(edge.target.as_str()),
35        ) && fi != ti
36        {
37            adj[fi].push(ti);
38        }
39    }
40
41    let sccs = kosaraju_sccs(n, &adj);
42
43    let mut node_kind: Vec<Option<&'static str>> = vec![None; n];
44    let mut groups: Vec<CycleGroup> = Vec::new();
45    for scc in &sccs {
46        if scc.len() < 2 {
47            continue;
48        }
49        // Rust forbids circular dependencies between crates, so an SCC whose
50        // members span more than one crate cannot be a real cycle — it is an
51        // artifact of imprecise path resolution. Drop it.
52        if spans_multiple_crates(scc, graph) {
53            continue;
54        }
55        let kind = classify_scc(scc);
56        for &idx in scc {
57            node_kind[idx] = Some(kind);
58        }
59        groups.push(CycleGroup {
60            kind: kind.to_string(),
61            nodes: scc.iter().map(|&i| graph.nodes[i].id.clone()).collect(),
62        });
63    }
64
65    for (i, node) in graph.nodes.iter_mut().enumerate() {
66        match node_kind[i] {
67            Some(k) => {
68                node.attrs
69                    .insert("cycle".to_string(), AttrValue::Str(k.to_string()));
70            }
71            None => {
72                node.attrs.remove("cycle");
73            }
74        }
75    }
76    groups
77}
78
79/// The crate a node belongs to. Prefers the plugin-provided `crate` attribute
80/// (the precise per-target compilation unit from `cargo metadata`); falls back
81/// to deriving it from the id as everything before the last `/src/` segment for
82/// nodes/plugins that don't set it. Returns `None` when neither is available, so
83/// callers can stay conservative.
84fn crate_of(node: &code_ranker_plugin_api::node::Node) -> Option<&str> {
85    if let Some(AttrValue::Str(c)) = node.attrs.get("crate") {
86        return Some(c.as_str());
87    }
88    node.id.rfind("/src/").map(|i| &node.id[..i])
89}
90
91/// True only when every member has a determinable crate and at least two crates
92/// are present. Unknown-crate nodes make this `false` (conservative: keep the
93/// cycle) so non-crate id schemes (tests, other plugins) are never mis-dropped.
94fn spans_multiple_crates(scc: &[usize], graph: &Graph) -> bool {
95    let mut crates = Vec::with_capacity(scc.len());
96    for &i in scc {
97        match crate_of(&graph.nodes[i]) {
98            Some(c) => crates.push(c),
99            None => return false,
100        }
101    }
102    crates.iter().any(|c| *c != crates[0])
103}
104
105fn classify_scc(scc: &[usize]) -> &'static str {
106    if scc.len() == 2 { "mutual" } else { "chain" }
107}
108
109// ── Kosaraju's SCC (iterative, O(V+E)) ─────────────────────────────────────
110
111fn kosaraju_sccs(n: usize, adj: &[Vec<usize>]) -> Vec<Vec<usize>> {
112    let mut visited = vec![false; n];
113    let mut finish_order = Vec::with_capacity(n);
114    for i in 0..n {
115        if !visited[i] {
116            dfs_finish(i, adj, &mut visited, &mut finish_order);
117        }
118    }
119    let mut radj: Vec<Vec<usize>> = vec![Vec::new(); n];
120    for (u, neighbors) in adj.iter().enumerate() {
121        for &v in neighbors {
122            radj[v].push(u);
123        }
124    }
125    let mut visited2 = vec![false; n];
126    let mut sccs: Vec<Vec<usize>> = Vec::new();
127    for &start in finish_order.iter().rev() {
128        if !visited2[start] {
129            let mut scc = Vec::new();
130            dfs_collect(start, &radj, &mut visited2, &mut scc);
131            sccs.push(scc);
132        }
133    }
134    sccs
135}
136
137fn dfs_finish(start: usize, adj: &[Vec<usize>], visited: &mut [bool], order: &mut Vec<usize>) {
138    let mut stack: Vec<(usize, usize)> = vec![(start, 0)];
139    visited[start] = true;
140    while let Some((u, ni)) = stack.last_mut() {
141        let u = *u;
142        if *ni < adj[u].len() {
143            let v = adj[u][*ni];
144            *ni += 1;
145            if !visited[v] {
146                visited[v] = true;
147                stack.push((v, 0));
148            }
149        } else {
150            stack.pop();
151            order.push(u);
152        }
153    }
154}
155
156fn dfs_collect(start: usize, adj: &[Vec<usize>], visited: &mut [bool], scc: &mut Vec<usize>) {
157    let mut stack = vec![start];
158    visited[start] = true;
159    while let Some(u) = stack.pop() {
160        scc.push(u);
161        for &v in &adj[u] {
162            if !visited[v] {
163                visited[v] = true;
164                stack.push(v);
165            }
166        }
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use code_ranker_plugin_api::{edge::Edge, node::Node};
174
175    fn node(id: &str, name: &str) -> Node {
176        Node {
177            id: id.into(),
178            kind: "file".into(),
179            name: name.into(),
180            parent: None,
181            attrs: Default::default(),
182        }
183    }
184    fn edge(from: &str, to: &str, kind: &str) -> Edge {
185        Edge {
186            source: from.into(),
187            target: to.into(),
188            kind: kind.into(),
189            line: None,
190            attrs: Default::default(),
191        }
192    }
193    fn flow() -> HashSet<String> {
194        HashSet::from(["uses".to_string(), "reexports".to_string()])
195    }
196    fn node_crate(id: &str, name: &str, krate: &str) -> Node {
197        let mut n = node(id, name);
198        n.attrs.insert("crate".into(), AttrValue::Str(krate.into()));
199        n
200    }
201
202    #[test]
203    fn cross_crate_via_attr_is_dropped() {
204        // deno-style ids (no `/src/`): crate identity comes from the attribute.
205        let mut g = Graph {
206            nodes: vec![
207                node_crate("{t}/cli/a.rs", "a", "deno"),
208                node_crate("{t}/runtime/b.rs", "b", "deno_runtime"),
209            ],
210            edges: vec![
211                edge("{t}/cli/a.rs", "{t}/runtime/b.rs", "uses"),
212                edge("{t}/runtime/b.rs", "{t}/cli/a.rs", "uses"),
213            ],
214        };
215        assert!(annotate_cycles(&mut g, &flow()).is_empty());
216    }
217
218    #[test]
219    fn same_crate_via_attr_is_kept() {
220        // Same crate attr despite different subdirs and no `/src/` in the ids.
221        let mut g = Graph {
222            nodes: vec![
223                node_crate("{t}/cli/a.rs", "a", "deno"),
224                node_crate("{t}/cli/sub/b.rs", "b", "deno"),
225            ],
226            edges: vec![
227                edge("{t}/cli/a.rs", "{t}/cli/sub/b.rs", "uses"),
228                edge("{t}/cli/sub/b.rs", "{t}/cli/a.rs", "uses"),
229            ],
230        };
231        assert_eq!(annotate_cycles(&mut g, &flow()).len(), 1);
232    }
233
234    #[test]
235    fn two_node_cycle_is_mutual() {
236        let mut g = Graph {
237            nodes: vec![node("a", "a"), node("b", "b")],
238            edges: vec![edge("a", "b", "uses"), edge("b", "a", "uses")],
239        };
240        let groups = annotate_cycles(&mut g, &flow());
241        assert_eq!(groups.len(), 1);
242        assert_eq!(groups[0].kind, "mutual");
243        assert_eq!(
244            g.nodes[0].attrs.get("cycle"),
245            Some(&AttrValue::Str("mutual".into()))
246        );
247    }
248
249    #[test]
250    fn contains_edge_excluded_from_cycles() {
251        let mut g = Graph {
252            nodes: vec![node("m", "m"), node("c", "c")],
253            edges: vec![edge("m", "c", "contains"), edge("c", "m", "uses")],
254        };
255        let groups = annotate_cycles(&mut g, &flow());
256        assert!(groups.is_empty(), "contains is structural, not flow");
257    }
258
259    #[test]
260    fn cross_crate_scc_is_dropped() {
261        // A 2-cycle whose files live in different crates is impossible in Rust.
262        let mut g = Graph {
263            nodes: vec![
264                node("{t}/crateA/src/a.rs", "a"),
265                node("{t}/crateB/src/b.rs", "b"),
266            ],
267            edges: vec![
268                edge("{t}/crateA/src/a.rs", "{t}/crateB/src/b.rs", "uses"),
269                edge("{t}/crateB/src/b.rs", "{t}/crateA/src/a.rs", "uses"),
270            ],
271        };
272        let groups = annotate_cycles(&mut g, &flow());
273        assert!(groups.is_empty(), "cross-crate cycle must be dropped");
274    }
275
276    #[test]
277    fn intra_crate_scc_is_kept() {
278        let mut g = Graph {
279            nodes: vec![
280                node("{t}/crateA/src/a.rs", "a"),
281                node("{t}/crateA/src/b.rs", "b"),
282            ],
283            edges: vec![
284                edge("{t}/crateA/src/a.rs", "{t}/crateA/src/b.rs", "uses"),
285                edge("{t}/crateA/src/b.rs", "{t}/crateA/src/a.rs", "uses"),
286            ],
287        };
288        let groups = annotate_cycles(&mut g, &flow());
289        assert_eq!(groups.len(), 1);
290        assert_eq!(groups[0].kind, "mutual");
291    }
292
293    #[test]
294    fn three_node_scc_is_chain() {
295        let mut g = Graph {
296            nodes: vec![node("a", "a"), node("b", "b"), node("c", "c")],
297            edges: vec![
298                edge("a", "b", "uses"),
299                edge("b", "c", "uses"),
300                edge("c", "a", "uses"),
301            ],
302        };
303        let groups = annotate_cycles(&mut g, &flow());
304        assert_eq!(groups.len(), 1);
305        assert_eq!(groups[0].kind, "chain");
306    }
307
308    #[test]
309    fn test_named_node_no_longer_special_cased() {
310        // A test-named file in an SCC is classified purely by size now (`mutual`),
311        // not the removed `test_embed` kind.
312        let mut g = Graph {
313            nodes: vec![node("a", "a"), node("b", "foo_tests")],
314            edges: vec![edge("a", "b", "uses"), edge("b", "a", "uses")],
315        };
316        let groups = annotate_cycles(&mut g, &flow());
317        assert_eq!(groups[0].kind, "mutual");
318    }
319}