Skip to main content

bones_triage/graph/
cycles.rs

1//! Incremental and full-cycle detection helpers for dependency graphs.
2//!
3//! # Edge Direction
4//!
5//! The triage graph uses edge direction `blocker → blocked`.
6//! Adding a new edge `from → to` would create a cycle if `to` is already
7//! reachable from `from` through existing edges.
8
9#![allow(clippy::module_name_repetitions)]
10
11use std::collections::{HashMap, HashSet, VecDeque};
12
13use petgraph::algo::tarjan_scc;
14use petgraph::graph::{DiGraph, NodeIndex};
15use petgraph::visit::EdgeRef;
16
17/// Check whether adding `from -> to` would introduce a dependency cycle.
18///
19/// Returns a concrete cycle path when a cycle would be created, formatted as:
20/// `from -> to -> ... -> from`.
21///
22/// If the edge already exists, this returns `None` (no *new* cycle is created).
23#[must_use]
24pub fn would_create_cycle(
25    graph: &DiGraph<String, ()>,
26    from: NodeIndex,
27    to: NodeIndex,
28) -> Option<Vec<String>> {
29    if from == to {
30        let id = node_id(graph, from);
31        return Some(vec![id.clone(), id]);
32    }
33
34    if graph.contains_edge(from, to) {
35        return None;
36    }
37
38    // BFS from `to` looking for `from`.
39    // If reachable, then adding `from -> to` closes a cycle.
40    let mut queue: VecDeque<NodeIndex> = VecDeque::from([to]);
41    let mut visited: HashSet<NodeIndex> = HashSet::from([to]);
42    let mut parent: HashMap<NodeIndex, NodeIndex> = HashMap::new();
43
44    while let Some(current) = queue.pop_front() {
45        if current == from {
46            return Some(reconstruct_cycle_path(graph, from, to, &parent));
47        }
48
49        for edge in graph.edges(current) {
50            let next = edge.target();
51            if visited.insert(next) {
52                parent.insert(next, current);
53                queue.push_back(next);
54            }
55        }
56    }
57
58    None
59}
60
61/// Find all cycles currently present in `graph`.
62///
63/// Each entry is a sorted list of item IDs in one strongly connected
64/// component (SCC). Self-loops are reported as a one-element cycle.
65#[must_use]
66pub fn find_all_cycles(graph: &DiGraph<String, ()>) -> Vec<Vec<String>> {
67    let mut cycles: Vec<Vec<String>> = tarjan_scc(graph)
68        .into_iter()
69        .filter(|component| {
70            component.len() > 1
71                || component
72                    .first()
73                    .is_some_and(|node| has_self_loop(graph, *node))
74        })
75        .map(|component| {
76            let mut ids: Vec<String> = component
77                .into_iter()
78                .map(|idx| node_id(graph, idx))
79                .collect();
80            ids.sort_unstable();
81            ids
82        })
83        .collect();
84
85    cycles.sort_unstable();
86    cycles
87}
88
89#[must_use]
90fn has_self_loop(graph: &DiGraph<String, ()>, node: NodeIndex) -> bool {
91    graph.find_edge(node, node).is_some()
92}
93
94fn reconstruct_cycle_path(
95    graph: &DiGraph<String, ()>,
96    from: NodeIndex,
97    to: NodeIndex,
98    parent: &HashMap<NodeIndex, NodeIndex>,
99) -> Vec<String> {
100    // Parent links represent a path: to -> ... -> from.
101    // Rebuild that path and then prepend `from` to represent the newly added
102    // edge `from -> to` that closes the cycle.
103    let mut to_to_from: Vec<NodeIndex> = vec![from];
104    let mut cursor = from;
105
106    while cursor != to {
107        if let Some(next) = parent.get(&cursor) {
108            cursor = *next;
109            to_to_from.push(cursor);
110        } else {
111            break;
112        }
113    }
114
115    to_to_from.reverse();
116
117    let mut cycle: Vec<String> = Vec::with_capacity(to_to_from.len() + 1);
118    cycle.push(node_id(graph, from));
119    cycle.extend(to_to_from.into_iter().map(|idx| node_id(graph, idx)));
120    cycle
121}
122
123fn node_id(graph: &DiGraph<String, ()>, idx: NodeIndex) -> String {
124    graph
125        .node_weight(idx)
126        .cloned()
127        .unwrap_or_else(|| format!("#{}", idx.index()))
128}
129
130// ---------------------------------------------------------------------------
131// Cycle break suggestions
132// ---------------------------------------------------------------------------
133
134/// A detected dependency cycle with suggested edges to remove to break it.
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct CycleReport {
137    /// Sorted item IDs that form this cycle (members of the SCC).
138    pub members: Vec<String>,
139    /// Suggested `(blocker, blocked)` edges to remove to break the cycle.
140    ///
141    /// These are back-edges identified by DFS within the SCC.  Removing all
142    /// suggested edges will make the SCC acyclic.  In most cases a single
143    /// edge is sufficient.
144    pub suggested_breaks: Vec<(String, String)>,
145}
146
147/// Detect all cycles and, for each, suggest edges to remove to break them.
148///
149/// Internally uses Tarjan's SCC to identify cycle members, then runs a DFS
150/// within each SCC sub-graph to collect back-edges.  Back-edges are the
151/// canonical set to remove to make an SCC acyclic.
152///
153/// Self-loops are reported with the single edge `(id, id)` as the break.
154#[must_use]
155pub fn report_cycles_with_breaks(graph: &DiGraph<String, ()>) -> Vec<CycleReport> {
156    let scc_list = tarjan_scc(graph);
157
158    let mut reports: Vec<CycleReport> = scc_list
159        .into_iter()
160        .filter(|component| {
161            component.len() > 1
162                || component
163                    .first()
164                    .is_some_and(|node| has_self_loop(graph, *node))
165        })
166        .map(|component| {
167            let mut members: Vec<String> =
168                component.iter().map(|&idx| node_id(graph, idx)).collect();
169            members.sort_unstable();
170
171            // Self-loop: single node with an edge to itself.
172            if component.len() == 1 {
173                let id = members[0].clone();
174                return CycleReport {
175                    members,
176                    suggested_breaks: vec![(id.clone(), id)],
177                };
178            }
179
180            // Find back-edges within this SCC via DFS.
181            let member_set: HashSet<NodeIndex> = component.iter().copied().collect();
182            let suggested_breaks = find_back_edges_in_scc(graph, &component, &member_set);
183
184            CycleReport {
185                members,
186                suggested_breaks,
187            }
188        })
189        .collect();
190
191    reports.sort_unstable_by(|a, b| a.members.cmp(&b.members));
192    reports
193}
194
195/// Run DFS within the nodes of an SCC and collect back-edges.
196///
197/// A back-edge `(u, v)` is an edge to an ancestor in the DFS tree — removing
198/// it eliminates the cycle it closes.
199///
200/// Uses iterative DFS with an explicit ancestor set for correctness on large
201/// cycles without stack overflow risk.
202fn find_back_edges_in_scc(
203    graph: &DiGraph<String, ()>,
204    component: &[NodeIndex],
205    member_set: &HashSet<NodeIndex>,
206) -> Vec<(String, String)> {
207    // Start DFS from the lexicographically smallest node for determinism.
208    let start = component
209        .iter()
210        .min_by_key(|&&idx| node_id(graph, idx))
211        .copied()
212        .expect("component non-empty");
213
214    let mut visited: HashSet<NodeIndex> = HashSet::new();
215    let mut ancestor_set: HashSet<NodeIndex> = HashSet::new(); // for O(1) ancestor lookup
216    let mut back_edges: Vec<(String, String)> = Vec::new();
217
218    // Each stack entry: (node, index into its neighbor list).
219    let mut call_stack: Vec<(NodeIndex, Vec<NodeIndex>, usize)> = Vec::new();
220
221    // Bootstrap: push the start node.
222    if visited.insert(start) {
223        ancestor_set.insert(start);
224        let neighbors: Vec<NodeIndex> = graph
225            .neighbors_directed(start, petgraph::Direction::Outgoing)
226            .filter(|n| member_set.contains(n))
227            .collect();
228        call_stack.push((start, neighbors, 0));
229    }
230
231    // Run DFS over all SCC nodes (handles disconnected sub-components).
232    let mut all_starts: Vec<NodeIndex> = component.to_vec();
233    all_starts.sort_unstable_by_key(|&idx| node_id(graph, idx));
234    let mut extra_starts = all_starts.into_iter();
235
236    loop {
237        if let Some(frame) = call_stack.last_mut() {
238            let current = frame.0;
239            let neighbors = &frame.1;
240            let idx = &mut frame.2;
241
242            if *idx < neighbors.len() {
243                let neighbor = neighbors[*idx];
244                *idx += 1;
245
246                if ancestor_set.contains(&neighbor) {
247                    // Back-edge: current → neighbor (neighbor is an ancestor).
248                    back_edges.push((node_id(graph, current), node_id(graph, neighbor)));
249                } else if visited.insert(neighbor) {
250                    ancestor_set.insert(neighbor);
251                    let next_neighbors: Vec<NodeIndex> = graph
252                        .neighbors_directed(neighbor, petgraph::Direction::Outgoing)
253                        .filter(|n| member_set.contains(n))
254                        .collect();
255                    call_stack.push((neighbor, next_neighbors, 0));
256                }
257            } else {
258                // Pop this frame.
259                call_stack.pop();
260                ancestor_set.remove(&current);
261            }
262        } else {
263            // call_stack empty — try next unvisited SCC node (for disconnected SCCs).
264            let Some(next) = extra_starts.next() else {
265                break;
266            };
267            if visited.insert(next) {
268                ancestor_set.insert(next);
269                let neighbors: Vec<NodeIndex> = graph
270                    .neighbors_directed(next, petgraph::Direction::Outgoing)
271                    .filter(|n| member_set.contains(n))
272                    .collect();
273                call_stack.push((next, neighbors, 0));
274            }
275        }
276    }
277
278    back_edges.sort_unstable();
279    back_edges
280}
281
282#[cfg(test)]
283mod tests {
284    use std::collections::HashMap;
285
286    use super::*;
287
288    fn graph_with_nodes_and_edges(
289        nodes: &[&str],
290        edges: &[(&str, &str)],
291    ) -> (DiGraph<String, ()>, HashMap<String, NodeIndex>) {
292        let mut graph = DiGraph::<String, ()>::new();
293        let mut map: HashMap<String, NodeIndex> = HashMap::new();
294
295        for &node in nodes {
296            let idx = graph.add_node(node.to_string());
297            map.insert(node.to_string(), idx);
298        }
299
300        for &(from, to) in edges {
301            let from_idx = *map
302                .entry(from.to_string())
303                .or_insert_with(|| graph.add_node(from.to_string()));
304            let to_idx = *map
305                .entry(to.to_string())
306                .or_insert_with(|| graph.add_node(to.to_string()));
307            graph.add_edge(from_idx, to_idx, ());
308        }
309
310        (graph, map)
311    }
312
313    #[test]
314    fn would_create_cycle_detects_self_loop() {
315        let (graph, nodes) = graph_with_nodes_and_edges(&["A"], &[]);
316        let a = nodes["A"];
317
318        let cycle = would_create_cycle(&graph, a, a);
319
320        assert_eq!(cycle, Some(vec!["A".to_string(), "A".to_string()]));
321    }
322
323    #[test]
324    fn would_create_cycle_detects_three_node_loop() {
325        // Existing: A -> B -> C
326        // New edge: C -> A (creates C -> A -> B -> C)
327        let (graph, nodes) =
328            graph_with_nodes_and_edges(&["A", "B", "C"], &[("A", "B"), ("B", "C")]);
329
330        let cycle = would_create_cycle(&graph, nodes["C"], nodes["A"])
331            .unwrap_or_else(|| panic!("expected cycle"));
332
333        assert_eq!(cycle, vec!["C", "A", "B", "C"]);
334    }
335
336    #[test]
337    fn would_create_cycle_returns_none_for_safe_edge() {
338        // Existing: A -> B -> C
339        // New edge: A -> C (no cycle)
340        let (graph, nodes) =
341            graph_with_nodes_and_edges(&["A", "B", "C"], &[("A", "B"), ("B", "C")]);
342
343        assert!(would_create_cycle(&graph, nodes["A"], nodes["C"]).is_none());
344    }
345
346    #[test]
347    fn would_create_cycle_returns_none_for_duplicate_edge() {
348        let (graph, nodes) = graph_with_nodes_and_edges(&["A", "B"], &[("A", "B")]);
349
350        assert!(would_create_cycle(&graph, nodes["A"], nodes["B"]).is_none());
351    }
352
353    #[test]
354    fn find_all_cycles_reports_sccs_and_self_loops() {
355        // SCC1: A <-> B
356        // SCC2: C -> D -> E -> C
357        // SCC3: F -> F
358        let (graph, _) = graph_with_nodes_and_edges(
359            &["A", "B", "C", "D", "E", "F", "G"],
360            &[
361                ("A", "B"),
362                ("B", "A"),
363                ("C", "D"),
364                ("D", "E"),
365                ("E", "C"),
366                ("F", "F"),
367            ],
368        );
369
370        let cycles = find_all_cycles(&graph);
371
372        assert_eq!(
373            cycles,
374            vec![
375                vec!["A".to_string(), "B".to_string()],
376                vec!["C".to_string(), "D".to_string(), "E".to_string()],
377                vec!["F".to_string()]
378            ]
379        );
380    }
381
382    #[test]
383    fn find_all_cycles_empty_graph() {
384        let (graph, _) = graph_with_nodes_and_edges(&[], &[]);
385        assert!(find_all_cycles(&graph).is_empty());
386    }
387
388    // -----------------------------------------------------------------------
389    // report_cycles_with_breaks tests
390    // -----------------------------------------------------------------------
391
392    #[test]
393    fn report_cycles_empty_graph() {
394        let (graph, _) = graph_with_nodes_and_edges(&[], &[]);
395        let reports = report_cycles_with_breaks(&graph);
396        assert!(reports.is_empty(), "no cycles in empty graph");
397    }
398
399    #[test]
400    fn report_cycles_acyclic_graph() {
401        let (graph, _) = graph_with_nodes_and_edges(&["A", "B", "C"], &[("A", "B"), ("B", "C")]);
402        let reports = report_cycles_with_breaks(&graph);
403        assert!(reports.is_empty(), "no cycles in acyclic graph");
404    }
405
406    #[test]
407    fn report_cycles_self_loop() {
408        let (graph, _) = graph_with_nodes_and_edges(&["A"], &[("A", "A")]);
409        let reports = report_cycles_with_breaks(&graph);
410
411        assert_eq!(reports.len(), 1);
412        let r = &reports[0];
413        assert_eq!(r.members, vec!["A".to_string()]);
414        assert_eq!(r.suggested_breaks, vec![("A".to_string(), "A".to_string())]);
415    }
416
417    #[test]
418    fn report_cycles_two_node_cycle() {
419        // A ⇄ B: back-edge is B→A (DFS enters A first, then B, B→A is back-edge).
420        let (graph, _) = graph_with_nodes_and_edges(&["A", "B"], &[("A", "B"), ("B", "A")]);
421        let reports = report_cycles_with_breaks(&graph);
422
423        assert_eq!(reports.len(), 1);
424        let r = &reports[0];
425        assert_eq!(r.members, vec!["A".to_string(), "B".to_string()]);
426
427        // Exactly one back-edge should be suggested.
428        assert_eq!(r.suggested_breaks.len(), 1, "one break needed for 2-cycle");
429
430        // The suggested break should be a valid edge in the graph.
431        let (from, to) = &r.suggested_breaks[0];
432        // It must be either A→B or B→A.
433        let is_valid = (from == "B" && to == "A") || (from == "A" && to == "B");
434        assert!(is_valid, "break must be an existing edge, got {from}→{to}");
435    }
436
437    #[test]
438    fn report_cycles_three_node_cycle_one_break() {
439        // A → B → C → A: one back-edge breaks the cycle.
440        let (graph, _) =
441            graph_with_nodes_and_edges(&["A", "B", "C"], &[("A", "B"), ("B", "C"), ("C", "A")]);
442        let reports = report_cycles_with_breaks(&graph);
443
444        assert_eq!(reports.len(), 1);
445        let r = &reports[0];
446        assert_eq!(
447            r.members,
448            vec!["A".to_string(), "B".to_string(), "C".to_string()]
449        );
450        // One back-edge (C→A) should be detected.
451        assert_eq!(r.suggested_breaks.len(), 1);
452        assert_eq!(
453            r.suggested_breaks[0],
454            ("C".to_string(), "A".to_string()),
455            "back-edge in A→B→C→A is C→A"
456        );
457    }
458
459    #[test]
460    fn report_cycles_multiple_independent_cycles() {
461        // Cycle 1: A ⇄ B
462        // Cycle 2: C → D → E → C
463        // Cycle 3: F → F (self-loop)
464        let (graph, _) = graph_with_nodes_and_edges(
465            &["A", "B", "C", "D", "E", "F", "G"],
466            &[
467                ("A", "B"),
468                ("B", "A"),
469                ("C", "D"),
470                ("D", "E"),
471                ("E", "C"),
472                ("F", "F"),
473            ],
474        );
475        let reports = report_cycles_with_breaks(&graph);
476
477        assert_eq!(reports.len(), 3, "three separate cycles");
478
479        // Check members (reports sorted by members).
480        assert_eq!(reports[0].members, vec!["A", "B"]);
481        assert_eq!(reports[1].members, vec!["C", "D", "E"]);
482        assert_eq!(reports[2].members, vec!["F"]);
483
484        // Each cycle should have at least one suggested break.
485        assert!(!reports[0].suggested_breaks.is_empty());
486        assert!(!reports[1].suggested_breaks.is_empty());
487        assert!(!reports[2].suggested_breaks.is_empty());
488
489        // Self-loop break should be F→F.
490        assert_eq!(
491            reports[2].suggested_breaks,
492            vec![("F".to_string(), "F".to_string())]
493        );
494    }
495
496    #[test]
497    fn report_cycles_break_is_valid_existing_edge() {
498        // For each suggested break, verify it is an actual edge in the graph.
499        let (graph, _) =
500            graph_with_nodes_and_edges(&["A", "B", "C"], &[("A", "B"), ("B", "C"), ("C", "A")]);
501        let reports = report_cycles_with_breaks(&graph);
502
503        for report in &reports {
504            for (from, to) in &report.suggested_breaks {
505                let from_idx = graph
506                    .node_indices()
507                    .find(|&i| graph.node_weight(i).map(String::as_str) == Some(from.as_str()))
508                    .expect("from node exists");
509                let to_idx = graph
510                    .node_indices()
511                    .find(|&i| graph.node_weight(i).map(String::as_str) == Some(to.as_str()))
512                    .expect("to node exists");
513                assert!(
514                    graph.contains_edge(from_idx, to_idx),
515                    "suggested break {from}→{to} must be an existing edge"
516                );
517            }
518        }
519    }
520}