Skip to main content

bones_triage/graph/
normalize.rs

1//! SCC condensation and transitive reduction for the dependency graph.
2//!
3//! # Overview
4//!
5//! The raw blocking graph may contain cycles (e.g., two items that block each
6//! other due to concurrent link events). This module provides two normalization
7//! steps that produce a stable, acyclic graph suitable for centrality metrics:
8//!
9//! 1. **SCC Condensation** — Collapses strongly connected components (SCCs)
10//!    into single nodes. Items in the same SCC form a "dependency cycle" and
11//!    are treated as an atomic unit for scheduling purposes.
12//!
13//! 2. **Transitive Reduction** — Removes redundant edges from the condensed
14//!    DAG. An edge `A → C` is redundant if there is already a path `A → B → C`.
15//!    Removing such edges gives the minimal graph with the same reachability.
16//!
17//! # Output
18//!
19//! [`NormalizedGraph`] exposes both the condensed DAG (nodes = SCCs) and
20//! the transitively-reduced DAG alongside the original [`RawGraph`]. Callers
21//! can use the reduced graph for centrality metrics and the original for
22//! display purposes.
23
24#![allow(clippy::module_name_repetitions)]
25
26use std::collections::{HashMap, HashSet};
27
28use petgraph::{
29    Direction,
30    algo::condensation,
31    graph::{DiGraph, NodeIndex},
32    visit::{EdgeRef, IntoNodeIdentifiers},
33};
34use tracing::instrument;
35
36use crate::graph::build::RawGraph;
37
38// ---------------------------------------------------------------------------
39// NormalizedGraph
40// ---------------------------------------------------------------------------
41
42/// A node in the condensed dependency graph.
43///
44/// Each node represents one SCC from the original graph. Most nodes will
45/// contain a single item ID; nodes with multiple IDs represent dependency
46/// cycles.
47#[derive(Debug, Clone)]
48pub struct SccNode {
49    /// Item IDs in this SCC (sorted for deterministic output).
50    pub members: Vec<String>,
51}
52
53impl SccNode {
54    /// Return `true` if this SCC contains more than one item (i.e., a cycle).
55    #[must_use]
56    pub const fn is_cycle(&self) -> bool {
57        self.members.len() > 1
58    }
59
60    /// Return the primary representative of this SCC.
61    ///
62    /// For single-item SCCs this is the item ID. For cycles, it is the
63    /// lexicographically smallest member (deterministic).
64    #[must_use]
65    pub fn representative(&self) -> &str {
66        self.members.first().map(String::as_str).unwrap_or_default()
67    }
68}
69
70/// The fully normalized dependency graph.
71///
72/// Contains three views of the same data:
73/// - `raw`: the original [`RawGraph`] with all nodes and edges as-is.
74/// - `condensed`: condensed DAG where each node is an [`SccNode`].
75/// - `reduced`: transitively-reduced condensed DAG (minimum edges).
76#[derive(Debug)]
77pub struct NormalizedGraph {
78    /// Original graph from `SQLite` (may contain cycles).
79    pub raw: RawGraph,
80    /// Condensed DAG: SCCs collapsed to single nodes.
81    pub condensed: DiGraph<SccNode, ()>,
82    /// Transitively-reduced condensed DAG (minimum edge set).
83    pub reduced: DiGraph<SccNode, ()>,
84    /// Mapping from item ID to condensed node index.
85    pub item_to_scc: HashMap<String, NodeIndex>,
86}
87
88impl NormalizedGraph {
89    /// Build a [`NormalizedGraph`] from a [`RawGraph`].
90    ///
91    /// Steps:
92    /// 1. Run petgraph's SCC condensation (Tarjan's algorithm internally).
93    /// 2. Build [`SccNode`] labels from the raw node weights.
94    /// 3. Compute transitive reduction of the condensed DAG.
95    ///
96    /// # Panics
97    ///
98    /// Does not panic — all indices are verified.
99    #[must_use]
100    #[instrument(skip(raw))]
101    pub fn from_raw(raw: RawGraph) -> Self {
102        // petgraph::condensation collapses SCCs and returns a DiGraph where
103        // each node weight is a Vec of the original node weights.
104        // make_acyclic=true removes intra-SCC edges (back-edges in the SCCs).
105        let condensed_raw: DiGraph<Vec<String>, ()> =
106            condensation(raw.graph.clone(), /* make_acyclic */ true);
107
108        // Build SccNode labels (sorted members for determinism).
109        let condensed: DiGraph<SccNode, ()> = condensed_raw.map(
110            |_, members| {
111                let mut sorted = members.clone();
112                sorted.sort_unstable();
113                SccNode { members: sorted }
114            },
115            |_, ()| (),
116        );
117
118        // Build item_to_scc mapping.
119        let item_to_scc = build_item_to_scc_map(&condensed);
120
121        // Compute transitive reduction of the condensed DAG.
122        let reduced = transitive_reduction(&condensed);
123
124        Self {
125            raw,
126            condensed,
127            reduced,
128            item_to_scc,
129        }
130    }
131
132    /// Return the number of SCCs in the condensed graph.
133    #[must_use]
134    pub fn scc_count(&self) -> usize {
135        self.condensed.node_count()
136    }
137
138    /// Return the number of cycle SCCs (SCCs with more than one member).
139    #[must_use]
140    pub fn cycle_count(&self) -> usize {
141        self.condensed
142            .node_weights()
143            .filter(|n| n.is_cycle())
144            .count()
145    }
146
147    /// Return all items that are members of a dependency cycle.
148    #[must_use]
149    pub fn cyclic_items(&self) -> Vec<&str> {
150        self.condensed
151            .node_weights()
152            .filter(|n| n.is_cycle())
153            .flat_map(|n| n.members.iter().map(String::as_str))
154            .collect()
155    }
156
157    /// Return the SCC node index for a given item ID.
158    #[must_use]
159    pub fn scc_of(&self, item_id: &str) -> Option<NodeIndex> {
160        self.item_to_scc.get(item_id).copied()
161    }
162
163    /// Return the content hash from the underlying raw graph.
164    ///
165    /// Used for cache invalidation — if this changes, rebuild.
166    #[must_use]
167    pub fn content_hash(&self) -> &str {
168        &self.raw.content_hash
169    }
170}
171
172// ---------------------------------------------------------------------------
173// Transitive reduction
174// ---------------------------------------------------------------------------
175
176/// Compute the transitive reduction of a DAG.
177///
178/// Returns a new graph with the same nodes but only the minimal edges needed
179/// to preserve reachability. An edge `(u, v)` is removed if there exists
180/// another path `u → ... → v` of length ≥ 2.
181///
182/// # Algorithm
183///
184/// Process nodes in reverse topological order (sinks first). For each node
185/// `u`, compute the set of all nodes reachable from `u` via paths of length
186/// ≥ 2 through its direct successors. Any edge `(u, v)` where `v` is in
187/// that set is redundant.
188///
189/// # Panics
190///
191/// Panics if `g` contains a cycle (input must be a DAG).
192#[must_use]
193pub fn transitive_reduction<N: Clone>(g: &DiGraph<N, ()>) -> DiGraph<N, ()> {
194    use petgraph::algo::toposort;
195
196    // Get topological order (errors if cyclic).
197    let topo = toposort(g, None).unwrap_or_else(|_| {
198        // Fallback: return graph as-is if it contains a cycle.
199        // The condensed graph should always be a DAG, but we defend
200        // against bugs here rather than panicking.
201        g.node_identifiers().collect()
202    });
203
204    // For each node, compute the set of all nodes reachable in 2+ steps.
205    // We process in reverse topological order (sinks first) so that when
206    // we process node u, all successors already have their reachable sets.
207    let n = g.node_count();
208    let mut reachable: HashMap<NodeIndex, HashSet<NodeIndex>> = HashMap::with_capacity(n);
209
210    for &u in topo.iter().rev() {
211        let mut reach_u: HashSet<NodeIndex> = HashSet::new();
212        for v in g.neighbors_directed(u, Direction::Outgoing) {
213            // All nodes reachable from v (including v itself) are reachable
214            // from u in 2+ steps.
215            reach_u.insert(v);
216            if let Some(rv) = reachable.get(&v) {
217                reach_u.extend(rv.iter().copied());
218            }
219        }
220        reachable.insert(u, reach_u);
221    }
222
223    // Build the reduced graph: keep edge (u, v) only if v is NOT reachable
224    // from any other successor of u (i.e., v is not reachable from u via
225    // a path that doesn't use the direct edge u→v).
226    let mut reduced = g.map(|_, w| w.clone(), |_, ()| ());
227
228    // Collect edges to remove (can't mutate while iterating).
229    let to_remove: Vec<_> = g
230        .edge_references()
231        .filter(|e| {
232            let u = e.source();
233            let v = e.target();
234            // Check if v is reachable from any other successor of u.
235            // "Reachable from another successor" means v ∈ reachable(w)
236            // for some other direct successor w ≠ v of u.
237            g.neighbors_directed(u, Direction::Outgoing)
238                .filter(|&w| w != v)
239                .any(|w| reachable.get(&w).is_some_and(|rw| rw.contains(&v)))
240        })
241        .map(|e| (e.source(), e.target()))
242        .collect();
243
244    for (src, tgt) in to_remove {
245        if let Some(edge_idx) = reduced.find_edge(src, tgt) {
246            reduced.remove_edge(edge_idx);
247        }
248    }
249
250    reduced
251}
252
253// ---------------------------------------------------------------------------
254// Internal helpers
255// ---------------------------------------------------------------------------
256
257fn build_item_to_scc_map(condensed: &DiGraph<SccNode, ()>) -> HashMap<String, NodeIndex> {
258    let mut map = HashMap::new();
259    for idx in condensed.node_identifiers() {
260        if let Some(node) = condensed.node_weight(idx) {
261            for member in &node.members {
262                map.insert(member.clone(), idx);
263            }
264        }
265    }
266    map
267}
268
269// ---------------------------------------------------------------------------
270// Tests
271// ---------------------------------------------------------------------------
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use petgraph::graph::DiGraph;
277
278    fn make_raw_with_edges(edges: &[(&str, &str)]) -> RawGraph {
279        let mut graph = DiGraph::<String, ()>::new();
280        let mut node_map = std::collections::HashMap::new();
281
282        let all_ids: std::collections::BTreeSet<&str> =
283            edges.iter().flat_map(|(a, b)| [*a, *b]).collect();
284
285        for id in all_ids {
286            let idx = graph.add_node(id.to_string());
287            node_map.insert(id.to_string(), idx);
288        }
289
290        for (a, b) in edges {
291            let ia = node_map[*a];
292            let ib = node_map[*b];
293            graph.add_edge(ia, ib, ());
294        }
295
296        RawGraph {
297            graph,
298            node_map,
299            content_hash: "blake3:test".to_string(),
300        }
301    }
302
303    // -----------------------------------------------------------------------
304    // SCC condensation
305    // -----------------------------------------------------------------------
306
307    #[test]
308    fn linear_chain_each_node_is_own_scc() {
309        // A → B → C (no cycles)
310        let raw = make_raw_with_edges(&[("A", "B"), ("B", "C")]);
311        let ng = NormalizedGraph::from_raw(raw);
312
313        assert_eq!(ng.scc_count(), 3, "3 SCCs for acyclic chain");
314        assert_eq!(ng.cycle_count(), 0, "no cycles");
315    }
316
317    #[test]
318    fn simple_cycle_condensed_to_one_scc() {
319        // A → B → A (cycle)
320        let raw = make_raw_with_edges(&[("A", "B"), ("B", "A")]);
321        let ng = NormalizedGraph::from_raw(raw);
322
323        assert_eq!(ng.scc_count(), 1, "cycle condensed to 1 SCC");
324        assert_eq!(ng.cycle_count(), 1, "one cycle SCC");
325
326        let cyclic = ng.cyclic_items();
327        assert!(cyclic.contains(&"A"), "A in cyclic items");
328        assert!(cyclic.contains(&"B"), "B in cyclic items");
329    }
330
331    #[test]
332    fn mixed_cycle_and_acyclic() {
333        // A → B → A → C (A and B cycle; C is downstream)
334        let raw = make_raw_with_edges(&[("A", "B"), ("B", "A"), ("A", "C")]);
335        let ng = NormalizedGraph::from_raw(raw);
336
337        // SCCs: {A, B} and {C}
338        assert_eq!(ng.scc_count(), 2, "2 SCCs: the cycle and C");
339        assert_eq!(ng.cycle_count(), 1);
340
341        let cyclic = ng.cyclic_items();
342        assert!(cyclic.contains(&"A"));
343        assert!(cyclic.contains(&"B"));
344        assert!(!cyclic.contains(&"C"));
345    }
346
347    #[test]
348    fn item_to_scc_mapping_correct() {
349        let raw = make_raw_with_edges(&[("A", "B"), ("B", "A"), ("A", "C")]);
350        let ng = NormalizedGraph::from_raw(raw);
351
352        let scc_ab_a = ng.scc_of("A");
353        let scc_ab_b = ng.scc_of("B");
354        let scc_c = ng.scc_of("C");
355
356        assert!(scc_ab_a.is_some(), "A has SCC");
357        assert!(scc_ab_b.is_some(), "B has SCC");
358        assert!(scc_c.is_some(), "C has SCC");
359
360        // A and B should be in the same SCC
361        assert_eq!(scc_ab_a, scc_ab_b, "A and B in same SCC");
362        // C is in a different SCC
363        assert_ne!(scc_ab_a, scc_c, "C in different SCC from A");
364    }
365
366    // -----------------------------------------------------------------------
367    // Transitive reduction
368    // -----------------------------------------------------------------------
369
370    #[test]
371    fn transitive_reduction_removes_redundant_edge() {
372        // A → B → C and A → C (redundant)
373        let mut g: DiGraph<String, ()> = DiGraph::new();
374        let a = g.add_node("A".to_string());
375        let b = g.add_node("B".to_string());
376        let c = g.add_node("C".to_string());
377        g.add_edge(a, b, ());
378        g.add_edge(b, c, ());
379        g.add_edge(a, c, ()); // redundant
380
381        let reduced = transitive_reduction(&g);
382        assert_eq!(reduced.edge_count(), 2, "A→C removed");
383        assert!(!reduced.contains_edge(a, c), "redundant edge removed");
384        assert!(reduced.contains_edge(a, b), "direct edge kept");
385        assert!(reduced.contains_edge(b, c), "direct edge kept");
386    }
387
388    #[test]
389    fn transitive_reduction_preserves_minimal_graph() {
390        // A → B → C (no redundant edges)
391        let mut g: DiGraph<String, ()> = DiGraph::new();
392        let a = g.add_node("A".to_string());
393        let b = g.add_node("B".to_string());
394        let c = g.add_node("C".to_string());
395        g.add_edge(a, b, ());
396        g.add_edge(b, c, ());
397
398        let reduced = transitive_reduction(&g);
399        assert_eq!(reduced.edge_count(), 2, "no edges removed");
400    }
401
402    #[test]
403    fn transitive_reduction_diamond_removes_diagonal() {
404        // Diamond: A → B → D, A → C → D, A → D (redundant)
405        let mut g: DiGraph<String, ()> = DiGraph::new();
406        let a = g.add_node("A".to_string());
407        let b = g.add_node("B".to_string());
408        let c = g.add_node("C".to_string());
409        let d = g.add_node("D".to_string());
410        g.add_edge(a, b, ());
411        g.add_edge(a, c, ());
412        g.add_edge(b, d, ());
413        g.add_edge(c, d, ());
414        g.add_edge(a, d, ()); // redundant via A→B→D or A→C→D
415
416        let reduced = transitive_reduction(&g);
417        assert_eq!(reduced.edge_count(), 4, "A→D removed");
418        assert!(!reduced.contains_edge(a, d), "redundant edge removed");
419    }
420
421    #[test]
422    fn scc_representative_is_lexicographically_first() {
423        let raw = make_raw_with_edges(&[("bn-z", "bn-a"), ("bn-a", "bn-z")]);
424        let ng = NormalizedGraph::from_raw(raw);
425
426        assert_eq!(ng.scc_count(), 1);
427        let scc_idx = ng.condensed.node_identifiers().next().unwrap();
428        let scc = &ng.condensed[scc_idx];
429        // Members should be sorted, so bn-a comes before bn-z
430        assert_eq!(scc.members[0], "bn-a");
431        assert_eq!(scc.representative(), "bn-a");
432    }
433}