bones_triage/graph/normalize.rs
1//! SCC condensation and transitive reduction for the dependency graph.
2//!
3//! # Overview
4//!
5//! The raw blocking graph may contain cycles (e.g., two items that block each
6//! other due to concurrent link events). This module provides two normalization
7//! steps that produce a stable, acyclic graph suitable for centrality metrics:
8//!
9//! 1. **SCC Condensation** — Collapses strongly connected components (SCCs)
10//! into single nodes. Items in the same SCC form a "dependency cycle" and
11//! are treated as an atomic unit for scheduling purposes.
12//!
13//! 2. **Transitive Reduction** — Removes redundant edges from the condensed
14//! DAG. An edge `A → C` is redundant if there is already a path `A → B → C`.
15//! Removing such edges gives the minimal graph with the same reachability.
16//!
17//! # Output
18//!
19//! [`NormalizedGraph`] exposes both the condensed DAG (nodes = SCCs) and
20//! the transitively-reduced DAG alongside the original [`RawGraph`]. Callers
21//! can use the reduced graph for centrality metrics and the original for
22//! display purposes.
23
24#![allow(clippy::module_name_repetitions)]
25
26use std::collections::{HashMap, HashSet};
27
28use petgraph::{
29 Direction,
30 algo::condensation,
31 graph::{DiGraph, NodeIndex},
32 visit::{EdgeRef, IntoNodeIdentifiers},
33};
34use tracing::instrument;
35
36use crate::graph::build::RawGraph;
37
38// ---------------------------------------------------------------------------
39// NormalizedGraph
40// ---------------------------------------------------------------------------
41
42/// A node in the condensed dependency graph.
43///
44/// Each node represents one SCC from the original graph. Most nodes will
45/// contain a single item ID; nodes with multiple IDs represent dependency
46/// cycles.
47#[derive(Debug, Clone)]
48pub struct SccNode {
49 /// Item IDs in this SCC (sorted for deterministic output).
50 pub members: Vec<String>,
51}
52
53impl SccNode {
54 /// Return `true` if this SCC contains more than one item (i.e., a cycle).
55 #[must_use]
56 pub const fn is_cycle(&self) -> bool {
57 self.members.len() > 1
58 }
59
60 /// Return the primary representative of this SCC.
61 ///
62 /// For single-item SCCs this is the item ID. For cycles, it is the
63 /// lexicographically smallest member (deterministic).
64 #[must_use]
65 pub fn representative(&self) -> &str {
66 self.members.first().map(String::as_str).unwrap_or_default()
67 }
68}
69
70/// The fully normalized dependency graph.
71///
72/// Contains three views of the same data:
73/// - `raw`: the original [`RawGraph`] with all nodes and edges as-is.
74/// - `condensed`: condensed DAG where each node is an [`SccNode`].
75/// - `reduced`: transitively-reduced condensed DAG (minimum edges).
76#[derive(Debug)]
77pub struct NormalizedGraph {
78 /// Original graph from `SQLite` (may contain cycles).
79 pub raw: RawGraph,
80 /// Condensed DAG: SCCs collapsed to single nodes.
81 pub condensed: DiGraph<SccNode, ()>,
82 /// Transitively-reduced condensed DAG (minimum edge set).
83 pub reduced: DiGraph<SccNode, ()>,
84 /// Mapping from item ID to condensed node index.
85 pub item_to_scc: HashMap<String, NodeIndex>,
86}
87
88impl NormalizedGraph {
89 /// Build a [`NormalizedGraph`] from a [`RawGraph`].
90 ///
91 /// Steps:
92 /// 1. Run petgraph's SCC condensation (Tarjan's algorithm internally).
93 /// 2. Build [`SccNode`] labels from the raw node weights.
94 /// 3. Compute transitive reduction of the condensed DAG.
95 ///
96 /// # Panics
97 ///
98 /// Does not panic — all indices are verified.
99 #[must_use]
100 #[instrument(skip(raw))]
101 pub fn from_raw(raw: RawGraph) -> Self {
102 // petgraph::condensation collapses SCCs and returns a DiGraph where
103 // each node weight is a Vec of the original node weights.
104 // make_acyclic=true removes intra-SCC edges (back-edges in the SCCs).
105 let condensed_raw: DiGraph<Vec<String>, ()> =
106 condensation(raw.graph.clone(), /* make_acyclic */ true);
107
108 // Build SccNode labels (sorted members for determinism).
109 let condensed: DiGraph<SccNode, ()> = condensed_raw.map(
110 |_, members| {
111 let mut sorted = members.clone();
112 sorted.sort_unstable();
113 SccNode { members: sorted }
114 },
115 |_, ()| (),
116 );
117
118 // Build item_to_scc mapping.
119 let item_to_scc = build_item_to_scc_map(&condensed);
120
121 // Compute transitive reduction of the condensed DAG.
122 let reduced = transitive_reduction(&condensed);
123
124 Self {
125 raw,
126 condensed,
127 reduced,
128 item_to_scc,
129 }
130 }
131
132 /// Return the number of SCCs in the condensed graph.
133 #[must_use]
134 pub fn scc_count(&self) -> usize {
135 self.condensed.node_count()
136 }
137
138 /// Return the number of cycle SCCs (SCCs with more than one member).
139 #[must_use]
140 pub fn cycle_count(&self) -> usize {
141 self.condensed
142 .node_weights()
143 .filter(|n| n.is_cycle())
144 .count()
145 }
146
147 /// Return all items that are members of a dependency cycle.
148 #[must_use]
149 pub fn cyclic_items(&self) -> Vec<&str> {
150 self.condensed
151 .node_weights()
152 .filter(|n| n.is_cycle())
153 .flat_map(|n| n.members.iter().map(String::as_str))
154 .collect()
155 }
156
157 /// Return the SCC node index for a given item ID.
158 #[must_use]
159 pub fn scc_of(&self, item_id: &str) -> Option<NodeIndex> {
160 self.item_to_scc.get(item_id).copied()
161 }
162
163 /// Return the content hash from the underlying raw graph.
164 ///
165 /// Used for cache invalidation — if this changes, rebuild.
166 #[must_use]
167 pub fn content_hash(&self) -> &str {
168 &self.raw.content_hash
169 }
170}
171
172// ---------------------------------------------------------------------------
173// Transitive reduction
174// ---------------------------------------------------------------------------
175
176/// Compute the transitive reduction of a DAG.
177///
178/// Returns a new graph with the same nodes but only the minimal edges needed
179/// to preserve reachability. An edge `(u, v)` is removed if there exists
180/// another path `u → ... → v` of length ≥ 2.
181///
182/// # Algorithm
183///
184/// Process nodes in reverse topological order (sinks first). For each node
185/// `u`, compute the set of all nodes reachable from `u` via paths of length
186/// ≥ 2 through its direct successors. Any edge `(u, v)` where `v` is in
187/// that set is redundant.
188///
189/// # Panics
190///
191/// Panics if `g` contains a cycle (input must be a DAG).
192#[must_use]
193pub fn transitive_reduction<N: Clone>(g: &DiGraph<N, ()>) -> DiGraph<N, ()> {
194 use petgraph::algo::toposort;
195
196 // Get topological order (errors if cyclic).
197 let topo = toposort(g, None).unwrap_or_else(|_| {
198 // Fallback: return graph as-is if it contains a cycle.
199 // The condensed graph should always be a DAG, but we defend
200 // against bugs here rather than panicking.
201 g.node_identifiers().collect()
202 });
203
204 // For each node, compute the set of all nodes reachable in 2+ steps.
205 // We process in reverse topological order (sinks first) so that when
206 // we process node u, all successors already have their reachable sets.
207 let n = g.node_count();
208 let mut reachable: HashMap<NodeIndex, HashSet<NodeIndex>> = HashMap::with_capacity(n);
209
210 for &u in topo.iter().rev() {
211 let mut reach_u: HashSet<NodeIndex> = HashSet::new();
212 for v in g.neighbors_directed(u, Direction::Outgoing) {
213 // All nodes reachable from v (including v itself) are reachable
214 // from u in 2+ steps.
215 reach_u.insert(v);
216 if let Some(rv) = reachable.get(&v) {
217 reach_u.extend(rv.iter().copied());
218 }
219 }
220 reachable.insert(u, reach_u);
221 }
222
223 // Build the reduced graph: keep edge (u, v) only if v is NOT reachable
224 // from any other successor of u (i.e., v is not reachable from u via
225 // a path that doesn't use the direct edge u→v).
226 let mut reduced = g.map(|_, w| w.clone(), |_, ()| ());
227
228 // Collect edges to remove (can't mutate while iterating).
229 let to_remove: Vec<_> = g
230 .edge_references()
231 .filter(|e| {
232 let u = e.source();
233 let v = e.target();
234 // Check if v is reachable from any other successor of u.
235 // "Reachable from another successor" means v ∈ reachable(w)
236 // for some other direct successor w ≠ v of u.
237 g.neighbors_directed(u, Direction::Outgoing)
238 .filter(|&w| w != v)
239 .any(|w| reachable.get(&w).is_some_and(|rw| rw.contains(&v)))
240 })
241 .map(|e| (e.source(), e.target()))
242 .collect();
243
244 for (src, tgt) in to_remove {
245 if let Some(edge_idx) = reduced.find_edge(src, tgt) {
246 reduced.remove_edge(edge_idx);
247 }
248 }
249
250 reduced
251}
252
253// ---------------------------------------------------------------------------
254// Internal helpers
255// ---------------------------------------------------------------------------
256
257fn build_item_to_scc_map(condensed: &DiGraph<SccNode, ()>) -> HashMap<String, NodeIndex> {
258 let mut map = HashMap::new();
259 for idx in condensed.node_identifiers() {
260 if let Some(node) = condensed.node_weight(idx) {
261 for member in &node.members {
262 map.insert(member.clone(), idx);
263 }
264 }
265 }
266 map
267}
268
269// ---------------------------------------------------------------------------
270// Tests
271// ---------------------------------------------------------------------------
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use petgraph::graph::DiGraph;
277
278 fn make_raw_with_edges(edges: &[(&str, &str)]) -> RawGraph {
279 let mut graph = DiGraph::<String, ()>::new();
280 let mut node_map = std::collections::HashMap::new();
281
282 let all_ids: std::collections::BTreeSet<&str> =
283 edges.iter().flat_map(|(a, b)| [*a, *b]).collect();
284
285 for id in all_ids {
286 let idx = graph.add_node(id.to_string());
287 node_map.insert(id.to_string(), idx);
288 }
289
290 for (a, b) in edges {
291 let ia = node_map[*a];
292 let ib = node_map[*b];
293 graph.add_edge(ia, ib, ());
294 }
295
296 RawGraph {
297 graph,
298 node_map,
299 content_hash: "blake3:test".to_string(),
300 }
301 }
302
303 // -----------------------------------------------------------------------
304 // SCC condensation
305 // -----------------------------------------------------------------------
306
307 #[test]
308 fn linear_chain_each_node_is_own_scc() {
309 // A → B → C (no cycles)
310 let raw = make_raw_with_edges(&[("A", "B"), ("B", "C")]);
311 let ng = NormalizedGraph::from_raw(raw);
312
313 assert_eq!(ng.scc_count(), 3, "3 SCCs for acyclic chain");
314 assert_eq!(ng.cycle_count(), 0, "no cycles");
315 }
316
317 #[test]
318 fn simple_cycle_condensed_to_one_scc() {
319 // A → B → A (cycle)
320 let raw = make_raw_with_edges(&[("A", "B"), ("B", "A")]);
321 let ng = NormalizedGraph::from_raw(raw);
322
323 assert_eq!(ng.scc_count(), 1, "cycle condensed to 1 SCC");
324 assert_eq!(ng.cycle_count(), 1, "one cycle SCC");
325
326 let cyclic = ng.cyclic_items();
327 assert!(cyclic.contains(&"A"), "A in cyclic items");
328 assert!(cyclic.contains(&"B"), "B in cyclic items");
329 }
330
331 #[test]
332 fn mixed_cycle_and_acyclic() {
333 // A → B → A → C (A and B cycle; C is downstream)
334 let raw = make_raw_with_edges(&[("A", "B"), ("B", "A"), ("A", "C")]);
335 let ng = NormalizedGraph::from_raw(raw);
336
337 // SCCs: {A, B} and {C}
338 assert_eq!(ng.scc_count(), 2, "2 SCCs: the cycle and C");
339 assert_eq!(ng.cycle_count(), 1);
340
341 let cyclic = ng.cyclic_items();
342 assert!(cyclic.contains(&"A"));
343 assert!(cyclic.contains(&"B"));
344 assert!(!cyclic.contains(&"C"));
345 }
346
347 #[test]
348 fn item_to_scc_mapping_correct() {
349 let raw = make_raw_with_edges(&[("A", "B"), ("B", "A"), ("A", "C")]);
350 let ng = NormalizedGraph::from_raw(raw);
351
352 let scc_ab_a = ng.scc_of("A");
353 let scc_ab_b = ng.scc_of("B");
354 let scc_c = ng.scc_of("C");
355
356 assert!(scc_ab_a.is_some(), "A has SCC");
357 assert!(scc_ab_b.is_some(), "B has SCC");
358 assert!(scc_c.is_some(), "C has SCC");
359
360 // A and B should be in the same SCC
361 assert_eq!(scc_ab_a, scc_ab_b, "A and B in same SCC");
362 // C is in a different SCC
363 assert_ne!(scc_ab_a, scc_c, "C in different SCC from A");
364 }
365
366 // -----------------------------------------------------------------------
367 // Transitive reduction
368 // -----------------------------------------------------------------------
369
370 #[test]
371 fn transitive_reduction_removes_redundant_edge() {
372 // A → B → C and A → C (redundant)
373 let mut g: DiGraph<String, ()> = DiGraph::new();
374 let a = g.add_node("A".to_string());
375 let b = g.add_node("B".to_string());
376 let c = g.add_node("C".to_string());
377 g.add_edge(a, b, ());
378 g.add_edge(b, c, ());
379 g.add_edge(a, c, ()); // redundant
380
381 let reduced = transitive_reduction(&g);
382 assert_eq!(reduced.edge_count(), 2, "A→C removed");
383 assert!(!reduced.contains_edge(a, c), "redundant edge removed");
384 assert!(reduced.contains_edge(a, b), "direct edge kept");
385 assert!(reduced.contains_edge(b, c), "direct edge kept");
386 }
387
388 #[test]
389 fn transitive_reduction_preserves_minimal_graph() {
390 // A → B → C (no redundant edges)
391 let mut g: DiGraph<String, ()> = DiGraph::new();
392 let a = g.add_node("A".to_string());
393 let b = g.add_node("B".to_string());
394 let c = g.add_node("C".to_string());
395 g.add_edge(a, b, ());
396 g.add_edge(b, c, ());
397
398 let reduced = transitive_reduction(&g);
399 assert_eq!(reduced.edge_count(), 2, "no edges removed");
400 }
401
402 #[test]
403 fn transitive_reduction_diamond_removes_diagonal() {
404 // Diamond: A → B → D, A → C → D, A → D (redundant)
405 let mut g: DiGraph<String, ()> = DiGraph::new();
406 let a = g.add_node("A".to_string());
407 let b = g.add_node("B".to_string());
408 let c = g.add_node("C".to_string());
409 let d = g.add_node("D".to_string());
410 g.add_edge(a, b, ());
411 g.add_edge(a, c, ());
412 g.add_edge(b, d, ());
413 g.add_edge(c, d, ());
414 g.add_edge(a, d, ()); // redundant via A→B→D or A→C→D
415
416 let reduced = transitive_reduction(&g);
417 assert_eq!(reduced.edge_count(), 4, "A→D removed");
418 assert!(!reduced.contains_edge(a, d), "redundant edge removed");
419 }
420
421 #[test]
422 fn scc_representative_is_lexicographically_first() {
423 let raw = make_raw_with_edges(&[("bn-z", "bn-a"), ("bn-a", "bn-z")]);
424 let ng = NormalizedGraph::from_raw(raw);
425
426 assert_eq!(ng.scc_count(), 1);
427 let scc_idx = ng.condensed.node_identifiers().next().unwrap();
428 let scc = &ng.condensed[scc_idx];
429 // Members should be sorted, so bn-a comes before bn-z
430 assert_eq!(scc.members[0], "bn-a");
431 assert_eq!(scc.representative(), "bn-a");
432 }
433}