Skip to main content

bones_triage/metrics/
basic.rs

1//! Static graph metrics: degree centrality, topological sort, and density.
2//!
3//! # Overview
4//!
5//! These are Phase 1 metrics — synchronous, < 20ms on typical graphs.
6//! They are computed on every triage invocation and provide baseline
7//! statistics about the dependency structure.
8//!
9//! All metrics operate on the condensed DAG from [`NormalizedGraph`].
10//! Items sharing an SCC receive identical scores.
11
12use std::collections::HashMap;
13
14use petgraph::{Direction, algo::toposort, visit::IntoNodeIdentifiers};
15
16use crate::graph::normalize::NormalizedGraph;
17
18// ---------------------------------------------------------------------------
19// Degree Centrality
20// ---------------------------------------------------------------------------
21
22/// Per-item degree centrality scores.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct DegreeCentrality {
25    /// In-degree per item ID (how many things block this item).
26    pub in_degree: HashMap<String, usize>,
27    /// Out-degree per item ID (how many things this item blocks).
28    pub out_degree: HashMap<String, usize>,
29    /// Total degree per item ID (in + out).
30    pub total_degree: HashMap<String, usize>,
31}
32
33/// Compute degree centrality for all items in the condensed graph.
34///
35/// Items in the same SCC share the same degree (the SCC node's degree).
36///
37/// # Returns
38///
39/// A [`DegreeCentrality`] with in-degree, out-degree, and total-degree
40/// maps indexed by original item ID.
41#[must_use]
42pub fn degree_centrality(ng: &NormalizedGraph) -> DegreeCentrality {
43    let mut in_degree = HashMap::new();
44    let mut out_degree = HashMap::new();
45    let mut total_degree = HashMap::new();
46
47    for idx in ng.condensed.node_identifiers() {
48        let in_d = ng
49            .condensed
50            .neighbors_directed(idx, Direction::Incoming)
51            .count();
52        let out_d = ng
53            .condensed
54            .neighbors_directed(idx, Direction::Outgoing)
55            .count();
56        let total = in_d + out_d;
57
58        // Distribute to all items in this SCC
59        if let Some(scc) = ng.condensed.node_weight(idx) {
60            for member in &scc.members {
61                in_degree.insert(member.clone(), in_d);
62                out_degree.insert(member.clone(), out_d);
63                total_degree.insert(member.clone(), total);
64            }
65        }
66    }
67
68    DegreeCentrality {
69        in_degree,
70        out_degree,
71        total_degree,
72    }
73}
74
75// ---------------------------------------------------------------------------
76// Topological Sort
77// ---------------------------------------------------------------------------
78
79/// Compute a topological ordering of items on the condensed DAG.
80///
81/// Items are returned in dependency order: if A blocks B, A appears before B.
82/// Items within the same SCC are grouped together (their internal order
83/// is lexicographic for determinism).
84///
85/// # Returns
86///
87/// A `Vec<String>` of item IDs in topological order, or `None` if the
88/// condensed graph unexpectedly contains a cycle (should not happen).
89#[must_use]
90pub fn topological_order(ng: &NormalizedGraph) -> Option<Vec<String>> {
91    let topo = toposort(&ng.condensed, None).ok()?;
92
93    let mut result = Vec::with_capacity(ng.raw.node_count());
94    for idx in topo {
95        if let Some(scc) = ng.condensed.node_weight(idx) {
96            // Members are already sorted (from NormalizedGraph::from_raw)
97            result.extend(scc.members.iter().cloned());
98        }
99    }
100
101    Some(result)
102}
103
104// ---------------------------------------------------------------------------
105// Graph Density
106// ---------------------------------------------------------------------------
107
108/// Compute graph density of the condensed DAG.
109///
110/// Density = edges / (nodes * (nodes - 1)) for a directed graph.
111/// Returns 0.0 for graphs with fewer than 2 nodes.
112#[must_use]
113#[allow(clippy::cast_precision_loss)]
114pub fn condensed_density(ng: &NormalizedGraph) -> f64 {
115    let n = ng.condensed.node_count();
116    if n < 2 {
117        return 0.0;
118    }
119    let max_edges = (n * (n - 1)) as f64;
120    ng.condensed.edge_count() as f64 / max_edges
121}
122
123// ---------------------------------------------------------------------------
124// Component Analysis
125// ---------------------------------------------------------------------------
126
127/// Information about weakly connected components in the condensed graph.
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct ComponentInfo {
130    /// Number of weakly connected components.
131    pub count: usize,
132    /// Sizes (node counts) of each component, sorted descending.
133    pub sizes: Vec<usize>,
134}
135
136/// Compute weakly connected component information for the condensed DAG.
137///
138/// Uses a simple BFS/DFS approach treating edges as undirected.
139#[must_use]
140pub fn component_info(ng: &NormalizedGraph) -> ComponentInfo {
141    let node_count = ng.condensed.node_count();
142    if node_count == 0 {
143        return ComponentInfo {
144            count: 0,
145            sizes: vec![],
146        };
147    }
148
149    let mut visited = vec![false; node_count];
150    let mut sizes = Vec::new();
151
152    for start in ng.condensed.node_identifiers() {
153        if visited[start.index()] {
154            continue;
155        }
156
157        // BFS from start, treating edges as undirected
158        let mut stack = vec![start];
159        let mut component_size = 0usize;
160
161        while let Some(node) = stack.pop() {
162            if visited[node.index()] {
163                continue;
164            }
165            visited[node.index()] = true;
166            component_size += 1;
167
168            // Follow outgoing edges
169            for neighbor in ng.condensed.neighbors_directed(node, Direction::Outgoing) {
170                if !visited[neighbor.index()] {
171                    stack.push(neighbor);
172                }
173            }
174            // Follow incoming edges (treat as undirected)
175            for neighbor in ng.condensed.neighbors_directed(node, Direction::Incoming) {
176                if !visited[neighbor.index()] {
177                    stack.push(neighbor);
178                }
179            }
180        }
181
182        sizes.push(component_size);
183    }
184
185    sizes.sort_unstable_by(|a, b| b.cmp(a)); // descending
186
187    ComponentInfo {
188        count: sizes.len(),
189        sizes,
190    }
191}
192
193// ---------------------------------------------------------------------------
194// Source / Sink identification
195// ---------------------------------------------------------------------------
196
197/// Return item IDs that are sources in the condensed DAG (no incoming edges).
198///
199/// These are items (or SCC groups) with no blockers — ready to start.
200#[must_use]
201pub fn source_items(ng: &NormalizedGraph) -> Vec<String> {
202    let mut sources = Vec::new();
203    for idx in ng.condensed.node_identifiers() {
204        if ng
205            .condensed
206            .neighbors_directed(idx, Direction::Incoming)
207            .next()
208            .is_none()
209            && let Some(scc) = ng.condensed.node_weight(idx)
210        {
211            sources.extend(scc.members.iter().cloned());
212        }
213    }
214    sources.sort_unstable();
215    sources
216}
217
218/// Return item IDs that are sinks in the condensed DAG (no outgoing edges).
219///
220/// These are items (or SCC groups) that nothing else depends on.
221#[must_use]
222pub fn sink_items(ng: &NormalizedGraph) -> Vec<String> {
223    let mut sinks = Vec::new();
224    for idx in ng.condensed.node_identifiers() {
225        if ng
226            .condensed
227            .neighbors_directed(idx, Direction::Outgoing)
228            .next()
229            .is_none()
230            && let Some(scc) = ng.condensed.node_weight(idx)
231        {
232            sinks.extend(scc.members.iter().cloned());
233        }
234    }
235    sinks.sort_unstable();
236    sinks
237}
238
239// ---------------------------------------------------------------------------
240// Tests
241// ---------------------------------------------------------------------------
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::graph::build::RawGraph;
247    use crate::graph::normalize::NormalizedGraph;
248    use petgraph::graph::DiGraph;
249    use std::collections::HashMap;
250
251    fn make_normalized(edges: &[(&str, &str)]) -> NormalizedGraph {
252        let mut graph = DiGraph::<String, ()>::new();
253        let mut node_map = HashMap::new();
254
255        let all_ids: std::collections::BTreeSet<&str> =
256            edges.iter().flat_map(|(a, b)| [*a, *b]).collect();
257
258        for id in all_ids {
259            let idx = graph.add_node(id.to_string());
260            node_map.insert(id.to_string(), idx);
261        }
262
263        for (a, b) in edges {
264            let ia = node_map[*a];
265            let ib = node_map[*b];
266            graph.add_edge(ia, ib, ());
267        }
268
269        let raw = RawGraph {
270            graph,
271            node_map,
272            content_hash: "blake3:test".to_string(),
273        };
274
275        NormalizedGraph::from_raw(raw)
276    }
277
278    fn make_normalized_nodes(nodes: &[&str], edges: &[(&str, &str)]) -> NormalizedGraph {
279        let mut graph = DiGraph::<String, ()>::new();
280        let mut node_map = HashMap::new();
281
282        for id in nodes {
283            let idx = graph.add_node((*id).to_string());
284            node_map.insert((*id).to_string(), idx);
285        }
286
287        for (a, b) in edges {
288            let ia = node_map[*a];
289            let ib = node_map[*b];
290            graph.add_edge(ia, ib, ());
291        }
292
293        let raw = RawGraph {
294            graph,
295            node_map,
296            content_hash: "blake3:test".to_string(),
297        };
298
299        NormalizedGraph::from_raw(raw)
300    }
301
302    // -----------------------------------------------------------------------
303    // Degree centrality
304    // -----------------------------------------------------------------------
305
306    #[test]
307    fn degree_centrality_empty_graph() {
308        let ng = make_normalized_nodes(&[], &[]);
309        let dc = degree_centrality(&ng);
310        assert!(dc.in_degree.is_empty());
311        assert!(dc.out_degree.is_empty());
312        assert!(dc.total_degree.is_empty());
313    }
314
315    #[test]
316    fn degree_centrality_single_node() {
317        let ng = make_normalized_nodes(&["A"], &[]);
318        let dc = degree_centrality(&ng);
319        assert_eq!(dc.in_degree["A"], 0);
320        assert_eq!(dc.out_degree["A"], 0);
321        assert_eq!(dc.total_degree["A"], 0);
322    }
323
324    #[test]
325    fn degree_centrality_linear_chain() {
326        // A → B → C
327        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
328        let dc = degree_centrality(&ng);
329
330        assert_eq!(dc.in_degree["A"], 0);
331        assert_eq!(dc.out_degree["A"], 1);
332        assert_eq!(dc.total_degree["A"], 1);
333
334        assert_eq!(dc.in_degree["B"], 1);
335        assert_eq!(dc.out_degree["B"], 1);
336        assert_eq!(dc.total_degree["B"], 2);
337
338        assert_eq!(dc.in_degree["C"], 1);
339        assert_eq!(dc.out_degree["C"], 0);
340        assert_eq!(dc.total_degree["C"], 1);
341    }
342
343    #[test]
344    fn degree_centrality_star_topology() {
345        // Hub: A→B, A→C, A→D
346        let ng = make_normalized(&[("A", "B"), ("A", "C"), ("A", "D")]);
347        let dc = degree_centrality(&ng);
348
349        assert_eq!(dc.out_degree["A"], 3);
350        assert_eq!(dc.in_degree["A"], 0);
351
352        for leaf in ["B", "C", "D"] {
353            assert_eq!(dc.in_degree[leaf], 1);
354            assert_eq!(dc.out_degree[leaf], 0);
355        }
356    }
357
358    #[test]
359    fn degree_centrality_cycle_members_share_score() {
360        // A → B → A (cycle) → C
361        let ng = make_normalized(&[("A", "B"), ("B", "A"), ("A", "C")]);
362        let dc = degree_centrality(&ng);
363
364        // A and B are in the same SCC, which has out-degree 1 (to C's SCC)
365        assert_eq!(dc.out_degree["A"], dc.out_degree["B"]);
366        assert_eq!(dc.in_degree["A"], dc.in_degree["B"]);
367    }
368
369    // -----------------------------------------------------------------------
370    // Topological sort
371    // -----------------------------------------------------------------------
372
373    #[test]
374    fn topological_order_empty() {
375        let ng = make_normalized_nodes(&[], &[]);
376        let order = topological_order(&ng);
377        assert_eq!(order, Some(vec![]));
378    }
379
380    #[test]
381    fn topological_order_single_node() {
382        let ng = make_normalized_nodes(&["A"], &[]);
383        let order = topological_order(&ng);
384        assert_eq!(order, Some(vec!["A".to_string()]));
385    }
386
387    #[test]
388    fn topological_order_chain() {
389        // A → B → C: A must come before B, B before C
390        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
391        let order = topological_order(&ng).expect("should succeed");
392
393        let pos_a = order.iter().position(|x| x == "A").expect("A present");
394        let pos_b = order.iter().position(|x| x == "B").expect("B present");
395        let pos_c = order.iter().position(|x| x == "C").expect("C present");
396
397        assert!(pos_a < pos_b, "A before B");
398        assert!(pos_b < pos_c, "B before C");
399    }
400
401    #[test]
402    fn topological_order_diamond() {
403        // A → B → D, A → C → D
404        let ng = make_normalized(&[("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")]);
405        let order = topological_order(&ng).expect("should succeed");
406
407        let pos_a = order.iter().position(|x| x == "A").expect("A present");
408        let pos_b = order.iter().position(|x| x == "B").expect("B present");
409        let pos_c = order.iter().position(|x| x == "C").expect("C present");
410        let pos_d = order.iter().position(|x| x == "D").expect("D present");
411
412        assert!(pos_a < pos_b, "A before B");
413        assert!(pos_a < pos_c, "A before C");
414        assert!(pos_b < pos_d, "B before D");
415        assert!(pos_c < pos_d, "C before D");
416    }
417
418    #[test]
419    fn topological_order_cycle_grouped() {
420        // A → B → A → C: SCC{A,B} should appear before C
421        let ng = make_normalized(&[("A", "B"), ("B", "A"), ("A", "C")]);
422        let order = topological_order(&ng).expect("should succeed");
423
424        let pos_a = order.iter().position(|x| x == "A").expect("A present");
425        let pos_b = order.iter().position(|x| x == "B").expect("B present");
426        let pos_c = order.iter().position(|x| x == "C").expect("C present");
427
428        // A and B should both come before C
429        assert!(pos_a < pos_c, "A before C");
430        assert!(pos_b < pos_c, "B before C");
431    }
432
433    #[test]
434    fn topological_order_all_items_present() {
435        let ng = make_normalized(&[("A", "B"), ("C", "D")]);
436        let order = topological_order(&ng).expect("should succeed");
437        assert_eq!(order.len(), 4);
438        assert!(order.contains(&"A".to_string()));
439        assert!(order.contains(&"B".to_string()));
440        assert!(order.contains(&"C".to_string()));
441        assert!(order.contains(&"D".to_string()));
442    }
443
444    // -----------------------------------------------------------------------
445    // Condensed density
446    // -----------------------------------------------------------------------
447
448    #[test]
449    fn condensed_density_empty() {
450        let ng = make_normalized_nodes(&[], &[]);
451        assert!((condensed_density(&ng) - 0.0).abs() < f64::EPSILON);
452    }
453
454    #[test]
455    fn condensed_density_single_node() {
456        let ng = make_normalized_nodes(&["A"], &[]);
457        assert!((condensed_density(&ng) - 0.0).abs() < f64::EPSILON);
458    }
459
460    #[test]
461    fn condensed_density_two_nodes_one_edge() {
462        // A → B: density = 1 / (2*1) = 0.5
463        let ng = make_normalized(&[("A", "B")]);
464        assert!((condensed_density(&ng) - 0.5).abs() < 1e-10);
465    }
466
467    #[test]
468    fn condensed_density_cycle_collapses() {
469        // A → B → A: condensed to 1 node, density = 0.0
470        let ng = make_normalized(&[("A", "B"), ("B", "A")]);
471        assert!((condensed_density(&ng) - 0.0).abs() < f64::EPSILON);
472    }
473
474    // -----------------------------------------------------------------------
475    // Component info
476    // -----------------------------------------------------------------------
477
478    #[test]
479    fn component_info_empty() {
480        let ng = make_normalized_nodes(&[], &[]);
481        let ci = component_info(&ng);
482        assert_eq!(ci.count, 0);
483        assert!(ci.sizes.is_empty());
484    }
485
486    #[test]
487    fn component_info_single_component() {
488        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
489        let ci = component_info(&ng);
490        assert_eq!(ci.count, 1);
491        assert_eq!(ci.sizes, vec![3]);
492    }
493
494    #[test]
495    fn component_info_disjoint() {
496        let ng = make_normalized(&[("A", "B"), ("C", "D")]);
497        let ci = component_info(&ng);
498        assert_eq!(ci.count, 2);
499        assert_eq!(ci.sizes, vec![2, 2]); // sorted descending
500    }
501
502    #[test]
503    fn component_info_mixed_sizes() {
504        // Chain A→B→C and isolated D
505        let ng = make_normalized_nodes(&["A", "B", "C", "D"], &[("A", "B"), ("B", "C")]);
506        let ci = component_info(&ng);
507        assert_eq!(ci.count, 2);
508        assert_eq!(ci.sizes, vec![3, 1]); // larger first
509    }
510
511    // -----------------------------------------------------------------------
512    // Source / Sink items
513    // -----------------------------------------------------------------------
514
515    #[test]
516    fn source_items_chain() {
517        // A → B → C: A is the only source
518        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
519        assert_eq!(source_items(&ng), vec!["A".to_string()]);
520    }
521
522    #[test]
523    fn sink_items_chain() {
524        // A → B → C: C is the only sink
525        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
526        assert_eq!(sink_items(&ng), vec!["C".to_string()]);
527    }
528
529    #[test]
530    fn source_items_diamond() {
531        // A → B → D, A → C → D
532        let ng = make_normalized(&[("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")]);
533        assert_eq!(source_items(&ng), vec!["A".to_string()]);
534    }
535
536    #[test]
537    fn sink_items_diamond() {
538        let ng = make_normalized(&[("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")]);
539        assert_eq!(sink_items(&ng), vec!["D".to_string()]);
540    }
541
542    #[test]
543    fn source_sink_isolated_nodes() {
544        let ng = make_normalized_nodes(&["A", "B", "C"], &[]);
545        let sources = source_items(&ng);
546        let sinks = sink_items(&ng);
547        // Isolated nodes are both sources and sinks
548        assert_eq!(sources.len(), 3);
549        assert_eq!(sinks.len(), 3);
550    }
551
552    #[test]
553    fn source_sink_disjoint_chains() {
554        // A → B, C → D: sources are A and C, sinks are B and D
555        let ng = make_normalized(&[("A", "B"), ("C", "D")]);
556        assert_eq!(source_items(&ng), vec!["A".to_string(), "C".to_string()]);
557        assert_eq!(sink_items(&ng), vec!["B".to_string(), "D".to_string()]);
558    }
559}