Skip to main content

bones_triage/graph/
stats.rs

1//! Basic graph statistics for the dependency graph.
2//!
3//! # Statistics Provided
4//!
5//! - **`node_count`**: Total number of items (nodes) in the original graph.
6//! - **`edge_count`**: Total number of blocking edges in the original graph.
7//! - **density**: Ratio of actual edges to maximum possible edges for a
8//!   directed graph: `density = edge_count / (node_count * (node_count - 1))`.
9//!   A fully connected directed graph has density 1.0. An empty or
10//!   single-node graph has density 0.0.
11//! - **`scc_count`**: Number of strongly connected components (= number of nodes
12//!   in the condensed graph). In an acyclic graph this equals `node_count`.
13//! - **`cycle_count`**: Number of SCCs with more than one member (dependency
14//!   cycles that need resolution).
15//! - **`weakly_connected_component_count`**: Number of weakly connected
16//!   components. A value greater than 1 means the dependency graph is split
17//!   into disjoint subgraphs with no edges between them.
18//! - **`isolated_node_count`**: Nodes with no edges at all (neither in-edges
19//!   nor out-edges). These items have no dependencies and no dependents.
20//! - **`max_in_degree`**: Highest in-degree (most blocked-by dependencies)
21//!   in the original graph.
22//! - **`max_out_degree`**: Highest out-degree (most items blocked) in the
23//!   original graph.
24
25use petgraph::{Direction, algo::connected_components, visit::IntoNodeIdentifiers};
26
27use crate::graph::normalize::NormalizedGraph;
28
29// ---------------------------------------------------------------------------
30// GraphStats
31// ---------------------------------------------------------------------------
32
33/// Summary statistics for a dependency graph.
34///
35/// Computed from a [`NormalizedGraph`] by [`GraphStats::from_normalized`].
36/// All counts refer to the original (non-condensed) graph unless otherwise
37/// noted.
38#[derive(Debug, Clone, PartialEq)]
39pub struct GraphStats {
40    /// Number of items (nodes) in the graph.
41    pub node_count: usize,
42    /// Number of blocking edges in the original graph.
43    pub edge_count: usize,
44    /// Graph density: `edge_count / (node_count * (node_count - 1))`.
45    /// Ranges from 0.0 (no edges) to 1.0 (all possible edges present).
46    /// Zero for graphs with 0 or 1 node.
47    pub density: f64,
48    /// Number of strongly connected components.
49    ///
50    /// Equals `node_count` in a fully acyclic graph.
51    pub scc_count: usize,
52    /// Number of SCCs with more than one member (dependency cycles).
53    pub cycle_count: usize,
54    /// Number of weakly connected components (disjoint subgraphs).
55    pub weakly_connected_component_count: usize,
56    /// Number of nodes with no in-edges and no out-edges.
57    pub isolated_node_count: usize,
58    /// Maximum in-degree (most incoming blocking edges on one node).
59    pub max_in_degree: usize,
60    /// Maximum out-degree (most outgoing blocking edges from one node).
61    pub max_out_degree: usize,
62    /// Number of nodes in the condensed (reduced) graph.
63    ///
64    /// Equals `scc_count` — provided for convenience.
65    pub reduced_node_count: usize,
66    /// Number of edges in the transitively-reduced condensed graph.
67    pub reduced_edge_count: usize,
68}
69
70impl GraphStats {
71    /// Compute statistics from a [`NormalizedGraph`].
72    #[must_use]
73    pub fn from_normalized(ng: &NormalizedGraph) -> Self {
74        let node_count = ng.raw.node_count();
75        let edge_count = ng.raw.edge_count();
76
77        let density = compute_density(node_count, edge_count);
78
79        let scc_count = ng.scc_count();
80        let cycle_count = ng.cycle_count();
81
82        // Weakly connected components: treat the directed graph as undirected
83        // and count connected components.
84        let wcc = connected_components(&ng.raw.graph);
85
86        // Isolated nodes: degree 0 (no in or out edges).
87        let isolated_node_count = ng
88            .raw
89            .graph
90            .node_identifiers()
91            .filter(|&idx| {
92                ng.raw
93                    .graph
94                    .neighbors_directed(idx, Direction::Incoming)
95                    .next()
96                    .is_none()
97                    && ng
98                        .raw
99                        .graph
100                        .neighbors_directed(idx, Direction::Outgoing)
101                        .next()
102                        .is_none()
103            })
104            .count();
105
106        // Max in/out degree over all nodes in the original graph.
107        let max_in_degree = ng
108            .raw
109            .graph
110            .node_identifiers()
111            .map(|idx| {
112                ng.raw
113                    .graph
114                    .neighbors_directed(idx, Direction::Incoming)
115                    .count()
116            })
117            .max()
118            .unwrap_or(0);
119
120        let max_out_degree = ng
121            .raw
122            .graph
123            .node_identifiers()
124            .map(|idx| {
125                ng.raw
126                    .graph
127                    .neighbors_directed(idx, Direction::Outgoing)
128                    .count()
129            })
130            .max()
131            .unwrap_or(0);
132
133        let reduced_node_count = ng.reduced.node_count();
134        let reduced_edge_count = ng.reduced.edge_count();
135
136        Self {
137            node_count,
138            edge_count,
139            density,
140            scc_count,
141            cycle_count,
142            weakly_connected_component_count: wcc,
143            isolated_node_count,
144            max_in_degree,
145            max_out_degree,
146            reduced_node_count,
147            reduced_edge_count,
148        }
149    }
150
151    /// Return `true` if the graph has no blocking edges.
152    #[must_use]
153    pub const fn is_flat(&self) -> bool {
154        self.edge_count == 0
155    }
156
157    /// Return `true` if the graph contains at least one dependency cycle.
158    #[must_use]
159    pub const fn has_cycles(&self) -> bool {
160        self.cycle_count > 0
161    }
162
163    /// Return the reduction ratio: how many edges were removed by transitive
164    /// reduction vs the raw edge count.
165    ///
166    /// Returns 0.0 if there are no edges in the original graph.
167    #[must_use]
168    pub fn reduction_ratio(&self) -> f64 {
169        compute_ratio(self.edge_count, self.reduced_edge_count)
170    }
171}
172
173// ---------------------------------------------------------------------------
174// Internal helpers (cast precision suppressed at function scope)
175// ---------------------------------------------------------------------------
176
177#[allow(clippy::cast_precision_loss)]
178fn compute_density(node_count: usize, edge_count: usize) -> f64 {
179    if node_count < 2 {
180        return 0.0_f64;
181    }
182    let max_edges = (node_count * (node_count - 1)) as f64;
183    edge_count as f64 / max_edges
184}
185
186#[allow(clippy::cast_precision_loss)]
187fn compute_ratio(raw: usize, reduced: usize) -> f64 {
188    if raw == 0 {
189        return 0.0_f64;
190    }
191    let removed = (raw as f64 - reduced as f64).max(0.0_f64);
192    removed / raw as f64
193}
194
195// ---------------------------------------------------------------------------
196// Tests
197// ---------------------------------------------------------------------------
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::graph::build::RawGraph;
203    use crate::graph::normalize::NormalizedGraph;
204    use petgraph::graph::DiGraph;
205    use std::collections::HashMap;
206
207    fn make_normalized(edges: &[(&str, &str)]) -> NormalizedGraph {
208        let mut graph = DiGraph::<String, ()>::new();
209        let mut node_map = HashMap::new();
210
211        let all_ids: std::collections::BTreeSet<&str> =
212            edges.iter().flat_map(|(a, b)| [*a, *b]).collect();
213
214        for id in all_ids {
215            let idx = graph.add_node(id.to_string());
216            node_map.insert(id.to_string(), idx);
217        }
218
219        for (a, b) in edges {
220            let ia = node_map[*a];
221            let ib = node_map[*b];
222            graph.add_edge(ia, ib, ());
223        }
224
225        let raw = RawGraph {
226            graph,
227            node_map,
228            content_hash: "blake3:test".to_string(),
229        };
230
231        NormalizedGraph::from_raw(raw)
232    }
233
234    fn make_normalized_nodes(nodes: &[&str], edges: &[(&str, &str)]) -> NormalizedGraph {
235        let mut graph = DiGraph::<String, ()>::new();
236        let mut node_map = HashMap::new();
237
238        for id in nodes {
239            let idx = graph.add_node((*id).to_string());
240            node_map.insert((*id).to_string(), idx);
241        }
242
243        for (a, b) in edges {
244            let ia = node_map[*a];
245            let ib = node_map[*b];
246            graph.add_edge(ia, ib, ());
247        }
248
249        let raw = RawGraph {
250            graph,
251            node_map,
252            content_hash: "blake3:test".to_string(),
253        };
254
255        NormalizedGraph::from_raw(raw)
256    }
257
258    #[test]
259    fn empty_graph_stats() {
260        let ng = make_normalized_nodes(&[], &[]);
261        let stats = GraphStats::from_normalized(&ng);
262
263        assert_eq!(stats.node_count, 0);
264        assert_eq!(stats.edge_count, 0);
265        assert!((stats.density - 0.0).abs() < f64::EPSILON);
266        assert_eq!(stats.scc_count, 0);
267        assert_eq!(stats.cycle_count, 0);
268        assert_eq!(stats.weakly_connected_component_count, 0);
269        assert_eq!(stats.isolated_node_count, 0);
270        assert_eq!(stats.max_in_degree, 0);
271        assert_eq!(stats.max_out_degree, 0);
272        assert!(stats.is_flat());
273        assert!(!stats.has_cycles());
274    }
275
276    #[test]
277    fn single_node_no_edges() {
278        let ng = make_normalized_nodes(&["bn-001"], &[]);
279        let stats = GraphStats::from_normalized(&ng);
280
281        assert_eq!(stats.node_count, 1);
282        assert_eq!(stats.edge_count, 0);
283        assert!((stats.density - 0.0).abs() < f64::EPSILON);
284        assert_eq!(stats.isolated_node_count, 1);
285        assert_eq!(stats.weakly_connected_component_count, 1);
286    }
287
288    #[test]
289    fn linear_chain_stats() {
290        // A → B → C
291        let ng = make_normalized(&[("A", "B"), ("B", "C")]);
292        let stats = GraphStats::from_normalized(&ng);
293
294        assert_eq!(stats.node_count, 3);
295        assert_eq!(stats.edge_count, 2);
296        assert_eq!(stats.scc_count, 3);
297        assert_eq!(stats.cycle_count, 0);
298        assert!(!stats.has_cycles());
299        assert!(!stats.is_flat());
300        assert_eq!(stats.max_in_degree, 1);
301        assert_eq!(stats.max_out_degree, 1);
302    }
303
304    #[test]
305    fn cycle_detection_in_stats() {
306        // A → B → A (cycle)
307        let ng = make_normalized(&[("A", "B"), ("B", "A")]);
308        let stats = GraphStats::from_normalized(&ng);
309
310        assert_eq!(stats.node_count, 2);
311        assert_eq!(stats.edge_count, 2);
312        assert_eq!(stats.scc_count, 1, "one condensed SCC");
313        assert_eq!(stats.cycle_count, 1);
314        assert!(stats.has_cycles());
315    }
316
317    #[test]
318    fn density_two_node_one_edge() {
319        // A → B: density = 1 / (2 * 1) = 0.5
320        let ng = make_normalized(&[("A", "B")]);
321        let stats = GraphStats::from_normalized(&ng);
322
323        assert!((stats.density - 0.5).abs() < 1e-10, "density = 0.5");
324    }
325
326    #[test]
327    fn density_complete_directed_graph() {
328        // A → B, B → A: density = 2 / (2 * 1) = 1.0
329        let ng = make_normalized(&[("A", "B"), ("B", "A")]);
330        let stats = GraphStats::from_normalized(&ng);
331
332        assert!((stats.density - 1.0).abs() < 1e-10, "density = 1.0");
333    }
334
335    #[test]
336    fn disjoint_components() {
337        // Two disconnected chains: A→B and C→D
338        let ng = make_normalized(&[("A", "B"), ("C", "D")]);
339        let stats = GraphStats::from_normalized(&ng);
340
341        assert_eq!(stats.weakly_connected_component_count, 2);
342        assert_eq!(stats.isolated_node_count, 0);
343    }
344
345    #[test]
346    fn isolated_nodes_counted() {
347        // Three nodes, no edges
348        let ng = make_normalized_nodes(&["A", "B", "C"], &[]);
349        let stats = GraphStats::from_normalized(&ng);
350
351        assert_eq!(stats.isolated_node_count, 3);
352        assert_eq!(stats.weakly_connected_component_count, 3);
353    }
354
355    #[test]
356    fn max_degree_correct() {
357        // Hub: A→C, B→C, D→C, C→E
358        let ng = make_normalized(&[("A", "C"), ("B", "C"), ("D", "C"), ("C", "E")]);
359        let stats = GraphStats::from_normalized(&ng);
360
361        assert_eq!(stats.max_in_degree, 3, "C has 3 in-edges");
362        assert_eq!(stats.max_out_degree, 1, "each node has at most 1 out-edge");
363    }
364
365    #[test]
366    fn transitive_reduction_stats() {
367        // A→B→C and A→C (redundant)
368        let ng = make_normalized(&[("A", "B"), ("B", "C"), ("A", "C")]);
369        let stats = GraphStats::from_normalized(&ng);
370
371        assert_eq!(stats.edge_count, 3, "original has 3 edges");
372        assert_eq!(stats.reduced_edge_count, 2, "A→C removed in reduction");
373    }
374
375    #[test]
376    fn is_flat_no_edges() {
377        let ng = make_normalized_nodes(&["A", "B"], &[]);
378        let stats = GraphStats::from_normalized(&ng);
379        assert!(stats.is_flat());
380    }
381
382    #[test]
383    fn is_flat_with_edges() {
384        let ng = make_normalized(&[("A", "B")]);
385        let stats = GraphStats::from_normalized(&ng);
386        assert!(!stats.is_flat());
387    }
388}