Skip to main content

oximedia_graph/
graph_stats.rs

1//! Graph statistics and complexity analysis for the filter graph pipeline.
2//!
3//! Provides structural metrics for a processing graph: node count, edge count,
4//! average degree, maximum depth, and complexity classification.
5//!
6//! Additionally provides [`LatencyHistogram`] and [`NodeLatencyStats`] for
7//! per-node processing-time tracking using 32 logarithmic (power-of-two)
8//! nanosecond buckets.
9
10#![allow(dead_code)]
11
12use std::collections::{HashMap, HashSet, VecDeque};
13
14/// A qualitative classification of graph complexity.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum GraphComplexity {
17    /// Very few nodes and edges; trivial to schedule.
18    Trivial,
19    /// Moderate number of nodes; standard linear pipeline.
20    Simple,
21    /// Many nodes with branching; requires careful scheduling.
22    Moderate,
23    /// Dense graph with many cross-edges; potentially expensive.
24    Complex,
25}
26
27impl GraphComplexity {
28    /// Returns a human-readable description of the complexity level.
29    pub fn description(&self) -> &'static str {
30        match self {
31            GraphComplexity::Trivial => "trivial (<=2 nodes)",
32            GraphComplexity::Simple => "simple (3-8 nodes, linear)",
33            GraphComplexity::Moderate => "moderate (9-24 nodes, branching)",
34            GraphComplexity::Complex => "complex (>=25 nodes or dense edges)",
35        }
36    }
37
38    /// Returns `true` if the graph may require non-trivial scheduling.
39    pub fn requires_advanced_scheduling(&self) -> bool {
40        matches!(self, GraphComplexity::Moderate | GraphComplexity::Complex)
41    }
42}
43
44/// Structural statistics about a directed graph.
45#[derive(Debug, Clone)]
46pub struct GraphStats {
47    /// Number of nodes in the graph.
48    node_count: usize,
49    /// Number of directed edges.
50    edge_count: usize,
51    /// Maximum depth (longest path from any source node).
52    max_depth: usize,
53    /// Sum of all node in-degrees (equals `edge_count` for simple graphs).
54    total_degree: usize,
55}
56
57impl GraphStats {
58    /// Creates a `GraphStats` with the given raw values.
59    pub fn new(node_count: usize, edge_count: usize, max_depth: usize) -> Self {
60        Self {
61            node_count,
62            edge_count,
63            max_depth,
64            total_degree: edge_count,
65        }
66    }
67
68    /// Returns the number of nodes.
69    pub fn node_count(&self) -> usize {
70        self.node_count
71    }
72
73    /// Returns the number of directed edges.
74    pub fn edge_count(&self) -> usize {
75        self.edge_count
76    }
77
78    /// Returns the average out-degree across all nodes.
79    #[allow(clippy::cast_precision_loss)]
80    pub fn avg_degree(&self) -> f64 {
81        if self.node_count == 0 {
82            return 0.0;
83        }
84        self.edge_count as f64 / self.node_count as f64
85    }
86
87    /// Returns the maximum depth (length of the longest source-to-sink path).
88    pub fn depth(&self) -> usize {
89        self.max_depth
90    }
91
92    /// Classifies the graph by complexity.
93    pub fn complexity(&self) -> GraphComplexity {
94        if self.node_count <= 2 {
95            GraphComplexity::Trivial
96        } else if self.node_count <= 8 && self.avg_degree() <= 1.5 {
97            GraphComplexity::Simple
98        } else if self.node_count < 25 {
99            GraphComplexity::Moderate
100        } else {
101            GraphComplexity::Complex
102        }
103    }
104
105    /// Returns `true` if the graph has no edges.
106    pub fn is_disconnected(&self) -> bool {
107        self.edge_count == 0
108    }
109}
110
111// ─────────────────────────────────────────────────────────────────────────────
112// LatencyHistogram
113// ─────────────────────────────────────────────────────────────────────────────
114
115/// Per-node latency histogram with 32 power-of-two nanosecond buckets.
116///
117/// Bucket `k` covers the range `[2^(k-1) ns, 2^k ns)` for `k >= 1`.
118/// Bucket `0` covers `0 ns` exactly.
119/// Bucket `31` is a catch-all for values >= 2^30 ns (~1.07 s).
120///
121/// All operations are O(1) for record and O(32) for percentile queries.
122#[derive(Debug, Clone)]
123pub struct LatencyHistogram {
124    /// `buckets[k]` counts samples whose latency is in `[2^(k-1), 2^k)` ns.
125    buckets: [u64; 32],
126    /// Total number of recorded samples.
127    total_samples: u64,
128    /// Sum of all recorded latency values in nanoseconds.
129    sum_ns: u64,
130}
131
132impl Default for LatencyHistogram {
133    fn default() -> Self {
134        Self::new()
135    }
136}
137
138impl LatencyHistogram {
139    /// Creates a new, empty histogram.
140    pub fn new() -> Self {
141        Self {
142            buckets: [0u64; 32],
143            total_samples: 0,
144            sum_ns: 0,
145        }
146    }
147
148    /// Records a latency sample in nanoseconds.
149    pub fn record(&mut self, latency_ns: u64) {
150        self.total_samples += 1;
151        self.sum_ns = self.sum_ns.saturating_add(latency_ns);
152        let bucket = Self::bucket_for(latency_ns);
153        self.buckets[bucket] += 1;
154    }
155
156    /// Returns the mean latency in nanoseconds, or `0.0` if no samples recorded.
157    pub fn mean_ns(&self) -> f64 {
158        if self.total_samples == 0 {
159            return 0.0;
160        }
161        self.sum_ns as f64 / self.total_samples as f64
162    }
163
164    /// Returns the approximate latency at the `pct`-th percentile (0.0-100.0).
165    ///
166    /// Returns the lower bound of the bucket containing the `pct`-th sample.
167    pub fn percentile_ns(&self, pct: f64) -> u64 {
168        if self.total_samples == 0 {
169            return 0;
170        }
171        let target = ((pct / 100.0) * self.total_samples as f64).ceil() as u64;
172        let mut cumulative: u64 = 0;
173        for (k, &count) in self.buckets.iter().enumerate() {
174            cumulative += count;
175            if cumulative >= target {
176                return Self::bucket_lower_bound(k);
177            }
178        }
179        Self::bucket_lower_bound(31)
180    }
181
182    /// Returns the total number of recorded samples.
183    pub fn total_samples(&self) -> u64 {
184        self.total_samples
185    }
186
187    // ── helpers ───────────────────────────────────────────────────────────────
188
189    /// Maps a latency value to a bucket index 0-31.
190    fn bucket_for(latency_ns: u64) -> usize {
191        if latency_ns == 0 {
192            return 0;
193        }
194        let log2 = (u64::BITS - latency_ns.leading_zeros()) as usize;
195        log2.min(31)
196    }
197
198    /// Returns the lower bound (in ns) of bucket `k`.
199    fn bucket_lower_bound(k: usize) -> u64 {
200        if k == 0 {
201            0
202        } else {
203            1u64 << (k - 1)
204        }
205    }
206}
207
208// ─────────────────────────────────────────────────────────────────────────────
209// NodeLatencyStats
210// ─────────────────────────────────────────────────────────────────────────────
211
212/// Per-node latency statistics keyed by node identifier string.
213///
214/// Wraps a `HashMap` of [`LatencyHistogram`]s so callers can record and
215/// query the processing latency of individual graph nodes.
216#[derive(Debug, Clone, Default)]
217pub struct NodeLatencyStats {
218    /// Maps node identifier to latency histogram for that node.
219    pub histograms: HashMap<String, LatencyHistogram>,
220}
221
222impl NodeLatencyStats {
223    /// Creates an empty `NodeLatencyStats`.
224    pub fn new() -> Self {
225        Self::default()
226    }
227
228    /// Records a latency sample for the node identified by `node_id`.
229    ///
230    /// The histogram is created on the first call for each unique `node_id`.
231    pub fn record_node(&mut self, node_id: &str, latency_ns: u64) {
232        self.histograms
233            .entry(node_id.to_string())
234            .or_default()
235            .record(latency_ns);
236    }
237
238    /// Returns `(mean_ns, p50_ns, p99_ns)` for `node_id`, or `None` if no
239    /// samples have been recorded for that node.
240    pub fn summary(&self, node_id: &str) -> Option<(f64, u64, u64)> {
241        let hist = self.histograms.get(node_id)?;
242        if hist.total_samples() == 0 {
243            return None;
244        }
245        Some((
246            hist.mean_ns(),
247            hist.percentile_ns(50.0),
248            hist.percentile_ns(99.0),
249        ))
250    }
251}
252
253// ─────────────────────────────────────────────────────────────────────────────
254
255/// Analyzes a directed acyclic graph represented as an adjacency list.
256///
257/// Nodes are represented by `u64` IDs; edges as `(from, to)` pairs.
258#[derive(Debug, Clone, Default)]
259pub struct GraphAnalyzer {
260    /// Adjacency list: node -> list of successor nodes.
261    adjacency: HashMap<u64, Vec<u64>>,
262}
263
264impl GraphAnalyzer {
265    /// Creates an empty `GraphAnalyzer`.
266    pub fn new() -> Self {
267        Self {
268            adjacency: HashMap::new(),
269        }
270    }
271
272    /// Adds a node with no edges (idempotent).
273    pub fn add_node(&mut self, id: u64) {
274        self.adjacency.entry(id).or_default();
275    }
276
277    /// Adds a directed edge from `from` to `to`, implicitly adding both nodes.
278    pub fn add_edge(&mut self, from: u64, to: u64) {
279        self.adjacency.entry(from).or_default().push(to);
280        self.adjacency.entry(to).or_default();
281    }
282
283    /// Computes the maximum depth via BFS from all source nodes (in-degree 0).
284    fn max_depth(&self) -> usize {
285        let mut in_degree: HashMap<u64, usize> = self.adjacency.keys().map(|&k| (k, 0)).collect();
286        for successors in self.adjacency.values() {
287            for &succ in successors {
288                *in_degree.entry(succ).or_insert(0) += 1;
289            }
290        }
291
292        let mut depth_map: HashMap<u64, usize> = HashMap::new();
293        let mut queue: VecDeque<u64> = in_degree
294            .iter()
295            .filter_map(|(&n, &d)| if d == 0 { Some(n) } else { None })
296            .collect();
297
298        for &n in &queue {
299            depth_map.insert(n, 0);
300        }
301
302        let mut max_d = 0;
303        while let Some(node) = queue.pop_front() {
304            let cur_depth = *depth_map.get(&node).unwrap_or(&0);
305            if let Some(succs) = self.adjacency.get(&node) {
306                for &succ in succs {
307                    let new_depth = cur_depth + 1;
308                    let entry = depth_map.entry(succ).or_insert(0);
309                    if new_depth > *entry {
310                        *entry = new_depth;
311                        if new_depth > max_d {
312                            max_d = new_depth;
313                        }
314                        queue.push_back(succ);
315                    }
316                }
317            }
318        }
319        max_d
320    }
321
322    /// Returns the set of all unique node IDs.
323    pub fn nodes(&self) -> HashSet<u64> {
324        let mut nodes: HashSet<u64> = self.adjacency.keys().copied().collect();
325        for succs in self.adjacency.values() {
326            for &s in succs {
327                nodes.insert(s);
328            }
329        }
330        nodes
331    }
332
333    /// Returns the total number of directed edges.
334    pub fn edge_count(&self) -> usize {
335        self.adjacency.values().map(|v| v.len()).sum()
336    }
337
338    /// Computes and returns a `GraphStats` snapshot.
339    pub fn analyze(&self) -> GraphStats {
340        let node_count = self.nodes().len();
341        let edge_count = self.edge_count();
342        let max_depth = self.max_depth();
343        GraphStats::new(node_count, edge_count, max_depth)
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_complexity_description_trivial() {
353        assert!(GraphComplexity::Trivial.description().contains("trivial"));
354    }
355
356    #[test]
357    fn test_complexity_requires_advanced_trivial() {
358        assert!(!GraphComplexity::Trivial.requires_advanced_scheduling());
359    }
360
361    #[test]
362    fn test_complexity_requires_advanced_complex() {
363        assert!(GraphComplexity::Complex.requires_advanced_scheduling());
364    }
365
366    #[test]
367    fn test_graph_stats_avg_degree_zero_nodes() {
368        let s = GraphStats::new(0, 0, 0);
369        assert_eq!(s.avg_degree(), 0.0);
370    }
371
372    #[test]
373    fn test_graph_stats_avg_degree() {
374        let s = GraphStats::new(4, 4, 3);
375        assert!((s.avg_degree() - 1.0).abs() < 1e-9);
376    }
377
378    #[test]
379    fn test_graph_stats_complexity_trivial() {
380        let s = GraphStats::new(2, 1, 1);
381        assert_eq!(s.complexity(), GraphComplexity::Trivial);
382    }
383
384    #[test]
385    fn test_graph_stats_complexity_simple() {
386        let s = GraphStats::new(5, 4, 4);
387        assert_eq!(s.complexity(), GraphComplexity::Simple);
388    }
389
390    #[test]
391    fn test_graph_stats_complexity_moderate() {
392        let s = GraphStats::new(10, 12, 5);
393        assert_eq!(s.complexity(), GraphComplexity::Moderate);
394    }
395
396    #[test]
397    fn test_graph_stats_complexity_complex() {
398        let s = GraphStats::new(30, 60, 10);
399        assert_eq!(s.complexity(), GraphComplexity::Complex);
400    }
401
402    #[test]
403    fn test_graph_stats_is_disconnected() {
404        let s = GraphStats::new(3, 0, 0);
405        assert!(s.is_disconnected());
406    }
407
408    #[test]
409    fn test_analyzer_empty_graph() {
410        let a = GraphAnalyzer::new();
411        let stats = a.analyze();
412        assert_eq!(stats.node_count(), 0);
413        assert_eq!(stats.edge_count(), 0);
414        assert_eq!(stats.depth(), 0);
415    }
416
417    #[test]
418    fn test_analyzer_linear_chain() {
419        let mut a = GraphAnalyzer::new();
420        a.add_edge(0, 1);
421        a.add_edge(1, 2);
422        a.add_edge(2, 3);
423        let stats = a.analyze();
424        assert_eq!(stats.node_count(), 4);
425        assert_eq!(stats.edge_count(), 3);
426        assert_eq!(stats.depth(), 3);
427    }
428
429    #[test]
430    fn test_analyzer_branching_graph() {
431        let mut a = GraphAnalyzer::new();
432        a.add_edge(0, 1);
433        a.add_edge(0, 2);
434        a.add_edge(1, 3);
435        a.add_edge(2, 3);
436        let stats = a.analyze();
437        assert_eq!(stats.node_count(), 4);
438        assert_eq!(stats.edge_count(), 4);
439        assert_eq!(stats.depth(), 2);
440    }
441
442    #[test]
443    fn test_analyzer_isolated_nodes() {
444        let mut a = GraphAnalyzer::new();
445        a.add_node(0);
446        a.add_node(1);
447        let stats = a.analyze();
448        assert_eq!(stats.node_count(), 2);
449        assert_eq!(stats.edge_count(), 0);
450        assert_eq!(stats.depth(), 0);
451    }
452
453    #[test]
454    fn test_complexity_moderate_requires_advanced() {
455        assert!(GraphComplexity::Moderate.requires_advanced_scheduling());
456    }
457
458    // ── LatencyHistogram ──────────────────────────────────────────────────────
459
460    #[test]
461    fn test_histogram_record() {
462        let mut h = LatencyHistogram::new();
463        for i in 1u64..=100 {
464            h.record(i * 1_000);
465        }
466        assert_eq!(h.total_samples(), 100);
467        let mean = h.mean_ns();
468        assert!(
469            mean > 40_000.0 && mean < 60_000.0,
470            "mean {mean:.0} ns outside expected range"
471        );
472    }
473
474    #[test]
475    fn test_histogram_percentile() {
476        let mut h = LatencyHistogram::new();
477        for i in 0u64..100 {
478            h.record(i * 1_000);
479        }
480        let p50 = h.percentile_ns(50.0);
481        let p99 = h.percentile_ns(99.0);
482        assert!(
483            p50 >= 16_384 && p50 <= 65_536,
484            "p50={p50} outside expected bucket range"
485        );
486        assert!(p99 >= 32_768, "p99={p99} should be in high bucket");
487    }
488
489    #[test]
490    fn test_node_latency_stats() {
491        let mut stats = NodeLatencyStats::new();
492        for i in 0u64..50 {
493            stats.record_node("decoder", (i + 1) * 1_000);
494            stats.record_node("scaler", (i + 1) * 500);
495            stats.record_node("encoder", (i + 1) * 10_000);
496        }
497        let dec = stats
498            .summary("decoder")
499            .expect("decoder summary must be present");
500        let scl = stats
501            .summary("scaler")
502            .expect("scaler summary must be present");
503        let enc = stats
504            .summary("encoder")
505            .expect("encoder summary must be present");
506        assert!(dec.0 > 0.0, "decoder mean must be positive");
507        assert!(scl.0 < dec.0, "scaler should be faster than decoder");
508        assert!(enc.0 > dec.0, "encoder should be slower than decoder");
509        let (_, p50, p99) = dec;
510        assert!(p99 >= p50, "p99 must be >= p50");
511        assert!(stats.summary("nonexistent").is_none());
512    }
513}