Skip to main content

cbtop/latency_distribution/
histogram.rs

1//! Latency histogram with bucket statistics and entropy.
2
3/// Histogram bucket for latency distribution
4#[derive(Debug, Clone)]
5pub struct HistogramBucket {
6    /// Lower bound of bucket (inclusive)
7    pub lower: f64,
8    /// Upper bound of bucket (exclusive)
9    pub upper: f64,
10    /// Count of samples in bucket
11    pub count: usize,
12    /// Percentage of total samples
13    pub percentage: f64,
14}
15
16/// Latency histogram with statistical properties
17#[derive(Debug, Clone)]
18pub struct LatencyHistogram {
19    /// Histogram buckets
20    pub buckets: Vec<HistogramBucket>,
21    /// Total number of samples
22    pub total_samples: usize,
23    /// Shannon entropy (0-1 normalized)
24    pub entropy: f64,
25    /// Index of the mode bucket (most frequent)
26    pub mode_bucket: usize,
27    /// Number of buckets
28    pub bucket_count: usize,
29}
30
31impl LatencyHistogram {
32    /// Build histogram from samples with specified bucket count
33    pub fn build(samples: &[f64], bucket_count: usize) -> Self {
34        if samples.is_empty() || bucket_count == 0 {
35            return Self {
36                buckets: Vec::new(),
37                total_samples: 0,
38                entropy: 0.0,
39                mode_bucket: 0,
40                bucket_count: 0,
41            };
42        }
43
44        let min = samples.iter().cloned().fold(f64::INFINITY, f64::min);
45        let max = samples.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
46
47        let range = max - min;
48        let bucket_width = if range > 0.0 {
49            range / bucket_count as f64
50        } else {
51            1.0
52        };
53
54        let mut buckets: Vec<HistogramBucket> = (0..bucket_count)
55            .map(|i| {
56                let lower = min + i as f64 * bucket_width;
57                let upper = if i == bucket_count - 1 {
58                    max + f64::EPSILON
59                } else {
60                    min + (i + 1) as f64 * bucket_width
61                };
62                HistogramBucket {
63                    lower,
64                    upper,
65                    count: 0,
66                    percentage: 0.0,
67                }
68            })
69            .collect();
70
71        for &sample in samples {
72            let bucket_idx = if range > 0.0 {
73                ((sample - min) / bucket_width).floor() as usize
74            } else {
75                0
76            };
77            let idx = bucket_idx.min(bucket_count - 1);
78            buckets[idx].count += 1;
79        }
80
81        let total = samples.len();
82        for bucket in &mut buckets {
83            bucket.percentage = bucket.count as f64 / total as f64 * 100.0;
84        }
85
86        let mode_bucket = buckets
87            .iter()
88            .enumerate()
89            .max_by_key(|(_, b)| b.count)
90            .map(|(i, _)| i)
91            .unwrap_or(0);
92
93        let entropy = shannon_entropy(&buckets, total);
94
95        Self {
96            buckets,
97            total_samples: total,
98            entropy,
99            mode_bucket,
100            bucket_count,
101        }
102    }
103
104    /// Get the mode (most frequent) bucket
105    pub fn mode(&self) -> Option<&HistogramBucket> {
106        self.buckets.get(self.mode_bucket)
107    }
108
109    /// Verify bucket counts sum to total
110    pub fn verify_counts(&self) -> bool {
111        let sum: usize = self.buckets.iter().map(|b| b.count).sum();
112        sum == self.total_samples
113    }
114}
115
116/// Calculate Shannon entropy of histogram (normalized 0-1)
117fn shannon_entropy(buckets: &[HistogramBucket], total: usize) -> f64 {
118    if total == 0 || buckets.is_empty() {
119        return 0.0;
120    }
121
122    let mut entropy = 0.0;
123    for bucket in buckets {
124        if bucket.count > 0 {
125            let p = bucket.count as f64 / total as f64;
126            entropy -= p * p.ln();
127        }
128    }
129
130    let max_entropy = (buckets.len() as f64).ln();
131    if max_entropy > 0.0 {
132        entropy / max_entropy
133    } else {
134        0.0
135    }
136}