Skip to main content

dynomite/stats/
histogram.rs

1//! Estimated histogram with logarithmic bucket layout.
2//!
3//! The bucket layout follows Cassandra's `EstimatedHistogram`: 94 buckets
4//! whose offsets grow geometrically by a factor of 1.2, starting at 1.
5//! Recording a value finds the bucket whose offset is the largest value
6//! still less than or equal to the input.
7//!
8//! # Examples
9//!
10//! ```
11//! use dynomite::stats::Histogram;
12//!
13//! let mut h = Histogram::new();
14//! for v in 0..=100 {
15//!     h.record(v);
16//! }
17//! assert_eq!(h.count(), 101);
18//! assert!(h.percentile(0.5) <= h.percentile(0.95));
19//! ```
20
21use std::sync::OnceLock;
22
23use crate::stats::numeric::{floor_p_times_u64, u64_to_f64};
24
25/// Number of buckets in the estimated histogram.
26///
27/// # Examples
28///
29/// ```
30/// use dynomite::stats::BUCKET_COUNT;
31/// assert_eq!(BUCKET_COUNT, 94);
32/// ```
33pub const BUCKET_COUNT: usize = 94;
34
35/// Returns the cached bucket offset table.
36///
37/// The first bucket starts at `1`. Each subsequent offset is
38/// `floor(prev * 6 / 5)`, bumped by one if the multiplication did not
39/// advance. This is the integer-equivalent of the original `* 1.2`
40/// factor used by Cassandra's estimated histogram.
41fn bucket_offsets() -> &'static [u64; BUCKET_COUNT] {
42    static OFFSETS: OnceLock<[u64; BUCKET_COUNT]> = OnceLock::new();
43    OFFSETS.get_or_init(|| {
44        let mut offsets = [0u64; BUCKET_COUNT];
45        let mut last: u64 = 1;
46        offsets[0] = 1;
47        for slot in offsets.iter_mut().skip(1) {
48            let mut next = last.saturating_mul(6) / 5;
49            if next == last {
50                next += 1;
51            }
52            *slot = next;
53            last = next;
54        }
55        offsets
56    })
57}
58
59/// A fixed-bucket histogram for tracking latencies and payload sizes.
60///
61/// All operations are O(`BUCKET_COUNT`) and allocation free.
62#[derive(Clone, Debug)]
63pub struct Histogram {
64    buckets: [u64; BUCKET_COUNT],
65    val_max: u64,
66}
67
68impl Histogram {
69    /// Construct a fresh histogram with all bucket counts set to zero.
70    ///
71    /// # Examples
72    ///
73    /// ```
74    /// use dynomite::stats::Histogram;
75    /// let h = Histogram::new();
76    /// assert_eq!(h.count(), 0);
77    /// ```
78    pub fn new() -> Self {
79        Self {
80            buckets: [0; BUCKET_COUNT],
81            val_max: 0,
82        }
83    }
84
85    /// Record a single observation, placing it in the appropriate bucket.
86    ///
87    /// Values larger than the largest bucket offset land in the final
88    /// bucket and signal histogram overflow. Once the overflow bucket is
89    /// non-empty, [`Histogram::percentile`], [`Histogram::mean`], and
90    /// [`Histogram::max`] all return [`Histogram::OVERFLOW_SENTINEL`].
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// use dynomite::stats::Histogram;
96    /// let mut h = Histogram::new();
97    /// h.record(42);
98    /// assert_eq!(h.count(), 1);
99    /// assert_eq!(h.max(), 42);
100    /// ```
101    pub fn record(&mut self, val: u64) {
102        let offsets = bucket_offsets();
103        let next = offsets.partition_point(|&o| o <= val);
104        let bucket = next.saturating_sub(1).min(BUCKET_COUNT - 1);
105        self.buckets[bucket] = self.buckets[bucket].saturating_add(1);
106        if val > self.val_max {
107            self.val_max = val;
108        }
109    }
110
111    /// Sentinel value returned by [`Histogram::percentile`],
112    /// [`Histogram::mean`], and [`Histogram::max`] when the overflow
113    /// bucket is non-empty.
114    pub const OVERFLOW_SENTINEL: u64 = u64::MAX;
115
116    /// Returns `true` when the final (overflow) bucket is non-empty.
117    ///
118    /// The reference implementation logs an error and refuses to publish
119    /// quantiles in this case; the Rust port surfaces the same signal
120    /// through this method and through the [`Histogram::OVERFLOW_SENTINEL`]
121    /// returned from quantile accessors.
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use dynomite::stats::Histogram;
127    /// let mut h = Histogram::new();
128    /// h.record(10);
129    /// assert!(!h.is_overflowing());
130    /// ```
131    pub fn is_overflowing(&self) -> bool {
132        self.buckets[BUCKET_COUNT - 1] > 0
133    }
134
135    /// Total number of observations recorded.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use dynomite::stats::Histogram;
141    /// let mut h = Histogram::new();
142    /// h.record(1);
143    /// h.record(2);
144    /// assert_eq!(h.count(), 2);
145    /// ```
146    pub fn count(&self) -> u64 {
147        self.buckets.iter().copied().fold(0u64, u64::saturating_add)
148    }
149
150    /// Approximate percentile, where `p` is in the closed interval
151    /// `[0.0, 1.0]`. Inputs outside the range or NaN return `0`.
152    ///
153    /// The result is the offset of the first bucket whose cumulative
154    /// count meets or exceeds `floor(count * p)`. Returns
155    /// [`Histogram::OVERFLOW_SENTINEL`] if the histogram is
156    /// [`Histogram::is_overflowing`].
157    ///
158    /// # Examples
159    ///
160    /// ```
161    /// use dynomite::stats::Histogram;
162    /// let mut h = Histogram::new();
163    /// for v in 0..1_000 { h.record(v); }
164    /// assert!(h.percentile(0.95) >= h.percentile(0.5));
165    /// ```
166    pub fn percentile(&self, p: f64) -> u64 {
167        if !p.is_finite() || !(0.0..=1.0).contains(&p) {
168            return 0;
169        }
170        if self.is_overflowing() {
171            return Self::OVERFLOW_SENTINEL;
172        }
173        let total = self.count();
174        if total == 0 {
175            return 0;
176        }
177        let pcount = floor_p_times_u64(p, total);
178        if pcount == 0 {
179            return 0;
180        }
181        let offsets = bucket_offsets();
182        let mut elements: u64 = 0;
183        for (i, &offset) in offsets.iter().enumerate().take(BUCKET_COUNT - 1) {
184            elements = elements.saturating_add(self.buckets[i]);
185            if elements >= pcount {
186                return offset;
187            }
188        }
189        0
190    }
191
192    /// Arithmetic mean of all observations using bucket offsets as
193    /// representative values. Returns `0.0` for an empty histogram and
194    /// `f64::INFINITY` when the histogram is [`Histogram::is_overflowing`].
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// use dynomite::stats::Histogram;
200    /// let mut h = Histogram::new();
201    /// h.record(10);
202    /// assert!(h.mean() > 0.0);
203    /// ```
204    pub fn mean(&self) -> f64 {
205        if self.is_overflowing() {
206            return f64::INFINITY;
207        }
208        let offsets = bucket_offsets();
209        let mut sum: u64 = 0;
210        let mut elements: u64 = 0;
211        for (i, &offset) in offsets.iter().enumerate().take(BUCKET_COUNT - 1) {
212            elements = elements.saturating_add(self.buckets[i]);
213            sum = sum.saturating_add(self.buckets[i].saturating_mul(offset));
214        }
215        if elements == 0 {
216            return 0.0;
217        }
218        let quotient = sum / elements;
219        let remainder = sum % elements;
220        u64_to_f64(quotient) + u64_to_f64(remainder) / u64_to_f64(elements)
221    }
222
223    /// Maximum observation seen since the last [`Histogram::reset`].
224    /// Returns [`Histogram::OVERFLOW_SENTINEL`] when the histogram is
225    /// [`Histogram::is_overflowing`].
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// use dynomite::stats::Histogram;
231    /// let mut h = Histogram::new();
232    /// h.record(99);
233    /// assert_eq!(h.max(), 99);
234    /// ```
235    pub fn max(&self) -> u64 {
236        if self.is_overflowing() {
237            return Self::OVERFLOW_SENTINEL;
238        }
239        self.val_max
240    }
241
242    /// Reset all bucket counts and the recorded maximum to zero.
243    ///
244    /// # Examples
245    ///
246    /// ```
247    /// use dynomite::stats::Histogram;
248    /// let mut h = Histogram::new();
249    /// h.record(7);
250    /// h.reset();
251    /// assert_eq!(h.count(), 0);
252    /// ```
253    pub fn reset(&mut self) {
254        self.buckets = [0; BUCKET_COUNT];
255        self.val_max = 0;
256    }
257
258    /// Merge bucket counts from another histogram into this one.
259    ///
260    /// The maximum is updated to the larger of the two recorded maxima.
261    ///
262    /// # Examples
263    ///
264    /// ```
265    /// use dynomite::stats::Histogram;
266    /// let mut a = Histogram::new();
267    /// let mut b = Histogram::new();
268    /// a.record(10);
269    /// b.record(20);
270    /// a.merge(&b);
271    /// assert_eq!(a.count(), 2);
272    /// assert_eq!(a.max(), 20);
273    /// ```
274    pub fn merge(&mut self, other: &Self) {
275        for i in 0..BUCKET_COUNT {
276            self.buckets[i] = self.buckets[i].saturating_add(other.buckets[i]);
277        }
278        if other.val_max > self.val_max {
279            self.val_max = other.val_max;
280        }
281    }
282
283    /// Borrow the raw bucket counts.
284    ///
285    /// # Examples
286    ///
287    /// ```
288    /// use dynomite::stats::{Histogram, BUCKET_COUNT};
289    /// let h = Histogram::new();
290    /// assert_eq!(h.buckets().len(), BUCKET_COUNT);
291    /// ```
292    pub fn buckets(&self) -> &[u64; BUCKET_COUNT] {
293        &self.buckets
294    }
295}
296
297impl Default for Histogram {
298    fn default() -> Self {
299        Self::new()
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn offsets_are_strictly_increasing() {
309        let off = bucket_offsets();
310        for window in off.windows(2) {
311            assert!(window[0] < window[1], "offsets must be strictly monotonic");
312        }
313        assert_eq!(off[0], 1);
314    }
315
316    #[test]
317    fn empty_histogram_returns_zeros() {
318        let h = Histogram::new();
319        assert_eq!(h.count(), 0);
320        assert_eq!(h.max(), 0);
321        assert_eq!(h.percentile(0.5), 0);
322        assert!(h.mean().abs() < f64::EPSILON);
323    }
324
325    #[test]
326    fn record_updates_count_and_max() {
327        let mut h = Histogram::new();
328        h.record(0);
329        h.record(5);
330        h.record(1_000);
331        assert_eq!(h.count(), 3);
332        assert_eq!(h.max(), 1_000);
333    }
334
335    #[test]
336    fn percentile_is_monotone_non_decreasing() {
337        let mut h = Histogram::new();
338        for v in 0..1_000 {
339            h.record(v);
340        }
341        let mut last = 0u64;
342        let mut p = 0.0f64;
343        while p <= 1.0 {
344            let v = h.percentile(p);
345            assert!(v >= last, "percentile decreased at p={p}: {v} < {last}");
346            last = v;
347            p += 0.05;
348        }
349    }
350
351    #[test]
352    fn merge_sums_counts() {
353        let mut a = Histogram::new();
354        let mut b = Histogram::new();
355        for v in 0..100 {
356            a.record(v);
357            b.record(v + 50);
358        }
359        let total = a.count() + b.count();
360        a.merge(&b);
361        assert_eq!(a.count(), total);
362    }
363
364    #[test]
365    fn reset_clears_state() {
366        let mut h = Histogram::new();
367        h.record(123);
368        h.reset();
369        assert_eq!(h.count(), 0);
370        assert_eq!(h.max(), 0);
371    }
372
373    #[test]
374    fn percentile_outside_range_returns_zero() {
375        let mut h = Histogram::new();
376        h.record(10);
377        assert_eq!(h.percentile(-0.1), 0);
378        assert_eq!(h.percentile(1.1), 0);
379        assert_eq!(h.percentile(f64::NAN), 0);
380    }
381
382    #[test]
383    fn overflow_signals_quantile_callers() {
384        let mut h = Histogram::new();
385        // Record a value larger than every bucket offset.
386        let last_offset = *bucket_offsets().last().expect("non-empty offsets");
387        h.record(last_offset.saturating_add(1));
388        assert!(h.is_overflowing(), "expected overflow signal");
389        assert_eq!(h.percentile(0.5), Histogram::OVERFLOW_SENTINEL);
390        assert_eq!(h.max(), Histogram::OVERFLOW_SENTINEL);
391        assert!(h.mean().is_infinite());
392        // After reset the overflow signal clears.
393        h.reset();
394        assert!(!h.is_overflowing());
395    }
396
397    #[test]
398    fn mean_rough_match_for_uniform() {
399        let mut h = Histogram::new();
400        for v in 0..1000u64 {
401            h.record(v);
402        }
403        // True mean is 499.5; bucket-quantization makes the result
404        // approximate but within an order of magnitude.
405        let m = h.mean();
406        assert!((100.0..=1500.0).contains(&m), "mean was {m}");
407    }
408}