dynomite/stats/histogram.rs
1//! Estimated histogram with logarithmic bucket layout.
2//!
3//! The bucket layout follows Cassandra's `EstimatedHistogram`: 94 buckets
4//! whose offsets grow geometrically by a factor of 1.2, starting at 1.
5//! Recording a value finds the bucket whose offset is the largest value
6//! still less than or equal to the input.
7//!
8//! # Examples
9//!
10//! ```
11//! use dynomite::stats::Histogram;
12//!
13//! let mut h = Histogram::new();
14//! for v in 0..=100 {
15//! h.record(v);
16//! }
17//! assert_eq!(h.count(), 101);
18//! assert!(h.percentile(0.5) <= h.percentile(0.95));
19//! ```
20
21use std::sync::OnceLock;
22
23use crate::stats::numeric::{floor_p_times_u64, u64_to_f64};
24
25/// Number of buckets in the estimated histogram.
26///
27/// # Examples
28///
29/// ```
30/// use dynomite::stats::BUCKET_COUNT;
31/// assert_eq!(BUCKET_COUNT, 94);
32/// ```
33pub const BUCKET_COUNT: usize = 94;
34
35/// Returns the cached bucket offset table.
36///
37/// The first bucket starts at `1`. Each subsequent offset is
38/// `floor(prev * 6 / 5)`, bumped by one if the multiplication did not
39/// advance. This is the integer-equivalent of the original `* 1.2`
40/// factor used by Cassandra's estimated histogram.
41fn bucket_offsets() -> &'static [u64; BUCKET_COUNT] {
42 static OFFSETS: OnceLock<[u64; BUCKET_COUNT]> = OnceLock::new();
43 OFFSETS.get_or_init(|| {
44 let mut offsets = [0u64; BUCKET_COUNT];
45 let mut last: u64 = 1;
46 offsets[0] = 1;
47 for slot in offsets.iter_mut().skip(1) {
48 let mut next = last.saturating_mul(6) / 5;
49 if next == last {
50 next += 1;
51 }
52 *slot = next;
53 last = next;
54 }
55 offsets
56 })
57}
58
59/// A fixed-bucket histogram for tracking latencies and payload sizes.
60///
61/// All operations are O(`BUCKET_COUNT`) and allocation free.
62#[derive(Clone, Debug)]
63pub struct Histogram {
64 buckets: [u64; BUCKET_COUNT],
65 val_max: u64,
66}
67
68impl Histogram {
69 /// Construct a fresh histogram with all bucket counts set to zero.
70 ///
71 /// # Examples
72 ///
73 /// ```
74 /// use dynomite::stats::Histogram;
75 /// let h = Histogram::new();
76 /// assert_eq!(h.count(), 0);
77 /// ```
78 pub fn new() -> Self {
79 Self {
80 buckets: [0; BUCKET_COUNT],
81 val_max: 0,
82 }
83 }
84
85 /// Record a single observation, placing it in the appropriate bucket.
86 ///
87 /// Values larger than the largest bucket offset land in the final
88 /// bucket and signal histogram overflow. Once the overflow bucket is
89 /// non-empty, [`Histogram::percentile`], [`Histogram::mean`], and
90 /// [`Histogram::max`] all return [`Histogram::OVERFLOW_SENTINEL`].
91 ///
92 /// # Examples
93 ///
94 /// ```
95 /// use dynomite::stats::Histogram;
96 /// let mut h = Histogram::new();
97 /// h.record(42);
98 /// assert_eq!(h.count(), 1);
99 /// assert_eq!(h.max(), 42);
100 /// ```
101 pub fn record(&mut self, val: u64) {
102 let offsets = bucket_offsets();
103 let next = offsets.partition_point(|&o| o <= val);
104 let bucket = next.saturating_sub(1).min(BUCKET_COUNT - 1);
105 self.buckets[bucket] = self.buckets[bucket].saturating_add(1);
106 if val > self.val_max {
107 self.val_max = val;
108 }
109 }
110
111 /// Sentinel value returned by [`Histogram::percentile`],
112 /// [`Histogram::mean`], and [`Histogram::max`] when the overflow
113 /// bucket is non-empty.
114 pub const OVERFLOW_SENTINEL: u64 = u64::MAX;
115
116 /// Returns `true` when the final (overflow) bucket is non-empty.
117 ///
118 /// The reference implementation logs an error and refuses to publish
119 /// quantiles in this case; the Rust port surfaces the same signal
120 /// through this method and through the [`Histogram::OVERFLOW_SENTINEL`]
121 /// returned from quantile accessors.
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use dynomite::stats::Histogram;
127 /// let mut h = Histogram::new();
128 /// h.record(10);
129 /// assert!(!h.is_overflowing());
130 /// ```
131 pub fn is_overflowing(&self) -> bool {
132 self.buckets[BUCKET_COUNT - 1] > 0
133 }
134
135 /// Total number of observations recorded.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use dynomite::stats::Histogram;
141 /// let mut h = Histogram::new();
142 /// h.record(1);
143 /// h.record(2);
144 /// assert_eq!(h.count(), 2);
145 /// ```
146 pub fn count(&self) -> u64 {
147 self.buckets.iter().copied().fold(0u64, u64::saturating_add)
148 }
149
150 /// Approximate percentile, where `p` is in the closed interval
151 /// `[0.0, 1.0]`. Inputs outside the range or NaN return `0`.
152 ///
153 /// The result is the offset of the first bucket whose cumulative
154 /// count meets or exceeds `floor(count * p)`. Returns
155 /// [`Histogram::OVERFLOW_SENTINEL`] if the histogram is
156 /// [`Histogram::is_overflowing`].
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use dynomite::stats::Histogram;
162 /// let mut h = Histogram::new();
163 /// for v in 0..1_000 { h.record(v); }
164 /// assert!(h.percentile(0.95) >= h.percentile(0.5));
165 /// ```
166 pub fn percentile(&self, p: f64) -> u64 {
167 if !p.is_finite() || !(0.0..=1.0).contains(&p) {
168 return 0;
169 }
170 if self.is_overflowing() {
171 return Self::OVERFLOW_SENTINEL;
172 }
173 let total = self.count();
174 if total == 0 {
175 return 0;
176 }
177 let pcount = floor_p_times_u64(p, total);
178 if pcount == 0 {
179 return 0;
180 }
181 let offsets = bucket_offsets();
182 let mut elements: u64 = 0;
183 for (i, &offset) in offsets.iter().enumerate().take(BUCKET_COUNT - 1) {
184 elements = elements.saturating_add(self.buckets[i]);
185 if elements >= pcount {
186 return offset;
187 }
188 }
189 0
190 }
191
192 /// Arithmetic mean of all observations using bucket offsets as
193 /// representative values. Returns `0.0` for an empty histogram and
194 /// `f64::INFINITY` when the histogram is [`Histogram::is_overflowing`].
195 ///
196 /// # Examples
197 ///
198 /// ```
199 /// use dynomite::stats::Histogram;
200 /// let mut h = Histogram::new();
201 /// h.record(10);
202 /// assert!(h.mean() > 0.0);
203 /// ```
204 pub fn mean(&self) -> f64 {
205 if self.is_overflowing() {
206 return f64::INFINITY;
207 }
208 let offsets = bucket_offsets();
209 let mut sum: u64 = 0;
210 let mut elements: u64 = 0;
211 for (i, &offset) in offsets.iter().enumerate().take(BUCKET_COUNT - 1) {
212 elements = elements.saturating_add(self.buckets[i]);
213 sum = sum.saturating_add(self.buckets[i].saturating_mul(offset));
214 }
215 if elements == 0 {
216 return 0.0;
217 }
218 let quotient = sum / elements;
219 let remainder = sum % elements;
220 u64_to_f64(quotient) + u64_to_f64(remainder) / u64_to_f64(elements)
221 }
222
223 /// Maximum observation seen since the last [`Histogram::reset`].
224 /// Returns [`Histogram::OVERFLOW_SENTINEL`] when the histogram is
225 /// [`Histogram::is_overflowing`].
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// use dynomite::stats::Histogram;
231 /// let mut h = Histogram::new();
232 /// h.record(99);
233 /// assert_eq!(h.max(), 99);
234 /// ```
235 pub fn max(&self) -> u64 {
236 if self.is_overflowing() {
237 return Self::OVERFLOW_SENTINEL;
238 }
239 self.val_max
240 }
241
242 /// Reset all bucket counts and the recorded maximum to zero.
243 ///
244 /// # Examples
245 ///
246 /// ```
247 /// use dynomite::stats::Histogram;
248 /// let mut h = Histogram::new();
249 /// h.record(7);
250 /// h.reset();
251 /// assert_eq!(h.count(), 0);
252 /// ```
253 pub fn reset(&mut self) {
254 self.buckets = [0; BUCKET_COUNT];
255 self.val_max = 0;
256 }
257
258 /// Merge bucket counts from another histogram into this one.
259 ///
260 /// The maximum is updated to the larger of the two recorded maxima.
261 ///
262 /// # Examples
263 ///
264 /// ```
265 /// use dynomite::stats::Histogram;
266 /// let mut a = Histogram::new();
267 /// let mut b = Histogram::new();
268 /// a.record(10);
269 /// b.record(20);
270 /// a.merge(&b);
271 /// assert_eq!(a.count(), 2);
272 /// assert_eq!(a.max(), 20);
273 /// ```
274 pub fn merge(&mut self, other: &Self) {
275 for i in 0..BUCKET_COUNT {
276 self.buckets[i] = self.buckets[i].saturating_add(other.buckets[i]);
277 }
278 if other.val_max > self.val_max {
279 self.val_max = other.val_max;
280 }
281 }
282
283 /// Borrow the raw bucket counts.
284 ///
285 /// # Examples
286 ///
287 /// ```
288 /// use dynomite::stats::{Histogram, BUCKET_COUNT};
289 /// let h = Histogram::new();
290 /// assert_eq!(h.buckets().len(), BUCKET_COUNT);
291 /// ```
292 pub fn buckets(&self) -> &[u64; BUCKET_COUNT] {
293 &self.buckets
294 }
295}
296
297impl Default for Histogram {
298 fn default() -> Self {
299 Self::new()
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 #[test]
308 fn offsets_are_strictly_increasing() {
309 let off = bucket_offsets();
310 for window in off.windows(2) {
311 assert!(window[0] < window[1], "offsets must be strictly monotonic");
312 }
313 assert_eq!(off[0], 1);
314 }
315
316 #[test]
317 fn empty_histogram_returns_zeros() {
318 let h = Histogram::new();
319 assert_eq!(h.count(), 0);
320 assert_eq!(h.max(), 0);
321 assert_eq!(h.percentile(0.5), 0);
322 assert!(h.mean().abs() < f64::EPSILON);
323 }
324
325 #[test]
326 fn record_updates_count_and_max() {
327 let mut h = Histogram::new();
328 h.record(0);
329 h.record(5);
330 h.record(1_000);
331 assert_eq!(h.count(), 3);
332 assert_eq!(h.max(), 1_000);
333 }
334
335 #[test]
336 fn percentile_is_monotone_non_decreasing() {
337 let mut h = Histogram::new();
338 for v in 0..1_000 {
339 h.record(v);
340 }
341 let mut last = 0u64;
342 let mut p = 0.0f64;
343 while p <= 1.0 {
344 let v = h.percentile(p);
345 assert!(v >= last, "percentile decreased at p={p}: {v} < {last}");
346 last = v;
347 p += 0.05;
348 }
349 }
350
351 #[test]
352 fn merge_sums_counts() {
353 let mut a = Histogram::new();
354 let mut b = Histogram::new();
355 for v in 0..100 {
356 a.record(v);
357 b.record(v + 50);
358 }
359 let total = a.count() + b.count();
360 a.merge(&b);
361 assert_eq!(a.count(), total);
362 }
363
364 #[test]
365 fn reset_clears_state() {
366 let mut h = Histogram::new();
367 h.record(123);
368 h.reset();
369 assert_eq!(h.count(), 0);
370 assert_eq!(h.max(), 0);
371 }
372
373 #[test]
374 fn percentile_outside_range_returns_zero() {
375 let mut h = Histogram::new();
376 h.record(10);
377 assert_eq!(h.percentile(-0.1), 0);
378 assert_eq!(h.percentile(1.1), 0);
379 assert_eq!(h.percentile(f64::NAN), 0);
380 }
381
382 #[test]
383 fn overflow_signals_quantile_callers() {
384 let mut h = Histogram::new();
385 // Record a value larger than every bucket offset.
386 let last_offset = *bucket_offsets().last().expect("non-empty offsets");
387 h.record(last_offset.saturating_add(1));
388 assert!(h.is_overflowing(), "expected overflow signal");
389 assert_eq!(h.percentile(0.5), Histogram::OVERFLOW_SENTINEL);
390 assert_eq!(h.max(), Histogram::OVERFLOW_SENTINEL);
391 assert!(h.mean().is_infinite());
392 // After reset the overflow signal clears.
393 h.reset();
394 assert!(!h.is_overflowing());
395 }
396
397 #[test]
398 fn mean_rough_match_for_uniform() {
399 let mut h = Histogram::new();
400 for v in 0..1000u64 {
401 h.record(v);
402 }
403 // True mean is 499.5; bucket-quantization makes the result
404 // approximate but within an order of magnitude.
405 let m = h.mean();
406 assert!((100.0..=1500.0).contains(&m), "mean was {m}");
407 }
408}