stretto/
histogram.rs

1use atomic::Atomic;
2use std::fmt::{Display, Formatter};
3use std::sync::atomic::{AtomicI64, Ordering};
4use std::sync::Arc;
5
6const MAXI64: i64 = i64::MAX;
7
8/// `Histogram` stores the information needed to represent the sizes of the keys and values
9/// as a histogram.
10///
11/// Histogram promises thread-safe.
12#[derive(Debug)]
13pub struct Histogram {
14    bounds: Arc<Vec<Atomic<f64>>>,
15    count: AtomicI64,
16    count_per_bucket: Arc<Vec<AtomicI64>>,
17    min: AtomicI64,
18    max: AtomicI64,
19    sum: AtomicI64,
20}
21
22impl Histogram {
23    /// Returns a new instance of HistogramData with properly initialized fields.
24    pub fn new(bounds: Vec<f64>) -> Self {
25        let bounds = bounds.into_iter().map(Atomic::new).collect::<Vec<_>>();
26
27        let mut cpb = init_cpb(bounds.len() + 1);
28        cpb.shrink_to_fit();
29        Histogram {
30            bounds: Arc::new(bounds),
31            count: AtomicI64::new(0),
32            count_per_bucket: Arc::new(cpb),
33            min: AtomicI64::new(MAXI64),
34            max: AtomicI64::new(0),
35            sum: AtomicI64::new(0),
36        }
37    }
38
39    /// Returns HistogramData base on the min exponent and max exponent. The bounds are powers of two of the form
40    /// [2^min_exponent, ..., 2^max_exponent].
41    // pub fn from_exponents(min_exp: u32, max_exp: u32) -> Self {
42    //     Self::new((min_exp..=max_exp).map(|idx| (1 << idx) as f64).collect())
43    // }
44
45    /// `update` changes the Min and Max fields if value is less than or greater than the current values.
46    pub fn update(&self, val: i64) {
47        let _ = self
48            .max
49            .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |max| {
50                if val > max {
51                    Some(val)
52                } else {
53                    None
54                }
55            });
56
57        let _ = self
58            .min
59            .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |min| {
60                if val < min {
61                    Some(val)
62                } else {
63                    None
64                }
65            });
66
67        self.sum.fetch_add(val, Ordering::SeqCst);
68        self.count.fetch_add(1, Ordering::SeqCst);
69
70        for idx in 0..=self.bounds.len() {
71            // Allocate value in the last buckets if we reached the end of the Bounds array.
72            if idx == self.bounds.len() {
73                self.count_per_bucket[idx].fetch_add(1, Ordering::SeqCst);
74                break;
75            }
76
77            if val < (self.bounds[idx].load(Ordering::SeqCst) as i64) {
78                self.count_per_bucket[idx].fetch_add(1, Ordering::SeqCst);
79                break;
80            }
81        }
82    }
83
84    /// `mean` returns the mean value for the histogram.
85    #[allow(dead_code)]
86    pub fn mean(&self) -> f64 {
87        if self.count.load(Ordering::SeqCst) == 0 {
88            0f64
89        } else {
90            (self.sum.load(Ordering::SeqCst) as f64) / (self.count.load(Ordering::SeqCst) as f64)
91        }
92    }
93
94    /// `percentile` returns the percentile value for the histogram.
95    /// value of p should be between [0.0-1.0]
96
97    pub fn percentile(&self, p: f64) -> f64 {
98        let count = self.count.load(Ordering::SeqCst);
99        if count == 0 {
100            // if no data return the minimum range
101            self.bounds[0].load(Ordering::SeqCst)
102        } else {
103            let mut pval = ((count as f64) * p) as i64;
104
105            for (idx, val) in self.count_per_bucket.iter().enumerate() {
106                pval -= val.load(Ordering::SeqCst);
107                if pval <= 0 {
108                    if idx == self.bounds.len() {
109                        break;
110                    }
111                    return self.bounds[idx].load(Ordering::SeqCst);
112                }
113            }
114            // default return should be the max range
115            self.bounds[self.bounds.len() - 1].load(Ordering::SeqCst)
116        }
117    }
118
119    /// `clear` reset the histogram. Helpful in situations where we need to reset the metrics
120    pub fn clear(&self) {
121        self.count.store(0, Ordering::SeqCst);
122        self.count_per_bucket
123            .iter()
124            .for_each(|val| val.store(0, Ordering::SeqCst));
125        self.sum.store(0, Ordering::SeqCst);
126        self.max.store(0, Ordering::SeqCst);
127        self.min.store(0, Ordering::SeqCst);
128    }
129}
130
131impl Display for Histogram {
132    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
133        let mut buf = Vec::<u8>::new();
134        buf.extend("\n -- Histogram:\n".as_bytes());
135        buf.extend(format!("Min value: {}\n", self.min.load(Ordering::SeqCst)).as_bytes());
136        buf.extend(format!("Max value: {}\n", self.max.load(Ordering::SeqCst)).as_bytes());
137        buf.extend(format!("Count: {}\n", self.count.load(Ordering::SeqCst)).as_bytes());
138        buf.extend(format!("50p: {}\n", self.percentile(0.5)).as_bytes());
139        buf.extend(format!("75p: {}\n", self.percentile(0.75)).as_bytes());
140        buf.extend(format!("90p: {}\n", self.percentile(0.9)).as_bytes());
141
142        let num_bounds = self.bounds.len();
143        let count = self.count.load(Ordering::SeqCst);
144        let mut cum = 0f64;
145        for (idx, ct) in self.count_per_bucket.iter().enumerate() {
146            let ct = ct.load(Ordering::SeqCst);
147            if ct == 0 {
148                continue;
149            }
150
151            // The last bucket represents the bucket that contains the range from
152            // the last bound up to infinity so it's processed differently than the
153            // other buckets.
154            if idx == self.count_per_bucket.len() - 1 {
155                let lb = self.bounds[num_bounds - 1].load(Ordering::SeqCst) as u64;
156                let page = (ct * 100) as f64 / (count as f64);
157                cum += page;
158                buf.extend(
159                    format!("[{}, {}) {} {:.2}% {:.2}%\n", lb, "infinity", ct, page, cum)
160                        .as_bytes(),
161                );
162                continue;
163            }
164
165            let ub = self.bounds[idx].load(Ordering::SeqCst) as u64;
166            let mut lb = 0u64;
167            if idx > 0 {
168                lb = self.bounds[idx - 1].load(Ordering::SeqCst) as u64;
169            }
170
171            let page = (ct * 100) as f64 / (count as f64);
172
173            cum += page;
174            buf.extend(format!("[{}, {}) {} {:.2}% {:.2}%\n", lb, ub, ct, page, cum).as_bytes())
175        }
176
177        buf.extend(" --\n".as_bytes());
178        write!(f, "{}", String::from_utf8(buf).unwrap())
179    }
180}
181
182impl Clone for Histogram {
183    fn clone(&self) -> Self {
184        Self {
185            bounds: self.bounds.clone(),
186            count: AtomicI64::new(self.count.load(Ordering::SeqCst)),
187            count_per_bucket: self.count_per_bucket.clone(),
188            min: AtomicI64::new(self.min.load(Ordering::SeqCst)),
189            max: AtomicI64::new(self.max.load(Ordering::SeqCst)),
190            sum: AtomicI64::new(self.sum.load(Ordering::SeqCst)),
191        }
192    }
193}
194
195fn init_cpb(num: usize) -> Vec<AtomicI64> {
196    vec![0; num]
197        .into_iter()
198        .map(AtomicI64::new)
199        .collect::<Vec<_>>()
200}
201
202#[cfg(test)]
203mod test {
204    use crate::histogram::Histogram;
205
206    struct PercentileTestCase {
207        upper_bound: i64,
208        lower_bound: i64,
209        step: i64,
210        loops: u64,
211        percent: f64,
212        expect: f64,
213    }
214
215    fn init_histogram(lb: f64, ub: f64, step: f64) -> Histogram {
216        let size = ((ub - lb) / step).ceil() as usize;
217        let mut bounds = vec![0f64; size + 1];
218
219        let mut prev = 0f64;
220        bounds.iter_mut().enumerate().for_each(|(idx, val)| {
221            if idx == 0 {
222                *val = lb;
223                prev = lb;
224            } else if idx == size {
225                *val = ub;
226            } else {
227                *val = prev + step;
228                prev = *val;
229            }
230        });
231        Histogram::new(bounds)
232    }
233
234    fn assert_histogram_percentiles(ps: Vec<PercentileTestCase>) {
235        let h = init_histogram(32.0, 514.0, 4.0);
236
237        ps.iter().for_each(|case| {
238            (case.lower_bound..=case.upper_bound)
239                .filter(|x| *x % case.step == 0)
240                .for_each(|v| {
241                    (0..case.loops).for_each(|_| {
242                        h.update(v);
243                    })
244                });
245
246            assert_eq!(
247                h.percentile(case.percent),
248                case.expect,
249                "bad: p: {}",
250                case.percent
251            );
252            h.clear();
253        });
254    }
255
256    #[test]
257    fn test_mean() {
258        let h = init_histogram(0.0, 16.0, 4.0);
259        (0..=16).filter(|x| *x % 4 == 0).for_each(|v| {
260            h.update(v);
261        });
262        assert_eq!(h.mean(), 40f64 / 5f64);
263    }
264
265    #[test]
266    fn test_percentile() {
267        let cases = vec![
268            PercentileTestCase {
269                upper_bound: 1024,
270                lower_bound: 0,
271                step: 4,
272                loops: 1000,
273                percent: 0.0,
274                expect: 32.0,
275            },
276            PercentileTestCase {
277                upper_bound: 512,
278                lower_bound: 0,
279                step: 4,
280                loops: 1000,
281                percent: 0.99,
282                expect: 512.0,
283            },
284            PercentileTestCase {
285                upper_bound: 1024,
286                lower_bound: 0,
287                step: 4,
288                loops: 1000,
289                percent: 1.0,
290                expect: 514.0,
291            },
292        ];
293        assert_histogram_percentiles(cases);
294    }
295
296    #[test]
297    fn test_fmt() {
298        let h = init_histogram(0.0, 16.0, 4.0);
299        (0..=16).filter(|x| *x % 4 == 0).for_each(|v| {
300            h.update(v);
301        });
302        let f = "\n -- Histogram:
303Min value: 0
304Max value: 16
305Count: 5
30650p: 8
30775p: 12
30890p: 16
309[0, 4) 1 20.00% 20.00%
310[4, 8) 1 20.00% 40.00%
311[8, 12) 1 20.00% 60.00%
312[12, 16) 1 20.00% 80.00%
313[16, infinity) 1 20.00% 100.00%
314 --\n";
315        assert_eq!(format!("{}", h), f)
316    }
317}