glean_core/histogram/
mod.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! A simple histogram implementation for exponential histograms.
6
7use std::any::TypeId;
8use std::collections::HashMap;
9
10use malloc_size_of_derive::MallocSizeOf;
11use once_cell::sync::OnceCell;
12use serde::{Deserialize, Serialize};
13
14use crate::error::{Error, ErrorKind};
15
16pub use exponential::PrecomputedExponential;
17pub use functional::Functional;
18pub use linear::PrecomputedLinear;
19
20mod exponential;
21mod functional;
22mod linear;
23
24/// Different kinds of histograms.
25#[derive(Debug, Clone, Copy, Serialize, Deserialize, MallocSizeOf)]
26#[serde(rename_all = "lowercase")]
27pub enum HistogramType {
28    /// A histogram with linear distributed buckets.
29    Linear,
30    /// A histogram with exponential distributed buckets.
31    Exponential,
32}
33
34impl TryFrom<i32> for HistogramType {
35    type Error = Error;
36
37    fn try_from(value: i32) -> Result<HistogramType, Self::Error> {
38        match value {
39            0 => Ok(HistogramType::Linear),
40            1 => Ok(HistogramType::Exponential),
41            e => Err(ErrorKind::HistogramType(e).into()),
42        }
43    }
44}
45
46/// A histogram.
47///
48/// Stores the counts per bucket and tracks the count of added samples and the total sum.
49/// The bucketing algorithm can be changed.
50///
51/// ## Example
52///
53/// ```rust,ignore
54/// let mut hist = Histogram::exponential(1, 500, 10);
55///
56/// for i in 1..=10 {
57///     hist.accumulate(i);
58/// }
59///
60/// assert_eq!(10, hist.count());
61/// assert_eq!(55, hist.sum());
62/// ```
63#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, MallocSizeOf)]
64pub struct Histogram<B> {
65    /// Mapping bucket's minimum to sample count.
66    values: HashMap<u64, u64>,
67
68    /// The count of samples added.
69    count: u64,
70    /// The total sum of samples.
71    sum: u64,
72
73    /// The bucketing algorithm used.
74    bucketing: B,
75}
76
77/// A bucketing algorithm for histograms.
78///
79/// It's responsible to calculate the bucket a sample goes into.
80/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed.
81pub trait Bucketing {
82    /// Get the bucket's minimum value the sample falls into.
83    fn sample_to_bucket_minimum(&self, sample: u64) -> u64;
84
85    /// The computed bucket ranges for this bucketing algorithm.
86    fn ranges(&self) -> &[u64];
87}
88
89impl<B: Bucketing> Histogram<B> {
90    /// Gets the number of buckets in this histogram.
91    pub fn bucket_count(&self) -> usize {
92        self.values.len()
93    }
94
95    /// Adds a single value to this histogram.
96    pub fn accumulate(&mut self, sample: u64) {
97        let bucket_min = self.bucketing.sample_to_bucket_minimum(sample);
98        let entry = self.values.entry(bucket_min).or_insert(0);
99        *entry += 1;
100        self.sum = self.sum.saturating_add(sample);
101        self.count += 1;
102    }
103
104    /// Gets the total sum of values recorded in this histogram.
105    pub fn sum(&self) -> u64 {
106        self.sum
107    }
108
109    /// Gets the total count of values recorded in this histogram.
110    pub fn count(&self) -> u64 {
111        self.count
112    }
113
114    /// Gets the filled values.
115    pub fn values(&self) -> &HashMap<u64, u64> {
116        &self.values
117    }
118
119    /// Checks if this histogram recorded any values.
120    pub fn is_empty(&self) -> bool {
121        self.count() == 0
122    }
123
124    /// Gets a snapshot of all values from the first bucket until one past the last filled bucket,
125    /// filling in empty buckets with 0.
126    pub fn snapshot_values(&self) -> HashMap<u64, u64> {
127        let mut res = self.values.clone();
128
129        let max_bucket = self.values.keys().max().cloned().unwrap_or(0);
130
131        for &min_bucket in self.bucketing.ranges() {
132            // Fill in missing entries.
133            let _ = res.entry(min_bucket).or_insert(0);
134            // stop one after the last filled bucket
135            if min_bucket > max_bucket {
136                break;
137            }
138        }
139        res
140    }
141
142    /// Clear this histogram.
143    pub fn clear(&mut self) {
144        self.sum = 0;
145        self.count = 0;
146        self.values.clear();
147    }
148}
149
150/// Either linear or exponential histogram bucketing
151///
152/// This is to be used as a single type to avoid generic use in the buffered API.
153pub enum LinearOrExponential {
154    Linear(PrecomputedLinear),
155    Exponential(PrecomputedExponential),
156}
157
158impl Histogram<LinearOrExponential> {
159    /// A histogram using linear bucketing.
160    ///
161    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
162    /// This type is only used for the buffered API.
163    pub fn _linear(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
164        Histogram {
165            values: HashMap::new(),
166            count: 0,
167            sum: 0,
168            bucketing: LinearOrExponential::Linear(PrecomputedLinear {
169                bucket_ranges: OnceCell::new(),
170                min,
171                max,
172                bucket_count,
173            }),
174        }
175    }
176
177    /// A histogram using expontential bucketing.
178    ///
179    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
180    /// This type is only used for the buffered API.
181    pub fn _exponential(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
182        Histogram {
183            values: HashMap::new(),
184            count: 0,
185            sum: 0,
186            bucketing: LinearOrExponential::Exponential(PrecomputedExponential {
187                bucket_ranges: OnceCell::new(),
188                min,
189                max,
190                bucket_count,
191            }),
192        }
193    }
194}
195
196impl Bucketing for LinearOrExponential {
197    fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
198        use LinearOrExponential::*;
199        match self {
200            Linear(lin) => lin.sample_to_bucket_minimum(sample),
201            Exponential(exp) => exp.sample_to_bucket_minimum(sample),
202        }
203    }
204
205    fn ranges(&self) -> &[u64] {
206        use LinearOrExponential::*;
207        match self {
208            Linear(lin) => lin.ranges(),
209            Exponential(exp) => exp.ranges(),
210        }
211    }
212}
213
214impl<B> Histogram<B>
215where
216    B: Bucketing,
217    B: std::fmt::Debug,
218    B: PartialEq,
219{
220    /// Merges data from one histogram into the other.
221    ///
222    /// ## Panics
223    ///
224    /// Panics if the two histograms don't use the same bucketing.
225    pub fn merge(&mut self, other: &Self) {
226        assert_eq!(self.bucketing, other.bucketing);
227
228        self.sum = self.sum.saturating_add(other.sum);
229        self.count = self.count.saturating_add(other.count);
230        for (&bucket, &count) in &other.values {
231            let entry = self.values.entry(bucket).or_insert(0);
232            *entry = entry.saturating_add(count)
233        }
234    }
235}
236
237impl<B> Histogram<B>
238where
239    B: Bucketing + 'static,
240    B: std::fmt::Debug,
241    B: PartialEq,
242{
243    /// Merges data from one histogram into the other.
244    ///
245    /// ## Panics
246    ///
247    /// Panics if the two histograms don't use the same bucketing.
248    /// Note that the `other` side can be either linear or exponential
249    /// and we only merge if it matches `self`'s bucketing.
250    // _Note:_ Unfortunately this needs a separate name from the above, otherwise it's a conflicting
251    // method.
252    // We only use it internally for the buffered API, and can guarantee correct usage that way.
253    pub fn _merge(&mut self, other: &Histogram<LinearOrExponential>) {
254        #[rustfmt::skip]
255        assert!(
256            (
257                TypeId::of::<B>() == TypeId::of::<PrecomputedLinear>()
258                && matches!(other.bucketing, LinearOrExponential::Linear(_))
259            ) ||
260            (
261                TypeId::of::<B>() == TypeId::of::<PrecomputedExponential>()
262                && matches!(other.bucketing, LinearOrExponential::Exponential(_))
263            )
264        );
265        self.sum = self.sum.saturating_add(other.sum);
266        self.count = self.count.saturating_add(other.count);
267        for (&bucket, &count) in &other.values {
268            let entry = self.values.entry(bucket).or_insert(0);
269            *entry = entry.saturating_add(count);
270        }
271    }
272}