glean_core/histogram/
mod.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! A simple histogram implementation for exponential histograms.
6
7use std::any::TypeId;
8use std::collections::HashMap;
9
10use once_cell::sync::OnceCell;
11use serde::{Deserialize, Serialize};
12
13use crate::error::{Error, ErrorKind};
14
15pub use exponential::PrecomputedExponential;
16pub use functional::Functional;
17pub use linear::PrecomputedLinear;
18
19mod exponential;
20mod functional;
21mod linear;
22
23/// Different kinds of histograms.
24#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
25#[serde(rename_all = "lowercase")]
26pub enum HistogramType {
27    /// A histogram with linear distributed buckets.
28    Linear,
29    /// A histogram with exponential distributed buckets.
30    Exponential,
31}
32
33impl TryFrom<i32> for HistogramType {
34    type Error = Error;
35
36    fn try_from(value: i32) -> Result<HistogramType, Self::Error> {
37        match value {
38            0 => Ok(HistogramType::Linear),
39            1 => Ok(HistogramType::Exponential),
40            e => Err(ErrorKind::HistogramType(e).into()),
41        }
42    }
43}
44
45/// A histogram.
46///
47/// Stores the counts per bucket and tracks the count of added samples and the total sum.
48/// The bucketing algorithm can be changed.
49///
50/// ## Example
51///
52/// ```rust,ignore
53/// let mut hist = Histogram::exponential(1, 500, 10);
54///
55/// for i in 1..=10 {
56///     hist.accumulate(i);
57/// }
58///
59/// assert_eq!(10, hist.count());
60/// assert_eq!(55, hist.sum());
61/// ```
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
63pub struct Histogram<B> {
64    /// Mapping bucket's minimum to sample count.
65    values: HashMap<u64, u64>,
66
67    /// The count of samples added.
68    count: u64,
69    /// The total sum of samples.
70    sum: u64,
71
72    /// The bucketing algorithm used.
73    bucketing: B,
74}
75
76/// A bucketing algorithm for histograms.
77///
78/// It's responsible to calculate the bucket a sample goes into.
79/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed.
80pub trait Bucketing {
81    /// Get the bucket's minimum value the sample falls into.
82    fn sample_to_bucket_minimum(&self, sample: u64) -> u64;
83
84    /// The computed bucket ranges for this bucketing algorithm.
85    fn ranges(&self) -> &[u64];
86}
87
88impl<B: Bucketing> Histogram<B> {
89    /// Gets the number of buckets in this histogram.
90    pub fn bucket_count(&self) -> usize {
91        self.values.len()
92    }
93
94    /// Adds a single value to this histogram.
95    pub fn accumulate(&mut self, sample: u64) {
96        let bucket_min = self.bucketing.sample_to_bucket_minimum(sample);
97        let entry = self.values.entry(bucket_min).or_insert(0);
98        *entry += 1;
99        self.sum = self.sum.saturating_add(sample);
100        self.count += 1;
101    }
102
103    /// Gets the total sum of values recorded in this histogram.
104    pub fn sum(&self) -> u64 {
105        self.sum
106    }
107
108    /// Gets the total count of values recorded in this histogram.
109    pub fn count(&self) -> u64 {
110        self.count
111    }
112
113    /// Gets the filled values.
114    pub fn values(&self) -> &HashMap<u64, u64> {
115        &self.values
116    }
117
118    /// Checks if this histogram recorded any values.
119    pub fn is_empty(&self) -> bool {
120        self.count() == 0
121    }
122
123    /// Gets a snapshot of all values from the first bucket until one past the last filled bucket,
124    /// filling in empty buckets with 0.
125    pub fn snapshot_values(&self) -> HashMap<u64, u64> {
126        let mut res = self.values.clone();
127
128        let max_bucket = self.values.keys().max().cloned().unwrap_or(0);
129
130        for &min_bucket in self.bucketing.ranges() {
131            // Fill in missing entries.
132            let _ = res.entry(min_bucket).or_insert(0);
133            // stop one after the last filled bucket
134            if min_bucket > max_bucket {
135                break;
136            }
137        }
138        res
139    }
140
141    /// Clear this histogram.
142    pub fn clear(&mut self) {
143        self.sum = 0;
144        self.count = 0;
145        self.values.clear();
146    }
147}
148
149/// Either linear or exponential histogram bucketing
150///
151/// This is to be used as a single type to avoid generic use in the buffered API.
152pub enum LinearOrExponential {
153    Linear(PrecomputedLinear),
154    Exponential(PrecomputedExponential),
155}
156
157impl Histogram<LinearOrExponential> {
158    /// A histogram using linear bucketing.
159    ///
160    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
161    /// This type is only used for the buffered API.
162    pub fn _linear(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
163        Histogram {
164            values: HashMap::new(),
165            count: 0,
166            sum: 0,
167            bucketing: LinearOrExponential::Linear(PrecomputedLinear {
168                bucket_ranges: OnceCell::new(),
169                min,
170                max,
171                bucket_count,
172            }),
173        }
174    }
175
176    /// A histogram using expontential bucketing.
177    ///
178    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
179    /// This type is only used for the buffered API.
180    pub fn _exponential(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
181        Histogram {
182            values: HashMap::new(),
183            count: 0,
184            sum: 0,
185            bucketing: LinearOrExponential::Exponential(PrecomputedExponential {
186                bucket_ranges: OnceCell::new(),
187                min,
188                max,
189                bucket_count,
190            }),
191        }
192    }
193}
194
195impl Bucketing for LinearOrExponential {
196    fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
197        use LinearOrExponential::*;
198        match self {
199            Linear(lin) => lin.sample_to_bucket_minimum(sample),
200            Exponential(exp) => exp.sample_to_bucket_minimum(sample),
201        }
202    }
203
204    fn ranges(&self) -> &[u64] {
205        use LinearOrExponential::*;
206        match self {
207            Linear(lin) => lin.ranges(),
208            Exponential(exp) => exp.ranges(),
209        }
210    }
211}
212
213impl<B> Histogram<B>
214where
215    B: Bucketing,
216    B: std::fmt::Debug,
217    B: PartialEq,
218{
219    /// Merges data from one histogram into the other.
220    ///
221    /// ## Panics
222    ///
223    /// Panics if the two histograms don't use the same bucketing.
224    pub fn merge(&mut self, other: &Self) {
225        assert_eq!(self.bucketing, other.bucketing);
226
227        self.sum += other.sum;
228        self.count += other.count;
229        for (&bucket, &count) in &other.values {
230            *self.values.entry(bucket).or_insert(0) += count;
231        }
232    }
233}
234
235impl<B> Histogram<B>
236where
237    B: Bucketing + 'static,
238    B: std::fmt::Debug,
239    B: PartialEq,
240{
241    /// Merges data from one histogram into the other.
242    ///
243    /// ## Panics
244    ///
245    /// Panics if the two histograms don't use the same bucketing.
246    /// Note that the `other` side can be either linear or exponential
247    /// and we only merge if it matches `self`'s bucketing.
248    // _Note:_ Unfortunately this needs a separate name from the above, otherwise it's a conflicting
249    // method.
250    // We only use it internally for the buffered API, and can guarantee correct usage that way.
251    pub fn _merge(&mut self, other: &Histogram<LinearOrExponential>) {
252        #[rustfmt::skip]
253        assert!(
254            (
255                TypeId::of::<B>() == TypeId::of::<PrecomputedLinear>()
256                && matches!(other.bucketing, LinearOrExponential::Linear(_))
257            ) ||
258            (
259                TypeId::of::<B>() == TypeId::of::<PrecomputedExponential>()
260                && matches!(other.bucketing, LinearOrExponential::Exponential(_))
261            )
262        );
263        self.sum += other.sum;
264        self.count += other.count;
265        for (&bucket, &count) in &other.values {
266            *self.values.entry(bucket).or_insert(0) += count;
267        }
268    }
269}