use std::collections::BTreeMap;
use crate::quantile::{Quantile, QuantilesResult, SampleQuantiles};
use crate::{Bucket, Config, Count, Error, SparseHistogram, SparseHistogram32};
macro_rules! define_histogram {
($name:ident, $iter:ident, $sparse:ident, $count:ty) => {
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct $name {
pub(crate) config: Config,
pub(crate) buckets: Box<[$count]>,
}
impl $name {
pub fn new(grouping_power: u8, max_value_power: u8) -> Result<Self, Error> {
let config = Config::new(grouping_power, max_value_power)?;
Ok(Self::with_config(&config))
}
pub fn with_config(config: &Config) -> Self {
let buckets: Box<[$count]> =
vec![<$count as Count>::ZERO; config.total_buckets()].into();
Self {
config: *config,
buckets,
}
}
pub fn from_buckets(
grouping_power: u8,
max_value_power: u8,
buckets: Vec<$count>,
) -> Result<Self, Error> {
let config = Config::new(grouping_power, max_value_power)?;
if config.total_buckets() != buckets.len() {
return Err(Error::IncompatibleParameters);
}
Ok(Self {
config,
buckets: buckets.into(),
})
}
pub fn increment(&mut self, value: u64) -> Result<(), Error> {
self.add(value, <$count as Count>::ONE)
}
pub fn add(&mut self, value: u64, count: $count) -> Result<(), Error> {
let index = self.config.value_to_index(value)?;
self.buckets[index] = self.buckets[index].wrapping_add(count);
Ok(())
}
pub fn as_slice(&self) -> &[$count] {
&self.buckets
}
pub fn as_mut_slice(&mut self) -> &mut [$count] {
&mut self.buckets
}
pub fn downsample(&self, grouping_power: u8) -> Result<Self, Error> {
if grouping_power >= self.config.grouping_power() {
return Err(Error::IncompatibleParameters);
}
let mut histogram = Self::new(grouping_power, self.config.max_value_power())?;
for (i, n) in self.as_slice().iter().enumerate() {
if *n != <$count as Count>::ZERO {
let val = self.config.index_to_lower_bound(i);
histogram.add(val, *n)?;
}
}
Ok(histogram)
}
pub fn checked_add(&self, other: &Self) -> Result<Self, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.checked_add(*other).ok_or(Error::Overflow)?;
}
Ok(result)
}
pub fn wrapping_add(&self, other: &Self) -> Result<Self, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.wrapping_add(*other);
}
Ok(result)
}
pub fn checked_sub(&self, other: &Self) -> Result<Self, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.checked_sub(*other).ok_or(Error::Underflow)?;
}
Ok(result)
}
pub fn wrapping_sub(&self, other: &Self) -> Result<Self, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.wrapping_sub(*other);
}
Ok(result)
}
pub fn iter(&self) -> $iter<'_> {
$iter {
index: 0,
histogram: self,
}
}
pub fn config(&self) -> Config {
self.config
}
pub fn quantiles(&self, quantiles: &[f64]) -> Result<Option<QuantilesResult>, Error> {
<Self as SampleQuantiles>::quantiles(self, quantiles)
}
pub fn quantile(&self, quantile: f64) -> Result<Option<QuantilesResult>, Error> {
<Self as SampleQuantiles>::quantile(self, quantile)
}
}
impl SampleQuantiles for $name {
fn quantiles(&self, quantiles: &[f64]) -> Result<Option<QuantilesResult>, Error> {
for q in quantiles {
if !(0.0..=1.0).contains(q) {
return Err(Error::InvalidQuantile);
}
}
let total_count: u128 = self.buckets.iter().map(|v| v.as_u128()).sum();
if total_count == 0 {
return Ok(None);
}
let mut sorted: Vec<Quantile> = quantiles
.iter()
.map(|&q| Quantile::new(q).unwrap())
.collect();
sorted.sort();
sorted.dedup();
let mut min_idx = None;
let mut max_idx = None;
for (i, count) in self.buckets.iter().enumerate() {
if *count != <$count as Count>::ZERO {
if min_idx.is_none() {
min_idx = Some(i);
}
max_idx = Some(i);
}
}
let min_idx = min_idx.unwrap();
let max_idx = max_idx.unwrap();
let min = Bucket {
count: self.buckets[min_idx].as_u128() as u64,
range: self.config.index_to_range(min_idx),
};
let max = Bucket {
count: self.buckets[max_idx].as_u128() as u64,
range: self.config.index_to_range(max_idx),
};
let mut bucket_idx = 0;
let mut partial_sum = self.buckets[bucket_idx].as_u128();
let mut entries = BTreeMap::new();
for quantile in &sorted {
let count =
std::cmp::max(1, (quantile.as_f64() * total_count as f64).ceil() as u128);
loop {
if partial_sum >= count {
entries.insert(
*quantile,
Bucket {
count: self.buckets[bucket_idx].as_u128() as u64,
range: self.config.index_to_range(bucket_idx),
},
);
break;
}
if bucket_idx == (self.buckets.len() - 1) {
break;
}
bucket_idx += 1;
partial_sum += self.buckets[bucket_idx].as_u128();
}
}
Ok(Some(QuantilesResult::new(entries, total_count, min, max)))
}
}
impl<'a> IntoIterator for &'a $name {
type Item = Bucket;
type IntoIter = $iter<'a>;
fn into_iter(self) -> Self::IntoIter {
$iter {
index: 0,
histogram: self,
}
}
}
pub struct $iter<'a> {
index: usize,
histogram: &'a $name,
}
impl Iterator for $iter<'_> {
type Item = Bucket;
fn next(&mut self) -> Option<Bucket> {
if self.index >= self.histogram.buckets.len() {
return None;
}
let bucket = Bucket {
count: self.histogram.buckets[self.index].as_u128() as u64,
range: self.histogram.config.index_to_range(self.index),
};
self.index += 1;
Some(bucket)
}
}
impl ExactSizeIterator for $iter<'_> {
fn len(&self) -> usize {
self.histogram.buckets.len() - self.index
}
}
impl std::iter::FusedIterator for $iter<'_> {}
impl From<&$sparse> for $name {
fn from(other: &$sparse) -> Self {
let mut histogram = $name::with_config(&other.config);
for (index, count) in other.index.iter().zip(other.count.iter()) {
histogram.buckets[*index as usize] = *count;
}
histogram
}
}
};
}
define_histogram!(Histogram, Iter, SparseHistogram, u64);
define_histogram!(Histogram32, Iter32, SparseHistogram32, u32);
impl Histogram {
#[deprecated(note = "Use the SampleQuantiles trait")]
#[allow(deprecated)]
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
Ok(SampleQuantiles::quantiles(self, percentiles)
.map_err(|e| match e {
Error::InvalidQuantile => Error::InvalidPercentile,
other => other,
})?
.map(|qr| {
qr.entries()
.iter()
.map(|(q, b)| (q.as_f64(), b.clone()))
.collect()
}))
}
#[deprecated(note = "Use the SampleQuantiles trait")]
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
#[allow(deprecated)]
self.percentiles(&[percentile])
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::RngExt;
#[cfg(target_pointer_width = "64")]
#[test]
fn size() {
assert_eq!(std::mem::size_of::<Histogram>(), 48);
}
#[test]
fn quantiles() {
let mut histogram = Histogram::new(7, 64).unwrap();
assert_eq!(histogram.quantile(0.5).unwrap(), None);
assert_eq!(histogram.quantiles(&[0.5, 0.9, 0.99, 0.999]).unwrap(), None);
for i in 0..=100 {
let _ = histogram.increment(i);
let r = histogram.quantile(0.0).unwrap().unwrap();
assert_eq!(
r.get(&Quantile::new(0.0).unwrap()),
Some(&Bucket {
count: 1,
range: 0..=0,
})
);
let r = histogram.quantile(1.0).unwrap().unwrap();
assert_eq!(
r.get(&Quantile::new(1.0).unwrap()),
Some(&Bucket {
count: 1,
range: i..=i,
})
);
}
let r = histogram.quantile(0.0).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.0).unwrap()).unwrap().end(), 0);
let r = histogram.quantile(0.25).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.25).unwrap()).unwrap().end(), 25);
let r = histogram.quantile(0.50).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.50).unwrap()).unwrap().end(), 50);
let r = histogram.quantile(0.75).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.75).unwrap()).unwrap().end(), 75);
let r = histogram.quantile(0.90).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.90).unwrap()).unwrap().end(), 90);
let r = histogram.quantile(0.99).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.99).unwrap()).unwrap().end(), 99);
let r = histogram.quantile(0.999).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.999).unwrap()).unwrap().end(), 100);
assert_eq!(histogram.quantile(-1.0), Err(Error::InvalidQuantile));
assert_eq!(histogram.quantile(1.01), Err(Error::InvalidQuantile));
let _ = histogram.increment(1024);
let r = histogram.quantile(0.999).unwrap().unwrap();
assert_eq!(
r.get(&Quantile::new(0.999).unwrap()),
Some(&Bucket {
count: 1,
range: 1024..=1031,
})
);
}
#[test]
fn min() {
let mut histogram = Histogram::new(7, 64).unwrap();
assert_eq!(histogram.quantile(0.0).unwrap(), None);
let _ = histogram.increment(10);
let r = histogram.quantile(0.0).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.0).unwrap()).unwrap().end(), 10);
let _ = histogram.increment(4);
let r = histogram.quantile(0.0).unwrap().unwrap();
assert_eq!(r.get(&Quantile::new(0.0).unwrap()).unwrap().end(), 4);
}
#[test]
fn downsample() {
let mut histogram = Histogram::new(8, 32).unwrap();
let mut vals: Vec<u64> = Vec::with_capacity(10000);
use rand::SeedableRng;
let mut rng = rand::rngs::SmallRng::seed_from_u64(42);
for _ in 0..vals.capacity() {
let v: u64 = rng.random_range(1..2_u64.pow(histogram.config.max_value_power() as u32));
vals.push(v);
let _ = histogram.increment(v);
}
vals.sort();
let mut percentiles: Vec<f64> = Vec::with_capacity(109);
for i in 20..99 {
percentiles.push(i as f64 / 100.0);
}
let mut tail = vec![
0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.997, 0.998, 0.999, 0.9999, 1.0,
];
percentiles.append(&mut tail);
let h = histogram.clone();
let grouping_power = histogram.config.grouping_power();
for factor in 1..grouping_power {
let error = histogram.config.error();
for p in &percentiles {
let v = vals[((*p * (vals.len() as f64)) as usize) - 1];
let q = histogram.quantile(*p).unwrap().unwrap();
let vhist = q.get(&Quantile::new(*p).unwrap()).unwrap().end();
let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
assert!(e < error);
}
histogram = h.downsample(grouping_power - factor).unwrap();
}
}
fn build_histograms() -> (Histogram, Histogram, Histogram, Histogram) {
let mut h1 = Histogram::new(1, 3).unwrap();
let mut h2 = Histogram::new(1, 3).unwrap();
let mut h3 = Histogram::new(1, 3).unwrap();
let h4 = Histogram::new(7, 32).unwrap();
for i in 0..h1.config().total_buckets() {
h1.as_mut_slice()[i] = 1;
h2.as_mut_slice()[i] = 1;
h3.as_mut_slice()[i] = u64::MAX;
}
(h1, h2, h3, h4)
}
#[test]
fn checked_add() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.checked_add(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.checked_add(&h_good).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow));
}
#[test]
fn wrapping_add() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.wrapping_add(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.wrapping_add(&h_good).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
let r = h.wrapping_add(&h_overflow).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
}
#[test]
fn checked_sub() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.checked_sub(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.checked_sub(&h_good).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
assert_eq!(h.checked_sub(&h_overflow), Err(Error::Underflow));
}
#[test]
fn wrapping_sub() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.wrapping_sub(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.wrapping_sub(&h_good).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
let r = h.wrapping_sub(&h_overflow).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
}
#[test]
fn from_buckets() {
let mut histogram = Histogram::new(8, 32).unwrap();
for i in 0..=100 {
let _ = histogram.increment(i);
}
let buckets = histogram.as_slice();
let constructed = Histogram::from_buckets(8, 32, buckets.to_vec()).unwrap();
assert!(constructed == histogram);
}
#[cfg(target_pointer_width = "64")]
#[test]
fn size_u32() {
assert_eq!(std::mem::size_of::<Histogram32>(), 48);
}
#[test]
fn increment_u32() {
let mut h = Histogram32::new(7, 64).unwrap();
h.increment(5).unwrap();
h.increment(5).unwrap();
h.increment(5).unwrap();
let result = h.quantile(0.5).unwrap().unwrap();
let q = Quantile::new(0.5).unwrap();
assert_eq!(result.get(&q).unwrap().count(), 3u64);
}
#[test]
fn add_u32_wraps_at_max() {
let mut h = Histogram32::new(2, 4).unwrap();
h.add(1, u32::MAX).unwrap();
h.add(1, 1).unwrap();
assert_eq!(h.as_slice()[1], 0u32);
}
#[test]
fn checked_add_u32_overflow() {
let mut h1 = Histogram32::new(1, 3).unwrap();
let mut h2 = Histogram32::new(1, 3).unwrap();
h1.as_mut_slice()[0] = u32::MAX;
h2.as_mut_slice()[0] = 1;
assert_eq!(h1.checked_add(&h2), Err(Error::Overflow));
}
#[test]
fn iter_u32_widens_count_to_u64() {
let mut h = Histogram32::new(2, 4).unwrap();
h.add(1, 5u32).unwrap();
let bucket = h.iter().find(|b| b.count() > 0).unwrap();
let count: u64 = bucket.count();
assert_eq!(count, 5);
}
}