use std::collections::BTreeMap;
use crate::quantile::{Quantile, QuantilesResult, SampleQuantiles};
use crate::{Bucket, Config, Error, SparseHistogram};
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct Histogram {
pub(crate) config: Config,
pub(crate) buckets: Box<[u64]>,
}
impl Histogram {
pub fn new(grouping_power: u8, max_value_power: u8) -> Result<Self, Error> {
let config = Config::new(grouping_power, max_value_power)?;
Ok(Self::with_config(&config))
}
pub fn with_config(config: &Config) -> Self {
let buckets: Box<[u64]> = vec![0; config.total_buckets()].into();
Self {
config: *config,
buckets,
}
}
pub fn from_buckets(
grouping_power: u8,
max_value_power: u8,
buckets: Vec<u64>,
) -> Result<Self, Error> {
let config = Config::new(grouping_power, max_value_power)?;
if config.total_buckets() != buckets.len() {
return Err(Error::IncompatibleParameters);
}
Ok(Self {
config,
buckets: buckets.into(),
})
}
pub fn increment(&mut self, value: u64) -> Result<(), Error> {
self.add(value, 1)
}
pub fn add(&mut self, value: u64, count: u64) -> Result<(), Error> {
let index = self.config.value_to_index(value)?;
self.buckets[index] = self.buckets[index].wrapping_add(count);
Ok(())
}
pub fn as_slice(&self) -> &[u64] {
&self.buckets
}
pub fn as_mut_slice(&mut self) -> &mut [u64] {
&mut self.buckets
}
#[deprecated(note = "Use the SampleQuantiles trait")]
#[allow(deprecated)]
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
Ok(SampleQuantiles::quantiles(self, percentiles)
.map_err(|e| match e {
Error::InvalidQuantile => Error::InvalidPercentile,
other => other,
})?
.map(|qr| {
qr.entries()
.iter()
.map(|(q, b)| (q.as_f64(), b.clone()))
.collect()
}))
}
#[deprecated(note = "Use the SampleQuantiles trait")]
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
#[allow(deprecated)]
self.percentiles(&[percentile])
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}
pub fn downsample(&self, grouping_power: u8) -> Result<Histogram, Error> {
if grouping_power >= self.config.grouping_power() {
return Err(Error::IncompatibleParameters);
}
let mut histogram = Histogram::new(grouping_power, self.config.max_value_power())?;
for (i, n) in self.as_slice().iter().enumerate() {
if *n != 0 {
let val = self.config.index_to_lower_bound(i);
histogram.add(val, *n)?;
}
}
Ok(histogram)
}
pub fn checked_add(&self, other: &Histogram) -> Result<Histogram, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.checked_add(*other).ok_or(Error::Overflow)?;
}
Ok(result)
}
pub fn wrapping_add(&self, other: &Histogram) -> Result<Histogram, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.wrapping_add(*other);
}
Ok(result)
}
pub fn checked_sub(&self, other: &Histogram) -> Result<Histogram, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.checked_sub(*other).ok_or(Error::Underflow)?;
}
Ok(result)
}
pub fn wrapping_sub(&self, other: &Histogram) -> Result<Histogram, Error> {
if self.config != other.config {
return Err(Error::IncompatibleParameters);
}
let mut result = self.clone();
for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
*this = this.wrapping_sub(*other);
}
Ok(result)
}
pub fn iter(&self) -> Iter<'_> {
Iter {
index: 0,
histogram: self,
}
}
pub fn config(&self) -> Config {
self.config
}
}
impl SampleQuantiles for Histogram {
fn quantiles(&self, quantiles: &[f64]) -> Result<Option<QuantilesResult>, Error> {
for q in quantiles {
if !(0.0..=1.0).contains(q) {
return Err(Error::InvalidQuantile);
}
}
let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum();
if total_count == 0 {
return Ok(None);
}
let mut sorted: Vec<Quantile> = quantiles
.iter()
.map(|&q| Quantile::new(q).unwrap())
.collect();
sorted.sort();
sorted.dedup();
let mut min_idx = None;
let mut max_idx = None;
for (i, &count) in self.buckets.iter().enumerate() {
if count > 0 {
if min_idx.is_none() {
min_idx = Some(i);
}
max_idx = Some(i);
}
}
let min_idx = min_idx.unwrap(); let max_idx = max_idx.unwrap();
let min = Bucket {
count: self.buckets[min_idx],
range: self.config.index_to_range(min_idx),
};
let max = Bucket {
count: self.buckets[max_idx],
range: self.config.index_to_range(max_idx),
};
let mut bucket_idx = 0;
let mut partial_sum = self.buckets[bucket_idx] as u128;
let mut entries = BTreeMap::new();
for quantile in &sorted {
let count = std::cmp::max(1, (quantile.as_f64() * total_count as f64).ceil() as u128);
loop {
if partial_sum >= count {
entries.insert(
*quantile,
Bucket {
count: self.buckets[bucket_idx],
range: self.config.index_to_range(bucket_idx),
},
);
break;
}
if bucket_idx == (self.buckets.len() - 1) {
break;
}
bucket_idx += 1;
partial_sum += self.buckets[bucket_idx] as u128;
}
}
Ok(Some(QuantilesResult::new(entries, total_count, min, max)))
}
}
impl<'a> IntoIterator for &'a Histogram {
type Item = Bucket;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
Iter {
index: 0,
histogram: self,
}
}
}
pub struct Iter<'a> {
index: usize,
histogram: &'a Histogram,
}
impl Iterator for Iter<'_> {
type Item = Bucket;
fn next(&mut self) -> Option<<Self as std::iter::Iterator>::Item> {
if self.index >= self.histogram.buckets.len() {
return None;
}
let bucket = Bucket {
count: self.histogram.buckets[self.index],
range: self.histogram.config.index_to_range(self.index),
};
self.index += 1;
Some(bucket)
}
}
impl ExactSizeIterator for Iter<'_> {
fn len(&self) -> usize {
self.histogram.buckets.len() - self.index
}
}
impl std::iter::FusedIterator for Iter<'_> {}
impl From<&SparseHistogram> for Histogram {
fn from(other: &SparseHistogram) -> Self {
let mut histogram = Histogram::with_config(&other.config);
for (index, count) in other.index.iter().zip(other.count.iter()) {
histogram.buckets[*index as usize] = *count;
}
histogram
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::RngExt;
#[cfg(target_pointer_width = "64")]
#[test]
fn size() {
assert_eq!(std::mem::size_of::<Histogram>(), 48);
}
#[allow(deprecated)]
#[test]
fn percentiles() {
let mut histogram = Histogram::new(7, 64).unwrap();
assert_eq!(histogram.percentile(0.5).unwrap(), None);
assert_eq!(
histogram.percentiles(&[0.5, 0.9, 0.99, 0.999]).unwrap(),
None
);
for i in 0..=100 {
let _ = histogram.increment(i);
assert_eq!(
histogram.percentile(0.0),
Ok(Some(Bucket {
count: 1,
range: 0..=0,
}))
);
assert_eq!(
histogram.percentile(1.0),
Ok(Some(Bucket {
count: 1,
range: i..=i,
}))
);
}
assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(0));
assert_eq!(histogram.percentile(0.25).map(|b| b.unwrap().end()), Ok(25));
assert_eq!(histogram.percentile(0.50).map(|b| b.unwrap().end()), Ok(50));
assert_eq!(histogram.percentile(0.75).map(|b| b.unwrap().end()), Ok(75));
assert_eq!(histogram.percentile(0.90).map(|b| b.unwrap().end()), Ok(90));
assert_eq!(histogram.percentile(0.99).map(|b| b.unwrap().end()), Ok(99));
assert_eq!(
histogram.percentile(0.999).map(|b| b.unwrap().end()),
Ok(100)
);
assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile));
assert_eq!(histogram.percentile(1.01), Err(Error::InvalidPercentile));
let percentiles: Vec<(f64, u64)> = histogram
.percentiles(&[0.5, 0.9, 0.99, 0.999])
.unwrap()
.unwrap()
.iter()
.map(|(p, b)| (*p, b.end()))
.collect();
assert_eq!(
percentiles,
vec![(0.5, 50), (0.9, 90), (0.99, 99), (0.999, 100)]
);
let _ = histogram.increment(1024);
assert_eq!(
histogram.percentile(0.999),
Ok(Some(Bucket {
count: 1,
range: 1024..=1031,
}))
);
}
#[allow(deprecated)]
#[test]
fn min() {
let mut histogram = Histogram::new(7, 64).unwrap();
assert_eq!(histogram.percentile(0.0).unwrap(), None);
let _ = histogram.increment(10);
assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(10));
let _ = histogram.increment(4);
assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(4));
}
#[allow(deprecated)]
#[test]
fn percentile_rejects_old_scale() {
let mut histogram = Histogram::new(7, 64).unwrap();
let _ = histogram.increment(1);
assert_eq!(histogram.percentile(50.0), Err(Error::InvalidPercentile));
assert_eq!(histogram.percentile(99.9), Err(Error::InvalidPercentile));
}
#[allow(deprecated)]
#[test]
fn downsample() {
let mut histogram = Histogram::new(8, 32).unwrap();
let mut vals: Vec<u64> = Vec::with_capacity(10000);
use rand::SeedableRng;
let mut rng = rand::rngs::SmallRng::seed_from_u64(42);
for _ in 0..vals.capacity() {
let v: u64 = rng.random_range(1..2_u64.pow(histogram.config.max_value_power() as u32));
vals.push(v);
let _ = histogram.increment(v);
}
vals.sort();
let mut percentiles: Vec<f64> = Vec::with_capacity(109);
for i in 20..99 {
percentiles.push(i as f64 / 100.0);
}
let mut tail = vec![
0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.997, 0.998, 0.999, 0.9999, 1.0,
];
percentiles.append(&mut tail);
let h = histogram.clone();
let grouping_power = histogram.config.grouping_power();
for factor in 1..grouping_power {
let error = histogram.config.error();
for p in &percentiles {
let v = vals[((*p * (vals.len() as f64)) as usize) - 1];
let vhist = histogram.percentile(*p).unwrap().unwrap().end();
let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
assert!(e < error);
}
histogram = h.downsample(grouping_power - factor).unwrap();
}
}
fn build_histograms() -> (Histogram, Histogram, Histogram, Histogram) {
let mut h1 = Histogram::new(1, 3).unwrap();
let mut h2 = Histogram::new(1, 3).unwrap();
let mut h3 = Histogram::new(1, 3).unwrap();
let h4 = Histogram::new(7, 32).unwrap();
for i in 0..h1.config().total_buckets() {
h1.as_mut_slice()[i] = 1;
h2.as_mut_slice()[i] = 1;
h3.as_mut_slice()[i] = u64::MAX;
}
(h1, h2, h3, h4)
}
#[test]
fn checked_add() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.checked_add(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.checked_add(&h_good).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow));
}
#[test]
fn wrapping_add() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.wrapping_add(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.wrapping_add(&h_good).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
let r = h.wrapping_add(&h_overflow).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
}
#[test]
fn checked_sub() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.checked_sub(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.checked_sub(&h_good).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
assert_eq!(h.checked_sub(&h_overflow), Err(Error::Underflow));
}
#[test]
fn wrapping_sub() {
let (h, h_good, h_overflow, h_mismatch) = build_histograms();
assert_eq!(
h.wrapping_sub(&h_mismatch),
Err(Error::IncompatibleParameters)
);
let r = h.wrapping_sub(&h_good).unwrap();
assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
let r = h.wrapping_sub(&h_overflow).unwrap();
assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
}
#[test]
fn from_buckets() {
let mut histogram = Histogram::new(8, 32).unwrap();
for i in 0..=100 {
let _ = histogram.increment(i);
}
let buckets = histogram.as_slice();
let constructed = Histogram::from_buckets(8, 32, buckets.to_vec()).unwrap();
assert!(constructed == histogram);
}
}