use sketches_ddsketch::{Config, DDSketch};
use std::fmt;
#[derive(Clone)]
pub struct Summary {
sketch: DDSketch,
}
impl Summary {
pub fn new(alpha: f64, max_buckets: u32, min_value: f64) -> Summary {
let config = Config::new(alpha, max_buckets, min_value.abs());
Summary { sketch: DDSketch::new(config) }
}
pub fn with_defaults() -> Summary {
Summary::new(0.0001, 32_768, 1.0e-9)
}
pub fn add(&mut self, value: f64) {
if value.is_infinite() {
return;
}
self.sketch.add(value);
}
pub fn quantile(&self, q: f64) -> Option<f64> {
if !(0.0..=1.0).contains(&q) || self.count() == 0 {
return None;
}
self.sketch.quantile(q).expect("quantile should be valid at this point")
}
pub fn merge(&mut self, other: &Summary) -> Result<(), MergeError> {
self.sketch.merge(&other.sketch).map_err(|_| MergeError {})?;
Ok(())
}
pub fn min(&self) -> f64 {
self.sketch.min().unwrap_or(f64::INFINITY)
}
pub fn max(&self) -> f64 {
self.sketch.max().unwrap_or(f64::NEG_INFINITY)
}
pub fn is_empty(&self) -> bool {
self.count() == 0
}
pub fn count(&self) -> usize {
self.sketch.count()
}
pub fn estimated_size(&self) -> usize {
std::mem::size_of::<Self>() + (self.sketch.length() * 8)
}
}
#[derive(Copy, Clone, Debug)]
pub struct MergeError {}
impl fmt::Display for MergeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "merge error")
}
}
impl std::error::Error for MergeError {}
#[cfg(test)]
mod tests {
use super::Summary;
use quickcheck_macros::quickcheck;
#[allow(unused_imports)]
use approx::{abs_diff_eq, assert_abs_diff_eq, assert_relative_eq, relative_eq};
use ndarray::{Array1, Axis};
use ndarray_stats::{interpolate::Linear, QuantileExt};
use noisy_float::types::n64;
use ordered_float::NotNan;
use rand::{distributions::Distribution, thread_rng};
use rand_distr::Uniform;
#[test]
fn test_basics() {
let alpha = 0.0001;
let max_bins = 32_768;
let min_value = 1.0e-9;
let mut summary = Summary::new(alpha, max_bins, min_value);
assert!(summary.is_empty());
summary.add(-420.42);
assert_eq!(summary.count(), 1);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), -420.42);
let test_cases = vec![(0.1, -420.42), (0.5, -420.42), (0.9, -420.42)];
for (q, val) in test_cases {
assert_relative_eq!(
summary.quantile(q).expect("value should exist"),
val,
max_relative = alpha
);
}
summary.add(420.42);
assert_eq!(summary.count(), 2);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), 420.42);
assert_relative_eq!(
summary.quantile(0.5).expect("value should exist"),
-420.42,
max_relative = alpha
);
assert_relative_eq!(
summary.quantile(0.51).expect("value should exist"),
-420.42,
max_relative = alpha
);
summary.add(42.42);
assert_eq!(summary.count(), 3);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), 420.42);
let test_cases = vec![
(0.333333, -420.42),
(0.333334, -420.42),
(0.666666, 42.42),
(0.666667, 42.42),
(0.999999, 42.42),
];
for (q, val) in test_cases {
assert_relative_eq!(
summary.quantile(q).expect("value should exist"),
val,
max_relative = alpha
);
}
}
#[test]
fn test_positive_uniform() {
let alpha = 0.0001;
let max_bins = 32_768;
let min_value = 1.0e-9;
let mut rng = thread_rng();
let dist = Uniform::new(0.0, 100.0);
let mut summary = Summary::new(alpha, max_bins, min_value);
let mut uniform = Vec::new();
for _ in 0..100_000 {
let value = dist.sample(&mut rng);
uniform.push(NotNan::new(value).unwrap());
summary.add(value);
}
uniform.sort();
let mut true_histogram = Array1::from(uniform);
let quantiles = &[0.25, 0.5, 0.75, 0.99];
for quantile in quantiles {
let aval_raw = true_histogram
.quantile_axis_mut(Axis(0), n64(*quantile), &Linear)
.expect("quantile should be in range");
let aval = aval_raw.get(()).expect("quantile value should be present").into_inner();
let sval = summary.quantile(*quantile).expect("quantile value should be present");
let distance = (aval * alpha) * 2.0;
assert_relative_eq!(aval, sval, max_relative = distance);
}
}
#[test]
fn test_negative_positive_uniform() {
let alpha = 0.0001;
let max_bins = 65_536;
let min_value = 1.0e-9;
let mut rng = thread_rng();
let dist = Uniform::new(-100.0, 100.0);
let mut summary = Summary::new(alpha, max_bins, min_value);
let mut uniform = Vec::new();
for _ in 0..100_000 {
let value = dist.sample(&mut rng);
uniform.push(NotNan::new(value).unwrap());
summary.add(value);
}
uniform.sort();
let mut true_histogram = Array1::from(uniform);
let quantiles = &[0.25, 0.47, 0.75, 0.99];
for quantile in quantiles {
let aval_raw = true_histogram
.quantile_axis_mut(Axis(0), n64(*quantile), &Linear)
.expect("quantile should be in range");
let aval = aval_raw.get(()).expect("quantile value should be present").into_inner();
let sval = summary.quantile(*quantile).expect("quantile value should be present");
let distance = (aval.abs() * alpha) * 2.0;
assert_relative_eq!(aval, sval, max_relative = distance);
}
}
#[test]
fn test_zeroes() {
let mut summary = Summary::with_defaults();
summary.add(0.0);
assert_eq!(summary.quantile(0.5), Some(0.0));
}
#[test]
fn test_infinities() {
let mut summary = Summary::with_defaults();
summary.add(f64::INFINITY);
assert_eq!(summary.quantile(0.5), None);
summary.add(f64::NEG_INFINITY);
assert_eq!(summary.quantile(0.5), None);
}
#[quickcheck]
fn quantile_validity(inputs: Vec<f64>) -> bool {
let mut had_non_inf = false;
let mut summary = Summary::with_defaults();
for input in &inputs {
if !input.is_infinite() {
had_non_inf = true;
}
summary.add(*input);
}
let qs = &[0.0, 0.5, 0.9, 0.95, 0.99, 0.999, 1.0];
for q in qs {
let result = summary.quantile(*q);
if had_non_inf {
assert!(result.is_some());
} else {
assert!(result.is_none());
}
}
true
}
}