#![no_std]
extern crate alloc;
use core::{
fmt::{self, Debug},
ops::AddAssign,
};
use num_traits::{cast::FromPrimitive, float::Float, identities::One, identities::Zero};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub struct Stats<T: Float + Zero + One + AddAssign + FromPrimitive + PartialEq + Debug> {
pub min: T,
pub max: T,
pub mean: T,
pub std_dev: T,
pub count: usize,
mean2: T,
}
impl<T> fmt::Display for Stats<T>
where
T: fmt::Display + Float + Zero + One + AddAssign + FromPrimitive + PartialEq + Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let precision = f.precision().unwrap_or(2);
write!(f, "(avg: {:.precision$}, std_dev: {:.precision$}, min: {:.precision$}, max: {:.precision$}, count: {})", self.mean, self.std_dev, self.min, self.max, self.count, precision=precision)
}
}
impl<T> Default for Stats<T>
where
T: Float + Zero + One + AddAssign + FromPrimitive + PartialEq + Debug,
{
fn default() -> Stats<T> {
Stats::new()
}
}
impl<T> Stats<T>
where
T: Float + Zero + One + AddAssign + FromPrimitive + PartialEq + Debug,
{
pub fn new() -> Stats<T> {
Stats {
count: 0,
min: T::infinity(),
max: T::neg_infinity(),
mean: T::zero(),
std_dev: T::zero(),
mean2: T::zero(),
}
}
pub fn update(&mut self, value: T) {
if value > self.max {
self.max = value;
}
if value < self.min {
self.min = value;
}
self.count += 1;
let count = T::from(self.count).unwrap();
let delta = value - self.mean;
self.mean += delta / count;
let delta2 = value - self.mean;
self.mean2 += delta * delta2;
if self.count > 1 {
self.std_dev = (self.mean2 / (count - T::one())).sqrt();
}
}
pub fn merge(&self, other: &Self) -> Self {
let mut merged = Stats::<T>::new();
if self.count + other.count == 0 {
return merged;
}
if self.count == 0 {
return other.clone();
} else if other.count == 0 {
return self.clone();
}
merged.max = if other.max > self.max {
other.max
} else {
self.max
};
merged.min = if other.min < self.min {
other.min
} else {
self.min
};
merged.count = self.count + other.count;
let merged_count = T::from(merged.count).unwrap();
let self_count = T::from(self.count).unwrap();
let other_count = T::from(other.count).unwrap();
let delta = other.mean - self.mean;
merged.mean = (self.mean * self_count + other.mean * other_count) / merged_count;
merged.mean2 =
self.mean2 + other.mean2 + delta * delta * self_count * other_count / merged_count;
merged.std_dev = (merged.mean2 / (merged_count - T::one())).sqrt();
merged
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
use alloc::vec::Vec;
use float_cmp::{ApproxEq, ApproxEqUlps};
use rand::SeedableRng;
use rand_distr::{Distribution, Normal};
use rayon::prelude::*;
type T = f64;
#[test]
fn it_works() {
let mut s: Stats<f32> = Stats::new();
let vals: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
for v in &vals {
s.update(*v);
}
assert_eq!(s.count, vals.len());
assert_eq!(s.min, 1.0);
assert_eq!(s.max, 5.0);
assert!(s.mean.approx_eq_ulps(&3.0, 2));
assert!(s.std_dev.approx_eq_ulps(&1.5811388, 2));
}
fn calc_mean(vals: &Vec<T>) -> T {
let sum = vals.iter().fold(T::zero(), |acc, x| acc + *x);
sum / T::from_usize(vals.len()).unwrap()
}
fn calc_std_dev(vals: &Vec<T>) -> T {
let mean = calc_mean(vals);
let std_dev = (vals
.iter()
.fold(T::zero(), |acc, x| acc + (*x - mean).powi(2))
/ T::from_usize(vals.len() - 1).unwrap())
.sqrt();
std_dev
}
fn get_max(vals: &Vec<T>) -> T {
let mut max = T::min_value();
for v in vals {
if *v > max {
max = *v;
}
}
max
}
fn get_min(vals: &Vec<T>) -> T {
let mut min = T::max_value();
for v in vals {
if *v < min {
min = *v;
}
}
min
}
#[test]
fn stats_for_large_random_data() {
const MEAN: T = 2.0;
const STD_DEV: T = 3.0;
const SEED: u64 = 42;
const NUM_SAMPLES: usize = 10_000;
let mut s: Stats<T> = Stats::new();
let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
let normal = Normal::<T>::new(MEAN, STD_DEV).unwrap();
let random_data: Vec<T> = (0..NUM_SAMPLES).map(|_x| normal.sample(&mut rng)).collect();
random_data.iter().for_each(|v| s.update(*v));
let mean = calc_mean(&random_data);
assert!(s.mean.approx_eq(mean, (1.0e-13, 2)));
let std_dev = calc_std_dev(&random_data);
assert!(s.std_dev.approx_eq(std_dev, (1.0e-13, 2)));
assert_eq!(s.count, random_data.len());
let max = get_max(&random_data);
let min = get_min(&random_data);
assert_eq!(s.max, max);
assert_eq!(s.min, min);
}
#[test]
fn stats_merge() {
const MEAN: T = 2.0;
const STD_DEV: T = 3.0;
const SEED: u64 = 42;
const NUM_SAMPLES: usize = 10_000;
let mut s: Stats<T> = Stats::new();
let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
let normal = Normal::<T>::new(MEAN, STD_DEV).unwrap();
let random_data: Vec<T> = (0..NUM_SAMPLES).map(|_x| normal.sample(&mut rng)).collect();
random_data.iter().for_each(|v| s.update(*v));
let mean = calc_mean(&random_data);
let std_dev = calc_std_dev(&random_data);
let max = get_max(&random_data);
let min = get_min(&random_data);
let chunks_size = 1000;
let stats: Vec<Stats<T>> = random_data
.chunks(chunks_size)
.map(|chunk| {
let mut s: Stats<T> = Stats::new();
chunk.iter().for_each(|v| s.update(*v));
s
})
.collect();
assert_eq!(stats.len(), NUM_SAMPLES / chunks_size);
let merged_stats = stats.into_iter().reduce(|acc, s| acc.merge(&s)).unwrap();
assert!(merged_stats.mean.approx_eq(mean, (1.0e-13, 2)));
assert!(merged_stats.std_dev.approx_eq(std_dev, (1.0e-13, 2)));
assert_eq!(merged_stats.max, max);
assert_eq!(merged_stats.min, min);
assert_eq!(merged_stats.count, NUM_SAMPLES);
assert!(merged_stats.mean.approx_eq(s.mean, (1.0e-13, 2)));
assert!(merged_stats.std_dev.approx_eq(s.std_dev, (1.0e-13, 2)));
assert_eq!(merged_stats.max, s.max);
assert_eq!(merged_stats.min, s.min);
assert_eq!(merged_stats.count, s.count);
let empty_stats: Stats<T> = Stats::new();
let merged_stats = s.merge(&empty_stats);
assert_eq!(merged_stats.count, s.count);
let empty_stats: Stats<T> = Stats::new();
let merged_stats = empty_stats.merge(&s);
assert_eq!(merged_stats.count, s.count);
let empty_stats_1: Stats<T> = Stats::new();
let empty_stats_2: Stats<T> = Stats::new();
let merged_stats = empty_stats_1.merge(&empty_stats_2);
assert_eq!(merged_stats.count, 0);
}
#[test]
fn stats_merge_parallel() {
const MEAN: T = 2.0;
const STD_DEV: T = 3.0;
const SEED: u64 = 42;
const NUM_SAMPLES: usize = 500_000;
let mut s: Stats<T> = Stats::new();
let mut rng = rand::rngs::StdRng::seed_from_u64(SEED);
let normal = Normal::<T>::new(MEAN, STD_DEV).unwrap();
let random_data: Vec<T> = (0..NUM_SAMPLES).map(|_x| normal.sample(&mut rng)).collect();
random_data.iter().for_each(|v| s.update(*v));
let mean = calc_mean(&random_data);
let std_dev = calc_std_dev(&random_data);
let max = get_max(&random_data);
let min = get_min(&random_data);
let chunks_size = 1000;
let stats: Vec<Stats<T>> = random_data
.par_chunks(chunks_size) .map(|chunk| {
let mut s: Stats<T> = Stats::new();
chunk.iter().for_each(|v| s.update(*v));
s
})
.collect();
assert!(stats.len() >= NUM_SAMPLES / chunks_size);
let merged_stats = stats.into_iter().reduce(|acc, s| acc.merge(&s)).unwrap();
assert!(merged_stats.mean.approx_eq(mean, (1.0e-13, 2)));
assert!(merged_stats.std_dev.approx_eq(std_dev, (1.0e-13, 2)));
assert_eq!(merged_stats.max, max);
assert_eq!(merged_stats.min, min);
assert_eq!(merged_stats.count, NUM_SAMPLES);
assert!(merged_stats.mean.approx_eq(s.mean, (1.0e-13, 2)));
assert!(merged_stats.std_dev.approx_eq(s.std_dev, (1.0e-13, 2)));
assert_eq!(merged_stats.max, s.max);
assert_eq!(merged_stats.min, s.min);
assert_eq!(merged_stats.count, s.count);
}
}