use super::*;
use crate::utils;
use error::*;
use num_traits::Float;
pub trait StatisticsOps<F: Float>: Default {
fn from_iter<I>(data: &I) -> CIResult<Self>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
let mut stats = Self::default();
stats.extend(data)?;
Ok(stats)
}
fn sample_mean(&self) -> F;
fn sample_sem(&self) -> F;
fn sample_count(&self) -> usize;
fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>>;
fn append(&mut self, x: F) -> CIResult<()>;
fn extend<I>(&mut self, data: &I) -> CIResult<()>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
for x_i in data {
self.append(*x_i)?;
}
Ok(())
}
fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>;
}
macro_rules! impl_statistics_ops_for {
( $x:ty ) => {
impl<F: Float> StatisticsOps<F> for $x {
#[inline]
fn append(&mut self, x: F) -> CIResult<()> {
self.append(x)
}
#[inline]
fn sample_mean(&self) -> F {
self.sample_mean()
}
#[inline]
fn sample_sem(&self) -> F {
self.sample_sem()
}
#[inline]
fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
self.ci_mean(confidence)
}
#[inline]
fn sample_count(&self) -> usize {
self.sample_count()
}
#[inline]
fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
<$x>::ci(confidence, data)
}
}
};
}
impl_statistics_ops_for!(Arithmetic<F>);
impl_statistics_ops_for!(Harmonic<F>);
impl_statistics_ops_for!(Geometric<F>);
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Arithmetic<F: Float> {
sum: utils::KahanSum<F>,
sum_sq: utils::KahanSum<F>,
count: usize,
}
impl<F: Float> Default for Arithmetic<F> {
fn default() -> Self {
Self {
sum: utils::KahanSum::default(),
sum_sq: utils::KahanSum::default(),
count: 0,
}
}
}
impl<F: Float> Arithmetic<F> {
pub fn new() -> Self {
Default::default()
}
pub fn sample_variance(&self) -> F {
let mean = self.sample_mean();
(self.sum_sq.value() - mean * self.sum.value()) / F::from(self.count - 1).unwrap()
}
pub fn sample_std_dev(&self) -> F {
self.sample_variance().sqrt()
}
fn append(&mut self, x: F) -> CIResult<()> {
self.sum += x;
self.sum_sq += x * x;
self.count += 1;
Ok(())
}
pub fn sample_mean(&self) -> F {
self.sum.value() / F::from(self.count).unwrap()
}
pub fn sample_sem(&self) -> F {
self.sample_std_dev() / F::from(self.count - 1).unwrap().sqrt()
}
pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
let n = self.count as f64;
let mean = self.sample_mean().try_f64("stats.mean")?;
let std_dev = self.sample_std_dev().try_f64("stats.std_dev")?;
let std_err_mean = std_dev / n.sqrt();
let degrees_of_freedom = n - 1.;
let (lo, hi) = stats::interval_bounds(confidence, mean, std_err_mean, degrees_of_freedom);
let (lo, hi) = (F::from(lo).convert("lo")?, F::from(hi).convert("hi")?);
match confidence {
Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
}
}
pub fn sample_count(&self) -> usize {
self.count
}
#[allow(clippy::should_implement_trait)]
pub fn add(self, rhs: Self) -> Self {
let mut sum = self.sum;
let mut sum_sq = self.sum_sq;
sum += rhs.sum;
sum_sq += rhs.sum_sq;
let count = self.count + rhs.count;
Self { sum, sum_sq, count }
}
pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
Self::from_iter(data)?.ci_mean(confidence)
}
}
impl<F: Float> std::ops::Add for Arithmetic<F> {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
self.add(rhs)
}
}
impl<F: Float> std::ops::AddAssign for Arithmetic<F> {
#[inline]
fn add_assign(&mut self, rhs: Self) {
*self = self.add(rhs);
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Harmonic<F: Float> {
recip_space: Arithmetic<F>,
}
impl<F: Float> Harmonic<F> {
pub fn new() -> Self {
Default::default()
}
pub fn append(&mut self, x: F) -> CIResult<()> {
if x <= F::zero() {
return Err(error::CIError::NonPositiveValue(
x.to_f64().unwrap_or(f64::NAN),
));
}
self.recip_space.append(F::one() / x)?;
Ok(())
}
pub fn sample_mean(&self) -> F {
F::one() / self.recip_space.sample_mean()
}
pub fn sample_sem(&self) -> F {
let harm_mean = self.sample_mean();
let recip_std_dev = self.recip_space.sample_std_dev();
harm_mean * harm_mean * recip_std_dev
/ F::from(self.recip_space.sample_count() - 1).unwrap().sqrt()
}
pub fn sample_count(&self) -> usize {
self.recip_space.sample_count()
}
pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
let arith_ci = self.recip_space.ci_mean(confidence.flipped())?;
let (lo, hi) = (F::one() / arith_ci.high_f(), F::one() / arith_ci.low_f());
match confidence {
Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
}
}
#[allow(clippy::should_implement_trait)]
pub fn add(self, rhs: Self) -> Self {
Self {
recip_space: self.recip_space + rhs.recip_space,
}
}
pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
Self::from_iter(data)?.ci_mean(confidence)
}
}
impl<F: Float> Default for Harmonic<F> {
fn default() -> Self {
Self {
recip_space: Arithmetic::default(),
}
}
}
impl<F: Float> std::ops::Add for Harmonic<F> {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
self.add(rhs)
}
}
impl<F: Float> std::ops::AddAssign for Harmonic<F> {
#[inline]
fn add_assign(&mut self, rhs: Self) {
*self = self.add(rhs);
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Geometric<F: Float> {
log_space: Arithmetic<F>,
}
impl<F: Float> Geometric<F> {
pub fn new() -> Self {
Default::default()
}
pub fn append(&mut self, x: F) -> CIResult<()> {
if x <= F::zero() {
return Err(error::CIError::NonPositiveValue(
x.to_f64().unwrap_or(f64::NAN),
));
}
self.log_space.append(x.ln())?;
Ok(())
}
pub fn sample_mean(&self) -> F {
self.log_space.sample_mean().exp()
}
pub fn sample_sem(&self) -> F {
let geom_mean = self.sample_mean();
let log_std_dev = self.log_space.sample_std_dev();
geom_mean * log_std_dev / F::from(self.log_space.sample_count() - 1).unwrap().sqrt()
}
pub fn sample_count(&self) -> usize {
self.log_space.sample_count()
}
pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
let arith_ci = self.log_space.ci_mean(confidence)?;
let (lo, hi) = (arith_ci.low_f().exp(), arith_ci.high_f().exp());
match confidence {
Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
}
}
#[allow(clippy::should_implement_trait)]
pub fn add(self, rhs: Self) -> Self {
Self {
log_space: self.log_space + rhs.log_space,
}
}
pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
Self::from_iter(data)?.ci_mean(confidence)
}
}
impl<F: Float> Default for Geometric<F> {
fn default() -> Self {
Self {
log_space: Arithmetic::default(),
}
}
}
impl<F: Float> std::ops::Add for Geometric<F> {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
self.add(rhs)
}
}
impl<F: Float> std::ops::AddAssign for Geometric<F> {
#[inline]
fn add_assign(&mut self, rhs: Self) {
*self = self.add(rhs);
}
}
pub trait MeanCI<T: PartialOrd> {
fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<T>>
where
for<'a> &'a I: IntoIterator<Item = &'a T>;
}
macro_rules! impl_mean_ci_for {
( $x:ty ) => {
impl<F: Float> MeanCI<F> for $x {
fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
where
for<'a> &'a I: IntoIterator<Item = &'a F>,
{
<$x>::ci(confidence, data)
}
}
};
}
impl_mean_ci_for!(Arithmetic<F>);
impl_mean_ci_for!(Harmonic<F>);
impl_mean_ci_for!(Geometric<F>);
#[cfg(test)]
mod tests {
use super::*;
use approx::*;
#[test]
fn test_mean_ci() -> CIResult<()> {
let data = [
82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
];
let confidence = Confidence::new_two_sided(0.95);
let ci = Arithmetic::ci(confidence, &data)?;
assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67);
let one_sided_ci = Arithmetic::ci(Confidence::UpperOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
let one_sided_ci = Arithmetic::ci(Confidence::LowerOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
let mut stats = Arithmetic::default();
stats.extend(&data)?;
let ci = stats.ci_mean(confidence)?;
assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67, epsilon = 1e-8);
let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
Ok(())
}
#[test]
fn test_geometric_ci() -> CIResult<()> {
let data = [
82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
];
let confidence = Confidence::new_two_sided(0.95);
let ci = Geometric::ci(confidence, &data)?;
assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
let one_sided_ci = Geometric::ci(Confidence::UpperOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
let one_sided_ci = Geometric::ci(Confidence::LowerOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
let mut stats = Geometric::default();
stats.extend(&data)?;
let ci = stats.ci_mean(confidence)?;
assert_abs_diff_eq!(stats.sample_mean(), 43.7268032829256, epsilon = 1e-8);
assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
Ok(())
}
#[test]
fn test_harmonic_ci() -> CIResult<()> {
let data = [
82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
];
let confidence = Confidence::new_two_sided(0.95);
let ci = Harmonic::ci(confidence, &data)?;
assert_abs_diff_eq!(ci.low_f(), 23.614092539657168, epsilon = 1e-8);
assert_abs_diff_eq!(ci.high_f(), 41.237860649168255, epsilon = 1e-8);
let one_sided_ci = Harmonic::ci(Confidence::UpperOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
let one_sided_ci = Harmonic::ci(Confidence::LowerOneSided(0.975), &data)?;
assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
let confidence = Confidence::new_two_sided(0.95);
let data = [
1.81600583, 0.07498389, 1.29092744, 0.62023863, 0.09345327, 1.94670997, 2.27687339,
0.9251231, 1.78173864, 0.4391542, 1.36948099, 1.5191194, 0.42286756, 1.48463176,
0.17621009, 2.31810064, 0.15633061, 2.55137878, 1.11043948, 1.35923319, 1.58385561,
0.63431437, 0.49993148, 0.49168534, 0.11533354,
];
let ci = Harmonic::ci(confidence, &data)?;
assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
let mut stats = Harmonic::default();
stats.extend(&data)?;
let ci = stats.ci_mean(confidence)?;
assert_abs_diff_eq!(stats.sample_mean(), 0.38041820166550844, epsilon = 1e-8);
assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
Ok(())
}
#[test]
fn test_arithmetic_add() {
const VALUE: f32 = 0.1;
let size = 1_000_000;
let mut stats_ref = Arithmetic::default();
let mut stats_summed = Arithmetic::default();
let mut stats_summed_in_place = Arithmetic::default();
for _ in 0..size {
stats_ref.append(VALUE).unwrap();
let mut new_stat = Arithmetic::default();
new_stat.append(VALUE).unwrap();
stats_summed = stats_summed + new_stat;
stats_summed_in_place += new_stat;
}
assert_eq!(stats_ref.sample_count(), size);
assert_eq!(stats_summed.sample_count(), size);
assert_eq!(stats_summed_in_place.sample_count(), size);
assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
assert_eq!(stats_ref.sample_variance(), stats_summed.sample_variance());
assert_eq!(stats_ref.sample_std_dev(), stats_summed.sample_std_dev());
assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
assert_eq!(
stats_ref.sample_variance(),
stats_summed_in_place.sample_variance()
);
assert_eq!(
stats_ref.sample_std_dev(),
stats_summed_in_place.sample_std_dev()
);
assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
}
#[test]
fn test_geometric_add() {
const VALUE: f32 = 0.1;
let size = 1_000_000;
let mut stats_ref = Geometric::default();
let mut stats_summed = Geometric::default();
let mut stats_summed_in_place = Geometric::default();
for _ in 0..size {
stats_ref.append(VALUE).unwrap();
let mut new_stat = Geometric::default();
new_stat.append(VALUE).unwrap();
stats_summed = stats_summed + new_stat;
stats_summed_in_place += new_stat;
}
assert_eq!(stats_ref.sample_count(), size);
assert_eq!(stats_summed.sample_count(), size);
assert_eq!(stats_summed_in_place.sample_count(), size);
assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
}
#[test]
fn test_harmonic_add() {
const VALUE: f32 = 0.1;
let size = 1_000_000;
let mut stats_ref = Harmonic::default();
let mut stats_summed = Harmonic::default();
let mut stats_summed_in_place = Harmonic::default();
for _ in 0..size {
stats_ref.append(VALUE).unwrap();
let mut new_stat = Harmonic::default();
new_stat.append(VALUE).unwrap();
stats_summed = stats_summed + new_stat;
stats_summed_in_place += new_stat;
}
assert_eq!(stats_ref.sample_count(), size);
assert_eq!(stats_summed.sample_count(), size);
assert_eq!(stats_summed_in_place.sample_count(), size);
assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
}
#[test]
fn test_misc() -> CIResult<()> {
let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
let stats = mean::Arithmetic::from_iter(&data)?;
assert_eq!(stats.sample_count(), 10);
assert_eq!(stats.sample_mean(), 5.5);
assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
let confidence = Confidence::new_two_sided(0.95);
let ci = stats.ci_mean(confidence)?;
assert_abs_diff_eq!(ci, Interval::new(3.3341, 7.6659)?, epsilon = 1e-4);
Ok(())
}
}