use super::{StatsError, StatsResult};
#[inline(always)]
pub fn sample_mean(n: u64, sum: f64) -> StatsResult<f64> {
if n == 0 {
return Err(StatsError("sample size must be positive"));
}
Ok(sum / n as f64)
}
#[inline(always)]
pub fn sample_sum2_deviations(n: u64, sum: f64, sum2: f64) -> StatsResult<f64> {
if n == 0 {
return Err(StatsError("sample size must be positive"));
}
Ok(sum2 - sum.powi(2) / n as f64)
}
#[inline(always)]
pub fn sample_var(n: u64, sum: f64, sum2: f64) -> StatsResult<f64> {
if n <= 1 {
return Err(StatsError("sample size must be greater than `1`"));
}
Ok(sample_sum2_deviations(n, sum, sum2)? / (n as f64 - 1.))
}
#[inline(always)]
pub fn sample_stdev(n: u64, sum: f64, sum2: f64) -> StatsResult<f64> {
Ok(sample_var(n, sum, sum2)?.sqrt())
}
#[doc = include_str!("../../examples/moments.rs")]
#[derive(Debug, PartialEq)]
pub struct SampleMoments {
n: u64,
sum: f64,
sum2: f64,
min: f64,
max: f64,
}
impl SampleMoments {
pub fn new_empty() -> Self {
Self::new(0, 0., 0.)
}
pub fn new(n: u64, sum: f64, sum2: f64) -> Self {
Self::new_with_min_max(n, sum, sum2, f64::NAN, f64::NAN)
}
pub fn new_with_min_max(n: u64, sum: f64, sum2: f64, min: f64, max: f64) -> Self {
Self {
n,
sum,
sum2,
min,
max,
}
}
pub fn collect_value(&mut self, value: f64) {
self.n += 1;
self.sum += value;
self.sum2 += value * value;
self.min = value.min(self.min);
self.max = value.max(self.max);
}
pub fn from_iterator(dataset: impl Iterator<Item = f64>) -> Self {
let mut moments = SampleMoments::new_empty();
for v in dataset {
moments.collect_value(v);
}
moments
}
pub fn from_paired_iterators(
mut dataset1: impl Iterator<Item = f64>,
mut dataset2: impl Iterator<Item = f64>,
) -> StatsResult<Self> {
let mut moments = SampleMoments::new_empty();
loop {
match (dataset1.next(), dataset2.next()) {
(Some(v1), Some(v2)) => moments.collect_value(v1 - v2),
(None, None) => break,
_ => return Err(StatsError("paired samples must have the same size")),
}
}
Ok(moments)
}
pub fn from_slice(dataset: &[f64]) -> Self {
let iter = dataset.iter().cloned();
Self::from_iterator(iter)
}
pub fn from_paired_slices(dataset1: &[f64], dataset2: &[f64]) -> StatsResult<Self> {
Self::from_paired_iterators(dataset1.iter().cloned(), dataset2.iter().cloned())
}
pub fn n(&self) -> u64 {
self.n
}
pub fn nf(&self) -> f64 {
self.n as f64
}
pub fn sum(&self) -> f64 {
self.sum
}
pub fn mean(&self) -> StatsResult<f64> {
sample_mean(self.n, self.sum)
}
pub fn sum2(&self) -> f64 {
self.sum2
}
pub fn sum2_deviations(&self) -> StatsResult<f64> {
sample_sum2_deviations(self.n, self.sum, self.sum2)
}
pub fn var(&self) -> StatsResult<f64> {
sample_var(self.n, self.sum, self.sum2)
}
pub fn stdev(&self) -> StatsResult<f64> {
sample_stdev(self.n, self.sum, self.sum2)
}
pub fn min(&self) -> f64 {
self.min
}
pub fn max(&self) -> f64 {
self.max
}
}
impl Default for SampleMoments {
fn default() -> Self {
Self::new_empty()
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum AltHyp {
Lt,
Gt,
Ne,
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Hyp {
Null,
Alt(AltHyp),
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct HypTestResult {
p: f64,
alpha: f64,
alt_hyp: AltHyp,
accepted: Hyp,
}
impl HypTestResult {
pub fn new(p: f64, alpha: f64, alt_hyp: AltHyp) -> HypTestResult {
Self {
p,
alpha,
alt_hyp,
accepted: if p < alpha {
Hyp::Alt(alt_hyp)
} else {
Hyp::Null
},
}
}
pub fn p(&self) -> f64 {
self.p
}
pub fn alpha(&self) -> f64 {
self.alpha
}
pub fn alt_hyp(&self) -> AltHyp {
self.alt_hyp
}
pub fn accepted(&self) -> Hyp {
self.accepted
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum PositionWrtCi {
Below,
In,
Above,
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct Ci(
pub f64,
pub f64,
);
impl Ci {
pub fn position_of(&self, value: f64) -> PositionWrtCi {
match value {
_ if value < self.0 => PositionWrtCi::Below,
_ if self.0 <= value && value <= self.1 => PositionWrtCi::In,
_ => PositionWrtCi::Above,
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::dev_utils::ApproxEq;
const EPSILON: f64 = 0.000005;
#[test]
fn test_moments() {
let x = [14., 15., 15., 15., 16., 18., 22., 23., 24., 25., 25.];
let n = 11;
let nf = 11.;
let sum = 212.;
let sum2 = 4290.;
let mean = 212. / 11.;
let var = 20.41818;
let sum2_dev = var * ((n - 1) as f64);
let stdev = 4.518648;
let min = 14.;
let max = 25.;
let moments = SampleMoments::new_with_min_max(n, sum, sum2, min, max);
let moments_s = SampleMoments::from_slice(&x);
assert_eq!(moments, moments_s);
assert_eq!(n, moments.n());
assert_eq!(nf, moments.nf());
assert_eq!(sum, moments.sum());
assert_eq!(sum2, moments.sum2());
assert_eq!(mean, moments.mean().unwrap());
assert!(var.approx_eq(moments.var().unwrap(), EPSILON));
assert!(sum2_dev.approx_eq(moments.sum2_deviations().unwrap(), EPSILON * 10.));
assert!(stdev.approx_eq(moments.stdev().unwrap(), EPSILON));
assert_eq!(min, moments.min());
assert_eq!(max, moments.max());
}
#[test]
fn test_moments_default() {
let moments = SampleMoments::default();
assert_eq!(0, moments.n());
assert_eq!(0., moments.nf());
assert_eq!(0., moments.sum());
assert_eq!(0., moments.sum2());
assert!(moments.mean().is_err());
assert!(moments.var().is_err());
assert!(moments.sum2_deviations().is_err());
assert!(moments.stdev().is_err());
assert!(moments.min().is_nan());
assert!(moments.max().is_nan());
}
#[test]
fn test_moments_paired_slices() {
let x = [14., 15., 15., 15., 16., 18., 22., 23., 24., 25., 25.];
let y = [12., 10., 19., 15., 10., 28., 26., 13., 15., 45., 11.];
let moments_i =
SampleMoments::from_paired_iterators(x.iter().cloned(), y.iter().cloned()).unwrap();
let moments_s = SampleMoments::from_paired_slices(&x, &y).unwrap();
let diffs = x.into_iter().zip(y).map(|(vx, vy)| vx - vy);
let moments_d = SampleMoments::from_iterator(diffs);
assert_eq!(moments_i, moments_s);
assert_eq!(moments_i, moments_d);
}
#[test]
fn test_ci() {
let ci = Ci(0., 1.);
assert_eq!(ci.position_of(-1.), PositionWrtCi::Below);
assert_eq!(ci.position_of(0.), PositionWrtCi::In);
assert_eq!(ci.position_of(1.), PositionWrtCi::In);
assert_eq!(ci.position_of(2.), PositionWrtCi::Above);
}
}