#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StatisticsError {
EmptyInput,
InsufficientData { needed: usize, actual: usize },
TooManyValues { actual: usize },
}
impl core::fmt::Display for StatisticsError {
fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::EmptyInput => formatter.write_str("statistics input must not be empty"),
Self::InsufficientData { needed, actual } => write!(
formatter,
"statistics operation requires at least {needed} values, received {actual}"
),
Self::TooManyValues { actual } => write!(
formatter,
"statistics input length {actual} exceeds the exact counting range supported by this helper"
),
}
}
}
impl std::error::Error for StatisticsError {}
pub fn mean(values: &[f64]) -> Result<f64, StatisticsError> {
ensure_non_empty(values)?;
let count = exact_len_as_f64(values.len())?;
Ok(values.iter().sum::<f64>() / count)
}
pub fn median(values: &[f64]) -> Result<f64, StatisticsError> {
ensure_non_empty(values)?;
let mut sorted = values.to_vec();
sorted.sort_by(f64::total_cmp);
let middle = sorted.len() / 2;
if sorted.len().is_multiple_of(2) {
Ok(f64::midpoint(sorted[middle - 1], sorted[middle]))
} else {
Ok(sorted[middle])
}
}
pub fn population_variance(values: &[f64]) -> Result<f64, StatisticsError> {
ensure_non_empty(values)?;
let count = exact_len_as_f64(values.len())?;
variance_with_denominator(values, count)
}
pub fn sample_variance(values: &[f64]) -> Result<f64, StatisticsError> {
ensure_len_at_least(values, 2)?;
let count = exact_len_as_f64(values.len() - 1)?;
variance_with_denominator(values, count)
}
pub fn population_std_dev(values: &[f64]) -> Result<f64, StatisticsError> {
Ok(population_variance(values)?.sqrt())
}
pub fn sample_std_dev(values: &[f64]) -> Result<f64, StatisticsError> {
Ok(sample_variance(values)?.sqrt())
}
fn variance_with_denominator(values: &[f64], denominator: f64) -> Result<f64, StatisticsError> {
let average = mean(values)?;
let squared_deviations = values
.iter()
.map(|value| {
let delta = *value - average;
delta * delta
})
.sum::<f64>();
Ok(squared_deviations / denominator)
}
const fn ensure_non_empty(values: &[f64]) -> Result<(), StatisticsError> {
ensure_len_at_least(values, 1)
}
const fn ensure_len_at_least(values: &[f64], needed: usize) -> Result<(), StatisticsError> {
if values.len() < needed {
if values.is_empty() {
Err(StatisticsError::EmptyInput)
} else {
Err(StatisticsError::InsufficientData {
needed,
actual: values.len(),
})
}
} else {
Ok(())
}
}
fn exact_len_as_f64(length: usize) -> Result<f64, StatisticsError> {
let count =
u32::try_from(length).map_err(|_| StatisticsError::TooManyValues { actual: length })?;
Ok(f64::from(count))
}
pub mod prelude;
#[cfg(test)]
mod tests {
use super::{
StatisticsError, mean, median, population_std_dev, population_variance, sample_std_dev,
sample_variance,
};
fn assert_close(left: f64, right: f64) {
assert!((left - right).abs() < 1.0e-12, "left={left}, right={right}");
}
#[test]
fn computes_common_summary_statistics() -> Result<(), StatisticsError> {
let values = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
assert_close(mean(&values)?, 5.0);
assert_close(median(&values)?, 4.5);
assert_close(population_variance(&values)?, 4.0);
assert_close(population_std_dev(&values)?, 2.0);
Ok(())
}
#[test]
fn computes_sample_summaries_and_reports_invalid_inputs() {
let values = [1.0, 2.0, 3.0, 4.0];
assert_close(
sample_variance(&values).expect("sample variance should succeed"),
1.666_666_666_666_666_7,
);
assert_close(
sample_std_dev(&values).expect("sample std dev should succeed"),
1.290_994_448_735_805_6,
);
assert_eq!(mean(&[]), Err(StatisticsError::EmptyInput));
assert_eq!(
sample_variance(&[3.0]),
Err(StatisticsError::InsufficientData {
needed: 2,
actual: 1,
})
);
}
}