Skip to main content

use_statistics/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4//! Statistical utilities for `RustUse`.
5
6/// Errors returned by statistics helpers when the input cannot support the requested summary.
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum StatisticsError {
9    /// The input slice was empty.
10    EmptyInput,
11    /// The input slice does not contain enough values for the requested operation.
12    InsufficientData { needed: usize, actual: usize },
13    /// The input slice is too large to convert its length into an exact `f64` count.
14    TooManyValues { actual: usize },
15}
16
17impl core::fmt::Display for StatisticsError {
18    fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
19        match self {
20            Self::EmptyInput => formatter.write_str("statistics input must not be empty"),
21            Self::InsufficientData { needed, actual } => write!(
22                formatter,
23                "statistics operation requires at least {needed} values, received {actual}"
24            ),
25            Self::TooManyValues { actual } => write!(
26                formatter,
27                "statistics input length {actual} exceeds the exact counting range supported by this helper"
28            ),
29        }
30    }
31}
32
33impl std::error::Error for StatisticsError {}
34
35/// Returns the arithmetic mean of `values`.
36///
37/// # Errors
38///
39/// Returns [`StatisticsError::EmptyInput`] when `values` is empty, or
40/// [`StatisticsError::TooManyValues`] if the slice length cannot be represented
41/// exactly in the internal floating-point count conversion.
42pub fn mean(values: &[f64]) -> Result<f64, StatisticsError> {
43    ensure_non_empty(values)?;
44    let count = exact_len_as_f64(values.len())?;
45
46    Ok(values.iter().sum::<f64>() / count)
47}
48
49/// Returns the median of `values`.
50///
51/// # Errors
52///
53/// Returns [`StatisticsError::EmptyInput`] when `values` is empty.
54pub fn median(values: &[f64]) -> Result<f64, StatisticsError> {
55    ensure_non_empty(values)?;
56
57    let mut sorted = values.to_vec();
58    sorted.sort_by(f64::total_cmp);
59
60    let middle = sorted.len() / 2;
61
62    if sorted.len().is_multiple_of(2) {
63        Ok(f64::midpoint(sorted[middle - 1], sorted[middle]))
64    } else {
65        Ok(sorted[middle])
66    }
67}
68
69/// Returns the population variance of `values`.
70///
71/// # Errors
72///
73/// Returns [`StatisticsError::EmptyInput`] when `values` is empty, or
74/// [`StatisticsError::TooManyValues`] if the slice length cannot be represented
75/// exactly in the internal floating-point count conversion.
76pub fn population_variance(values: &[f64]) -> Result<f64, StatisticsError> {
77    ensure_non_empty(values)?;
78    let count = exact_len_as_f64(values.len())?;
79
80    variance_with_denominator(values, count)
81}
82
83/// Returns the sample variance of `values` using Bessel's correction.
84///
85/// # Errors
86///
87/// Returns [`StatisticsError::InsufficientData`] when fewer than two values are
88/// provided, or [`StatisticsError::TooManyValues`] if the slice length cannot
89/// be represented exactly in the internal floating-point count conversion.
90pub fn sample_variance(values: &[f64]) -> Result<f64, StatisticsError> {
91    ensure_len_at_least(values, 2)?;
92    let count = exact_len_as_f64(values.len() - 1)?;
93
94    variance_with_denominator(values, count)
95}
96
97/// Returns the population standard deviation of `values`.
98///
99/// # Errors
100///
101/// Propagates any error returned by [`population_variance`].
102pub fn population_std_dev(values: &[f64]) -> Result<f64, StatisticsError> {
103    Ok(population_variance(values)?.sqrt())
104}
105
106/// Returns the sample standard deviation of `values` using Bessel's correction.
107///
108/// # Errors
109///
110/// Propagates any error returned by [`sample_variance`].
111pub fn sample_std_dev(values: &[f64]) -> Result<f64, StatisticsError> {
112    Ok(sample_variance(values)?.sqrt())
113}
114
115fn variance_with_denominator(values: &[f64], denominator: f64) -> Result<f64, StatisticsError> {
116    let average = mean(values)?;
117    let squared_deviations = values
118        .iter()
119        .map(|value| {
120            let delta = *value - average;
121            delta * delta
122        })
123        .sum::<f64>();
124
125    Ok(squared_deviations / denominator)
126}
127
128const fn ensure_non_empty(values: &[f64]) -> Result<(), StatisticsError> {
129    ensure_len_at_least(values, 1)
130}
131
132const fn ensure_len_at_least(values: &[f64], needed: usize) -> Result<(), StatisticsError> {
133    if values.len() < needed {
134        if values.is_empty() {
135            Err(StatisticsError::EmptyInput)
136        } else {
137            Err(StatisticsError::InsufficientData {
138                needed,
139                actual: values.len(),
140            })
141        }
142    } else {
143        Ok(())
144    }
145}
146
147fn exact_len_as_f64(length: usize) -> Result<f64, StatisticsError> {
148    let count =
149        u32::try_from(length).map_err(|_| StatisticsError::TooManyValues { actual: length })?;
150
151    Ok(f64::from(count))
152}
153
154pub mod prelude;
155
156#[cfg(test)]
157mod tests {
158    use super::{
159        StatisticsError, mean, median, population_std_dev, population_variance, sample_std_dev,
160        sample_variance,
161    };
162
163    fn assert_close(left: f64, right: f64) {
164        assert!((left - right).abs() < 1.0e-12, "left={left}, right={right}");
165    }
166
167    #[test]
168    fn computes_common_summary_statistics() -> Result<(), StatisticsError> {
169        let values = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
170
171        assert_close(mean(&values)?, 5.0);
172        assert_close(median(&values)?, 4.5);
173        assert_close(population_variance(&values)?, 4.0);
174        assert_close(population_std_dev(&values)?, 2.0);
175
176        Ok(())
177    }
178
179    #[test]
180    fn computes_sample_summaries_and_reports_invalid_inputs() {
181        let values = [1.0, 2.0, 3.0, 4.0];
182
183        assert_close(
184            sample_variance(&values).expect("sample variance should succeed"),
185            1.666_666_666_666_666_7,
186        );
187        assert_close(
188            sample_std_dev(&values).expect("sample std dev should succeed"),
189            1.290_994_448_735_805_6,
190        );
191        assert_eq!(mean(&[]), Err(StatisticsError::EmptyInput));
192        assert_eq!(
193            sample_variance(&[3.0]),
194            Err(StatisticsError::InsufficientData {
195                needed: 2,
196                actual: 1,
197            })
198        );
199    }
200}