average/moments/
variance.rs

1/// Estimate the arithmetic mean and the variance of a sequence of numbers
2/// ("population").
3///
4/// This can be used to estimate the standard error of the mean.
5///
6///
7/// ## Example
8///
9/// ```
10/// use average::Variance;
11///
12/// let a: Variance = (1..6).map(f64::from).collect();
13/// println!("The mean is {} ± {}.", a.mean(), a.error());
14/// ```
15#[derive(Debug, Clone)]
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
17pub struct Variance {
18    /// Estimator of average.
19    avg: Mean,
20    /// Intermediate sum of squares for calculating the variance.
21    sum_2: f64,
22}
23
24impl Variance {
25    /// Create a new variance estimator.
26    #[inline]
27    pub fn new() -> Variance {
28        Variance { avg: Mean::new(), sum_2: 0. }
29    }
30
31    /// Increment the sample size.
32    ///
33    /// This does not update anything else.
34    #[inline]
35    fn increment(&mut self) {
36        self.avg.increment();
37    }
38
39    /// Add an observation given an already calculated difference from the mean
40    /// divided by the number of samples, assuming the inner count of the sample
41    /// size was already updated.
42    ///
43    /// This is useful for avoiding unnecessary divisions in the inner loop.
44    #[inline]
45    fn add_inner(&mut self, delta_n: f64) {
46        // This algorithm introduced by Welford in 1962 trades numerical
47        // stability for a division inside the loop.
48        //
49        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
50        let n = self.avg.len().to_f64().unwrap();
51        self.avg.add_inner(delta_n);
52        self.sum_2 += delta_n * delta_n * n * (n - 1.);
53    }
54
55    /// Determine whether the sample is empty.
56    #[inline]
57    pub fn is_empty(&self) -> bool {
58        self.avg.is_empty()
59    }
60
61    /// Estimate the mean of the population.
62    ///
63    /// Returns NaN for an empty sample.
64    #[inline]
65    pub fn mean(&self) -> f64 {
66        self.avg.mean()
67    }
68
69    /// Return the sample size.
70    #[inline]
71    pub fn len(&self) -> u64 {
72        self.avg.len()
73    }
74
75    /// Calculate the sample variance.
76    ///
77    /// This is an unbiased estimator of the variance of the population.
78    /// 
79    /// Returns NaN for samples of size 1 or less.
80    #[inline]
81    pub fn sample_variance(&self) -> f64 {
82        if self.avg.len() < 2 {
83            return f64::NAN;
84        }
85        self.sum_2 / (self.avg.len() - 1).to_f64().unwrap()
86    }
87
88    /// Calculate the population variance of the sample.
89    ///
90    /// This is a biased estimator of the variance of the population.
91    /// 
92    /// Returns NaN for an empty sample.
93    #[inline]
94    pub fn population_variance(&self) -> f64 {
95        let n = self.avg.len();
96        if n == 0 {
97            return f64::NAN;
98        }
99        self.sum_2 / n.to_f64().unwrap()
100    }
101
102    /// Estimate the variance of the mean of the population.
103    /// 
104    /// Returns NaN for an empty sample.
105    #[inline]
106    pub fn variance_of_mean(&self) -> f64 {
107        let n = self.avg.len();
108        if n == 0 {
109            return f64::NAN;
110        }
111        if n == 1 {
112            return 0.;
113        }
114        self.sample_variance() / n.to_f64().unwrap()
115    }
116
117    /// Estimate the standard error of the mean of the population.
118    /// 
119    /// Returns NaN for an empty sample.
120    #[cfg(any(feature = "std", feature = "libm"))]
121    #[inline]
122    pub fn error(&self) -> f64 {
123        num_traits::Float::sqrt(self.variance_of_mean())
124    }
125
126}
127
128impl core::default::Default for Variance {
129    fn default() -> Variance {
130        Variance::new()
131    }
132}
133
134impl Estimate for Variance {
135    #[inline]
136    fn add(&mut self, sample: f64) {
137        self.increment();
138        let delta_n = (sample - self.avg.mean())
139            / self.len().to_f64().unwrap();
140        self.add_inner(delta_n);
141    }
142
143    #[inline]
144    fn estimate(&self) -> f64 {
145        self.population_variance()
146    }
147}
148
149impl Merge for Variance {
150    /// Merge another sample into this one.
151    ///
152    ///
153    /// ## Example
154    ///
155    /// ```
156    /// use average::{Variance, Merge};
157    ///
158    /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
159    /// let (left, right) = sequence.split_at(3);
160    /// let avg_total: Variance = sequence.iter().collect();
161    /// let mut avg_left: Variance = left.iter().collect();
162    /// let avg_right: Variance = right.iter().collect();
163    /// avg_left.merge(&avg_right);
164    /// assert_eq!(avg_total.mean(), avg_left.mean());
165    /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
166    /// ```
167    #[inline]
168    fn merge(&mut self, other: &Variance) {
169        if other.is_empty() {
170            return;
171        }
172        if self.is_empty() {
173            *self = other.clone();
174            return;
175        }
176        // This algorithm was proposed by Chan et al. in 1979.
177        //
178        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
179        let len_self = self.len().to_f64().unwrap();
180        let len_other = other.len().to_f64().unwrap();
181        let len_total = len_self + len_other;
182        let delta = other.mean() - self.mean();
183        self.avg.merge(&other.avg);
184        self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
185    }
186}
187
188impl_from_iterator!(Variance);
189impl_from_par_iterator!(Variance);
190impl_extend!(Variance);