average/moments/variance.rs
1/// Estimate the arithmetic mean and the variance of a sequence of numbers
2/// ("population").
3///
4/// This can be used to estimate the standard error of the mean.
5///
6///
7/// ## Example
8///
9/// ```
10/// use average::Variance;
11///
12/// let a: Variance = (1..6).map(f64::from).collect();
13/// println!("The mean is {} ± {}.", a.mean(), a.error());
14/// ```
15#[derive(Debug, Clone)]
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
17pub struct Variance {
18 /// Estimator of average.
19 avg: Mean,
20 /// Intermediate sum of squares for calculating the variance.
21 sum_2: f64,
22}
23
24impl Variance {
25 /// Create a new variance estimator.
26 #[inline]
27 pub fn new() -> Variance {
28 Variance { avg: Mean::new(), sum_2: 0. }
29 }
30
31 /// Increment the sample size.
32 ///
33 /// This does not update anything else.
34 #[inline]
35 fn increment(&mut self) {
36 self.avg.increment();
37 }
38
39 /// Add an observation given an already calculated difference from the mean
40 /// divided by the number of samples, assuming the inner count of the sample
41 /// size was already updated.
42 ///
43 /// This is useful for avoiding unnecessary divisions in the inner loop.
44 #[inline]
45 fn add_inner(&mut self, delta_n: f64) {
46 // This algorithm introduced by Welford in 1962 trades numerical
47 // stability for a division inside the loop.
48 //
49 // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
50 let n = self.avg.len().to_f64().unwrap();
51 self.avg.add_inner(delta_n);
52 self.sum_2 += delta_n * delta_n * n * (n - 1.);
53 }
54
55 /// Determine whether the sample is empty.
56 #[inline]
57 pub fn is_empty(&self) -> bool {
58 self.avg.is_empty()
59 }
60
61 /// Estimate the mean of the population.
62 ///
63 /// Returns NaN for an empty sample.
64 #[inline]
65 pub fn mean(&self) -> f64 {
66 self.avg.mean()
67 }
68
69 /// Return the sample size.
70 #[inline]
71 pub fn len(&self) -> u64 {
72 self.avg.len()
73 }
74
75 /// Calculate the sample variance.
76 ///
77 /// This is an unbiased estimator of the variance of the population.
78 ///
79 /// Returns NaN for samples of size 1 or less.
80 #[inline]
81 pub fn sample_variance(&self) -> f64 {
82 if self.avg.len() < 2 {
83 return f64::NAN;
84 }
85 self.sum_2 / (self.avg.len() - 1).to_f64().unwrap()
86 }
87
88 /// Calculate the population variance of the sample.
89 ///
90 /// This is a biased estimator of the variance of the population.
91 ///
92 /// Returns NaN for an empty sample.
93 #[inline]
94 pub fn population_variance(&self) -> f64 {
95 let n = self.avg.len();
96 if n == 0 {
97 return f64::NAN;
98 }
99 self.sum_2 / n.to_f64().unwrap()
100 }
101
102 /// Estimate the variance of the mean of the population.
103 ///
104 /// Returns NaN for an empty sample.
105 #[inline]
106 pub fn variance_of_mean(&self) -> f64 {
107 let n = self.avg.len();
108 if n == 0 {
109 return f64::NAN;
110 }
111 if n == 1 {
112 return 0.;
113 }
114 self.sample_variance() / n.to_f64().unwrap()
115 }
116
117 /// Estimate the standard error of the mean of the population.
118 ///
119 /// Returns NaN for an empty sample.
120 #[cfg(any(feature = "std", feature = "libm"))]
121 #[inline]
122 pub fn error(&self) -> f64 {
123 num_traits::Float::sqrt(self.variance_of_mean())
124 }
125
126}
127
128impl core::default::Default for Variance {
129 fn default() -> Variance {
130 Variance::new()
131 }
132}
133
134impl Estimate for Variance {
135 #[inline]
136 fn add(&mut self, sample: f64) {
137 self.increment();
138 let delta_n = (sample - self.avg.mean())
139 / self.len().to_f64().unwrap();
140 self.add_inner(delta_n);
141 }
142
143 #[inline]
144 fn estimate(&self) -> f64 {
145 self.population_variance()
146 }
147}
148
149impl Merge for Variance {
150 /// Merge another sample into this one.
151 ///
152 ///
153 /// ## Example
154 ///
155 /// ```
156 /// use average::{Variance, Merge};
157 ///
158 /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
159 /// let (left, right) = sequence.split_at(3);
160 /// let avg_total: Variance = sequence.iter().collect();
161 /// let mut avg_left: Variance = left.iter().collect();
162 /// let avg_right: Variance = right.iter().collect();
163 /// avg_left.merge(&avg_right);
164 /// assert_eq!(avg_total.mean(), avg_left.mean());
165 /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
166 /// ```
167 #[inline]
168 fn merge(&mut self, other: &Variance) {
169 if other.is_empty() {
170 return;
171 }
172 if self.is_empty() {
173 *self = other.clone();
174 return;
175 }
176 // This algorithm was proposed by Chan et al. in 1979.
177 //
178 // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
179 let len_self = self.len().to_f64().unwrap();
180 let len_other = other.len().to_f64().unwrap();
181 let len_total = len_self + len_other;
182 let delta = other.mean() - self.mean();
183 self.avg.merge(&other.avg);
184 self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
185 }
186}
187
188impl_from_iterator!(Variance);
189impl_from_par_iterator!(Variance);
190impl_extend!(Variance);