average/weighted_mean.rs
1use super::{Estimate, MeanWithError, Merge};
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5/// Estimate the weighted and unweighted arithmetic mean of a sequence of
6/// numbers ("population").
7///
8///
9/// ## Example
10///
11/// ```
12/// use average::WeightedMean;
13///
14/// let a: WeightedMean = (1..6).zip(1..6)
15/// .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
16/// println!("The weighted mean is {}.", a.mean());
17/// ```
18#[derive(Debug, Clone)]
19#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
20pub struct WeightedMean {
21 /// Sum of the weights.
22 weight_sum: f64,
23 /// Weighted mean value.
24 weighted_avg: f64,
25}
26
27impl WeightedMean {
28 /// Create a new weighted and unweighted mean estimator.
29 pub fn new() -> WeightedMean {
30 WeightedMean {
31 weight_sum: 0.,
32 weighted_avg: 0.,
33 }
34 }
35
36 /// Add an observation sampled from the population.
37 #[inline]
38 pub fn add(&mut self, sample: f64, weight: f64) {
39 // The algorithm for the unweighted mean was suggested by Welford in 1962.
40 //
41 // See
42 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
43 // and
44 // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
45 self.weight_sum += weight;
46
47 let prev_avg = self.weighted_avg;
48 self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
49 }
50
51 /// Determine whether the sample is empty.
52 ///
53 /// Might be a false positive if the sum of weights is zero.
54 #[inline]
55 pub fn is_empty(&self) -> bool {
56 self.weight_sum == 0.
57 }
58
59 /// Return the sum of the weights.
60 ///
61 /// Returns 0 for an empty sample.
62 #[inline]
63 pub fn sum_weights(&self) -> f64 {
64 self.weight_sum
65 }
66
67 /// Estimate the weighted mean of the population.
68 ///
69 /// Returns NaN for an empty sample, or if the sum of weights is zero.
70 #[inline]
71 pub fn mean(&self) -> f64 {
72 if !self.is_empty() { self.weighted_avg } else { f64::NAN }
73 }
74}
75
76impl core::default::Default for WeightedMean {
77 fn default() -> WeightedMean {
78 WeightedMean::new()
79 }
80}
81
82impl core::iter::FromIterator<(f64, f64)> for WeightedMean {
83 fn from_iter<T>(iter: T) -> WeightedMean
84 where
85 T: IntoIterator<Item = (f64, f64)>,
86 {
87 let mut a = WeightedMean::new();
88 for (i, w) in iter {
89 a.add(i, w);
90 }
91 a
92 }
93}
94
95impl core::iter::Extend<(f64, f64)> for WeightedMean {
96 fn extend<T: IntoIterator<Item = (f64, f64)>>(&mut self, iter: T) {
97 for (i, w) in iter {
98 self.add(i, w);
99 }
100 }
101}
102
103impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMean {
104 fn from_iter<T>(iter: T) -> WeightedMean
105 where
106 T: IntoIterator<Item = &'a (f64, f64)>,
107 {
108 let mut a = WeightedMean::new();
109 for &(i, w) in iter {
110 a.add(i, w);
111 }
112 a
113 }
114}
115
116impl<'a> core::iter::Extend<&'a (f64, f64)> for WeightedMean {
117 fn extend<T: IntoIterator<Item = &'a (f64, f64)>>(&mut self, iter: T) {
118 for &(i, w) in iter {
119 self.add(i, w);
120 }
121 }
122}
123
124impl Merge for WeightedMean {
125 /// Merge another sample into this one.
126 ///
127 ///
128 /// ## Example
129 ///
130 /// ```
131 /// use average::{WeightedMean, Merge};
132 ///
133 /// let weighted_sequence: &[(f64, f64)] = &[
134 /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
135 /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
136 /// let (left, right) = weighted_sequence.split_at(3);
137 /// let avg_total: WeightedMean = weighted_sequence.iter().collect();
138 /// let mut avg_left: WeightedMean = left.iter().collect();
139 /// let avg_right: WeightedMean = right.iter().collect();
140 /// avg_left.merge(&avg_right);
141 /// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15);
142 /// ```
143 #[inline]
144 fn merge(&mut self, other: &WeightedMean) {
145 if other.is_empty() {
146 return;
147 }
148 if self.is_empty() {
149 *self = other.clone();
150 return;
151 }
152 let total_weight_sum = self.weight_sum + other.weight_sum;
153 self.weighted_avg = (self.weight_sum * self.weighted_avg
154 + other.weight_sum * other.weighted_avg)
155 / total_weight_sum;
156 self.weight_sum = total_weight_sum;
157 }
158}
159
160/// Estimate the weighted and unweighted arithmetic mean and the unweighted
161/// variance of a sequence of numbers ("population").
162///
163/// This can be used to estimate the standard error of the weighted mean.
164///
165///
166/// ## Example
167///
168/// ```
169/// use average::WeightedMeanWithError;
170///
171/// let a: WeightedMeanWithError = (1..6).zip(1..6)
172/// .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
173/// println!("The weighted mean is {} ± {}.", a.weighted_mean(), a.error());
174/// ```
175#[derive(Debug, Clone)]
176#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
177pub struct WeightedMeanWithError {
178 /// Sum of the squares of the weights.
179 weight_sum_sq: f64,
180 /// Estimator of the weighted mean.
181 weighted_avg: WeightedMean,
182 /// Estimator of unweighted mean and its variance.
183 unweighted_avg: MeanWithError,
184}
185
186impl WeightedMeanWithError {
187 /// Create a new weighted and unweighted mean estimator.
188 #[inline]
189 pub fn new() -> WeightedMeanWithError {
190 WeightedMeanWithError {
191 weight_sum_sq: 0.,
192 weighted_avg: WeightedMean::new(),
193 unweighted_avg: MeanWithError::new(),
194 }
195 }
196
197 /// Add an observation sampled from the population.
198 #[inline]
199 pub fn add(&mut self, sample: f64, weight: f64) {
200 // The algorithm for the unweighted mean was suggested by Welford in 1962.
201 // The algorithm for the weighted mean was suggested by West in 1979.
202 //
203 // See
204 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
205 // and
206 // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
207 self.weight_sum_sq += weight * weight;
208 self.weighted_avg.add(sample, weight);
209 self.unweighted_avg.add(sample);
210 }
211
212 /// Determine whether the sample is empty.
213 #[inline]
214 pub fn is_empty(&self) -> bool {
215 self.unweighted_avg.is_empty()
216 }
217
218 /// Return the sum of the weights.
219 ///
220 /// Returns 0 for an empty sample.
221 #[inline]
222 pub fn sum_weights(&self) -> f64 {
223 self.weighted_avg.sum_weights()
224 }
225
226 /// Return the sum of the squared weights.
227 ///
228 /// Returns 0 for an empty sample.
229 #[inline]
230 pub fn sum_weights_sq(&self) -> f64 {
231 self.weight_sum_sq
232 }
233
234 /// Estimate the weighted mean of the population.
235 ///
236 /// Returns NaN for an empty sample, or if the sum of weights is zero.
237 #[inline]
238 pub fn weighted_mean(&self) -> f64 {
239 self.weighted_avg.mean()
240 }
241
242 /// Estimate the unweighted mean of the population.
243 ///
244 /// Returns NaN for an empty sample.
245 #[inline]
246 pub fn unweighted_mean(&self) -> f64 {
247 self.unweighted_avg.mean()
248 }
249
250 /// Return the sample size.
251 #[inline]
252 pub fn len(&self) -> u64 {
253 self.unweighted_avg.len()
254 }
255
256 /// Calculate the effective sample size.
257 #[inline]
258 pub fn effective_len(&self) -> f64 {
259 if self.is_empty() {
260 return 0.;
261 }
262 let weight_sum = self.weighted_avg.sum_weights();
263 weight_sum * weight_sum / self.weight_sum_sq
264 }
265
266 /// Calculate the *unweighted* population variance of the sample.
267 ///
268 /// This is a biased estimator of the variance of the population.
269 ///
270 /// Returns NaN for an empty sample.
271 #[inline]
272 pub fn population_variance(&self) -> f64 {
273 self.unweighted_avg.population_variance()
274 }
275
276 /// Calculate the *unweighted* sample variance.
277 ///
278 /// This is an unbiased estimator of the variance of the population.
279 ///
280 /// Returns NaN for samples of size 1 or less.
281 #[inline]
282 pub fn sample_variance(&self) -> f64 {
283 self.unweighted_avg.sample_variance()
284 }
285
286 /// Estimate the standard error of the *weighted* mean of the population.
287 ///
288 /// Returns NaN if the sample is empty, or if the sum of weights is zero.
289 ///
290 /// This unbiased estimator assumes that the samples were independently
291 /// drawn from the same population with constant variance.
292 #[inline]
293 pub fn variance_of_weighted_mean(&self) -> f64 {
294 // This uses the same estimate as WinCross, which should provide better
295 // results than the ones used by SPSS or Mentor.
296 //
297 // See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf.
298
299 let weight_sum = self.weighted_avg.sum_weights();
300 if weight_sum == 0. {
301 return f64::NAN;
302 }
303 let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum);
304 self.sample_variance() * inv_effective_len
305 }
306
307 /// Estimate the standard error of the *weighted* mean of the population.
308 ///
309 /// Returns NaN if the sample is empty, or if the sum of weights is zero.
310 ///
311 /// This unbiased estimator assumes that the samples were independently
312 /// drawn from the same population with constant variance.
313 #[cfg(any(feature = "std", feature = "libm"))]
314 #[inline]
315 pub fn error(&self) -> f64 {
316 num_traits::Float::sqrt(self.variance_of_weighted_mean())
317 }
318}
319
320impl Merge for WeightedMeanWithError {
321 /// Merge another sample into this one.
322 ///
323 ///
324 /// ## Example
325 ///
326 /// ```
327 /// use average::{WeightedMeanWithError, Merge};
328 ///
329 /// let weighted_sequence: &[(f64, f64)] = &[
330 /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
331 /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
332 /// let (left, right) = weighted_sequence.split_at(3);
333 /// let avg_total: WeightedMeanWithError = weighted_sequence.iter().collect();
334 /// let mut avg_left: WeightedMeanWithError = left.iter().collect();
335 /// let avg_right: WeightedMeanWithError = right.iter().collect();
336 /// avg_left.merge(&avg_right);
337 /// assert!((avg_total.weighted_mean() - avg_left.weighted_mean()).abs() < 1e-15);
338 /// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
339 /// ```
340 #[inline]
341 fn merge(&mut self, other: &WeightedMeanWithError) {
342 self.weight_sum_sq += other.weight_sum_sq;
343 self.weighted_avg.merge(&other.weighted_avg);
344 self.unweighted_avg.merge(&other.unweighted_avg);
345 }
346}
347
348impl core::default::Default for WeightedMeanWithError {
349 fn default() -> WeightedMeanWithError {
350 WeightedMeanWithError::new()
351 }
352}
353
354impl core::iter::FromIterator<(f64, f64)> for WeightedMeanWithError {
355 fn from_iter<T>(iter: T) -> WeightedMeanWithError
356 where
357 T: IntoIterator<Item = (f64, f64)>,
358 {
359 let mut a = WeightedMeanWithError::new();
360 for (i, w) in iter {
361 a.add(i, w);
362 }
363 a
364 }
365}
366
367impl core::iter::Extend<(f64, f64)> for WeightedMeanWithError {
368 fn extend<T: IntoIterator<Item = (f64, f64)>>(&mut self, iter: T) {
369 for (i, w) in iter {
370 self.add(i, w);
371 }
372 }
373}
374
375impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMeanWithError {
376 fn from_iter<T>(iter: T) -> WeightedMeanWithError
377 where
378 T: IntoIterator<Item = &'a (f64, f64)>,
379 {
380 let mut a = WeightedMeanWithError::new();
381 for &(i, w) in iter {
382 a.add(i, w);
383 }
384 a
385 }
386}
387
388impl<'a> core::iter::Extend<&'a (f64, f64)> for WeightedMeanWithError {
389 fn extend<T: IntoIterator<Item = &'a (f64, f64)>>(&mut self, iter: T) {
390 for &(i, w) in iter {
391 self.add(i, w);
392 }
393 }
394}