average/
weighted_mean.rs

1use super::{Estimate, MeanWithError, Merge};
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5/// Estimate the weighted and unweighted arithmetic mean of a sequence of
6/// numbers ("population").
7///
8///
9/// ## Example
10///
11/// ```
12/// use average::WeightedMean;
13///
14/// let a: WeightedMean = (1..6).zip(1..6)
15///     .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
16/// println!("The weighted mean is {}.", a.mean());
17/// ```
18#[derive(Debug, Clone)]
19#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
20pub struct WeightedMean {
21    /// Sum of the weights.
22    weight_sum: f64,
23    /// Weighted mean value.
24    weighted_avg: f64,
25}
26
27impl WeightedMean {
28    /// Create a new weighted and unweighted mean estimator.
29    pub fn new() -> WeightedMean {
30        WeightedMean {
31            weight_sum: 0.,
32            weighted_avg: 0.,
33        }
34    }
35
36    /// Add an observation sampled from the population.
37    #[inline]
38    pub fn add(&mut self, sample: f64, weight: f64) {
39        // The algorithm for the unweighted mean was suggested by Welford in 1962.
40        //
41        // See
42        // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
43        // and
44        // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
45        self.weight_sum += weight;
46
47        let prev_avg = self.weighted_avg;
48        self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
49    }
50
51    /// Determine whether the sample is empty.
52    ///
53    /// Might be a false positive if the sum of weights is zero.
54    #[inline]
55    pub fn is_empty(&self) -> bool {
56        self.weight_sum == 0.
57    }
58
59    /// Return the sum of the weights.
60    ///
61    /// Returns 0 for an empty sample.
62    #[inline]
63    pub fn sum_weights(&self) -> f64 {
64        self.weight_sum
65    }
66
67    /// Estimate the weighted mean of the population.
68    ///
69    /// Returns NaN for an empty sample, or if the sum of weights is zero.
70    #[inline]
71    pub fn mean(&self) -> f64 {
72        if !self.is_empty() { self.weighted_avg } else { f64::NAN }
73    }
74}
75
76impl core::default::Default for WeightedMean {
77    fn default() -> WeightedMean {
78        WeightedMean::new()
79    }
80}
81
82impl core::iter::FromIterator<(f64, f64)> for WeightedMean {
83    fn from_iter<T>(iter: T) -> WeightedMean
84    where
85        T: IntoIterator<Item = (f64, f64)>,
86    {
87        let mut a = WeightedMean::new();
88        for (i, w) in iter {
89            a.add(i, w);
90        }
91        a
92    }
93}
94
95impl core::iter::Extend<(f64, f64)> for WeightedMean {
96    fn extend<T: IntoIterator<Item = (f64, f64)>>(&mut self, iter: T) {
97        for (i, w) in iter {
98            self.add(i, w);
99        }
100    }
101}
102
103impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMean {
104    fn from_iter<T>(iter: T) -> WeightedMean
105    where
106        T: IntoIterator<Item = &'a (f64, f64)>,
107    {
108        let mut a = WeightedMean::new();
109        for &(i, w) in iter {
110            a.add(i, w);
111        }
112        a
113    }
114}
115
116impl<'a> core::iter::Extend<&'a (f64, f64)> for WeightedMean {
117    fn extend<T: IntoIterator<Item = &'a (f64, f64)>>(&mut self, iter: T) {
118        for &(i, w) in iter {
119            self.add(i, w);
120        }
121    }
122}
123
124impl Merge for WeightedMean {
125    /// Merge another sample into this one.
126    ///
127    ///
128    /// ## Example
129    ///
130    /// ```
131    /// use average::{WeightedMean, Merge};
132    ///
133    /// let weighted_sequence: &[(f64, f64)] = &[
134    ///     (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
135    ///     (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
136    /// let (left, right) = weighted_sequence.split_at(3);
137    /// let avg_total: WeightedMean = weighted_sequence.iter().collect();
138    /// let mut avg_left: WeightedMean = left.iter().collect();
139    /// let avg_right: WeightedMean = right.iter().collect();
140    /// avg_left.merge(&avg_right);
141    /// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15);
142    /// ```
143    #[inline]
144    fn merge(&mut self, other: &WeightedMean) {
145        if other.is_empty() {
146            return;
147        }
148        if self.is_empty() {
149            *self = other.clone();
150            return;
151        }
152        let total_weight_sum = self.weight_sum + other.weight_sum;
153        self.weighted_avg = (self.weight_sum * self.weighted_avg
154            + other.weight_sum * other.weighted_avg)
155            / total_weight_sum;
156        self.weight_sum = total_weight_sum;
157    }
158}
159
160/// Estimate the weighted and unweighted arithmetic mean and the unweighted
161/// variance of a sequence of numbers ("population").
162///
163/// This can be used to estimate the standard error of the weighted mean.
164///
165///
166/// ## Example
167///
168/// ```
169/// use average::WeightedMeanWithError;
170///
171/// let a: WeightedMeanWithError = (1..6).zip(1..6)
172///     .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
173/// println!("The weighted mean is {} ± {}.", a.weighted_mean(), a.error());
174/// ```
175#[derive(Debug, Clone)]
176#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
177pub struct WeightedMeanWithError {
178    /// Sum of the squares of the weights.
179    weight_sum_sq: f64,
180    /// Estimator of the weighted mean.
181    weighted_avg: WeightedMean,
182    /// Estimator of unweighted mean and its variance.
183    unweighted_avg: MeanWithError,
184}
185
186impl WeightedMeanWithError {
187    /// Create a new weighted and unweighted mean estimator.
188    #[inline]
189    pub fn new() -> WeightedMeanWithError {
190        WeightedMeanWithError {
191            weight_sum_sq: 0.,
192            weighted_avg: WeightedMean::new(),
193            unweighted_avg: MeanWithError::new(),
194        }
195    }
196
197    /// Add an observation sampled from the population.
198    #[inline]
199    pub fn add(&mut self, sample: f64, weight: f64) {
200        // The algorithm for the unweighted mean was suggested by Welford in 1962.
201        // The algorithm for the weighted mean was suggested by West in 1979.
202        //
203        // See
204        // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
205        // and
206        // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
207        self.weight_sum_sq += weight * weight;
208        self.weighted_avg.add(sample, weight);
209        self.unweighted_avg.add(sample);
210    }
211
212    /// Determine whether the sample is empty.
213    #[inline]
214    pub fn is_empty(&self) -> bool {
215        self.unweighted_avg.is_empty()
216    }
217
218    /// Return the sum of the weights.
219    ///
220    /// Returns 0 for an empty sample.
221    #[inline]
222    pub fn sum_weights(&self) -> f64 {
223        self.weighted_avg.sum_weights()
224    }
225
226    /// Return the sum of the squared weights.
227    ///
228    /// Returns 0 for an empty sample.
229    #[inline]
230    pub fn sum_weights_sq(&self) -> f64 {
231        self.weight_sum_sq
232    }
233
234    /// Estimate the weighted mean of the population.
235    ///
236    /// Returns NaN for an empty sample, or if the sum of weights is zero.
237    #[inline]
238    pub fn weighted_mean(&self) -> f64 {
239        self.weighted_avg.mean()
240    }
241
242    /// Estimate the unweighted mean of the population.
243    ///
244    /// Returns NaN for an empty sample.
245    #[inline]
246    pub fn unweighted_mean(&self) -> f64 {
247        self.unweighted_avg.mean()
248    }
249
250    /// Return the sample size.
251    #[inline]
252    pub fn len(&self) -> u64 {
253        self.unweighted_avg.len()
254    }
255
256    /// Calculate the effective sample size.
257    #[inline]
258    pub fn effective_len(&self) -> f64 {
259        if self.is_empty() {
260            return 0.;
261        }
262        let weight_sum = self.weighted_avg.sum_weights();
263        weight_sum * weight_sum / self.weight_sum_sq
264    }
265
266    /// Calculate the *unweighted* population variance of the sample.
267    ///
268    /// This is a biased estimator of the variance of the population.
269    /// 
270    /// Returns NaN for an empty sample.
271    #[inline]
272    pub fn population_variance(&self) -> f64 {
273        self.unweighted_avg.population_variance()
274    }
275
276    /// Calculate the *unweighted* sample variance.
277    ///
278    /// This is an unbiased estimator of the variance of the population.
279    /// 
280    /// Returns NaN for samples of size 1 or less.
281    #[inline]
282    pub fn sample_variance(&self) -> f64 {
283        self.unweighted_avg.sample_variance()
284    }
285
286    /// Estimate the standard error of the *weighted* mean of the population.
287    ///
288    /// Returns NaN if the sample is empty, or if the sum of weights is zero.
289    ///
290    /// This unbiased estimator assumes that the samples were independently
291    /// drawn from the same population with constant variance.
292    #[inline]
293    pub fn variance_of_weighted_mean(&self) -> f64 {
294        // This uses the same estimate as WinCross, which should provide better
295        // results than the ones used by SPSS or Mentor.
296        //
297        // See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf.
298
299        let weight_sum = self.weighted_avg.sum_weights();
300        if weight_sum == 0. {
301            return f64::NAN;
302        }
303        let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum);
304        self.sample_variance() * inv_effective_len
305    }
306
307    /// Estimate the standard error of the *weighted* mean of the population.
308    ///
309    /// Returns NaN if the sample is empty, or if the sum of weights is zero.
310    ///
311    /// This unbiased estimator assumes that the samples were independently
312    /// drawn from the same population with constant variance.
313    #[cfg(any(feature = "std", feature = "libm"))]
314    #[inline]
315    pub fn error(&self) -> f64 {
316        num_traits::Float::sqrt(self.variance_of_weighted_mean())
317    }
318}
319
320impl Merge for WeightedMeanWithError {
321    /// Merge another sample into this one.
322    ///
323    ///
324    /// ## Example
325    ///
326    /// ```
327    /// use average::{WeightedMeanWithError, Merge};
328    ///
329    /// let weighted_sequence: &[(f64, f64)] = &[
330    ///     (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
331    ///     (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
332    /// let (left, right) = weighted_sequence.split_at(3);
333    /// let avg_total: WeightedMeanWithError = weighted_sequence.iter().collect();
334    /// let mut avg_left: WeightedMeanWithError = left.iter().collect();
335    /// let avg_right: WeightedMeanWithError = right.iter().collect();
336    /// avg_left.merge(&avg_right);
337    /// assert!((avg_total.weighted_mean() - avg_left.weighted_mean()).abs() < 1e-15);
338    /// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
339    /// ```
340    #[inline]
341    fn merge(&mut self, other: &WeightedMeanWithError) {
342        self.weight_sum_sq += other.weight_sum_sq;
343        self.weighted_avg.merge(&other.weighted_avg);
344        self.unweighted_avg.merge(&other.unweighted_avg);
345    }
346}
347
348impl core::default::Default for WeightedMeanWithError {
349    fn default() -> WeightedMeanWithError {
350        WeightedMeanWithError::new()
351    }
352}
353
354impl core::iter::FromIterator<(f64, f64)> for WeightedMeanWithError {
355    fn from_iter<T>(iter: T) -> WeightedMeanWithError
356    where
357        T: IntoIterator<Item = (f64, f64)>,
358    {
359        let mut a = WeightedMeanWithError::new();
360        for (i, w) in iter {
361            a.add(i, w);
362        }
363        a
364    }
365}
366
367impl core::iter::Extend<(f64, f64)> for WeightedMeanWithError {
368    fn extend<T: IntoIterator<Item = (f64, f64)>>(&mut self, iter: T) {
369        for (i, w) in iter {
370            self.add(i, w);
371        }
372    }
373}
374
375impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMeanWithError {
376    fn from_iter<T>(iter: T) -> WeightedMeanWithError
377    where
378        T: IntoIterator<Item = &'a (f64, f64)>,
379    {
380        let mut a = WeightedMeanWithError::new();
381        for &(i, w) in iter {
382            a.add(i, w);
383        }
384        a
385    }
386}
387
388impl<'a> core::iter::Extend<&'a (f64, f64)> for WeightedMeanWithError {
389    fn extend<T: IntoIterator<Item = &'a (f64, f64)>>(&mut self, iter: T) {
390        for &(i, w) in iter {
391            self.add(i, w);
392        }
393    }
394}