average/
covariance.rs

1use num_traits::ToPrimitive;
2#[cfg(feature = "serde")]
3use serde_derive::{Deserialize, Serialize};
4
5use crate::Merge;
6
7/// Estimate the arithmetic means and the covariance of a sequence of number pairs
8/// ("population").
9///
10/// Because the variances are calculated as well, this can be used to calculate the Pearson
11/// correlation coefficient.
12///
13///
14/// ## Example
15///
16/// ```
17/// use average::Covariance;
18///
19/// let a: Covariance = [(1., 5.), (2., 4.), (3., 3.), (4., 2.), (5., 1.)].iter().cloned().collect();
20/// assert_eq!(a.mean_x(), 3.);
21/// assert_eq!(a.mean_y(), 3.);
22/// assert_eq!(a.population_covariance(), -2.0);
23/// assert_eq!(a.sample_covariance(), -2.5);
24/// ```
25#[derive(Debug, Clone)]
26#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
27pub struct Covariance {
28    avg_x: f64,
29    sum_x_2: f64,
30    avg_y: f64,
31    sum_y_2: f64,
32    sum_prod: f64,
33    n: u64,
34}
35
36impl Covariance {
37    /// Create a new covariance estimator.
38    #[inline]
39    pub fn new() -> Covariance {
40        Covariance {
41            avg_x: 0.,
42            sum_x_2: 0.,
43            avg_y: 0.,
44            sum_y_2: 0.,
45            sum_prod: 0.,
46            n: 0,
47        }
48    }
49
50    /// Add an observation sampled from the population.
51    #[inline]
52    pub fn add(&mut self, x: f64, y: f64) {
53        self.n += 1;
54        let n = self.n.to_f64().unwrap();
55
56        let delta_x = x - self.avg_x;
57        let delta_x_n = delta_x / n;
58        let delta_y_n = (y - self.avg_y) / n;
59
60        self.avg_x += delta_x_n;
61        self.sum_x_2 += delta_x_n * delta_x_n * n * (n - 1.);
62
63        self.avg_y += delta_y_n;
64        self.sum_y_2 += delta_y_n * delta_y_n * n * (n - 1.);
65
66        self.sum_prod += delta_x * (y - self.avg_y);
67    }
68
69    /// Calculate the population covariance of the sample.
70    ///
71    /// This is a biased estimator of the covariance of the population.
72    ///
73    /// Returns NaN for an empty sample.
74    #[inline]
75    pub fn population_covariance(&self) -> f64 {
76        if self.n < 1 {
77            return f64::NAN;
78        }
79        self.sum_prod / self.n.to_f64().unwrap()
80    }
81
82    /// Calculate the sample covariance.
83    ///
84    /// This is an unbiased estimator of the covariance of the population.
85    ///
86    /// Returns NaN for samples of size 1 or less.
87    #[inline]
88    pub fn sample_covariance(&self) -> f64 {
89        if self.n < 2 {
90            return f64::NAN;
91        }
92        self.sum_prod / (self.n - 1).to_f64().unwrap()
93    }
94
95    /// Calculate the population Pearson correlation coefficient.
96    ///
97    /// Returns NaN for an empty sample.
98    #[cfg(any(feature = "std", feature = "libm"))]
99    #[inline]
100    pub fn pearson(&self) -> f64 {
101        if self.n < 2 {
102            return f64::NAN;
103        }
104        self.sum_prod / num_traits::Float::sqrt(self.sum_x_2 * self.sum_y_2)
105    }
106
107    /// Return the sample size.
108    #[inline]
109    pub fn len(&self) -> u64 {
110        self.n
111    }
112
113    /// Determine whether the sample is empty.
114    #[inline]
115    pub fn is_empty(&self) -> bool {
116        self.n == 0
117    }
118
119    /// Estimate the mean of the `x` population.
120    ///
121    /// Returns NaN for an empty sample.
122    #[inline]
123    pub fn mean_x(&self) -> f64 {
124        if self.n > 0 { self.avg_x } else { f64::NAN }
125    }
126
127    /// Estimate the mean of the `y` population.
128    ///
129    /// Returns NaN for an empty sample.
130    #[inline]
131    pub fn mean_y(&self) -> f64 {
132        if self.n > 0 { self.avg_y } else { f64::NAN }
133    }
134
135    /// Calculate the sample variance of `x`.
136    ///
137    /// This is an unbiased estimator of the variance of the population.
138    ///
139    /// Returns NaN for samples of size 1 or less.
140    #[inline]
141    pub fn sample_variance_x(&self) -> f64 {
142        if self.n < 2 {
143            return f64::NAN;
144        }
145        self.sum_x_2 / (self.n - 1).to_f64().unwrap()
146    }
147
148    /// Calculate the population variance of the sample for `x`.
149    ///
150    /// This is a biased estimator of the variance of the population.
151    ///
152    /// Returns NaN for an empty sample.
153    #[inline]
154    pub fn population_variance_x(&self) -> f64 {
155        if self.n == 0 {
156            return f64::NAN;
157        }
158        self.sum_x_2 / self.n.to_f64().unwrap()
159    }
160
161    /// Calculate the sample variance of `y`.
162    ///
163    /// This is an unbiased estimator of the variance of the population.
164    ///
165    /// Returns NaN for samples of size 1 or less.
166    #[inline]
167    pub fn sample_variance_y(&self) -> f64 {
168        if self.n < 2 {
169            return f64::NAN;
170        }
171        self.sum_y_2 / (self.n - 1).to_f64().unwrap()
172    }
173
174    /// Calculate the population variance of the sample for `y`.
175    ///
176    /// This is a biased estimator of the variance of the population.
177    ///
178    /// Returns NaN for an empty sample.
179    #[inline]
180    pub fn population_variance_y(&self) -> f64 {
181        if self.n == 0 {
182            return f64::NAN;
183        }
184        self.sum_y_2 / self.n.to_f64().unwrap()
185    }
186
187    // TODO: Standard deviation and standard error
188}
189
190impl core::default::Default for Covariance {
191    fn default() -> Covariance {
192        Covariance::new()
193    }
194}
195
196impl Merge for Covariance {
197    /// Merge another sample into this one.
198    ///
199    /// ## Example
200    ///
201    /// ```
202    /// use average::{Covariance, Merge};
203    ///
204    /// let sequence: &[(f64, f64)] = &[(1., 2.), (3., 4.), (5., 6.), (7., 8.), (9., 10.)];
205    /// let (left, right) = sequence.split_at(3);
206    /// let cov_total: Covariance = sequence.iter().collect();
207    /// let mut cov_left: Covariance = left.iter().collect();
208    /// let cov_right: Covariance = right.iter().collect();
209    /// cov_left.merge(&cov_right);
210    /// assert_eq!(cov_total.population_covariance(), cov_left.population_covariance());
211    /// ```
212    #[inline]
213    fn merge(&mut self, other: &Covariance) {
214        if other.n == 0 {
215            return;
216        }
217        if self.n == 0 {
218            *self = other.clone();
219            return;
220        }
221
222        let delta_x = other.avg_x - self.avg_x;
223        let delta_y = other.avg_y - self.avg_y;
224        let len_self = self.n.to_f64().unwrap();
225        let len_other = other.n.to_f64().unwrap();
226        let len_total = len_self + len_other;
227
228        self.avg_x = (len_self * self.avg_x + len_other * other.avg_x) / len_total;
229        self.sum_x_2 += other.sum_x_2 + delta_x*delta_x * len_self * len_other / len_total;
230
231        self.avg_y = (len_self * self.avg_y + len_other * other.avg_y) / len_total;
232        self.sum_y_2 += other.sum_y_2 + delta_y*delta_y * len_self * len_other / len_total;
233
234        self.sum_prod += other.sum_prod + delta_x*delta_y * len_self * len_other / len_total;
235
236        self.n += other.n;
237    }
238}
239
240impl core::iter::FromIterator<(f64, f64)> for Covariance {
241    fn from_iter<T>(iter: T) -> Covariance
242        where
243            T: IntoIterator<Item = (f64, f64)>,
244    {
245        let mut cov = Covariance::new();
246        for (x, y) in iter {
247            cov.add(x, y);
248        }
249        cov
250    }
251}
252
253impl core::iter::Extend<(f64, f64)> for Covariance {
254    fn extend<T: IntoIterator<Item = (f64, f64)>>(&mut self, iter: T) {
255        for (x, y) in iter {
256            self.add(x, y);
257        }
258    }
259}
260
261impl<'a> core::iter::FromIterator<&'a (f64, f64)> for Covariance {
262    fn from_iter<T>(iter: T) -> Covariance
263        where
264            T: IntoIterator<Item = &'a (f64, f64)>,
265    {
266        let mut cov = Covariance::new();
267        for &(x, y) in iter {
268            cov.add(x, y);
269        }
270        cov
271    }
272}
273
274impl<'a> core::iter::Extend<&'a (f64, f64)> for Covariance {
275    fn extend<T: IntoIterator<Item = &'a (f64, f64)>>(&mut self, iter: T) {
276        for &(x, y) in iter {
277            self.add(x, y);
278        }
279    }
280}