vecfx/
stats.rs

1// stats
2// abstracted from: https://github.com/rust-lang/libtest/blob/master/libtest/stats.rs
3
4// [[file:~/Workspace/Programming/gchemol-rs/vecfx/vecfx.note::*stats][stats:1]]
5/// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
6pub trait StatsExt {
7    /// Sum of the samples.
8    ///
9    /// Note: this method sacrifices performance at the altar of accuracy
10    /// Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
11    /// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric
12    /// Predicates"][paper]
13    ///
14    /// [paper]: http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps
15    fn sum(&self) -> f64;
16
17    /// Minimum value of the samples.
18    fn min(&self) -> f64;
19
20    /// Maximum value of the samples.
21    fn max(&self) -> f64;
22
23    /// Arithmetic mean (average) of the samples: sum divided by sample-count.
24    ///
25    /// See: <https://en.wikipedia.org/wiki/Arithmetic_mean>
26    fn mean(&self) -> f64;
27
28    /// Variance of the samples: bias-corrected mean of the squares of the differences of each
29    /// sample from the sample mean. Note that this calculates the _sample variance_ rather than the
30    /// population variance, which is assumed to be unknown. It therefore corrects the `(n-1)/n`
31    /// bias that would appear if we calculated a population variance, by dividing by `(n-1)` rather
32    /// than `n`.
33    ///
34    /// See: <https://en.wikipedia.org/wiki/Variance>
35    fn var(&self) -> f64;
36
37    /// Standard deviation: the square root of the sample variance.
38    ///
39    /// Note: this is not a robust statistic for non-normal distributions. Prefer the
40    /// `median_abs_dev` for unknown distributions.
41    ///
42    /// See: <https://en.wikipedia.org/wiki/Standard_deviation>
43    fn std_dev(&self) -> f64;
44
45    /// Index to the minimum value of the samples.
46    fn imin(&self) -> usize;
47
48    /// Index to the maximum value of the samples.
49    fn imax(&self) -> usize;
50}
51
52impl StatsExt for [f64] {
53    // FIXME #11059 handle NaN, inf and overflow
54    fn sum(&self) -> f64 {
55        let mut partials = vec![];
56
57        for &x in self {
58            let mut x = x;
59            let mut j = 0;
60            // This inner loop applies `hi`/`lo` summation to each
61            // partial so that the list of partial sums remains exact.
62            for i in 0..partials.len() {
63                let mut y: f64 = partials[i];
64                if x.abs() < y.abs() {
65                    std::mem::swap(&mut x, &mut y);
66                }
67                // Rounded `x+y` is stored in `hi` with round-off stored in
68                // `lo`. Together `hi+lo` are exactly equal to `x+y`.
69                let hi = x + y;
70                let lo = y - (hi - x);
71                if lo != 0.0 {
72                    partials[j] = lo;
73                    j += 1;
74                }
75                x = hi;
76            }
77            if j >= partials.len() {
78                partials.push(x);
79            } else {
80                partials[j] = x;
81                partials.truncate(j + 1);
82            }
83        }
84        let zero: f64 = 0.0;
85        partials.iter().fold(zero, |p, q| p + *q)
86    }
87
88    fn min(&self) -> f64 {
89        assert!(!self.is_empty());
90        self.iter().fold(self[0], |p, q| p.min(*q))
91    }
92
93    fn max(&self) -> f64 {
94        assert!(!self.is_empty());
95        self.iter().fold(self[0], |p, q| p.max(*q))
96    }
97
98    fn mean(&self) -> f64 {
99        assert!(!self.is_empty());
100        self.sum() / (self.len() as f64)
101    }
102
103    fn var(&self) -> f64 {
104        if self.len() < 2 {
105            0.0
106        } else {
107            let mean = self.mean();
108            let mut v: f64 = 0.0;
109            for s in self {
110                let x = *s - mean;
111                v += x * x;
112            }
113            // N.B., this is _supposed to be_ len-1, not len. If you
114            // change it back to len, you will be calculating a
115            // population variance, not a sample variance.
116            let denom = (self.len() - 1) as f64;
117            v / denom
118        }
119    }
120
121    fn std_dev(&self) -> f64 {
122        self.var().sqrt()
123    }
124
125    fn imin(&self) -> usize {
126        assert!(!self.is_empty());
127        self.iter().enumerate().fold(0, |i, (j, q)| if self[i] > *q { j } else { i })
128    }
129
130    fn imax(&self) -> usize {
131        assert!(!self.is_empty());
132        self.iter().enumerate().fold(0, |i, (j, q)| if self[i] > *q { i } else { j })
133    }
134}
135// stats:1 ends here
136
137// test
138
139// [[file:~/Workspace/Programming/gchemol-rs/vecfx/vecfx.note::*test][test:1]]
140#[cfg(test)]
141mod tests {
142    use super::*;
143    use approx::*;
144
145    #[test]
146    fn test_stats_nan() {
147        let xs = &[1.0, 2.0, std::f64::NAN, 3.0, 4.0];
148        assert_eq!(xs.min(), 1.0);
149        assert_eq!(xs.max(), 4.0);
150        assert_eq!(xs.imin(), 0);
151        assert_eq!(xs.imax(), 4);
152        assert!(xs.sum().is_nan());
153        assert!(xs.mean().is_nan());
154    }
155
156    #[test]
157    fn test_stats() {
158        let val = &[958.0000000000, 924.0000000000];
159        assert_relative_eq!(val.sum(), 1882.0);
160        assert_relative_eq!(val.min(), 924.0);
161        assert_relative_eq!(val.max(), 958.0);
162        assert_relative_eq!(val.mean(), 941.0);
163        assert_relative_eq!(val.var(), 578.0);
164        assert_relative_eq!(val.std_dev(), 24.0416305603, epsilon = 1e-6);
165        assert_eq!(val.imin(), 1);
166        assert_eq!(val.imax(), 0);
167    }
168}
169// test:1 ends here