vecfx/stats.rs
1// stats
2// abstracted from: https://github.com/rust-lang/libtest/blob/master/libtest/stats.rs
3
4// [[file:~/Workspace/Programming/gchemol-rs/vecfx/vecfx.note::*stats][stats:1]]
5/// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
6pub trait StatsExt {
7 /// Sum of the samples.
8 ///
9 /// Note: this method sacrifices performance at the altar of accuracy
10 /// Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
11 /// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric
12 /// Predicates"][paper]
13 ///
14 /// [paper]: http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps
15 fn sum(&self) -> f64;
16
17 /// Minimum value of the samples.
18 fn min(&self) -> f64;
19
20 /// Maximum value of the samples.
21 fn max(&self) -> f64;
22
23 /// Arithmetic mean (average) of the samples: sum divided by sample-count.
24 ///
25 /// See: <https://en.wikipedia.org/wiki/Arithmetic_mean>
26 fn mean(&self) -> f64;
27
28 /// Variance of the samples: bias-corrected mean of the squares of the differences of each
29 /// sample from the sample mean. Note that this calculates the _sample variance_ rather than the
30 /// population variance, which is assumed to be unknown. It therefore corrects the `(n-1)/n`
31 /// bias that would appear if we calculated a population variance, by dividing by `(n-1)` rather
32 /// than `n`.
33 ///
34 /// See: <https://en.wikipedia.org/wiki/Variance>
35 fn var(&self) -> f64;
36
37 /// Standard deviation: the square root of the sample variance.
38 ///
39 /// Note: this is not a robust statistic for non-normal distributions. Prefer the
40 /// `median_abs_dev` for unknown distributions.
41 ///
42 /// See: <https://en.wikipedia.org/wiki/Standard_deviation>
43 fn std_dev(&self) -> f64;
44
45 /// Index to the minimum value of the samples.
46 fn imin(&self) -> usize;
47
48 /// Index to the maximum value of the samples.
49 fn imax(&self) -> usize;
50}
51
52impl StatsExt for [f64] {
53 // FIXME #11059 handle NaN, inf and overflow
54 fn sum(&self) -> f64 {
55 let mut partials = vec![];
56
57 for &x in self {
58 let mut x = x;
59 let mut j = 0;
60 // This inner loop applies `hi`/`lo` summation to each
61 // partial so that the list of partial sums remains exact.
62 for i in 0..partials.len() {
63 let mut y: f64 = partials[i];
64 if x.abs() < y.abs() {
65 std::mem::swap(&mut x, &mut y);
66 }
67 // Rounded `x+y` is stored in `hi` with round-off stored in
68 // `lo`. Together `hi+lo` are exactly equal to `x+y`.
69 let hi = x + y;
70 let lo = y - (hi - x);
71 if lo != 0.0 {
72 partials[j] = lo;
73 j += 1;
74 }
75 x = hi;
76 }
77 if j >= partials.len() {
78 partials.push(x);
79 } else {
80 partials[j] = x;
81 partials.truncate(j + 1);
82 }
83 }
84 let zero: f64 = 0.0;
85 partials.iter().fold(zero, |p, q| p + *q)
86 }
87
88 fn min(&self) -> f64 {
89 assert!(!self.is_empty());
90 self.iter().fold(self[0], |p, q| p.min(*q))
91 }
92
93 fn max(&self) -> f64 {
94 assert!(!self.is_empty());
95 self.iter().fold(self[0], |p, q| p.max(*q))
96 }
97
98 fn mean(&self) -> f64 {
99 assert!(!self.is_empty());
100 self.sum() / (self.len() as f64)
101 }
102
103 fn var(&self) -> f64 {
104 if self.len() < 2 {
105 0.0
106 } else {
107 let mean = self.mean();
108 let mut v: f64 = 0.0;
109 for s in self {
110 let x = *s - mean;
111 v += x * x;
112 }
113 // N.B., this is _supposed to be_ len-1, not len. If you
114 // change it back to len, you will be calculating a
115 // population variance, not a sample variance.
116 let denom = (self.len() - 1) as f64;
117 v / denom
118 }
119 }
120
121 fn std_dev(&self) -> f64 {
122 self.var().sqrt()
123 }
124
125 fn imin(&self) -> usize {
126 assert!(!self.is_empty());
127 self.iter().enumerate().fold(0, |i, (j, q)| if self[i] > *q { j } else { i })
128 }
129
130 fn imax(&self) -> usize {
131 assert!(!self.is_empty());
132 self.iter().enumerate().fold(0, |i, (j, q)| if self[i] > *q { i } else { j })
133 }
134}
135// stats:1 ends here
136
137// test
138
139// [[file:~/Workspace/Programming/gchemol-rs/vecfx/vecfx.note::*test][test:1]]
140#[cfg(test)]
141mod tests {
142 use super::*;
143 use approx::*;
144
145 #[test]
146 fn test_stats_nan() {
147 let xs = &[1.0, 2.0, std::f64::NAN, 3.0, 4.0];
148 assert_eq!(xs.min(), 1.0);
149 assert_eq!(xs.max(), 4.0);
150 assert_eq!(xs.imin(), 0);
151 assert_eq!(xs.imax(), 4);
152 assert!(xs.sum().is_nan());
153 assert!(xs.mean().is_nan());
154 }
155
156 #[test]
157 fn test_stats() {
158 let val = &[958.0000000000, 924.0000000000];
159 assert_relative_eq!(val.sum(), 1882.0);
160 assert_relative_eq!(val.min(), 924.0);
161 assert_relative_eq!(val.max(), 958.0);
162 assert_relative_eq!(val.mean(), 941.0);
163 assert_relative_eq!(val.var(), 578.0);
164 assert_relative_eq!(val.std_dev(), 24.0416305603, epsilon = 1e-6);
165 assert_eq!(val.imin(), 1);
166 assert_eq!(val.imax(), 0);
167 }
168}
169// test:1 ends here