git_perf/
stats.rs

1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use crate::data::ReductionFunc;
7
8use readable::num::*;
9
10pub trait VecAggregation {
11    fn median(&mut self) -> Option<f64>;
12}
13
14concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
15
16pub fn aggregate_measurements(measurements: impl Iterator<Item = f64>) -> Stats {
17    let s: AggStats = measurements.collect();
18    Stats {
19        mean: s.mean(),
20        stddev: s.sample_variance().sqrt(),
21        len: s.mean.len() as usize,
22    }
23}
24
25#[derive(Debug)]
26pub struct Stats {
27    pub mean: f64,
28    pub stddev: f64,
29    pub len: usize,
30}
31
32impl Display for Stats {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        write!(
35            f,
36            "μ: {} σ: {} n: {}",
37            Float::from(self.mean),
38            Float::from(self.stddev),
39            Unsigned::from(self.len),
40        )
41    }
42}
43
44impl Stats {
45    pub fn significantly_different_from(&self, other: &Stats, sigma: f64) -> bool {
46        assert!(self.len == 1);
47        assert!(other.len >= 1);
48        (self.mean - other.mean).abs() / other.stddev > sigma
49    }
50}
51
52impl VecAggregation for Vec<f64> {
53    fn median(&mut self) -> Option<f64> {
54        self.sort_by(f64::total_cmp);
55        match self.len() {
56            0 => None,
57            even if even % 2 == 0 => {
58                let left = self[even / 2 - 1];
59                let right = self[even / 2];
60                Some((left + right) / 2.0)
61            }
62            odd => Some(self[odd / 2]),
63        }
64    }
65}
66
67pub trait NumericReductionFunc: Iterator<Item = f64> {
68    fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
69        match fun {
70            ReductionFunc::Min => self.reduce(f64::min),
71            ReductionFunc::Max => self.reduce(f64::max),
72            ReductionFunc::Median => self.collect_vec().median(),
73            ReductionFunc::Mean => {
74                let stats: AggStats = self.collect();
75                if stats.mean.is_empty() {
76                    None
77                } else {
78                    Some(stats.mean())
79                }
80            }
81        }
82    }
83}
84
85impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
86
87#[cfg(test)]
88mod test {
89    use super::*;
90
91    #[test]
92    fn no_floating_error() {
93        let measurements = (0..100).map(|_| 0.1).collect_vec();
94        let stats = aggregate_measurements(measurements.into_iter());
95        // TODO(kaihowl)
96        assert_eq!(stats.mean, 0.1);
97        assert_eq!(stats.len, 100);
98        let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
99        assert_ne!(naive_mean, 0.1);
100    }
101
102    #[test]
103    fn single_measurement() {
104        let measurements = vec![1.0];
105        let stats = aggregate_measurements(measurements.into_iter());
106        assert_eq!(stats.len, 1);
107        assert_eq!(stats.mean, 1.0);
108        assert_eq!(stats.stddev, 0.0);
109    }
110
111    #[test]
112    fn no_measurement() {
113        let measurements = vec![];
114        let stats = aggregate_measurements(measurements.into_iter());
115        assert_eq!(stats.len, 0);
116        assert_eq!(stats.mean, 0.0);
117        assert_eq!(stats.stddev, 0.0);
118    }
119
120    #[test]
121    fn z_score_with_zero_stddev() {
122        let stddev = 0.0;
123        let mean = 30.0;
124        let higher_val = 50.0;
125        let lower_val = 10.0;
126        let z_high = ((higher_val - mean) / stddev as f64).abs();
127        let z_low = ((lower_val - mean) / stddev as f64).abs();
128        assert_eq!(z_high, f64::INFINITY);
129        assert_eq!(z_low, f64::INFINITY);
130    }
131
132    #[test]
133    fn verify_stats() {
134        let empty_vec = [];
135        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
136        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
137        assert_eq!(
138            None,
139            empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
140        );
141        assert_eq!(
142            None,
143            empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
144        );
145
146        let single_el_vec = [3.0];
147        assert_eq!(
148            Some(3.0),
149            single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
150        );
151        assert_eq!(
152            Some(3.0),
153            single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
154        );
155        assert_eq!(
156            Some(3.0),
157            single_el_vec
158                .into_iter()
159                .aggregate_by(ReductionFunc::Median)
160        );
161        assert_eq!(
162            Some(3.0),
163            single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
164        );
165
166        let two_el_vec = [3.0, 1.0];
167        assert_eq!(
168            Some(1.0),
169            two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
170        );
171        assert_eq!(
172            Some(3.0),
173            two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
174        );
175        assert_eq!(
176            Some(2.0),
177            two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
178        );
179        assert_eq!(
180            Some(2.0),
181            two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
182        );
183
184        let three_el_vec = [2.0, 6.0, 1.0];
185        assert_eq!(
186            Some(1.0),
187            three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
188        );
189        assert_eq!(
190            Some(6.0),
191            three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
192        );
193        assert_eq!(
194            Some(2.0),
195            three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
196        );
197        assert_eq!(
198            Some(3.0),
199            three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
200        );
201    }
202}