git_perf/
stats.rs

1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10    Min,
11    Max,
12    Median,
13    Mean,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DispersionMethod {
18    StandardDeviation,
19    MedianAbsoluteDeviation,
20}
21
22// Conversion from CLI types to stats types
23impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
24    fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
25        match func {
26            git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
27            git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
28            git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
29            git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
30        }
31    }
32}
33
34impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
35    fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
36        match method {
37            git_perf_cli_types::DispersionMethod::StandardDeviation => {
38                DispersionMethod::StandardDeviation
39            }
40            git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
41                DispersionMethod::MedianAbsoluteDeviation
42            }
43        }
44    }
45}
46
47pub trait VecAggregation {
48    fn median(&mut self) -> Option<f64>;
49}
50
51concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
52
53pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
54    let measurements_vec: Vec<f64> = measurements.cloned().collect();
55    let s: AggStats = measurements_vec.iter().collect();
56    Stats {
57        mean: s.mean(),
58        stddev: s.sample_variance().sqrt(),
59        mad: calculate_mad(&measurements_vec),
60        len: s.mean.len() as usize,
61    }
62}
63
64pub fn calculate_mad(measurements: &[f64]) -> f64 {
65    if measurements.is_empty() {
66        return 0.0;
67    }
68
69    // Calculate median without modifying original data
70    let mut measurements_copy = measurements.to_vec();
71    let median = measurements_copy.median().unwrap();
72
73    // Calculate absolute deviations
74    let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
75
76    // Calculate median of absolute deviations
77    abs_deviations.median().unwrap()
78}
79
80#[derive(Debug)]
81pub struct Stats {
82    pub mean: f64,
83    pub stddev: f64,
84    pub mad: f64,
85    pub len: usize,
86}
87
88impl Display for Stats {
89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90        write!(
91            f,
92            "μ: {} σ: {} MAD: {} n: {}",
93            Float::from(self.mean),
94            Float::from(self.stddev),
95            Float::from(self.mad),
96            Unsigned::from(self.len),
97        )
98    }
99}
100
101impl Stats {
102    pub fn z_score(&self, other: &Stats) -> f64 {
103        self.z_score_with_method(other, DispersionMethod::StandardDeviation)
104    }
105
106    pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
107        assert!(self.len == 1);
108        assert!(other.len >= 1);
109
110        let dispersion = match method {
111            DispersionMethod::StandardDeviation => other.stddev,
112            DispersionMethod::MedianAbsoluteDeviation => other.mad,
113        };
114
115        // Division by zero is an expected case here: For measurements with no variance
116        (self.mean - other.mean).abs() / dispersion
117    }
118
119    pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
120        let z_score = self.z_score_with_method(other, method);
121        z_score > sigma
122    }
123}
124
125impl VecAggregation for Vec<f64> {
126    fn median(&mut self) -> Option<f64> {
127        self.sort_by(f64::total_cmp);
128        match self.len() {
129            0 => None,
130            even if even % 2 == 0 => {
131                let left = self[even / 2 - 1];
132                let right = self[even / 2];
133                Some((left + right) / 2.0)
134            }
135            odd => Some(self[odd / 2]),
136        }
137    }
138}
139
140pub trait NumericReductionFunc: Iterator<Item = f64> {
141    fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
142        match fun {
143            ReductionFunc::Min => self.reduce(f64::min),
144            ReductionFunc::Max => self.reduce(f64::max),
145            ReductionFunc::Median => self.collect_vec().median(),
146            ReductionFunc::Mean => {
147                let stats: AggStats = self.collect();
148                if stats.mean.is_empty() {
149                    None
150                } else {
151                    Some(stats.mean())
152                }
153            }
154        }
155    }
156}
157
158impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
159
160#[cfg(test)]
161mod test {
162    use average::assert_almost_eq;
163
164    use super::*;
165
166    #[test]
167    fn no_floating_error() {
168        let measurements = (0..100).map(|_| 0.1).collect_vec();
169        let stats = aggregate_measurements(measurements.iter());
170        assert_eq!(stats.mean, 0.1);
171        assert_eq!(stats.len, 100);
172        let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
173        assert_ne!(naive_mean, 0.1);
174    }
175
176    #[test]
177    fn single_measurement() {
178        let measurements = [1.0];
179        let stats = aggregate_measurements(measurements.iter());
180        assert_eq!(stats.len, 1);
181        assert_eq!(stats.mean, 1.0);
182        assert_eq!(stats.stddev, 0.0);
183    }
184
185    #[test]
186    fn no_measurement() {
187        let measurements = [];
188        let stats = aggregate_measurements(measurements.iter());
189        assert_eq!(stats.len, 0);
190        assert_eq!(stats.mean, 0.0);
191        assert_eq!(stats.stddev, 0.0);
192    }
193
194    #[test]
195    fn z_score_with_zero_stddev() {
196        let tail = Stats {
197            mean: 30.0,
198            stddev: 0.0,
199            mad: 0.0,
200            len: 40,
201        };
202
203        let head_normal = Stats {
204            mean: 30.0,
205            stddev: 0.0,
206            mad: 0.0,
207            len: 1,
208        };
209
210        let head_low = Stats {
211            mean: 20.0,
212            stddev: 0.0,
213            mad: 0.0,
214            len: 1,
215        };
216
217        let z_normal = head_normal.z_score(&tail);
218        assert!(z_normal.is_nan());
219
220        let z_low = head_low.z_score(&tail);
221        assert!(z_low.is_infinite());
222    }
223
224    #[test]
225    fn verify_stats() {
226        let empty_vec = [];
227        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
228        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
229        assert_eq!(
230            None,
231            empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
232        );
233        assert_eq!(
234            None,
235            empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
236        );
237
238        let single_el_vec = [3.0];
239        assert_eq!(
240            Some(3.0),
241            single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
242        );
243        assert_eq!(
244            Some(3.0),
245            single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
246        );
247        assert_eq!(
248            Some(3.0),
249            single_el_vec
250                .into_iter()
251                .aggregate_by(ReductionFunc::Median)
252        );
253        assert_eq!(
254            Some(3.0),
255            single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
256        );
257
258        let two_el_vec = [3.0, 1.0];
259        assert_eq!(
260            Some(1.0),
261            two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
262        );
263        assert_eq!(
264            Some(3.0),
265            two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
266        );
267        assert_eq!(
268            Some(2.0),
269            two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
270        );
271        assert_eq!(
272            Some(2.0),
273            two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
274        );
275
276        let three_el_vec = [2.0, 6.0, 1.0];
277        assert_eq!(
278            Some(1.0),
279            three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
280        );
281        assert_eq!(
282            Some(6.0),
283            three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
284        );
285        assert_eq!(
286            Some(2.0),
287            three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
288        );
289        assert_eq!(
290            Some(3.0),
291            three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
292        );
293    }
294
295    #[test]
296    fn test_calculate_mad() {
297        // Test empty array
298        assert_eq!(calculate_mad(&[]), 0.0);
299
300        // Test single value
301        assert_eq!(calculate_mad(&[5.0]), 0.0);
302
303        // Test two values
304        assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
305
306        // Test three values
307        assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
308
309        // Test with outliers
310        let data = [1.0, 2.0, 3.0, 100.0];
311        let mad = calculate_mad(&data);
312        assert_almost_eq!(mad, 1.0, 0.001);
313        // assert!(mad > 0.0);
314        // assert!(mad < 50.0); // Should be robust to outliers
315
316        // Test with known MAD value
317        let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
318        let mad = calculate_mad(&data);
319        assert_almost_eq!(mad, 1.0, 0.001);
320    }
321
322    #[test]
323    fn test_mad_in_aggregate_measurements() {
324        let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
325        let stats = aggregate_measurements(measurements.iter());
326
327        assert_eq!(stats.len, 5);
328        assert_eq!(stats.mean, 3.0);
329        assert!(stats.mad > 0.0);
330        assert!(stats.stddev > 0.0);
331
332        // MAD should be less than stddev for normal distributions
333        assert!(stats.mad < stats.stddev);
334    }
335
336    #[test]
337    fn test_z_score_with_mad() {
338        let tail = Stats {
339            mean: 30.0,
340            stddev: 5.0,
341            mad: 3.0,
342            len: 40,
343        };
344
345        let head = Stats {
346            mean: 35.0,
347            stddev: 0.0,
348            mad: 0.0,
349            len: 1,
350        };
351
352        let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
353        let z_score_mad =
354            head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
355
356        assert_eq!(z_score_stddev, 1.0); // (35-30)/5 = 1.0
357        assert_eq!(z_score_mad, 5.0 / 3.0); // (35-30)/3 ≈ 1.67
358
359        // MAD z-score should be different from stddev z-score
360        assert_ne!(z_score_stddev, z_score_mad);
361    }
362
363    #[test]
364    fn test_backward_compatibility() {
365        // Test that existing z_score method still works
366        let tail = Stats {
367            mean: 30.0,
368            stddev: 5.0,
369            mad: 3.0,
370            len: 40,
371        };
372
373        let head = Stats {
374            mean: 35.0,
375            stddev: 0.0,
376            mad: 0.0,
377            len: 1,
378        };
379
380        let z_score_old = head.z_score(&tail);
381        let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
382
383        assert_eq!(z_score_old, z_score_new);
384    }
385
386    #[test]
387    fn test_display_with_mad() {
388        let stats = Stats {
389            mean: 10.0,
390            stddev: 2.0,
391            mad: 1.5,
392            len: 5,
393        };
394
395        let display = format!("{}", stats);
396        assert!(display.contains("μ: 10"));
397        assert!(display.contains("σ: 2"));
398        assert!(display.contains("MAD: 1.5"));
399        assert!(display.contains("n: 5"));
400    }
401
402    #[test]
403    fn test_is_significant_boundary() {
404        // COVERS MUTATION: z_score > sigma vs >=
405        let tail = Stats {
406            mean: 10.0,
407            stddev: 2.0,
408            mad: 1.5,
409            len: 5,
410        };
411
412        let head = Stats {
413            mean: 12.0, // z_score = (12-10)/2 = 1.0
414            stddev: 0.0,
415            mad: 0.0,
416            len: 1,
417        };
418
419        // Test boundary: z_score = 1.0, sigma = 1.0
420        // Should NOT be significant (z_score is not > sigma)
421        assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
422
423        // Test just above boundary: z_score = 1.0, sigma = 0.9
424        // Should be significant (z_score > sigma)
425        assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
426
427        // Test just below boundary: z_score = 1.0, sigma = 1.1
428        // Should NOT be significant (z_score is not > sigma)
429        assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
430
431        // Test with MAD
432        let head_mad = Stats {
433            mean: 11.5, // z_score = (11.5-10)/1.5 = 1.0
434            stddev: 0.0,
435            mad: 0.0,
436            len: 1,
437        };
438
439        // Test boundary with MAD: z_score = 1.0, sigma = 1.0
440        assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
441        assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
442        assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
443    }
444}