git_perf/
stats.rs

1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10    Min,
11    Max,
12    Median,
13    Mean,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DispersionMethod {
18    StandardDeviation,
19    MedianAbsoluteDeviation,
20}
21
22// Conversion from CLI types to stats types
23impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
24    fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
25        match func {
26            git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
27            git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
28            git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
29            git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
30        }
31    }
32}
33
34impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
35    fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
36        match method {
37            git_perf_cli_types::DispersionMethod::StandardDeviation => {
38                DispersionMethod::StandardDeviation
39            }
40            git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
41                DispersionMethod::MedianAbsoluteDeviation
42            }
43        }
44    }
45}
46
47pub trait VecAggregation {
48    fn median(&mut self) -> Option<f64>;
49}
50
51concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
52
53pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
54    let measurements_vec: Vec<f64> = measurements.cloned().collect();
55    let s: AggStats = measurements_vec.iter().collect();
56    Stats {
57        mean: s.mean(),
58        stddev: s.sample_variance().sqrt(),
59        mad: calculate_mad(&measurements_vec),
60        len: s.mean.len() as usize,
61    }
62}
63
64pub fn calculate_mad(measurements: &[f64]) -> f64 {
65    if measurements.is_empty() {
66        return 0.0;
67    }
68
69    // Calculate median without modifying original data
70    let mut measurements_copy = measurements.to_vec();
71    let median = measurements_copy.median().unwrap();
72
73    // Calculate absolute deviations
74    let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
75
76    // Calculate median of absolute deviations
77    abs_deviations.median().unwrap()
78}
79
80#[derive(Debug)]
81pub struct Stats {
82    pub mean: f64,
83    pub stddev: f64,
84    pub mad: f64,
85    pub len: usize,
86}
87
88impl Display for Stats {
89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90        write!(
91            f,
92            "μ: {} σ: {} MAD: {} n: {}",
93            Float::from(self.mean),
94            Float::from(self.stddev),
95            Float::from(self.mad),
96            Unsigned::from(self.len),
97        )
98    }
99}
100
101impl Stats {
102    pub fn z_score(&self, other: &Stats) -> f64 {
103        self.z_score_with_method(other, DispersionMethod::StandardDeviation)
104    }
105
106    pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
107        assert!(self.len == 1);
108        assert!(other.len >= 1);
109
110        let dispersion = match method {
111            DispersionMethod::StandardDeviation => other.stddev,
112            DispersionMethod::MedianAbsoluteDeviation => other.mad,
113        };
114
115        // Division by zero is an expected case here: For measurements with no variance
116        (self.mean - other.mean).abs() / dispersion
117    }
118
119    pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
120        let z_score = self.z_score_with_method(other, method);
121        z_score > sigma
122    }
123}
124
125/// A wrapper around Stats that includes an optional unit for the mean value.
126/// When displayed, only the mean (μ) will have the unit suffix.
127/// Sigma (σ) and MAD remain unitless as they are dispersion measures.
128pub struct StatsWithUnit<'a> {
129    pub stats: &'a Stats,
130    pub unit: Option<&'a str>,
131}
132
133impl<'a> Display for StatsWithUnit<'a> {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        use crate::units::{format_measurement, parse_value_with_unit, Measurement};
136
137        match self.unit {
138            Some(u) => {
139                // Try to parse and format the mean value with auto-scaling
140                let mean_measurement = parse_value_with_unit(self.stats.mean, u);
141                let mean_display = match &mean_measurement {
142                    Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
143                        format_measurement(measurement.clone())
144                    }
145                    _ => format!("{} {}", Float::from(self.stats.mean), u),
146                };
147
148                // Try to parse and format stddev with auto-scaling
149                let stddev_measurement = parse_value_with_unit(self.stats.stddev, u);
150                let stddev_display = match &stddev_measurement {
151                    Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
152                        format_measurement(measurement.clone())
153                    }
154                    _ => format!("{}", Float::from(self.stats.stddev)),
155                };
156
157                // Try to parse and format MAD with auto-scaling
158                let mad_measurement = parse_value_with_unit(self.stats.mad, u);
159                let mad_display = match &mad_measurement {
160                    Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
161                        format_measurement(measurement.clone())
162                    }
163                    _ => format!("{}", Float::from(self.stats.mad)),
164                };
165
166                write!(
167                    f,
168                    "μ: {} σ: {} MAD: {} n: {}",
169                    mean_display,
170                    stddev_display,
171                    mad_display,
172                    Unsigned::from(self.stats.len)
173                )
174            }
175            None => write!(f, "{}", self.stats),
176        }
177    }
178}
179
180impl VecAggregation for Vec<f64> {
181    fn median(&mut self) -> Option<f64> {
182        self.sort_by(f64::total_cmp);
183        match self.len() {
184            0 => None,
185            even if even % 2 == 0 => {
186                let left = self[even / 2 - 1];
187                let right = self[even / 2];
188                Some((left + right) / 2.0)
189            }
190            odd => Some(self[odd / 2]),
191        }
192    }
193}
194
195pub trait NumericReductionFunc: Iterator<Item = f64> {
196    fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
197        match fun {
198            ReductionFunc::Min => self.reduce(f64::min),
199            ReductionFunc::Max => self.reduce(f64::max),
200            ReductionFunc::Median => self.collect_vec().median(),
201            ReductionFunc::Mean => {
202                let stats: AggStats = self.collect();
203                if stats.mean.is_empty() {
204                    None
205                } else {
206                    Some(stats.mean())
207                }
208            }
209        }
210    }
211}
212
213impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
214
215#[cfg(test)]
216mod test {
217    use average::assert_almost_eq;
218
219    use super::*;
220
221    #[test]
222    fn no_floating_error() {
223        let measurements = (0..100).map(|_| 0.1).collect_vec();
224        let stats = aggregate_measurements(measurements.iter());
225        assert_eq!(stats.mean, 0.1);
226        assert_eq!(stats.len, 100);
227        let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
228        assert_ne!(naive_mean, 0.1);
229    }
230
231    #[test]
232    fn single_measurement() {
233        let measurements = [1.0];
234        let stats = aggregate_measurements(measurements.iter());
235        assert_eq!(stats.len, 1);
236        assert_eq!(stats.mean, 1.0);
237        assert_eq!(stats.stddev, 0.0);
238    }
239
240    #[test]
241    fn no_measurement() {
242        let measurements = [];
243        let stats = aggregate_measurements(measurements.iter());
244        assert_eq!(stats.len, 0);
245        assert_eq!(stats.mean, 0.0);
246        assert_eq!(stats.stddev, 0.0);
247    }
248
249    #[test]
250    fn z_score_with_zero_stddev() {
251        let tail = Stats {
252            mean: 30.0,
253            stddev: 0.0,
254            mad: 0.0,
255            len: 40,
256        };
257
258        let head_normal = Stats {
259            mean: 30.0,
260            stddev: 0.0,
261            mad: 0.0,
262            len: 1,
263        };
264
265        let head_low = Stats {
266            mean: 20.0,
267            stddev: 0.0,
268            mad: 0.0,
269            len: 1,
270        };
271
272        let z_normal = head_normal.z_score(&tail);
273        assert!(z_normal.is_nan());
274
275        let z_low = head_low.z_score(&tail);
276        assert!(z_low.is_infinite());
277    }
278
279    #[test]
280    fn verify_stats() {
281        let empty_vec = [];
282        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
283        assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
284        assert_eq!(
285            None,
286            empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
287        );
288        assert_eq!(
289            None,
290            empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
291        );
292
293        let single_el_vec = [3.0];
294        assert_eq!(
295            Some(3.0),
296            single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
297        );
298        assert_eq!(
299            Some(3.0),
300            single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
301        );
302        assert_eq!(
303            Some(3.0),
304            single_el_vec
305                .into_iter()
306                .aggregate_by(ReductionFunc::Median)
307        );
308        assert_eq!(
309            Some(3.0),
310            single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
311        );
312
313        let two_el_vec = [3.0, 1.0];
314        assert_eq!(
315            Some(1.0),
316            two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
317        );
318        assert_eq!(
319            Some(3.0),
320            two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
321        );
322        assert_eq!(
323            Some(2.0),
324            two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
325        );
326        assert_eq!(
327            Some(2.0),
328            two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
329        );
330
331        let three_el_vec = [2.0, 6.0, 1.0];
332        assert_eq!(
333            Some(1.0),
334            three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
335        );
336        assert_eq!(
337            Some(6.0),
338            three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
339        );
340        assert_eq!(
341            Some(2.0),
342            three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
343        );
344        assert_eq!(
345            Some(3.0),
346            three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
347        );
348    }
349
350    #[test]
351    fn test_calculate_mad() {
352        // Test empty array
353        assert_eq!(calculate_mad(&[]), 0.0);
354
355        // Test single value
356        assert_eq!(calculate_mad(&[5.0]), 0.0);
357
358        // Test two values
359        assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
360
361        // Test three values
362        assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
363
364        // Test with outliers
365        let data = [1.0, 2.0, 3.0, 100.0];
366        let mad = calculate_mad(&data);
367        assert_almost_eq!(mad, 1.0, 0.001);
368        // assert!(mad > 0.0);
369        // assert!(mad < 50.0); // Should be robust to outliers
370
371        // Test with known MAD value
372        let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
373        let mad = calculate_mad(&data);
374        assert_almost_eq!(mad, 1.0, 0.001);
375    }
376
377    #[test]
378    fn test_mad_in_aggregate_measurements() {
379        let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
380        let stats = aggregate_measurements(measurements.iter());
381
382        assert_eq!(stats.len, 5);
383        assert_eq!(stats.mean, 3.0);
384        assert!(stats.mad > 0.0);
385        assert!(stats.stddev > 0.0);
386
387        // MAD should be less than stddev for normal distributions
388        assert!(stats.mad < stats.stddev);
389    }
390
391    #[test]
392    fn test_z_score_with_mad() {
393        let tail = Stats {
394            mean: 30.0,
395            stddev: 5.0,
396            mad: 3.0,
397            len: 40,
398        };
399
400        let head = Stats {
401            mean: 35.0,
402            stddev: 0.0,
403            mad: 0.0,
404            len: 1,
405        };
406
407        let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
408        let z_score_mad =
409            head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
410
411        assert_eq!(z_score_stddev, 1.0); // (35-30)/5 = 1.0
412        assert_eq!(z_score_mad, 5.0 / 3.0); // (35-30)/3 ≈ 1.67
413
414        // MAD z-score should be different from stddev z-score
415        assert_ne!(z_score_stddev, z_score_mad);
416    }
417
418    #[test]
419    fn test_backward_compatibility() {
420        // Test that existing z_score method still works
421        let tail = Stats {
422            mean: 30.0,
423            stddev: 5.0,
424            mad: 3.0,
425            len: 40,
426        };
427
428        let head = Stats {
429            mean: 35.0,
430            stddev: 0.0,
431            mad: 0.0,
432            len: 1,
433        };
434
435        let z_score_old = head.z_score(&tail);
436        let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
437
438        assert_eq!(z_score_old, z_score_new);
439    }
440
441    #[test]
442    fn test_display_with_mad() {
443        let stats = Stats {
444            mean: 10.0,
445            stddev: 2.0,
446            mad: 1.5,
447            len: 5,
448        };
449
450        let display = format!("{}", stats);
451        assert!(display.contains("μ: 10"));
452        assert!(display.contains("σ: 2"));
453        assert!(display.contains("MAD: 1.5"));
454        assert!(display.contains("n: 5"));
455    }
456
457    #[test]
458    fn test_stats_with_unit() {
459        let stats = Stats {
460            mean: 1_234.5,
461            stddev: 123.4,
462            mad: 98.7,
463            len: 10,
464        };
465
466        // Test with unit - values should be auto-scaled (1234.5ms → 1.23s)
467        let with_unit = StatsWithUnit {
468            stats: &stats,
469            unit: Some("ms"),
470        };
471        let formatted = format!("{}", with_unit);
472
473        // Mean should be auto-scaled from 1234.5ms to ~1.23s
474        assert!(
475            formatted.contains("μ: 1.23s") || formatted.contains("μ: 1.2s"),
476            "Mean should be auto-scaled to seconds: {}",
477            formatted
478        );
479        // Stddev should be auto-scaled from 123.4ms to ~123ms or 123.4ms
480        assert!(
481            formatted.contains("σ: 123") && formatted.contains("ms"),
482            "Stddev should be auto-scaled: {}",
483            formatted
484        );
485        // MAD should be auto-scaled from 98.7ms to ~98ms or 98.7ms
486        assert!(
487            formatted.contains("MAD: 98") && formatted.contains("ms"),
488            "MAD should be auto-scaled: {}",
489            formatted
490        );
491        assert!(
492            formatted.contains("n: 10"),
493            "Count should be present: {}",
494            formatted
495        );
496
497        // Test without unit (should match Display trait)
498        let without_unit = StatsWithUnit {
499            stats: &stats,
500            unit: None,
501        };
502        let formatted_without = format!("{}", without_unit);
503        let display_format = format!("{}", stats);
504        assert_eq!(
505            formatted_without, display_format,
506            "StatsWithUnit with None should match Stats Display"
507        );
508
509        // Test with large values - should be auto-scaled (1234567.89ns → 1.23ms)
510        let large_stats = Stats {
511            mean: 1_234_567.89, // nanoseconds
512            stddev: 123_456.78,
513            mad: 12_345.67,
514            len: 1000,
515        };
516
517        let large_with_unit = StatsWithUnit {
518            stats: &large_stats,
519            unit: Some("ns"),
520        };
521        let large_formatted = format!("{}", large_with_unit);
522
523        // Mean should be auto-scaled from nanoseconds to milliseconds
524        assert!(
525            large_formatted.contains("μ: 1.23ms") || large_formatted.contains("μ: 1.2ms"),
526            "Large mean should be auto-scaled to ms: {}",
527            large_formatted
528        );
529        // Stddev should be auto-scaled appropriately
530        assert!(
531            large_formatted.contains("σ:")
532                && (large_formatted.contains("ms") || large_formatted.contains("μs")),
533            "Large stddev should be auto-scaled: {}",
534            large_formatted
535        );
536        // MAD should be auto-scaled appropriately
537        assert!(
538            large_formatted.contains("MAD:")
539                && (large_formatted.contains("ms") || large_formatted.contains("μs")),
540            "Large MAD should be auto-scaled: {}",
541            large_formatted
542        );
543        assert!(
544            large_formatted.contains("n: 1,000") || large_formatted.contains("n: 1000"),
545            "Large count should be present: {}",
546            large_formatted
547        );
548    }
549
550    #[test]
551    fn test_stats_with_unit_various_values() {
552        // Test various edge cases and value types
553
554        // Small decimal values - should remain in ms (no auto-scaling needed)
555        let small_stats = Stats {
556            mean: 42.5,
557            stddev: 2.0,
558            mad: 1.5,
559            len: 5,
560        };
561        let formatted = format!(
562            "{}",
563            StatsWithUnit {
564                stats: &small_stats,
565                unit: Some("ms")
566            }
567        );
568        assert!(
569            formatted.contains("42.5ms") || formatted.contains("42ms"),
570            "Small decimal with unit: {}",
571            formatted
572        );
573
574        // Zero value - should be formatted as 0ns or 0ms
575        let zero_stats = Stats {
576            mean: 0.0,
577            stddev: 0.0,
578            mad: 0.0,
579            len: 1,
580        };
581        let formatted = format!(
582            "{}",
583            StatsWithUnit {
584                stats: &zero_stats,
585                unit: Some("ms")
586            }
587        );
588        assert!(
589            formatted.contains("0") && formatted.contains("ns"),
590            "Zero value with unit: {}",
591            formatted
592        );
593
594        // Value with more precision - "seconds" is unknown unit, falls back to Count
595        let precise_stats = Stats {
596            mean: 3.21, // Arbitrary value to avoid clippy::approx_constant warning
597            stddev: 0.5,
598            mad: 0.3,
599            len: 10,
600        };
601        let formatted = format!(
602            "{}",
603            StatsWithUnit {
604                stats: &precise_stats,
605                unit: Some("seconds")
606            }
607        );
608        assert!(
609            formatted.contains("3.21") && formatted.contains("seconds"),
610            "Precise value with unknown unit (fallback): {}",
611            formatted
612        );
613
614        // Large round number - should be auto-scaled if using "B" unit
615        let million_stats = Stats {
616            mean: 1_000_000.0,
617            stddev: 50_000.0,
618            mad: 30_000.0,
619            len: 100,
620        };
621        let formatted = format!(
622            "{}",
623            StatsWithUnit {
624                stats: &million_stats,
625                unit: Some("B")
626            }
627        );
628        // 1,000,000 B = 1 MB
629        assert!(
630            formatted.contains("1MB") || formatted.contains("1.0MB"),
631            "Million bytes should be auto-scaled to MB: {}",
632            formatted
633        );
634
635        // Different unit types - unknown unit falls back to Count format
636        let temp_stats = Stats {
637            mean: 98.6,
638            stddev: 1.2,
639            mad: 0.8,
640            len: 20,
641        };
642        let formatted = format!(
643            "{}",
644            StatsWithUnit {
645                stats: &temp_stats,
646                unit: Some("°F")
647            }
648        );
649        assert!(
650            formatted.contains("98.6") && formatted.contains("°F"),
651            "Temperature unit (unknown, fallback): {}",
652            formatted
653        );
654
655        // Without unit - no unit should appear anywhere
656        let no_unit = format!(
657            "{}",
658            StatsWithUnit {
659                stats: &small_stats,
660                unit: None
661            }
662        );
663        assert!(
664            !no_unit.contains(" ms"),
665            "Should have no units: {}",
666            no_unit
667        );
668        assert!(
669            !no_unit.contains(" bytes"),
670            "Should have no units: {}",
671            no_unit
672        );
673    }
674
675    #[test]
676    fn test_thousands_separator_with_unknown_unit() {
677        // Test that thousands separators are maintained for unknown units
678        // This uses the readable crate's Float formatter which adds separators
679        let large_stats = Stats {
680            mean: 12_345.67,
681            stddev: 1_234.56,
682            mad: 567.89,
683            len: 100,
684        };
685
686        let formatted = format!(
687            "{}",
688            StatsWithUnit {
689                stats: &large_stats,
690                unit: Some("widgets") // Unknown unit
691            }
692        );
693
694        // The Float formatter from readable crate should add thousands separators
695        assert!(
696            formatted.contains("12,345") || formatted.contains("12_345"),
697            "Mean should have thousands separators for unknown unit, got: {}",
698            formatted
699        );
700
701        assert!(
702            formatted.contains("widgets"),
703            "Unknown unit should be preserved, got: {}",
704            formatted
705        );
706
707        // Verify stddev also has separators
708        assert!(
709            formatted.contains("1,234") || formatted.contains("1_234"),
710            "Stddev should have thousands separators, got: {}",
711            formatted
712        );
713    }
714
715    #[test]
716    fn test_is_significant_boundary() {
717        // COVERS MUTATION: z_score > sigma vs >=
718        let tail = Stats {
719            mean: 10.0,
720            stddev: 2.0,
721            mad: 1.5,
722            len: 5,
723        };
724
725        let head = Stats {
726            mean: 12.0, // z_score = (12-10)/2 = 1.0
727            stddev: 0.0,
728            mad: 0.0,
729            len: 1,
730        };
731
732        // Test boundary: z_score = 1.0, sigma = 1.0
733        // Should NOT be significant (z_score is not > sigma)
734        assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
735
736        // Test just above boundary: z_score = 1.0, sigma = 0.9
737        // Should be significant (z_score > sigma)
738        assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
739
740        // Test just below boundary: z_score = 1.0, sigma = 1.1
741        // Should NOT be significant (z_score is not > sigma)
742        assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
743
744        // Test with MAD
745        let head_mad = Stats {
746            mean: 11.5, // z_score = (11.5-10)/1.5 = 1.0
747            stddev: 0.0,
748            mad: 0.0,
749            len: 1,
750        };
751
752        // Test boundary with MAD: z_score = 1.0, sigma = 1.0
753        assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
754        assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
755        assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
756    }
757}