1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10 Min,
11 Max,
12 Median,
13 Mean,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DispersionMethod {
18 StandardDeviation,
19 MedianAbsoluteDeviation,
20}
21
22impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
24 fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
25 match func {
26 git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
27 git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
28 git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
29 git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
30 }
31 }
32}
33
34impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
35 fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
36 match method {
37 git_perf_cli_types::DispersionMethod::StandardDeviation => {
38 DispersionMethod::StandardDeviation
39 }
40 git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
41 DispersionMethod::MedianAbsoluteDeviation
42 }
43 }
44 }
45}
46
47pub trait VecAggregation {
48 fn median(&mut self) -> Option<f64>;
49}
50
51concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
52
53pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
54 let measurements_vec: Vec<f64> = measurements.cloned().collect();
55 let s: AggStats = measurements_vec.iter().collect();
56 Stats {
57 mean: s.mean(),
58 stddev: s.sample_variance().sqrt(),
59 mad: calculate_mad(&measurements_vec),
60 len: s.mean.len() as usize,
61 }
62}
63
64#[must_use]
65pub fn calculate_mad(measurements: &[f64]) -> f64 {
66 if measurements.is_empty() {
67 return 0.0;
68 }
69
70 let mut measurements_copy = measurements.to_vec();
72 let median = measurements_copy.median().unwrap();
73
74 let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
76
77 abs_deviations.median().unwrap()
79}
80
81#[derive(Debug)]
82pub struct Stats {
83 pub mean: f64,
84 pub stddev: f64,
85 pub mad: f64,
86 pub len: usize,
87}
88
89impl Display for Stats {
90 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91 let stddev_str = if self.stddev.is_nan() {
92 "N/A".to_string()
93 } else {
94 format!("{}", Float::from(self.stddev))
95 };
96 let mad_str = if self.mad.is_nan() {
97 "N/A".to_string()
98 } else {
99 format!("{}", Float::from(self.mad))
100 };
101 write!(
102 f,
103 "μ: {} σ: {} MAD: {} n: {}",
104 Float::from(self.mean),
105 stddev_str,
106 mad_str,
107 Unsigned::from(self.len),
108 )
109 }
110}
111
112impl Stats {
113 #[must_use]
114 pub fn z_score(&self, other: &Stats) -> f64 {
115 self.z_score_with_method(other, DispersionMethod::StandardDeviation)
116 }
117
118 #[must_use]
119 pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
120 assert!(self.len == 1);
121 assert!(other.len >= 1);
122
123 let dispersion = match method {
124 DispersionMethod::StandardDeviation => other.stddev,
125 DispersionMethod::MedianAbsoluteDeviation => other.mad,
126 };
127
128 (self.mean - other.mean).abs() / dispersion
130 }
131
132 #[must_use]
133 pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
134 let z_score = self.z_score_with_method(other, method);
135 z_score > sigma
136 }
137}
138
139pub struct StatsWithUnit<'a> {
143 pub stats: &'a Stats,
144 pub unit: Option<&'a str>,
145}
146
147impl<'a> Display for StatsWithUnit<'a> {
148 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149 use crate::units::{format_measurement, parse_value_with_unit, Measurement};
150
151 match self.unit {
152 Some(u) => {
153 let mean_measurement = parse_value_with_unit(self.stats.mean, u);
155 let mean_display = match &mean_measurement {
156 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
157 format_measurement(measurement.clone())
158 }
159 _ => format!("{} {}", Float::from(self.stats.mean), u),
160 };
161
162 let stddev_measurement = parse_value_with_unit(self.stats.stddev, u);
164 let stddev_display = match &stddev_measurement {
165 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
166 format_measurement(measurement.clone())
167 }
168 _ if self.stats.stddev.is_nan() => "N/A".to_string(),
169 _ => format!("{}", Float::from(self.stats.stddev)),
170 };
171
172 let mad_measurement = parse_value_with_unit(self.stats.mad, u);
174 let mad_display = match &mad_measurement {
175 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
176 format_measurement(measurement.clone())
177 }
178 _ if self.stats.mad.is_nan() => "N/A".to_string(),
179 _ => format!("{}", Float::from(self.stats.mad)),
180 };
181
182 write!(
183 f,
184 "μ: {} σ: {} MAD: {} n: {}",
185 mean_display,
186 stddev_display,
187 mad_display,
188 Unsigned::from(self.stats.len)
189 )
190 }
191 None => write!(f, "{}", self.stats),
192 }
193 }
194}
195
196impl VecAggregation for Vec<f64> {
197 fn median(&mut self) -> Option<f64> {
198 self.sort_by(f64::total_cmp);
199 match self.len() {
200 0 => None,
201 even if even % 2 == 0 => {
202 let left = self[even / 2 - 1];
203 let right = self[even / 2];
204 Some((left + right) / 2.0)
205 }
206 odd => Some(self[odd / 2]),
207 }
208 }
209}
210
211pub trait NumericReductionFunc: Iterator<Item = f64> {
212 fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
213 match fun {
214 ReductionFunc::Min => self.reduce(f64::min),
215 ReductionFunc::Max => self.reduce(f64::max),
216 ReductionFunc::Median => self.collect_vec().median(),
217 ReductionFunc::Mean => {
218 let stats: AggStats = self.collect();
219 if stats.mean.is_empty() {
220 None
221 } else {
222 Some(stats.mean())
223 }
224 }
225 }
226 }
227}
228
229impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
230
231#[cfg(test)]
232mod test {
233 use average::assert_almost_eq;
234
235 use super::*;
236
237 #[test]
238 fn no_floating_error() {
239 let measurements = (0..100).map(|_| 0.1).collect_vec();
240 let stats = aggregate_measurements(measurements.iter());
241 assert_eq!(stats.mean, 0.1);
242 assert_eq!(stats.len, 100);
243 let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
244 assert_ne!(naive_mean, 0.1);
245 }
246
247 #[test]
248 fn single_measurement() {
249 let measurements = [1.0];
250 let stats = aggregate_measurements(measurements.iter());
251 assert_eq!(stats.len, 1);
252 assert_eq!(stats.mean, 1.0);
253 assert!(
256 stats.stddev.is_nan(),
257 "stddev should be NaN for single measurement"
258 );
259 }
260
261 #[test]
262 fn no_measurement() {
263 let measurements = [];
264 let stats = aggregate_measurements(measurements.iter());
265 assert_eq!(stats.len, 0);
266 assert!(
268 stats.mean.is_nan(),
269 "mean should be NaN for empty measurements"
270 );
271 assert!(
272 stats.stddev.is_nan(),
273 "stddev should be NaN for empty measurements"
274 );
275 }
276
277 #[test]
278 fn z_score_with_zero_stddev() {
279 let tail = Stats {
280 mean: 30.0,
281 stddev: 0.0,
282 mad: 0.0,
283 len: 40,
284 };
285
286 let head_normal = Stats {
287 mean: 30.0,
288 stddev: 0.0,
289 mad: 0.0,
290 len: 1,
291 };
292
293 let head_low = Stats {
294 mean: 20.0,
295 stddev: 0.0,
296 mad: 0.0,
297 len: 1,
298 };
299
300 let z_normal = head_normal.z_score(&tail);
301 assert!(z_normal.is_nan());
302
303 let z_low = head_low.z_score(&tail);
304 assert!(z_low.is_infinite());
305 }
306
307 #[test]
308 fn verify_stats() {
309 let empty_vec = [];
310 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
311 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
312 assert_eq!(
313 None,
314 empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
315 );
316 assert_eq!(
317 None,
318 empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
319 );
320
321 let single_el_vec = [3.0];
322 assert_eq!(
323 Some(3.0),
324 single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
325 );
326 assert_eq!(
327 Some(3.0),
328 single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
329 );
330 assert_eq!(
331 Some(3.0),
332 single_el_vec
333 .into_iter()
334 .aggregate_by(ReductionFunc::Median)
335 );
336 assert_eq!(
337 Some(3.0),
338 single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
339 );
340
341 let two_el_vec = [3.0, 1.0];
342 assert_eq!(
343 Some(1.0),
344 two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
345 );
346 assert_eq!(
347 Some(3.0),
348 two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
349 );
350 assert_eq!(
351 Some(2.0),
352 two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
353 );
354 assert_eq!(
355 Some(2.0),
356 two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
357 );
358
359 let three_el_vec = [2.0, 6.0, 1.0];
360 assert_eq!(
361 Some(1.0),
362 three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
363 );
364 assert_eq!(
365 Some(6.0),
366 three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
367 );
368 assert_eq!(
369 Some(2.0),
370 three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
371 );
372 assert_eq!(
373 Some(3.0),
374 three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
375 );
376 }
377
378 #[test]
379 fn test_calculate_mad() {
380 assert_eq!(calculate_mad(&[]), 0.0);
382
383 assert_eq!(calculate_mad(&[5.0]), 0.0);
385
386 assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
388
389 assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
391
392 let data = [1.0, 2.0, 3.0, 100.0];
394 let mad = calculate_mad(&data);
395 assert_almost_eq!(mad, 1.0, 0.001);
396 let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
401 let mad = calculate_mad(&data);
402 assert_almost_eq!(mad, 1.0, 0.001);
403 }
404
405 #[test]
406 fn test_mad_in_aggregate_measurements() {
407 let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
408 let stats = aggregate_measurements(measurements.iter());
409
410 assert_eq!(stats.len, 5);
411 assert_eq!(stats.mean, 3.0);
412 assert!(stats.mad > 0.0);
413 assert!(stats.stddev > 0.0);
414
415 assert!(stats.mad < stats.stddev);
417 }
418
419 #[test]
420 fn test_z_score_with_mad() {
421 let tail = Stats {
422 mean: 30.0,
423 stddev: 5.0,
424 mad: 3.0,
425 len: 40,
426 };
427
428 let head = Stats {
429 mean: 35.0,
430 stddev: 0.0,
431 mad: 0.0,
432 len: 1,
433 };
434
435 let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
436 let z_score_mad =
437 head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
438
439 assert_eq!(z_score_stddev, 1.0); assert_eq!(z_score_mad, 5.0 / 3.0); assert_ne!(z_score_stddev, z_score_mad);
444 }
445
446 #[test]
447 fn test_backward_compatibility() {
448 let tail = Stats {
450 mean: 30.0,
451 stddev: 5.0,
452 mad: 3.0,
453 len: 40,
454 };
455
456 let head = Stats {
457 mean: 35.0,
458 stddev: 0.0,
459 mad: 0.0,
460 len: 1,
461 };
462
463 let z_score_old = head.z_score(&tail);
464 let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
465
466 assert_eq!(z_score_old, z_score_new);
467 }
468
469 #[test]
470 fn test_display_with_mad() {
471 let stats = Stats {
472 mean: 10.0,
473 stddev: 2.0,
474 mad: 1.5,
475 len: 5,
476 };
477
478 let display = format!("{}", stats);
479 assert!(display.contains("μ: 10"));
480 assert!(display.contains("σ: 2"));
481 assert!(display.contains("MAD: 1.5"));
482 assert!(display.contains("n: 5"));
483 }
484
485 #[test]
486 fn test_stats_with_unit() {
487 let stats = Stats {
488 mean: 1_234.5,
489 stddev: 123.4,
490 mad: 98.7,
491 len: 10,
492 };
493
494 let with_unit = StatsWithUnit {
496 stats: &stats,
497 unit: Some("ms"),
498 };
499 let formatted = format!("{}", with_unit);
500
501 assert!(
503 formatted.contains("μ: 1.23s") || formatted.contains("μ: 1.2s"),
504 "Mean should be auto-scaled to seconds: {}",
505 formatted
506 );
507 assert!(
509 formatted.contains("σ: 123") && formatted.contains("ms"),
510 "Stddev should be auto-scaled: {}",
511 formatted
512 );
513 assert!(
515 formatted.contains("MAD: 98") && formatted.contains("ms"),
516 "MAD should be auto-scaled: {}",
517 formatted
518 );
519 assert!(
520 formatted.contains("n: 10"),
521 "Count should be present: {}",
522 formatted
523 );
524
525 let without_unit = StatsWithUnit {
527 stats: &stats,
528 unit: None,
529 };
530 let formatted_without = format!("{}", without_unit);
531 let display_format = format!("{}", stats);
532 assert_eq!(
533 formatted_without, display_format,
534 "StatsWithUnit with None should match Stats Display"
535 );
536
537 let large_stats = Stats {
539 mean: 1_234_567.89, stddev: 123_456.78,
541 mad: 12_345.67,
542 len: 1000,
543 };
544
545 let large_with_unit = StatsWithUnit {
546 stats: &large_stats,
547 unit: Some("ns"),
548 };
549 let large_formatted = format!("{}", large_with_unit);
550
551 assert!(
553 large_formatted.contains("μ: 1.23ms") || large_formatted.contains("μ: 1.2ms"),
554 "Large mean should be auto-scaled to ms: {}",
555 large_formatted
556 );
557 assert!(
559 large_formatted.contains("σ:")
560 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
561 "Large stddev should be auto-scaled: {}",
562 large_formatted
563 );
564 assert!(
566 large_formatted.contains("MAD:")
567 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
568 "Large MAD should be auto-scaled: {}",
569 large_formatted
570 );
571 assert!(
572 large_formatted.contains("n: 1,000") || large_formatted.contains("n: 1000"),
573 "Large count should be present: {}",
574 large_formatted
575 );
576 }
577
578 #[test]
579 fn test_stats_with_unit_various_values() {
580 let small_stats = Stats {
584 mean: 42.5,
585 stddev: 2.0,
586 mad: 1.5,
587 len: 5,
588 };
589 let formatted = format!(
590 "{}",
591 StatsWithUnit {
592 stats: &small_stats,
593 unit: Some("ms")
594 }
595 );
596 assert!(
597 formatted.contains("42.5ms") || formatted.contains("42ms"),
598 "Small decimal with unit: {}",
599 formatted
600 );
601
602 let zero_stats = Stats {
604 mean: 0.0,
605 stddev: 0.0,
606 mad: 0.0,
607 len: 1,
608 };
609 let formatted = format!(
610 "{}",
611 StatsWithUnit {
612 stats: &zero_stats,
613 unit: Some("ms")
614 }
615 );
616 assert!(
617 formatted.contains("0") && formatted.contains("ns"),
618 "Zero value with unit: {}",
619 formatted
620 );
621
622 let precise_stats = Stats {
624 mean: 3.21, stddev: 0.5,
626 mad: 0.3,
627 len: 10,
628 };
629 let formatted = format!(
630 "{}",
631 StatsWithUnit {
632 stats: &precise_stats,
633 unit: Some("seconds")
634 }
635 );
636 assert!(
637 formatted.contains("3.21") && formatted.contains("seconds"),
638 "Precise value with unknown unit (fallback): {}",
639 formatted
640 );
641
642 let million_stats = Stats {
644 mean: 1_000_000.0,
645 stddev: 50_000.0,
646 mad: 30_000.0,
647 len: 100,
648 };
649 let formatted = format!(
650 "{}",
651 StatsWithUnit {
652 stats: &million_stats,
653 unit: Some("B")
654 }
655 );
656 assert!(
658 formatted.contains("1MB") || formatted.contains("1.0MB"),
659 "Million bytes should be auto-scaled to MB: {}",
660 formatted
661 );
662
663 let temp_stats = Stats {
665 mean: 98.6,
666 stddev: 1.2,
667 mad: 0.8,
668 len: 20,
669 };
670 let formatted = format!(
671 "{}",
672 StatsWithUnit {
673 stats: &temp_stats,
674 unit: Some("°F")
675 }
676 );
677 assert!(
678 formatted.contains("98.6") && formatted.contains("°F"),
679 "Temperature unit (unknown, fallback): {}",
680 formatted
681 );
682
683 let no_unit = format!(
685 "{}",
686 StatsWithUnit {
687 stats: &small_stats,
688 unit: None
689 }
690 );
691 assert!(
692 !no_unit.contains(" ms"),
693 "Should have no units: {}",
694 no_unit
695 );
696 assert!(
697 !no_unit.contains(" bytes"),
698 "Should have no units: {}",
699 no_unit
700 );
701 }
702
703 #[test]
704 fn test_thousands_separator_with_unknown_unit() {
705 let large_stats = Stats {
708 mean: 12_345.67,
709 stddev: 1_234.56,
710 mad: 567.89,
711 len: 100,
712 };
713
714 let formatted = format!(
715 "{}",
716 StatsWithUnit {
717 stats: &large_stats,
718 unit: Some("widgets") }
720 );
721
722 assert!(
724 formatted.contains("12,345") || formatted.contains("12_345"),
725 "Mean should have thousands separators for unknown unit, got: {}",
726 formatted
727 );
728
729 assert!(
730 formatted.contains("widgets"),
731 "Unknown unit should be preserved, got: {}",
732 formatted
733 );
734
735 assert!(
737 formatted.contains("1,234") || formatted.contains("1_234"),
738 "Stddev should have thousands separators, got: {}",
739 formatted
740 );
741 }
742
743 #[test]
744 fn test_is_significant_boundary() {
745 let tail = Stats {
747 mean: 10.0,
748 stddev: 2.0,
749 mad: 1.5,
750 len: 5,
751 };
752
753 let head = Stats {
754 mean: 12.0, stddev: 0.0,
756 mad: 0.0,
757 len: 1,
758 };
759
760 assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
763
764 assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
767
768 assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
771
772 let head_mad = Stats {
774 mean: 11.5, stddev: 0.0,
776 mad: 0.0,
777 len: 1,
778 };
779
780 assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
782 assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
783 assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
784 }
785}