1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10 Min,
11 Max,
12 Median,
13 Mean,
14}
15
16impl Display for ReductionFunc {
17 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18 let s = match self {
19 ReductionFunc::Min => "min",
20 ReductionFunc::Max => "max",
21 ReductionFunc::Median => "median",
22 ReductionFunc::Mean => "mean",
23 };
24 write!(f, "{}", s)
25 }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq)]
29pub enum DispersionMethod {
30 StandardDeviation,
31 MedianAbsoluteDeviation,
32}
33
34impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
36 fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
37 match func {
38 git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
39 git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
40 git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
41 git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
42 }
43 }
44}
45
46impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
47 fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
48 match method {
49 git_perf_cli_types::DispersionMethod::StandardDeviation => {
50 DispersionMethod::StandardDeviation
51 }
52 git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
53 DispersionMethod::MedianAbsoluteDeviation
54 }
55 }
56 }
57}
58
59pub trait VecAggregation {
60 fn median(&mut self) -> Option<f64>;
61}
62
63concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
64
65pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
66 let measurements_vec: Vec<f64> = measurements.cloned().collect();
67 let s: AggStats = measurements_vec.iter().collect();
68 Stats {
69 mean: s.mean(),
70 stddev: s.sample_variance().sqrt(),
71 mad: calculate_mad(&measurements_vec),
72 len: s.mean.len() as usize,
73 }
74}
75
76#[must_use]
77pub fn calculate_mad(measurements: &[f64]) -> f64 {
78 if measurements.is_empty() {
79 return 0.0;
80 }
81
82 let mut measurements_copy = measurements.to_vec();
84 let median = measurements_copy.median().unwrap();
85
86 let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
88
89 abs_deviations.median().unwrap()
91}
92
93#[derive(Debug)]
94pub struct Stats {
95 pub mean: f64,
96 pub stddev: f64,
97 pub mad: f64,
98 pub len: usize,
99}
100
101impl Display for Stats {
102 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103 let stddev_str = if self.stddev.is_nan() {
104 "N/A".to_string()
105 } else {
106 format!("{}", Float::from(self.stddev))
107 };
108 let mad_str = if self.mad.is_nan() {
109 "N/A".to_string()
110 } else {
111 format!("{}", Float::from(self.mad))
112 };
113 write!(
114 f,
115 "μ: {} σ: {} MAD: {} n: {}",
116 Float::from(self.mean),
117 stddev_str,
118 mad_str,
119 Unsigned::from(self.len),
120 )
121 }
122}
123
124impl Stats {
125 #[must_use]
126 pub fn z_score(&self, other: &Stats) -> f64 {
127 self.z_score_with_method(other, DispersionMethod::StandardDeviation)
128 }
129
130 #[must_use]
131 pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
132 assert!(self.len == 1);
133 assert!(other.len >= 1);
134
135 let dispersion = match method {
136 DispersionMethod::StandardDeviation => other.stddev,
137 DispersionMethod::MedianAbsoluteDeviation => other.mad,
138 };
139
140 (self.mean - other.mean).abs() / dispersion
142 }
143
144 #[must_use]
145 pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
146 let z_score = self.z_score_with_method(other, method);
147 z_score > sigma
148 }
149}
150
151pub struct StatsWithUnit<'a> {
155 pub stats: &'a Stats,
156 pub unit: Option<&'a str>,
157}
158
159impl<'a> Display for StatsWithUnit<'a> {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 use crate::units::{format_measurement, parse_value_with_unit, Measurement};
162
163 match self.unit {
164 Some(u) => {
165 let mean_measurement = parse_value_with_unit(self.stats.mean, u);
167 let mean_display = match &mean_measurement {
168 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
169 format_measurement(measurement.clone())
170 }
171 _ => format!("{} {}", Float::from(self.stats.mean), u),
172 };
173
174 let stddev_measurement = parse_value_with_unit(self.stats.stddev, u);
176 let stddev_display = match &stddev_measurement {
177 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
178 format_measurement(measurement.clone())
179 }
180 _ if self.stats.stddev.is_nan() => "N/A".to_string(),
181 _ => format!("{}", Float::from(self.stats.stddev)),
182 };
183
184 let mad_measurement = parse_value_with_unit(self.stats.mad, u);
186 let mad_display = match &mad_measurement {
187 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
188 format_measurement(measurement.clone())
189 }
190 _ if self.stats.mad.is_nan() => "N/A".to_string(),
191 _ => format!("{}", Float::from(self.stats.mad)),
192 };
193
194 write!(
195 f,
196 "μ: {} σ: {} MAD: {} n: {}",
197 mean_display,
198 stddev_display,
199 mad_display,
200 Unsigned::from(self.stats.len)
201 )
202 }
203 None => write!(f, "{}", self.stats),
204 }
205 }
206}
207
208impl VecAggregation for Vec<f64> {
209 fn median(&mut self) -> Option<f64> {
210 self.sort_by(f64::total_cmp);
211 match self.len() {
212 0 => None,
213 even if even % 2 == 0 => {
214 let left = self[even / 2 - 1];
215 let right = self[even / 2];
216 Some((left + right) / 2.0)
217 }
218 odd => Some(self[odd / 2]),
219 }
220 }
221}
222
223pub trait NumericReductionFunc: Iterator<Item = f64> {
224 fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
225 match fun {
226 ReductionFunc::Min => self.reduce(f64::min),
227 ReductionFunc::Max => self.reduce(f64::max),
228 ReductionFunc::Median => self.collect_vec().median(),
229 ReductionFunc::Mean => {
230 let stats: AggStats = self.collect();
231 if stats.mean.is_empty() {
232 None
233 } else {
234 Some(stats.mean())
235 }
236 }
237 }
238 }
239}
240
241impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
242
243#[cfg(test)]
244mod test {
245 use average::assert_almost_eq;
246
247 use super::*;
248
249 #[test]
250 fn no_floating_error() {
251 let measurements = (0..100).map(|_| 0.1).collect_vec();
252 let stats = aggregate_measurements(measurements.iter());
253 assert_eq!(stats.mean, 0.1);
254 assert_eq!(stats.len, 100);
255 let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
256 assert_ne!(naive_mean, 0.1);
257 }
258
259 #[test]
260 fn single_measurement() {
261 let measurements = [1.0];
262 let stats = aggregate_measurements(measurements.iter());
263 assert_eq!(stats.len, 1);
264 assert_eq!(stats.mean, 1.0);
265 assert!(
268 stats.stddev.is_nan(),
269 "stddev should be NaN for single measurement"
270 );
271 }
272
273 #[test]
274 fn no_measurement() {
275 let measurements = [];
276 let stats = aggregate_measurements(measurements.iter());
277 assert_eq!(stats.len, 0);
278 assert!(
280 stats.mean.is_nan(),
281 "mean should be NaN for empty measurements"
282 );
283 assert!(
284 stats.stddev.is_nan(),
285 "stddev should be NaN for empty measurements"
286 );
287 }
288
289 #[test]
290 fn z_score_with_zero_stddev() {
291 let tail = Stats {
292 mean: 30.0,
293 stddev: 0.0,
294 mad: 0.0,
295 len: 40,
296 };
297
298 let head_normal = Stats {
299 mean: 30.0,
300 stddev: 0.0,
301 mad: 0.0,
302 len: 1,
303 };
304
305 let head_low = Stats {
306 mean: 20.0,
307 stddev: 0.0,
308 mad: 0.0,
309 len: 1,
310 };
311
312 let z_normal = head_normal.z_score(&tail);
313 assert!(z_normal.is_nan());
314
315 let z_low = head_low.z_score(&tail);
316 assert!(z_low.is_infinite());
317 }
318
319 #[test]
320 fn verify_stats() {
321 let empty_vec = [];
322 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
323 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
324 assert_eq!(
325 None,
326 empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
327 );
328 assert_eq!(
329 None,
330 empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
331 );
332
333 let single_el_vec = [3.0];
334 assert_eq!(
335 Some(3.0),
336 single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
337 );
338 assert_eq!(
339 Some(3.0),
340 single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
341 );
342 assert_eq!(
343 Some(3.0),
344 single_el_vec
345 .into_iter()
346 .aggregate_by(ReductionFunc::Median)
347 );
348 assert_eq!(
349 Some(3.0),
350 single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
351 );
352
353 let two_el_vec = [3.0, 1.0];
354 assert_eq!(
355 Some(1.0),
356 two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
357 );
358 assert_eq!(
359 Some(3.0),
360 two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
361 );
362 assert_eq!(
363 Some(2.0),
364 two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
365 );
366 assert_eq!(
367 Some(2.0),
368 two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
369 );
370
371 let three_el_vec = [2.0, 6.0, 1.0];
372 assert_eq!(
373 Some(1.0),
374 three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
375 );
376 assert_eq!(
377 Some(6.0),
378 three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
379 );
380 assert_eq!(
381 Some(2.0),
382 three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
383 );
384 assert_eq!(
385 Some(3.0),
386 three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
387 );
388 }
389
390 #[test]
391 fn test_calculate_mad() {
392 assert_eq!(calculate_mad(&[]), 0.0);
394
395 assert_eq!(calculate_mad(&[5.0]), 0.0);
397
398 assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
400
401 assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
403
404 let data = [1.0, 2.0, 3.0, 100.0];
406 let mad = calculate_mad(&data);
407 assert_almost_eq!(mad, 1.0, 0.001);
408 let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
413 let mad = calculate_mad(&data);
414 assert_almost_eq!(mad, 1.0, 0.001);
415 }
416
417 #[test]
418 fn test_mad_in_aggregate_measurements() {
419 let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
420 let stats = aggregate_measurements(measurements.iter());
421
422 assert_eq!(stats.len, 5);
423 assert_eq!(stats.mean, 3.0);
424 assert!(stats.mad > 0.0);
425 assert!(stats.stddev > 0.0);
426
427 assert!(stats.mad < stats.stddev);
429 }
430
431 #[test]
432 fn test_z_score_with_mad() {
433 let tail = Stats {
434 mean: 30.0,
435 stddev: 5.0,
436 mad: 3.0,
437 len: 40,
438 };
439
440 let head = Stats {
441 mean: 35.0,
442 stddev: 0.0,
443 mad: 0.0,
444 len: 1,
445 };
446
447 let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
448 let z_score_mad =
449 head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
450
451 assert_eq!(z_score_stddev, 1.0); assert_eq!(z_score_mad, 5.0 / 3.0); assert_ne!(z_score_stddev, z_score_mad);
456 }
457
458 #[test]
459 fn test_backward_compatibility() {
460 let tail = Stats {
462 mean: 30.0,
463 stddev: 5.0,
464 mad: 3.0,
465 len: 40,
466 };
467
468 let head = Stats {
469 mean: 35.0,
470 stddev: 0.0,
471 mad: 0.0,
472 len: 1,
473 };
474
475 let z_score_old = head.z_score(&tail);
476 let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
477
478 assert_eq!(z_score_old, z_score_new);
479 }
480
481 #[test]
482 fn test_display_with_mad() {
483 let stats = Stats {
484 mean: 10.0,
485 stddev: 2.0,
486 mad: 1.5,
487 len: 5,
488 };
489
490 let display = format!("{}", stats);
491 assert!(display.contains("μ: 10"));
492 assert!(display.contains("σ: 2"));
493 assert!(display.contains("MAD: 1.5"));
494 assert!(display.contains("n: 5"));
495 }
496
497 #[test]
498 fn test_stats_with_unit() {
499 let stats = Stats {
500 mean: 1_234.5,
501 stddev: 123.4,
502 mad: 98.7,
503 len: 10,
504 };
505
506 let with_unit = StatsWithUnit {
508 stats: &stats,
509 unit: Some("ms"),
510 };
511 let formatted = format!("{}", with_unit);
512
513 assert!(
515 formatted.contains("μ: 1.23s") || formatted.contains("μ: 1.2s"),
516 "Mean should be auto-scaled to seconds: {}",
517 formatted
518 );
519 assert!(
521 formatted.contains("σ: 123") && formatted.contains("ms"),
522 "Stddev should be auto-scaled: {}",
523 formatted
524 );
525 assert!(
527 formatted.contains("MAD: 98") && formatted.contains("ms"),
528 "MAD should be auto-scaled: {}",
529 formatted
530 );
531 assert!(
532 formatted.contains("n: 10"),
533 "Count should be present: {}",
534 formatted
535 );
536
537 let without_unit = StatsWithUnit {
539 stats: &stats,
540 unit: None,
541 };
542 let formatted_without = format!("{}", without_unit);
543 let display_format = format!("{}", stats);
544 assert_eq!(
545 formatted_without, display_format,
546 "StatsWithUnit with None should match Stats Display"
547 );
548
549 let large_stats = Stats {
551 mean: 1_234_567.89, stddev: 123_456.78,
553 mad: 12_345.67,
554 len: 1000,
555 };
556
557 let large_with_unit = StatsWithUnit {
558 stats: &large_stats,
559 unit: Some("ns"),
560 };
561 let large_formatted = format!("{}", large_with_unit);
562
563 assert!(
565 large_formatted.contains("μ: 1.23ms") || large_formatted.contains("μ: 1.2ms"),
566 "Large mean should be auto-scaled to ms: {}",
567 large_formatted
568 );
569 assert!(
571 large_formatted.contains("σ:")
572 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
573 "Large stddev should be auto-scaled: {}",
574 large_formatted
575 );
576 assert!(
578 large_formatted.contains("MAD:")
579 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
580 "Large MAD should be auto-scaled: {}",
581 large_formatted
582 );
583 assert!(
584 large_formatted.contains("n: 1,000") || large_formatted.contains("n: 1000"),
585 "Large count should be present: {}",
586 large_formatted
587 );
588 }
589
590 #[test]
591 fn test_stats_with_unit_various_values() {
592 let small_stats = Stats {
596 mean: 42.5,
597 stddev: 2.0,
598 mad: 1.5,
599 len: 5,
600 };
601 let formatted = format!(
602 "{}",
603 StatsWithUnit {
604 stats: &small_stats,
605 unit: Some("ms")
606 }
607 );
608 assert!(
609 formatted.contains("42.5ms") || formatted.contains("42ms"),
610 "Small decimal with unit: {}",
611 formatted
612 );
613
614 let zero_stats = Stats {
616 mean: 0.0,
617 stddev: 0.0,
618 mad: 0.0,
619 len: 1,
620 };
621 let formatted = format!(
622 "{}",
623 StatsWithUnit {
624 stats: &zero_stats,
625 unit: Some("ms")
626 }
627 );
628 assert!(
629 formatted.contains("0") && formatted.contains("ns"),
630 "Zero value with unit: {}",
631 formatted
632 );
633
634 let precise_stats = Stats {
636 mean: 3.21, stddev: 0.5,
638 mad: 0.3,
639 len: 10,
640 };
641 let formatted = format!(
642 "{}",
643 StatsWithUnit {
644 stats: &precise_stats,
645 unit: Some("seconds")
646 }
647 );
648 assert!(
649 formatted.contains("3.21") && formatted.contains("seconds"),
650 "Precise value with unknown unit (fallback): {}",
651 formatted
652 );
653
654 let million_stats = Stats {
656 mean: 1_000_000.0,
657 stddev: 50_000.0,
658 mad: 30_000.0,
659 len: 100,
660 };
661 let formatted = format!(
662 "{}",
663 StatsWithUnit {
664 stats: &million_stats,
665 unit: Some("B")
666 }
667 );
668 assert!(
670 formatted.contains("1MB") || formatted.contains("1.0MB"),
671 "Million bytes should be auto-scaled to MB: {}",
672 formatted
673 );
674
675 let temp_stats = Stats {
677 mean: 98.6,
678 stddev: 1.2,
679 mad: 0.8,
680 len: 20,
681 };
682 let formatted = format!(
683 "{}",
684 StatsWithUnit {
685 stats: &temp_stats,
686 unit: Some("°F")
687 }
688 );
689 assert!(
690 formatted.contains("98.6") && formatted.contains("°F"),
691 "Temperature unit (unknown, fallback): {}",
692 formatted
693 );
694
695 let no_unit = format!(
697 "{}",
698 StatsWithUnit {
699 stats: &small_stats,
700 unit: None
701 }
702 );
703 assert!(
704 !no_unit.contains(" ms"),
705 "Should have no units: {}",
706 no_unit
707 );
708 assert!(
709 !no_unit.contains(" bytes"),
710 "Should have no units: {}",
711 no_unit
712 );
713 }
714
715 #[test]
716 fn test_thousands_separator_with_unknown_unit() {
717 let large_stats = Stats {
720 mean: 12_345.67,
721 stddev: 1_234.56,
722 mad: 567.89,
723 len: 100,
724 };
725
726 let formatted = format!(
727 "{}",
728 StatsWithUnit {
729 stats: &large_stats,
730 unit: Some("widgets") }
732 );
733
734 assert!(
736 formatted.contains("12,345") || formatted.contains("12_345"),
737 "Mean should have thousands separators for unknown unit, got: {}",
738 formatted
739 );
740
741 assert!(
742 formatted.contains("widgets"),
743 "Unknown unit should be preserved, got: {}",
744 formatted
745 );
746
747 assert!(
749 formatted.contains("1,234") || formatted.contains("1_234"),
750 "Stddev should have thousands separators, got: {}",
751 formatted
752 );
753 }
754
755 #[test]
756 fn test_is_significant_boundary() {
757 let tail = Stats {
759 mean: 10.0,
760 stddev: 2.0,
761 mad: 1.5,
762 len: 5,
763 };
764
765 let head = Stats {
766 mean: 12.0, stddev: 0.0,
768 mad: 0.0,
769 len: 1,
770 };
771
772 assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
775
776 assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
779
780 assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
783
784 let head_mad = Stats {
786 mean: 11.5, stddev: 0.0,
788 mad: 0.0,
789 len: 1,
790 };
791
792 assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
794 assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
795 assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
796 }
797}