1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10 Min,
11 Max,
12 Median,
13 Mean,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DispersionMethod {
18 StandardDeviation,
19 MedianAbsoluteDeviation,
20}
21
22impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
24 fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
25 match func {
26 git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
27 git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
28 git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
29 git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
30 }
31 }
32}
33
34impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
35 fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
36 match method {
37 git_perf_cli_types::DispersionMethod::StandardDeviation => {
38 DispersionMethod::StandardDeviation
39 }
40 git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
41 DispersionMethod::MedianAbsoluteDeviation
42 }
43 }
44 }
45}
46
47pub trait VecAggregation {
48 fn median(&mut self) -> Option<f64>;
49}
50
51concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
52
53pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
54 let measurements_vec: Vec<f64> = measurements.cloned().collect();
55 let s: AggStats = measurements_vec.iter().collect();
56 Stats {
57 mean: s.mean(),
58 stddev: s.sample_variance().sqrt(),
59 mad: calculate_mad(&measurements_vec),
60 len: s.mean.len() as usize,
61 }
62}
63
64pub fn calculate_mad(measurements: &[f64]) -> f64 {
65 if measurements.is_empty() {
66 return 0.0;
67 }
68
69 let mut measurements_copy = measurements.to_vec();
71 let median = measurements_copy.median().unwrap();
72
73 let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
75
76 abs_deviations.median().unwrap()
78}
79
80#[derive(Debug)]
81pub struct Stats {
82 pub mean: f64,
83 pub stddev: f64,
84 pub mad: f64,
85 pub len: usize,
86}
87
88impl Display for Stats {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(
91 f,
92 "μ: {} σ: {} MAD: {} n: {}",
93 Float::from(self.mean),
94 Float::from(self.stddev),
95 Float::from(self.mad),
96 Unsigned::from(self.len),
97 )
98 }
99}
100
101impl Stats {
102 pub fn z_score(&self, other: &Stats) -> f64 {
103 self.z_score_with_method(other, DispersionMethod::StandardDeviation)
104 }
105
106 pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
107 assert!(self.len == 1);
108 assert!(other.len >= 1);
109
110 let dispersion = match method {
111 DispersionMethod::StandardDeviation => other.stddev,
112 DispersionMethod::MedianAbsoluteDeviation => other.mad,
113 };
114
115 (self.mean - other.mean).abs() / dispersion
117 }
118
119 pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
120 let z_score = self.z_score_with_method(other, method);
121 z_score > sigma
122 }
123}
124
125pub struct StatsWithUnit<'a> {
129 pub stats: &'a Stats,
130 pub unit: Option<&'a str>,
131}
132
133impl<'a> Display for StatsWithUnit<'a> {
134 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135 use crate::units::{format_measurement, parse_value_with_unit, Measurement};
136
137 match self.unit {
138 Some(u) => {
139 let mean_measurement = parse_value_with_unit(self.stats.mean, u);
141 let mean_display = match &mean_measurement {
142 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
143 format_measurement(measurement.clone())
144 }
145 _ => format!("{} {}", Float::from(self.stats.mean), u),
146 };
147
148 let stddev_measurement = parse_value_with_unit(self.stats.stddev, u);
150 let stddev_display = match &stddev_measurement {
151 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
152 format_measurement(measurement.clone())
153 }
154 _ => format!("{}", Float::from(self.stats.stddev)),
155 };
156
157 let mad_measurement = parse_value_with_unit(self.stats.mad, u);
159 let mad_display = match &mad_measurement {
160 Ok(measurement) if !matches!(measurement, Measurement::Count(_)) => {
161 format_measurement(measurement.clone())
162 }
163 _ => format!("{}", Float::from(self.stats.mad)),
164 };
165
166 write!(
167 f,
168 "μ: {} σ: {} MAD: {} n: {}",
169 mean_display,
170 stddev_display,
171 mad_display,
172 Unsigned::from(self.stats.len)
173 )
174 }
175 None => write!(f, "{}", self.stats),
176 }
177 }
178}
179
180impl VecAggregation for Vec<f64> {
181 fn median(&mut self) -> Option<f64> {
182 self.sort_by(f64::total_cmp);
183 match self.len() {
184 0 => None,
185 even if even % 2 == 0 => {
186 let left = self[even / 2 - 1];
187 let right = self[even / 2];
188 Some((left + right) / 2.0)
189 }
190 odd => Some(self[odd / 2]),
191 }
192 }
193}
194
195pub trait NumericReductionFunc: Iterator<Item = f64> {
196 fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
197 match fun {
198 ReductionFunc::Min => self.reduce(f64::min),
199 ReductionFunc::Max => self.reduce(f64::max),
200 ReductionFunc::Median => self.collect_vec().median(),
201 ReductionFunc::Mean => {
202 let stats: AggStats = self.collect();
203 if stats.mean.is_empty() {
204 None
205 } else {
206 Some(stats.mean())
207 }
208 }
209 }
210 }
211}
212
213impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
214
215#[cfg(test)]
216mod test {
217 use average::assert_almost_eq;
218
219 use super::*;
220
221 #[test]
222 fn no_floating_error() {
223 let measurements = (0..100).map(|_| 0.1).collect_vec();
224 let stats = aggregate_measurements(measurements.iter());
225 assert_eq!(stats.mean, 0.1);
226 assert_eq!(stats.len, 100);
227 let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
228 assert_ne!(naive_mean, 0.1);
229 }
230
231 #[test]
232 fn single_measurement() {
233 let measurements = [1.0];
234 let stats = aggregate_measurements(measurements.iter());
235 assert_eq!(stats.len, 1);
236 assert_eq!(stats.mean, 1.0);
237 assert_eq!(stats.stddev, 0.0);
238 }
239
240 #[test]
241 fn no_measurement() {
242 let measurements = [];
243 let stats = aggregate_measurements(measurements.iter());
244 assert_eq!(stats.len, 0);
245 assert_eq!(stats.mean, 0.0);
246 assert_eq!(stats.stddev, 0.0);
247 }
248
249 #[test]
250 fn z_score_with_zero_stddev() {
251 let tail = Stats {
252 mean: 30.0,
253 stddev: 0.0,
254 mad: 0.0,
255 len: 40,
256 };
257
258 let head_normal = Stats {
259 mean: 30.0,
260 stddev: 0.0,
261 mad: 0.0,
262 len: 1,
263 };
264
265 let head_low = Stats {
266 mean: 20.0,
267 stddev: 0.0,
268 mad: 0.0,
269 len: 1,
270 };
271
272 let z_normal = head_normal.z_score(&tail);
273 assert!(z_normal.is_nan());
274
275 let z_low = head_low.z_score(&tail);
276 assert!(z_low.is_infinite());
277 }
278
279 #[test]
280 fn verify_stats() {
281 let empty_vec = [];
282 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
283 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
284 assert_eq!(
285 None,
286 empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
287 );
288 assert_eq!(
289 None,
290 empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
291 );
292
293 let single_el_vec = [3.0];
294 assert_eq!(
295 Some(3.0),
296 single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
297 );
298 assert_eq!(
299 Some(3.0),
300 single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
301 );
302 assert_eq!(
303 Some(3.0),
304 single_el_vec
305 .into_iter()
306 .aggregate_by(ReductionFunc::Median)
307 );
308 assert_eq!(
309 Some(3.0),
310 single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
311 );
312
313 let two_el_vec = [3.0, 1.0];
314 assert_eq!(
315 Some(1.0),
316 two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
317 );
318 assert_eq!(
319 Some(3.0),
320 two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
321 );
322 assert_eq!(
323 Some(2.0),
324 two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
325 );
326 assert_eq!(
327 Some(2.0),
328 two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
329 );
330
331 let three_el_vec = [2.0, 6.0, 1.0];
332 assert_eq!(
333 Some(1.0),
334 three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
335 );
336 assert_eq!(
337 Some(6.0),
338 three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
339 );
340 assert_eq!(
341 Some(2.0),
342 three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
343 );
344 assert_eq!(
345 Some(3.0),
346 three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
347 );
348 }
349
350 #[test]
351 fn test_calculate_mad() {
352 assert_eq!(calculate_mad(&[]), 0.0);
354
355 assert_eq!(calculate_mad(&[5.0]), 0.0);
357
358 assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
360
361 assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
363
364 let data = [1.0, 2.0, 3.0, 100.0];
366 let mad = calculate_mad(&data);
367 assert_almost_eq!(mad, 1.0, 0.001);
368 let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
373 let mad = calculate_mad(&data);
374 assert_almost_eq!(mad, 1.0, 0.001);
375 }
376
377 #[test]
378 fn test_mad_in_aggregate_measurements() {
379 let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
380 let stats = aggregate_measurements(measurements.iter());
381
382 assert_eq!(stats.len, 5);
383 assert_eq!(stats.mean, 3.0);
384 assert!(stats.mad > 0.0);
385 assert!(stats.stddev > 0.0);
386
387 assert!(stats.mad < stats.stddev);
389 }
390
391 #[test]
392 fn test_z_score_with_mad() {
393 let tail = Stats {
394 mean: 30.0,
395 stddev: 5.0,
396 mad: 3.0,
397 len: 40,
398 };
399
400 let head = Stats {
401 mean: 35.0,
402 stddev: 0.0,
403 mad: 0.0,
404 len: 1,
405 };
406
407 let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
408 let z_score_mad =
409 head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
410
411 assert_eq!(z_score_stddev, 1.0); assert_eq!(z_score_mad, 5.0 / 3.0); assert_ne!(z_score_stddev, z_score_mad);
416 }
417
418 #[test]
419 fn test_backward_compatibility() {
420 let tail = Stats {
422 mean: 30.0,
423 stddev: 5.0,
424 mad: 3.0,
425 len: 40,
426 };
427
428 let head = Stats {
429 mean: 35.0,
430 stddev: 0.0,
431 mad: 0.0,
432 len: 1,
433 };
434
435 let z_score_old = head.z_score(&tail);
436 let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
437
438 assert_eq!(z_score_old, z_score_new);
439 }
440
441 #[test]
442 fn test_display_with_mad() {
443 let stats = Stats {
444 mean: 10.0,
445 stddev: 2.0,
446 mad: 1.5,
447 len: 5,
448 };
449
450 let display = format!("{}", stats);
451 assert!(display.contains("μ: 10"));
452 assert!(display.contains("σ: 2"));
453 assert!(display.contains("MAD: 1.5"));
454 assert!(display.contains("n: 5"));
455 }
456
457 #[test]
458 fn test_stats_with_unit() {
459 let stats = Stats {
460 mean: 1_234.5,
461 stddev: 123.4,
462 mad: 98.7,
463 len: 10,
464 };
465
466 let with_unit = StatsWithUnit {
468 stats: &stats,
469 unit: Some("ms"),
470 };
471 let formatted = format!("{}", with_unit);
472
473 assert!(
475 formatted.contains("μ: 1.23s") || formatted.contains("μ: 1.2s"),
476 "Mean should be auto-scaled to seconds: {}",
477 formatted
478 );
479 assert!(
481 formatted.contains("σ: 123") && formatted.contains("ms"),
482 "Stddev should be auto-scaled: {}",
483 formatted
484 );
485 assert!(
487 formatted.contains("MAD: 98") && formatted.contains("ms"),
488 "MAD should be auto-scaled: {}",
489 formatted
490 );
491 assert!(
492 formatted.contains("n: 10"),
493 "Count should be present: {}",
494 formatted
495 );
496
497 let without_unit = StatsWithUnit {
499 stats: &stats,
500 unit: None,
501 };
502 let formatted_without = format!("{}", without_unit);
503 let display_format = format!("{}", stats);
504 assert_eq!(
505 formatted_without, display_format,
506 "StatsWithUnit with None should match Stats Display"
507 );
508
509 let large_stats = Stats {
511 mean: 1_234_567.89, stddev: 123_456.78,
513 mad: 12_345.67,
514 len: 1000,
515 };
516
517 let large_with_unit = StatsWithUnit {
518 stats: &large_stats,
519 unit: Some("ns"),
520 };
521 let large_formatted = format!("{}", large_with_unit);
522
523 assert!(
525 large_formatted.contains("μ: 1.23ms") || large_formatted.contains("μ: 1.2ms"),
526 "Large mean should be auto-scaled to ms: {}",
527 large_formatted
528 );
529 assert!(
531 large_formatted.contains("σ:")
532 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
533 "Large stddev should be auto-scaled: {}",
534 large_formatted
535 );
536 assert!(
538 large_formatted.contains("MAD:")
539 && (large_formatted.contains("ms") || large_formatted.contains("μs")),
540 "Large MAD should be auto-scaled: {}",
541 large_formatted
542 );
543 assert!(
544 large_formatted.contains("n: 1,000") || large_formatted.contains("n: 1000"),
545 "Large count should be present: {}",
546 large_formatted
547 );
548 }
549
550 #[test]
551 fn test_stats_with_unit_various_values() {
552 let small_stats = Stats {
556 mean: 42.5,
557 stddev: 2.0,
558 mad: 1.5,
559 len: 5,
560 };
561 let formatted = format!(
562 "{}",
563 StatsWithUnit {
564 stats: &small_stats,
565 unit: Some("ms")
566 }
567 );
568 assert!(
569 formatted.contains("42.5ms") || formatted.contains("42ms"),
570 "Small decimal with unit: {}",
571 formatted
572 );
573
574 let zero_stats = Stats {
576 mean: 0.0,
577 stddev: 0.0,
578 mad: 0.0,
579 len: 1,
580 };
581 let formatted = format!(
582 "{}",
583 StatsWithUnit {
584 stats: &zero_stats,
585 unit: Some("ms")
586 }
587 );
588 assert!(
589 formatted.contains("0") && formatted.contains("ns"),
590 "Zero value with unit: {}",
591 formatted
592 );
593
594 let precise_stats = Stats {
596 mean: 3.21, stddev: 0.5,
598 mad: 0.3,
599 len: 10,
600 };
601 let formatted = format!(
602 "{}",
603 StatsWithUnit {
604 stats: &precise_stats,
605 unit: Some("seconds")
606 }
607 );
608 assert!(
609 formatted.contains("3.21") && formatted.contains("seconds"),
610 "Precise value with unknown unit (fallback): {}",
611 formatted
612 );
613
614 let million_stats = Stats {
616 mean: 1_000_000.0,
617 stddev: 50_000.0,
618 mad: 30_000.0,
619 len: 100,
620 };
621 let formatted = format!(
622 "{}",
623 StatsWithUnit {
624 stats: &million_stats,
625 unit: Some("B")
626 }
627 );
628 assert!(
630 formatted.contains("1MB") || formatted.contains("1.0MB"),
631 "Million bytes should be auto-scaled to MB: {}",
632 formatted
633 );
634
635 let temp_stats = Stats {
637 mean: 98.6,
638 stddev: 1.2,
639 mad: 0.8,
640 len: 20,
641 };
642 let formatted = format!(
643 "{}",
644 StatsWithUnit {
645 stats: &temp_stats,
646 unit: Some("°F")
647 }
648 );
649 assert!(
650 formatted.contains("98.6") && formatted.contains("°F"),
651 "Temperature unit (unknown, fallback): {}",
652 formatted
653 );
654
655 let no_unit = format!(
657 "{}",
658 StatsWithUnit {
659 stats: &small_stats,
660 unit: None
661 }
662 );
663 assert!(
664 !no_unit.contains(" ms"),
665 "Should have no units: {}",
666 no_unit
667 );
668 assert!(
669 !no_unit.contains(" bytes"),
670 "Should have no units: {}",
671 no_unit
672 );
673 }
674
675 #[test]
676 fn test_thousands_separator_with_unknown_unit() {
677 let large_stats = Stats {
680 mean: 12_345.67,
681 stddev: 1_234.56,
682 mad: 567.89,
683 len: 100,
684 };
685
686 let formatted = format!(
687 "{}",
688 StatsWithUnit {
689 stats: &large_stats,
690 unit: Some("widgets") }
692 );
693
694 assert!(
696 formatted.contains("12,345") || formatted.contains("12_345"),
697 "Mean should have thousands separators for unknown unit, got: {}",
698 formatted
699 );
700
701 assert!(
702 formatted.contains("widgets"),
703 "Unknown unit should be preserved, got: {}",
704 formatted
705 );
706
707 assert!(
709 formatted.contains("1,234") || formatted.contains("1_234"),
710 "Stddev should have thousands separators, got: {}",
711 formatted
712 );
713 }
714
715 #[test]
716 fn test_is_significant_boundary() {
717 let tail = Stats {
719 mean: 10.0,
720 stddev: 2.0,
721 mad: 1.5,
722 len: 5,
723 };
724
725 let head = Stats {
726 mean: 12.0, stddev: 0.0,
728 mad: 0.0,
729 len: 1,
730 };
731
732 assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
735
736 assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
739
740 assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
743
744 let head_mad = Stats {
746 mean: 11.5, stddev: 0.0,
748 mad: 0.0,
749 len: 1,
750 };
751
752 assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
754 assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
755 assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
756 }
757}