1use std::fmt::Display;
2
3use average::{self, concatenate, Estimate, Mean, Variance};
4use itertools::Itertools;
5
6use readable::num::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ReductionFunc {
10 Min,
11 Max,
12 Median,
13 Mean,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DispersionMethod {
18 StandardDeviation,
19 MedianAbsoluteDeviation,
20}
21
22impl From<git_perf_cli_types::ReductionFunc> for ReductionFunc {
24 fn from(func: git_perf_cli_types::ReductionFunc) -> Self {
25 match func {
26 git_perf_cli_types::ReductionFunc::Min => ReductionFunc::Min,
27 git_perf_cli_types::ReductionFunc::Max => ReductionFunc::Max,
28 git_perf_cli_types::ReductionFunc::Median => ReductionFunc::Median,
29 git_perf_cli_types::ReductionFunc::Mean => ReductionFunc::Mean,
30 }
31 }
32}
33
34impl From<git_perf_cli_types::DispersionMethod> for DispersionMethod {
35 fn from(method: git_perf_cli_types::DispersionMethod) -> Self {
36 match method {
37 git_perf_cli_types::DispersionMethod::StandardDeviation => {
38 DispersionMethod::StandardDeviation
39 }
40 git_perf_cli_types::DispersionMethod::MedianAbsoluteDeviation => {
41 DispersionMethod::MedianAbsoluteDeviation
42 }
43 }
44 }
45}
46
47pub trait VecAggregation {
48 fn median(&mut self) -> Option<f64>;
49}
50
51concatenate!(AggStats, [Mean, mean], [Variance, sample_variance]);
52
53pub fn aggregate_measurements<'a>(measurements: impl Iterator<Item = &'a f64>) -> Stats {
54 let measurements_vec: Vec<f64> = measurements.cloned().collect();
55 let s: AggStats = measurements_vec.iter().collect();
56 Stats {
57 mean: s.mean(),
58 stddev: s.sample_variance().sqrt(),
59 mad: calculate_mad(&measurements_vec),
60 len: s.mean.len() as usize,
61 }
62}
63
64pub fn calculate_mad(measurements: &[f64]) -> f64 {
65 if measurements.is_empty() {
66 return 0.0;
67 }
68
69 let mut measurements_copy = measurements.to_vec();
71 let median = measurements_copy.median().unwrap();
72
73 let mut abs_deviations: Vec<f64> = measurements.iter().map(|&x| (x - median).abs()).collect();
75
76 abs_deviations.median().unwrap()
78}
79
80#[derive(Debug)]
81pub struct Stats {
82 pub mean: f64,
83 pub stddev: f64,
84 pub mad: f64,
85 pub len: usize,
86}
87
88impl Display for Stats {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(
91 f,
92 "μ: {} σ: {} MAD: {} n: {}",
93 Float::from(self.mean),
94 Float::from(self.stddev),
95 Float::from(self.mad),
96 Unsigned::from(self.len),
97 )
98 }
99}
100
101impl Stats {
102 pub fn z_score(&self, other: &Stats) -> f64 {
103 self.z_score_with_method(other, DispersionMethod::StandardDeviation)
104 }
105
106 pub fn z_score_with_method(&self, other: &Stats, method: DispersionMethod) -> f64 {
107 assert!(self.len == 1);
108 assert!(other.len >= 1);
109
110 let dispersion = match method {
111 DispersionMethod::StandardDeviation => other.stddev,
112 DispersionMethod::MedianAbsoluteDeviation => other.mad,
113 };
114
115 (self.mean - other.mean).abs() / dispersion
117 }
118
119 pub fn is_significant(&self, other: &Stats, sigma: f64, method: DispersionMethod) -> bool {
120 let z_score = self.z_score_with_method(other, method);
121 z_score > sigma
122 }
123}
124
125impl VecAggregation for Vec<f64> {
126 fn median(&mut self) -> Option<f64> {
127 self.sort_by(f64::total_cmp);
128 match self.len() {
129 0 => None,
130 even if even % 2 == 0 => {
131 let left = self[even / 2 - 1];
132 let right = self[even / 2];
133 Some((left + right) / 2.0)
134 }
135 odd => Some(self[odd / 2]),
136 }
137 }
138}
139
140pub trait NumericReductionFunc: Iterator<Item = f64> {
141 fn aggregate_by(&mut self, fun: ReductionFunc) -> Option<Self::Item> {
142 match fun {
143 ReductionFunc::Min => self.reduce(f64::min),
144 ReductionFunc::Max => self.reduce(f64::max),
145 ReductionFunc::Median => self.collect_vec().median(),
146 ReductionFunc::Mean => {
147 let stats: AggStats = self.collect();
148 if stats.mean.is_empty() {
149 None
150 } else {
151 Some(stats.mean())
152 }
153 }
154 }
155 }
156}
157
158impl<T> NumericReductionFunc for T where T: Iterator<Item = f64> {}
159
160#[cfg(test)]
161mod test {
162 use average::assert_almost_eq;
163
164 use super::*;
165
166 #[test]
167 fn no_floating_error() {
168 let measurements = (0..100).map(|_| 0.1).collect_vec();
169 let stats = aggregate_measurements(measurements.iter());
170 assert_eq!(stats.mean, 0.1);
171 assert_eq!(stats.len, 100);
172 let naive_mean = (0..100).map(|_| 0.1).sum::<f64>() / 100.0;
173 assert_ne!(naive_mean, 0.1);
174 }
175
176 #[test]
177 fn single_measurement() {
178 let measurements = [1.0];
179 let stats = aggregate_measurements(measurements.iter());
180 assert_eq!(stats.len, 1);
181 assert_eq!(stats.mean, 1.0);
182 assert_eq!(stats.stddev, 0.0);
183 }
184
185 #[test]
186 fn no_measurement() {
187 let measurements = [];
188 let stats = aggregate_measurements(measurements.iter());
189 assert_eq!(stats.len, 0);
190 assert_eq!(stats.mean, 0.0);
191 assert_eq!(stats.stddev, 0.0);
192 }
193
194 #[test]
195 fn z_score_with_zero_stddev() {
196 let tail = Stats {
197 mean: 30.0,
198 stddev: 0.0,
199 mad: 0.0,
200 len: 40,
201 };
202
203 let head_normal = Stats {
204 mean: 30.0,
205 stddev: 0.0,
206 mad: 0.0,
207 len: 1,
208 };
209
210 let head_low = Stats {
211 mean: 20.0,
212 stddev: 0.0,
213 mad: 0.0,
214 len: 1,
215 };
216
217 let z_normal = head_normal.z_score(&tail);
218 assert!(z_normal.is_nan());
219
220 let z_low = head_low.z_score(&tail);
221 assert!(z_low.is_infinite());
222 }
223
224 #[test]
225 fn verify_stats() {
226 let empty_vec = [];
227 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Min));
228 assert_eq!(None, empty_vec.into_iter().aggregate_by(ReductionFunc::Max));
229 assert_eq!(
230 None,
231 empty_vec.into_iter().aggregate_by(ReductionFunc::Median)
232 );
233 assert_eq!(
234 None,
235 empty_vec.into_iter().aggregate_by(ReductionFunc::Mean)
236 );
237
238 let single_el_vec = [3.0];
239 assert_eq!(
240 Some(3.0),
241 single_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
242 );
243 assert_eq!(
244 Some(3.0),
245 single_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
246 );
247 assert_eq!(
248 Some(3.0),
249 single_el_vec
250 .into_iter()
251 .aggregate_by(ReductionFunc::Median)
252 );
253 assert_eq!(
254 Some(3.0),
255 single_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
256 );
257
258 let two_el_vec = [3.0, 1.0];
259 assert_eq!(
260 Some(1.0),
261 two_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
262 );
263 assert_eq!(
264 Some(3.0),
265 two_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
266 );
267 assert_eq!(
268 Some(2.0),
269 two_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
270 );
271 assert_eq!(
272 Some(2.0),
273 two_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
274 );
275
276 let three_el_vec = [2.0, 6.0, 1.0];
277 assert_eq!(
278 Some(1.0),
279 three_el_vec.into_iter().aggregate_by(ReductionFunc::Min)
280 );
281 assert_eq!(
282 Some(6.0),
283 three_el_vec.into_iter().aggregate_by(ReductionFunc::Max)
284 );
285 assert_eq!(
286 Some(2.0),
287 three_el_vec.into_iter().aggregate_by(ReductionFunc::Median)
288 );
289 assert_eq!(
290 Some(3.0),
291 three_el_vec.into_iter().aggregate_by(ReductionFunc::Mean)
292 );
293 }
294
295 #[test]
296 fn test_calculate_mad() {
297 assert_eq!(calculate_mad(&[]), 0.0);
299
300 assert_eq!(calculate_mad(&[5.0]), 0.0);
302
303 assert_eq!(calculate_mad(&[1.0, 3.0]), 1.0);
305
306 assert_eq!(calculate_mad(&[1.0, 2.0, 3.0]), 1.0);
308
309 let data = [1.0, 2.0, 3.0, 100.0];
311 let mad = calculate_mad(&data);
312 assert_almost_eq!(mad, 1.0, 0.001);
313 let data = [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0];
318 let mad = calculate_mad(&data);
319 assert_almost_eq!(mad, 1.0, 0.001);
320 }
321
322 #[test]
323 fn test_mad_in_aggregate_measurements() {
324 let measurements = [1.0, 2.0, 3.0, 4.0, 5.0];
325 let stats = aggregate_measurements(measurements.iter());
326
327 assert_eq!(stats.len, 5);
328 assert_eq!(stats.mean, 3.0);
329 assert!(stats.mad > 0.0);
330 assert!(stats.stddev > 0.0);
331
332 assert!(stats.mad < stats.stddev);
334 }
335
336 #[test]
337 fn test_z_score_with_mad() {
338 let tail = Stats {
339 mean: 30.0,
340 stddev: 5.0,
341 mad: 3.0,
342 len: 40,
343 };
344
345 let head = Stats {
346 mean: 35.0,
347 stddev: 0.0,
348 mad: 0.0,
349 len: 1,
350 };
351
352 let z_score_stddev = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
353 let z_score_mad =
354 head.z_score_with_method(&tail, DispersionMethod::MedianAbsoluteDeviation);
355
356 assert_eq!(z_score_stddev, 1.0); assert_eq!(z_score_mad, 5.0 / 3.0); assert_ne!(z_score_stddev, z_score_mad);
361 }
362
363 #[test]
364 fn test_backward_compatibility() {
365 let tail = Stats {
367 mean: 30.0,
368 stddev: 5.0,
369 mad: 3.0,
370 len: 40,
371 };
372
373 let head = Stats {
374 mean: 35.0,
375 stddev: 0.0,
376 mad: 0.0,
377 len: 1,
378 };
379
380 let z_score_old = head.z_score(&tail);
381 let z_score_new = head.z_score_with_method(&tail, DispersionMethod::StandardDeviation);
382
383 assert_eq!(z_score_old, z_score_new);
384 }
385
386 #[test]
387 fn test_display_with_mad() {
388 let stats = Stats {
389 mean: 10.0,
390 stddev: 2.0,
391 mad: 1.5,
392 len: 5,
393 };
394
395 let display = format!("{}", stats);
396 assert!(display.contains("μ: 10"));
397 assert!(display.contains("σ: 2"));
398 assert!(display.contains("MAD: 1.5"));
399 assert!(display.contains("n: 5"));
400 }
401
402 #[test]
403 fn test_is_significant_boundary() {
404 let tail = Stats {
406 mean: 10.0,
407 stddev: 2.0,
408 mad: 1.5,
409 len: 5,
410 };
411
412 let head = Stats {
413 mean: 12.0, stddev: 0.0,
415 mad: 0.0,
416 len: 1,
417 };
418
419 assert!(!head.is_significant(&tail, 1.0, DispersionMethod::StandardDeviation));
422
423 assert!(head.is_significant(&tail, 0.9, DispersionMethod::StandardDeviation));
426
427 assert!(!head.is_significant(&tail, 1.1, DispersionMethod::StandardDeviation));
430
431 let head_mad = Stats {
433 mean: 11.5, stddev: 0.0,
435 mad: 0.0,
436 len: 1,
437 };
438
439 assert!(!head_mad.is_significant(&tail, 1.0, DispersionMethod::MedianAbsoluteDeviation));
441 assert!(head_mad.is_significant(&tail, 0.9, DispersionMethod::MedianAbsoluteDeviation));
442 assert!(!head_mad.is_significant(&tail, 1.1, DispersionMethod::MedianAbsoluteDeviation));
443 }
444}