1use crate::stats::{mann_whitney_u, summarize_samples, welch_t};
16use crate::types::{
17 BenchDiff, BenchRun, CompareReport, CompareSide, CompareSummary, NeutralReason, RunReport,
18 Verdict,
19};
20use std::collections::{BTreeSet, HashMap, HashSet};
21
22#[derive(Debug, Clone, Copy)]
26pub struct CompareOptions {
27 pub threshold_pct: f64,
30 pub alpha: f64,
32 pub noisy_cv_threshold: f64,
36}
37
38impl Default for CompareOptions {
39 fn default() -> Self {
40 Self {
41 threshold_pct: 0.05,
42 alpha: 0.05,
43 noisy_cv_threshold: 0.25,
44 }
45 }
46}
47
48pub fn compare_reports(
56 base: &RunReport,
57 head: &RunReport,
58 options: CompareOptions,
59) -> CompareReport {
60 let mut base_by_name: HashMap<&str, BenchRun> = HashMap::new();
61 for r in &base.runs {
62 base_by_name.insert(r.name.as_str(), normalize(r));
63 }
64 let mut head_by_name: HashMap<&str, BenchRun> = HashMap::new();
65 for r in &head.runs {
66 head_by_name.insert(r.name.as_str(), normalize(r));
67 }
68 let mut all_names: BTreeSet<&str> = BTreeSet::new();
69 all_names.extend(base_by_name.keys().copied());
70 all_names.extend(head_by_name.keys().copied());
71
72 let skipped_basenames: Option<HashSet<String>> = head.affected_scope.as_ref().map(|a| {
73 a.skipped_bench_files
74 .iter()
75 .map(|p| basename_of(p).to_string())
76 .collect()
77 });
78
79 let mut diffs: Vec<BenchDiff> = Vec::with_capacity(all_names.len());
80 for name in all_names {
81 let b = base_by_name.get(name).cloned();
82 let h = head_by_name.get(name).cloned();
83 diffs.push(diff_one(
84 name.to_string(),
85 b,
86 h,
87 options,
88 skipped_basenames.as_ref(),
89 ));
90 }
91
92 let summary = summarize(&diffs);
93
94 CompareReport {
95 base: CompareSide {
96 r#ref: base.r#ref.clone(),
97 service: base.service.clone(),
98 },
99 head: CompareSide {
100 r#ref: head.r#ref.clone(),
101 service: head.service.clone(),
102 },
103 language: head.language,
104 threshold_pct: options.threshold_pct,
105 alpha: options.alpha,
106 noisy_cv_threshold: options.noisy_cv_threshold,
107 diffs,
108 summary,
109 affected_scope: head.affected_scope.clone(),
110 }
111}
112
113pub fn has_regressions(cmp: &CompareReport) -> bool {
116 cmp.summary.regressions > 0
117}
118
119fn summarize(diffs: &[BenchDiff]) -> CompareSummary {
120 let mut s = CompareSummary::default();
121 for d in diffs {
122 match d.verdict {
123 Verdict::Regression => s.regressions += 1,
124 Verdict::Improvement => s.improvements += 1,
125 Verdict::Neutral => s.neutrals += 1,
126 Verdict::New => s.new += 1,
127 Verdict::Removed => s.removed += 1,
128 Verdict::OutOfScope => s.out_of_scope += 1,
129 }
130 }
131 s
132}
133
134fn normalize(r: &BenchRun) -> BenchRun {
140 if r.samples.is_empty() {
141 return r.clone();
142 }
143 let suspect = (r.mean == 0.0 && r.median == 0.0)
147 || (r.elapsed_ns == 0.0 && !r.samples.is_empty())
148 || (r.batch_size == 0);
149 if !suspect {
150 return r.clone();
151 }
152 let s = summarize_samples(&r.samples);
153 BenchRun {
154 name: r.name.clone(),
155 file: r.file.clone(),
156 iterations: r.samples.len() as u32,
157 batch_size: if r.batch_size == 0 { 1 } else { r.batch_size },
158 elapsed_ns: if r.elapsed_ns == 0.0 {
159 r.samples.iter().sum()
160 } else {
161 r.elapsed_ns
162 },
163 samples: r.samples.clone(),
164 mean: s.mean,
165 median: s.median,
166 trimmed_mean: s.trimmed_mean,
167 stddev: s.stddev,
168 cv: s.cv,
169 mad: s.mad,
170 iqr: s.iqr,
171 min: s.min,
172 max: s.max,
173 p50: s.p50,
174 p95: s.p95,
175 p99: s.p99,
176 metrics: r.metrics.clone(),
177 tags: r.tags.clone(),
178 }
179}
180
181fn basename_of(path: &str) -> &str {
182 let i = path.rfind(['/', '\\']).map(|i| i + 1).unwrap_or(0);
183 &path[i..]
184}
185
186fn diff_one(
187 name: String,
188 base: Option<BenchRun>,
189 head: Option<BenchRun>,
190 opts: CompareOptions,
191 skipped_basenames: Option<&HashSet<String>>,
192) -> BenchDiff {
193 match (base, head) {
194 (None, Some(h)) => BenchDiff {
195 name,
196 base: None,
197 head: Some(h),
198 delta_pct: None,
199 mean_delta_pct: None,
200 p_value: None,
201 p_value_welch: None,
202 max_cv: None,
203 verdict: Verdict::New,
204 neutral_reason: None,
205 },
206 (Some(b), None) => {
207 let in_skipped = skipped_basenames
211 .map(|set| set.contains(basename_of(&b.file)))
212 .unwrap_or(false);
213 let verdict = if in_skipped {
214 Verdict::OutOfScope
215 } else {
216 Verdict::Removed
217 };
218 BenchDiff {
219 name,
220 base: Some(b),
221 head: None,
222 delta_pct: None,
223 mean_delta_pct: None,
224 p_value: None,
225 p_value_welch: None,
226 max_cv: None,
227 verdict,
228 neutral_reason: None,
229 }
230 }
231 (None, None) => unreachable!("at least one side must be present"),
232 (Some(b), Some(h)) => {
233 let delta_pct = if b.median == 0.0 {
234 0.0
235 } else {
236 (h.median - b.median) / b.median
237 };
238 let mean_delta_pct = if b.mean == 0.0 {
239 0.0
240 } else {
241 (h.mean - b.mean) / b.mean
242 };
243 let mw = mann_whitney_u(&h.samples, &b.samples);
244 let welch = welch_t(&h.samples, &b.samples);
245 let max_cv = b.cv.max(h.cv);
246
247 let significant = mw.p < opts.alpha;
248 let meaningful = delta_pct.abs() >= opts.threshold_pct;
249 let noisy = max_cv > opts.noisy_cv_threshold && delta_pct.abs() < 2.0 * max_cv;
250
251 let (verdict, neutral_reason) = if noisy {
252 (Verdict::Neutral, Some(NeutralReason::TooNoisy))
253 } else if significant && meaningful && delta_pct > 0.0 {
254 (Verdict::Regression, None)
255 } else if significant && meaningful && delta_pct < 0.0 {
256 (Verdict::Improvement, None)
257 } else if !meaningful {
258 (Verdict::Neutral, Some(NeutralReason::BelowThreshold))
259 } else {
260 (Verdict::Neutral, Some(NeutralReason::NotSignificant))
261 };
262
263 BenchDiff {
264 name,
265 base: Some(b),
266 head: Some(h),
267 delta_pct: Some(delta_pct),
268 mean_delta_pct: Some(mean_delta_pct),
269 p_value: Some(mw.p),
270 p_value_welch: Some(welch.p),
271 max_cv: Some(max_cv),
272 verdict,
273 neutral_reason,
274 }
275 }
276 }
277}