use crate::stats::{mann_whitney_u, summarize_samples, welch_t};
use crate::types::{
BenchDiff, BenchRun, CompareReport, CompareSide, CompareSummary, NeutralReason, RunReport,
Verdict,
};
use std::collections::{BTreeSet, HashMap, HashSet};
#[derive(Debug, Clone, Copy)]
pub struct CompareOptions {
pub threshold_pct: f64,
pub alpha: f64,
pub noisy_cv_threshold: f64,
}
impl Default for CompareOptions {
fn default() -> Self {
Self {
threshold_pct: 0.05,
alpha: 0.05,
noisy_cv_threshold: 0.25,
}
}
}
pub fn compare_reports(
base: &RunReport,
head: &RunReport,
options: CompareOptions,
) -> CompareReport {
let mut base_by_name: HashMap<&str, BenchRun> = HashMap::new();
for r in &base.runs {
base_by_name.insert(r.name.as_str(), normalize(r));
}
let mut head_by_name: HashMap<&str, BenchRun> = HashMap::new();
for r in &head.runs {
head_by_name.insert(r.name.as_str(), normalize(r));
}
let mut all_names: BTreeSet<&str> = BTreeSet::new();
all_names.extend(base_by_name.keys().copied());
all_names.extend(head_by_name.keys().copied());
let skipped_basenames: Option<HashSet<String>> = head.affected_scope.as_ref().map(|a| {
a.skipped_bench_files
.iter()
.map(|p| basename_of(p).to_string())
.collect()
});
let mut diffs: Vec<BenchDiff> = Vec::with_capacity(all_names.len());
for name in all_names {
let b = base_by_name.get(name).cloned();
let h = head_by_name.get(name).cloned();
diffs.push(diff_one(
name.to_string(),
b,
h,
options,
skipped_basenames.as_ref(),
));
}
let summary = summarize(&diffs);
CompareReport {
base: CompareSide {
r#ref: base.r#ref.clone(),
service: base.service.clone(),
},
head: CompareSide {
r#ref: head.r#ref.clone(),
service: head.service.clone(),
},
language: head.language,
threshold_pct: options.threshold_pct,
alpha: options.alpha,
noisy_cv_threshold: options.noisy_cv_threshold,
diffs,
summary,
affected_scope: head.affected_scope.clone(),
}
}
pub fn has_regressions(cmp: &CompareReport) -> bool {
cmp.summary.regressions > 0
}
fn summarize(diffs: &[BenchDiff]) -> CompareSummary {
let mut s = CompareSummary::default();
for d in diffs {
match d.verdict {
Verdict::Regression => s.regressions += 1,
Verdict::Improvement => s.improvements += 1,
Verdict::Neutral => s.neutrals += 1,
Verdict::New => s.new += 1,
Verdict::Removed => s.removed += 1,
Verdict::OutOfScope => s.out_of_scope += 1,
}
}
s
}
fn normalize(r: &BenchRun) -> BenchRun {
if r.samples.is_empty() {
return r.clone();
}
let suspect = (r.mean == 0.0 && r.median == 0.0)
|| (r.elapsed_ns == 0.0 && !r.samples.is_empty())
|| (r.batch_size == 0);
if !suspect {
return r.clone();
}
let s = summarize_samples(&r.samples);
BenchRun {
name: r.name.clone(),
file: r.file.clone(),
iterations: r.samples.len() as u32,
batch_size: if r.batch_size == 0 { 1 } else { r.batch_size },
elapsed_ns: if r.elapsed_ns == 0.0 {
r.samples.iter().sum()
} else {
r.elapsed_ns
},
samples: r.samples.clone(),
mean: s.mean,
median: s.median,
trimmed_mean: s.trimmed_mean,
stddev: s.stddev,
cv: s.cv,
mad: s.mad,
iqr: s.iqr,
min: s.min,
max: s.max,
p50: s.p50,
p95: s.p95,
p99: s.p99,
metrics: r.metrics.clone(),
tags: r.tags.clone(),
}
}
fn basename_of(path: &str) -> &str {
let i = path.rfind(['/', '\\']).map(|i| i + 1).unwrap_or(0);
&path[i..]
}
fn diff_one(
name: String,
base: Option<BenchRun>,
head: Option<BenchRun>,
opts: CompareOptions,
skipped_basenames: Option<&HashSet<String>>,
) -> BenchDiff {
match (base, head) {
(None, Some(h)) => BenchDiff {
name,
base: None,
head: Some(h),
delta_pct: None,
mean_delta_pct: None,
p_value: None,
p_value_welch: None,
max_cv: None,
verdict: Verdict::New,
neutral_reason: None,
},
(Some(b), None) => {
let in_skipped = skipped_basenames
.map(|set| set.contains(basename_of(&b.file)))
.unwrap_or(false);
let verdict = if in_skipped {
Verdict::OutOfScope
} else {
Verdict::Removed
};
BenchDiff {
name,
base: Some(b),
head: None,
delta_pct: None,
mean_delta_pct: None,
p_value: None,
p_value_welch: None,
max_cv: None,
verdict,
neutral_reason: None,
}
}
(None, None) => unreachable!("at least one side must be present"),
(Some(b), Some(h)) => {
let delta_pct = if b.median == 0.0 {
0.0
} else {
(h.median - b.median) / b.median
};
let mean_delta_pct = if b.mean == 0.0 {
0.0
} else {
(h.mean - b.mean) / b.mean
};
let mw = mann_whitney_u(&h.samples, &b.samples);
let welch = welch_t(&h.samples, &b.samples);
let max_cv = b.cv.max(h.cv);
let significant = mw.p < opts.alpha;
let meaningful = delta_pct.abs() >= opts.threshold_pct;
let noisy = max_cv > opts.noisy_cv_threshold && delta_pct.abs() < 2.0 * max_cv;
let (verdict, neutral_reason) = if noisy {
(Verdict::Neutral, Some(NeutralReason::TooNoisy))
} else if significant && meaningful && delta_pct > 0.0 {
(Verdict::Regression, None)
} else if significant && meaningful && delta_pct < 0.0 {
(Verdict::Improvement, None)
} else if !meaningful {
(Verdict::Neutral, Some(NeutralReason::BelowThreshold))
} else {
(Verdict::Neutral, Some(NeutralReason::NotSignificant))
};
BenchDiff {
name,
base: Some(b),
head: Some(h),
delta_pct: Some(delta_pct),
mean_delta_pct: Some(mean_delta_pct),
p_value: Some(mw.p),
p_value_welch: Some(welch.p),
max_cv: Some(max_cv),
verdict,
neutral_reason,
}
}
}
}