1use crate::profile_compare::types::{
8 BenchmarkProfile, ChangeDirection, CompareError, CompareResult, ComparisonVerdict,
9 EffectMagnitude, EffectSizeResult, MetricComparison, MetricSamples, ProfileComparison,
10 WelchTestResult,
11};
12
13#[derive(Debug, Clone)]
15pub struct CompareConfig {
16 pub confidence_level: f64,
18 pub min_samples: usize,
20 pub bonferroni_correction: bool,
22 pub regression_threshold_percent: f64,
24 pub higher_is_better: Vec<String>,
26 pub lower_is_better: Vec<String>,
28}
29
30impl Default for CompareConfig {
31 fn default() -> Self {
32 Self {
33 confidence_level: 0.95,
34 min_samples: 5,
35 bonferroni_correction: true,
36 regression_threshold_percent: 5.0,
37 higher_is_better: vec![
38 "throughput".to_string(),
39 "ops_per_sec".to_string(),
40 "requests_per_sec".to_string(),
41 ],
42 lower_is_better: vec![
43 "latency".to_string(),
44 "latency_p50".to_string(),
45 "latency_p99".to_string(),
46 "memory".to_string(),
47 "cpu_usage".to_string(),
48 ],
49 }
50 }
51}
52
53#[derive(Debug)]
55pub struct ProfileComparator {
56 config: CompareConfig,
58}
59
60impl ProfileComparator {
61 pub fn new(config: CompareConfig) -> Self {
63 Self { config }
64 }
65
66 pub fn compare(
68 &self,
69 baseline: &BenchmarkProfile,
70 comparison: &BenchmarkProfile,
71 ) -> CompareResult<ProfileComparison> {
72 let common_metrics: Vec<String> = baseline
74 .metric_names()
75 .filter(|name| comparison.metrics.contains_key(*name))
76 .cloned()
77 .collect();
78
79 if common_metrics.is_empty() {
80 return Err(CompareError::NoCommonMetrics);
81 }
82
83 let alpha = 1.0 - self.config.confidence_level;
85 let corrected_alpha = if self.config.bonferroni_correction {
86 alpha / common_metrics.len() as f64
87 } else {
88 alpha
89 };
90
91 let mut metric_comparisons = Vec::new();
92 let mut regressions = Vec::new();
93 let mut improvements = Vec::new();
94
95 for metric_name in &common_metrics {
96 let baseline_samples = baseline
97 .get_metric(metric_name)
98 .expect("metric should exist in baseline");
99 let comparison_samples = comparison
100 .get_metric(metric_name)
101 .expect("metric should exist in comparison");
102
103 if baseline_samples.count() < self.config.min_samples {
105 continue;
106 }
107 if comparison_samples.count() < self.config.min_samples {
108 continue;
109 }
110
111 let comparison_result = self.compare_metric(
112 metric_name,
113 baseline_samples,
114 comparison_samples,
115 corrected_alpha,
116 )?;
117
118 if comparison_result.is_regression {
119 regressions.push(metric_name.clone());
120 } else if comparison_result.t_test.significant
121 && comparison_result.direction == ChangeDirection::Improved
122 {
123 improvements.push(metric_name.clone());
124 }
125
126 metric_comparisons.push(comparison_result);
127 }
128
129 let verdict = if regressions.is_empty() {
131 ComparisonVerdict::Pass
132 } else {
133 let severe_regression =
135 metric_comparisons
136 .iter()
137 .filter(|m| m.is_regression)
138 .any(|m| {
139 m.effect_size.percent_change.abs()
140 >= self.config.regression_threshold_percent
141 });
142
143 if severe_regression {
144 ComparisonVerdict::Fail
145 } else {
146 ComparisonVerdict::Warning
147 }
148 };
149
150 Ok(ProfileComparison {
151 baseline_name: baseline.name.clone(),
152 comparison_name: comparison.name.clone(),
153 metrics: metric_comparisons,
154 regressions,
155 improvements,
156 verdict,
157 corrected_alpha,
158 })
159 }
160
161 pub(crate) fn compare_metric(
163 &self,
164 name: &str,
165 baseline: &MetricSamples,
166 comparison: &MetricSamples,
167 alpha: f64,
168 ) -> CompareResult<MetricComparison> {
169 if baseline.variance() == 0.0 && comparison.variance() == 0.0 {
171 return Err(CompareError::ZeroVariance {
172 metric: name.to_string(),
173 });
174 }
175
176 let t_test = self.welch_t_test(baseline, comparison, alpha)?;
178
179 let pooled_std = self.pooled_std(baseline, comparison);
181 let cohens_d = if pooled_std > 0.0 {
182 (comparison.mean() - baseline.mean()) / pooled_std
183 } else {
184 0.0
185 };
186
187 let percent_change = if baseline.mean().abs() > 1e-10 {
188 ((comparison.mean() - baseline.mean()) / baseline.mean()) * 100.0
189 } else {
190 0.0
191 };
192
193 let effect_size = EffectSizeResult {
194 cohens_d,
195 magnitude: EffectMagnitude::from_cohens_d(cohens_d),
196 percent_change,
197 };
198
199 let higher_is_better = self
201 .config
202 .higher_is_better
203 .iter()
204 .any(|m| name.contains(m));
205
206 let direction = if !t_test.significant {
207 ChangeDirection::NoChange
208 } else if higher_is_better {
209 if comparison.mean() > baseline.mean() {
210 ChangeDirection::Improved
211 } else {
212 ChangeDirection::Regressed
213 }
214 } else {
215 if comparison.mean() < baseline.mean() {
217 ChangeDirection::Improved
218 } else {
219 ChangeDirection::Regressed
220 }
221 };
222
223 let is_regression = direction == ChangeDirection::Regressed;
224
225 let (ci_lower, ci_upper) = self.confidence_interval(baseline, comparison, alpha);
227
228 Ok(MetricComparison {
229 name: name.to_string(),
230 baseline_mean: baseline.mean(),
231 baseline_std: baseline.std_dev(),
232 comparison_mean: comparison.mean(),
233 comparison_std: comparison.std_dev(),
234 t_test,
235 effect_size,
236 direction,
237 is_regression,
238 ci_lower,
239 ci_upper,
240 })
241 }
242
243 fn welch_t_test(
245 &self,
246 a: &MetricSamples,
247 b: &MetricSamples,
248 alpha: f64,
249 ) -> CompareResult<WelchTestResult> {
250 let n1 = a.count() as f64;
251 let n2 = b.count() as f64;
252 let var1 = a.variance();
253 let var2 = b.variance();
254
255 let mean_diff = b.mean() - a.mean();
257 let se = ((var1 / n1) + (var2 / n2)).sqrt();
258
259 let t_statistic = if se > 1e-10 { mean_diff / se } else { 0.0 };
260
261 let v1 = var1 / n1;
263 let v2 = var2 / n2;
264 let df = if (v1 + v2) > 1e-10 {
265 (v1 + v2).powi(2) / (v1.powi(2) / (n1 - 1.0) + v2.powi(2) / (n2 - 1.0))
266 } else {
267 n1 + n2 - 2.0
268 };
269
270 let p_value = self.t_distribution_p_value(t_statistic.abs(), df);
272
273 let significant = p_value < alpha;
274
275 Ok(WelchTestResult {
276 t_statistic,
277 degrees_of_freedom: df,
278 p_value,
279 significant,
280 confidence_level: 1.0 - alpha,
281 })
282 }
283
284 fn t_distribution_p_value(&self, t: f64, df: f64) -> f64 {
286 if df <= 0.0 {
290 return 1.0;
291 }
292
293 let x = df / (df + t * t);
294
295 if df > 100.0 {
298 let z = t;
300 2.0 * (1.0 - self.normal_cdf(z.abs()))
301 } else {
302 let a = df / 2.0;
304 let b = 0.5;
305 self.incomplete_beta(x, a, b)
306 }
307 }
308
309 fn normal_cdf(&self, x: f64) -> f64 {
311 let a1 = 0.254829592;
313 let a2 = -0.284496736;
314 let a3 = 1.421413741;
315 let a4 = -1.453152027;
316 let a5 = 1.061405429;
317 let p = 0.3275911;
318
319 let sign = if x < 0.0 { -1.0 } else { 1.0 };
320 let x = x.abs();
321
322 let t = 1.0 / (1.0 + p * x);
323 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x / 2.0).exp();
324
325 0.5 * (1.0 + sign * y)
326 }
327
328 fn incomplete_beta(&self, x: f64, a: f64, b: f64) -> f64 {
330 if !(0.0..=1.0).contains(&x) {
332 return 0.0;
333 }
334 if x == 0.0 {
335 return 0.0;
336 }
337 if x == 1.0 {
338 return 1.0;
339 }
340
341 let bt = if x == 0.0 || x == 1.0 {
343 0.0
344 } else {
345 (self.ln_gamma(a + b) - self.ln_gamma(a) - self.ln_gamma(b)
346 + a * x.ln()
347 + b * (1.0 - x).ln())
348 .exp()
349 };
350
351 if x < (a + 1.0) / (a + b + 2.0) {
352 bt * self.beta_cf(x, a, b) / a
353 } else {
354 1.0 - bt * self.beta_cf(1.0 - x, b, a) / b
355 }
356 }
357
358 fn beta_cf(&self, x: f64, a: f64, b: f64) -> f64 {
360 let max_iter = 100;
361 let eps: f64 = 1e-10;
362
363 let mut c: f64 = 1.0;
364 let mut d: f64 = 1.0 / (1.0 - (a + b) * x / (a + 1.0)).max(eps);
365 let mut h: f64 = d;
366
367 for m in 1..=max_iter {
368 let m = m as f64;
369
370 let aa = m * (b - m) * x / ((a + 2.0 * m - 1.0) * (a + 2.0 * m));
372 d = 1.0 / (1.0 + aa * d).max(eps);
373 c = 1.0 + aa / c.max(eps);
374 h *= d * c;
375
376 let aa = -(a + m) * (a + b + m) * x / ((a + 2.0 * m) * (a + 2.0 * m + 1.0));
378 d = 1.0 / (1.0 + aa * d).max(eps);
379 c = 1.0 + aa / c.max(eps);
380 let del = d * c;
381 h *= del;
382
383 if (del - 1.0).abs() < eps {
384 break;
385 }
386 }
387
388 h
389 }
390
391 #[allow(clippy::excessive_precision)]
393 fn ln_gamma(&self, x: f64) -> f64 {
394 if x <= 0.0 {
395 return f64::INFINITY;
396 }
397
398 let g = 7;
400 let c = [
401 0.99999999999980993,
402 676.5203681218851,
403 -1259.1392167224028,
404 771.32342877765313,
405 -176.61502916214059,
406 12.507343278686905,
407 -0.13857109526572012,
408 9.9843695780195716e-6,
409 1.5056327351493116e-7,
410 ];
411
412 if x < 0.5 {
413 std::f64::consts::PI.ln()
414 - (std::f64::consts::PI * x).sin().ln()
415 - self.ln_gamma(1.0 - x)
416 } else {
417 let x = x - 1.0;
418 let mut a = c[0];
419 for i in 1..=g {
420 a += c[i] / (x + i as f64);
421 }
422 let t = x + g as f64 + 0.5;
423 0.5 * (2.0 * std::f64::consts::PI).ln() + (t - 0.5) * t.ln() - t + a.ln()
424 }
425 }
426
427 fn pooled_std(&self, a: &MetricSamples, b: &MetricSamples) -> f64 {
429 let n1 = a.count() as f64;
430 let n2 = b.count() as f64;
431
432 if n1 + n2 <= 2.0 {
433 return 0.0;
434 }
435
436 let pooled_var = ((n1 - 1.0) * a.variance() + (n2 - 1.0) * b.variance()) / (n1 + n2 - 2.0);
437
438 pooled_var.sqrt()
439 }
440
441 fn confidence_interval(&self, a: &MetricSamples, b: &MetricSamples, alpha: f64) -> (f64, f64) {
443 let mean_diff = b.mean() - a.mean();
444 let se = ((a.variance() / a.count() as f64) + (b.variance() / b.count() as f64)).sqrt();
445
446 let z = self.normal_quantile(1.0 - alpha / 2.0);
448 let margin = z * se;
449
450 (mean_diff - margin, mean_diff + margin)
451 }
452
453 pub(crate) fn normal_quantile(&self, p: f64) -> f64 {
455 if p <= 0.0 {
457 return f64::NEG_INFINITY;
458 }
459 if p >= 1.0 {
460 return f64::INFINITY;
461 }
462 if p == 0.5 {
463 return 0.0;
464 }
465
466 let t = if p < 0.5 {
467 (-2.0 * p.ln()).sqrt()
468 } else {
469 (-2.0 * (1.0 - p).ln()).sqrt()
470 };
471
472 let c0 = 2.515517;
473 let c1 = 0.802853;
474 let c2 = 0.010328;
475 let d1 = 1.432788;
476 let d2 = 0.189269;
477 let d3 = 0.001308;
478
479 let x = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
480
481 if p < 0.5 {
482 -x
483 } else {
484 x
485 }
486 }
487
488 pub fn config(&self) -> &CompareConfig {
490 &self.config
491 }
492}