Skip to main content

cbtop/profile_compare/
comparator.rs

1//! Profile comparator with Welch's t-test and statistical analysis (PMAT-045)
2//!
3//! Contains `CompareConfig` and `ProfileComparator` with all statistical
4//! methods: Welch's t-test, Cohen's d, confidence intervals, and
5//! distribution approximations.
6
7use crate::profile_compare::types::{
8    BenchmarkProfile, ChangeDirection, CompareError, CompareResult, ComparisonVerdict,
9    EffectMagnitude, EffectSizeResult, MetricComparison, MetricSamples, ProfileComparison,
10    WelchTestResult,
11};
12
13/// Configuration for profile comparison
14#[derive(Debug, Clone)]
15pub struct CompareConfig {
16    /// Confidence level (default: 0.95)
17    pub confidence_level: f64,
18    /// Minimum samples required for comparison
19    pub min_samples: usize,
20    /// Apply Bonferroni correction for multiple comparisons
21    pub bonferroni_correction: bool,
22    /// Threshold for regression (percent change)
23    pub regression_threshold_percent: f64,
24    /// Metrics where higher is better (throughput-like)
25    pub higher_is_better: Vec<String>,
26    /// Metrics where lower is better (latency-like, default)
27    pub lower_is_better: Vec<String>,
28}
29
30impl Default for CompareConfig {
31    fn default() -> Self {
32        Self {
33            confidence_level: 0.95,
34            min_samples: 5,
35            bonferroni_correction: true,
36            regression_threshold_percent: 5.0,
37            higher_is_better: vec![
38                "throughput".to_string(),
39                "ops_per_sec".to_string(),
40                "requests_per_sec".to_string(),
41            ],
42            lower_is_better: vec![
43                "latency".to_string(),
44                "latency_p50".to_string(),
45                "latency_p99".to_string(),
46                "memory".to_string(),
47                "cpu_usage".to_string(),
48            ],
49        }
50    }
51}
52
53/// Profile comparator
54#[derive(Debug)]
55pub struct ProfileComparator {
56    /// Comparison configuration
57    config: CompareConfig,
58}
59
60impl ProfileComparator {
61    /// Create a new profile comparator
62    pub fn new(config: CompareConfig) -> Self {
63        Self { config }
64    }
65
66    /// Compare two benchmark profiles
67    pub fn compare(
68        &self,
69        baseline: &BenchmarkProfile,
70        comparison: &BenchmarkProfile,
71    ) -> CompareResult<ProfileComparison> {
72        // Find common metrics
73        let common_metrics: Vec<String> = baseline
74            .metric_names()
75            .filter(|name| comparison.metrics.contains_key(*name))
76            .cloned()
77            .collect();
78
79        if common_metrics.is_empty() {
80            return Err(CompareError::NoCommonMetrics);
81        }
82
83        // Calculate corrected alpha for multiple comparisons
84        let alpha = 1.0 - self.config.confidence_level;
85        let corrected_alpha = if self.config.bonferroni_correction {
86            alpha / common_metrics.len() as f64
87        } else {
88            alpha
89        };
90
91        let mut metric_comparisons = Vec::new();
92        let mut regressions = Vec::new();
93        let mut improvements = Vec::new();
94
95        for metric_name in &common_metrics {
96            let baseline_samples = baseline
97                .get_metric(metric_name)
98                .expect("metric should exist in baseline");
99            let comparison_samples = comparison
100                .get_metric(metric_name)
101                .expect("metric should exist in comparison");
102
103            // Check minimum samples
104            if baseline_samples.count() < self.config.min_samples {
105                continue;
106            }
107            if comparison_samples.count() < self.config.min_samples {
108                continue;
109            }
110
111            let comparison_result = self.compare_metric(
112                metric_name,
113                baseline_samples,
114                comparison_samples,
115                corrected_alpha,
116            )?;
117
118            if comparison_result.is_regression {
119                regressions.push(metric_name.clone());
120            } else if comparison_result.t_test.significant
121                && comparison_result.direction == ChangeDirection::Improved
122            {
123                improvements.push(metric_name.clone());
124            }
125
126            metric_comparisons.push(comparison_result);
127        }
128
129        // Determine verdict
130        let verdict = if regressions.is_empty() {
131            ComparisonVerdict::Pass
132        } else {
133            // Check if any regression exceeds threshold
134            let severe_regression =
135                metric_comparisons
136                    .iter()
137                    .filter(|m| m.is_regression)
138                    .any(|m| {
139                        m.effect_size.percent_change.abs()
140                            >= self.config.regression_threshold_percent
141                    });
142
143            if severe_regression {
144                ComparisonVerdict::Fail
145            } else {
146                ComparisonVerdict::Warning
147            }
148        };
149
150        Ok(ProfileComparison {
151            baseline_name: baseline.name.clone(),
152            comparison_name: comparison.name.clone(),
153            metrics: metric_comparisons,
154            regressions,
155            improvements,
156            verdict,
157            corrected_alpha,
158        })
159    }
160
161    /// Compare a single metric between two profiles
162    pub(crate) fn compare_metric(
163        &self,
164        name: &str,
165        baseline: &MetricSamples,
166        comparison: &MetricSamples,
167        alpha: f64,
168    ) -> CompareResult<MetricComparison> {
169        // Check for zero variance
170        if baseline.variance() == 0.0 && comparison.variance() == 0.0 {
171            return Err(CompareError::ZeroVariance {
172                metric: name.to_string(),
173            });
174        }
175
176        // Perform Welch's t-test
177        let t_test = self.welch_t_test(baseline, comparison, alpha)?;
178
179        // Calculate effect size (Cohen's d with pooled std)
180        let pooled_std = self.pooled_std(baseline, comparison);
181        let cohens_d = if pooled_std > 0.0 {
182            (comparison.mean() - baseline.mean()) / pooled_std
183        } else {
184            0.0
185        };
186
187        let percent_change = if baseline.mean().abs() > 1e-10 {
188            ((comparison.mean() - baseline.mean()) / baseline.mean()) * 100.0
189        } else {
190            0.0
191        };
192
193        let effect_size = EffectSizeResult {
194            cohens_d,
195            magnitude: EffectMagnitude::from_cohens_d(cohens_d),
196            percent_change,
197        };
198
199        // Determine direction and regression
200        let higher_is_better = self
201            .config
202            .higher_is_better
203            .iter()
204            .any(|m| name.contains(m));
205
206        let direction = if !t_test.significant {
207            ChangeDirection::NoChange
208        } else if higher_is_better {
209            if comparison.mean() > baseline.mean() {
210                ChangeDirection::Improved
211            } else {
212                ChangeDirection::Regressed
213            }
214        } else {
215            // Lower is better (latency-like)
216            if comparison.mean() < baseline.mean() {
217                ChangeDirection::Improved
218            } else {
219                ChangeDirection::Regressed
220            }
221        };
222
223        let is_regression = direction == ChangeDirection::Regressed;
224
225        // Calculate confidence interval for the difference
226        let (ci_lower, ci_upper) = self.confidence_interval(baseline, comparison, alpha);
227
228        Ok(MetricComparison {
229            name: name.to_string(),
230            baseline_mean: baseline.mean(),
231            baseline_std: baseline.std_dev(),
232            comparison_mean: comparison.mean(),
233            comparison_std: comparison.std_dev(),
234            t_test,
235            effect_size,
236            direction,
237            is_regression,
238            ci_lower,
239            ci_upper,
240        })
241    }
242
243    /// Perform Welch's t-test
244    fn welch_t_test(
245        &self,
246        a: &MetricSamples,
247        b: &MetricSamples,
248        alpha: f64,
249    ) -> CompareResult<WelchTestResult> {
250        let n1 = a.count() as f64;
251        let n2 = b.count() as f64;
252        let var1 = a.variance();
253        let var2 = b.variance();
254
255        // Calculate t-statistic
256        let mean_diff = b.mean() - a.mean();
257        let se = ((var1 / n1) + (var2 / n2)).sqrt();
258
259        let t_statistic = if se > 1e-10 { mean_diff / se } else { 0.0 };
260
261        // Welch-Satterthwaite degrees of freedom
262        let v1 = var1 / n1;
263        let v2 = var2 / n2;
264        let df = if (v1 + v2) > 1e-10 {
265            (v1 + v2).powi(2) / (v1.powi(2) / (n1 - 1.0) + v2.powi(2) / (n2 - 1.0))
266        } else {
267            n1 + n2 - 2.0
268        };
269
270        // Approximate p-value using t-distribution CDF
271        let p_value = self.t_distribution_p_value(t_statistic.abs(), df);
272
273        let significant = p_value < alpha;
274
275        Ok(WelchTestResult {
276            t_statistic,
277            degrees_of_freedom: df,
278            p_value,
279            significant,
280            confidence_level: 1.0 - alpha,
281        })
282    }
283
284    /// Approximate p-value from t-distribution using approximation
285    fn t_distribution_p_value(&self, t: f64, df: f64) -> f64 {
286        // Use approximation for two-tailed p-value
287        // Based on Hill's approximation for Student's t-distribution
288
289        if df <= 0.0 {
290            return 1.0;
291        }
292
293        let x = df / (df + t * t);
294
295        // Incomplete beta function approximation
296        // For large df, use normal approximation
297        if df > 100.0 {
298            // Normal approximation
299            let z = t;
300            2.0 * (1.0 - self.normal_cdf(z.abs()))
301        } else {
302            // Beta approximation
303            let a = df / 2.0;
304            let b = 0.5;
305            self.incomplete_beta(x, a, b)
306        }
307    }
308
309    /// Normal CDF approximation
310    fn normal_cdf(&self, x: f64) -> f64 {
311        // Abramowitz and Stegun approximation
312        let a1 = 0.254829592;
313        let a2 = -0.284496736;
314        let a3 = 1.421413741;
315        let a4 = -1.453152027;
316        let a5 = 1.061405429;
317        let p = 0.3275911;
318
319        let sign = if x < 0.0 { -1.0 } else { 1.0 };
320        let x = x.abs();
321
322        let t = 1.0 / (1.0 + p * x);
323        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x / 2.0).exp();
324
325        0.5 * (1.0 + sign * y)
326    }
327
328    /// Incomplete beta function approximation
329    fn incomplete_beta(&self, x: f64, a: f64, b: f64) -> f64 {
330        // Simple continued fraction approximation
331        if !(0.0..=1.0).contains(&x) {
332            return 0.0;
333        }
334        if x == 0.0 {
335            return 0.0;
336        }
337        if x == 1.0 {
338            return 1.0;
339        }
340
341        // Use continued fraction
342        let bt = if x == 0.0 || x == 1.0 {
343            0.0
344        } else {
345            (self.ln_gamma(a + b) - self.ln_gamma(a) - self.ln_gamma(b)
346                + a * x.ln()
347                + b * (1.0 - x).ln())
348            .exp()
349        };
350
351        if x < (a + 1.0) / (a + b + 2.0) {
352            bt * self.beta_cf(x, a, b) / a
353        } else {
354            1.0 - bt * self.beta_cf(1.0 - x, b, a) / b
355        }
356    }
357
358    /// Beta continued fraction
359    fn beta_cf(&self, x: f64, a: f64, b: f64) -> f64 {
360        let max_iter = 100;
361        let eps: f64 = 1e-10;
362
363        let mut c: f64 = 1.0;
364        let mut d: f64 = 1.0 / (1.0 - (a + b) * x / (a + 1.0)).max(eps);
365        let mut h: f64 = d;
366
367        for m in 1..=max_iter {
368            let m = m as f64;
369
370            // Even step
371            let aa = m * (b - m) * x / ((a + 2.0 * m - 1.0) * (a + 2.0 * m));
372            d = 1.0 / (1.0 + aa * d).max(eps);
373            c = 1.0 + aa / c.max(eps);
374            h *= d * c;
375
376            // Odd step
377            let aa = -(a + m) * (a + b + m) * x / ((a + 2.0 * m) * (a + 2.0 * m + 1.0));
378            d = 1.0 / (1.0 + aa * d).max(eps);
379            c = 1.0 + aa / c.max(eps);
380            let del = d * c;
381            h *= del;
382
383            if (del - 1.0).abs() < eps {
384                break;
385            }
386        }
387
388        h
389    }
390
391    /// Log gamma function approximation (Stirling)
392    #[allow(clippy::excessive_precision)]
393    fn ln_gamma(&self, x: f64) -> f64 {
394        if x <= 0.0 {
395            return f64::INFINITY;
396        }
397
398        // Lanczos approximation
399        let g = 7;
400        let c = [
401            0.99999999999980993,
402            676.5203681218851,
403            -1259.1392167224028,
404            771.32342877765313,
405            -176.61502916214059,
406            12.507343278686905,
407            -0.13857109526572012,
408            9.9843695780195716e-6,
409            1.5056327351493116e-7,
410        ];
411
412        if x < 0.5 {
413            std::f64::consts::PI.ln()
414                - (std::f64::consts::PI * x).sin().ln()
415                - self.ln_gamma(1.0 - x)
416        } else {
417            let x = x - 1.0;
418            let mut a = c[0];
419            for i in 1..=g {
420                a += c[i] / (x + i as f64);
421            }
422            let t = x + g as f64 + 0.5;
423            0.5 * (2.0 * std::f64::consts::PI).ln() + (t - 0.5) * t.ln() - t + a.ln()
424        }
425    }
426
427    /// Calculate pooled standard deviation
428    fn pooled_std(&self, a: &MetricSamples, b: &MetricSamples) -> f64 {
429        let n1 = a.count() as f64;
430        let n2 = b.count() as f64;
431
432        if n1 + n2 <= 2.0 {
433            return 0.0;
434        }
435
436        let pooled_var = ((n1 - 1.0) * a.variance() + (n2 - 1.0) * b.variance()) / (n1 + n2 - 2.0);
437
438        pooled_var.sqrt()
439    }
440
441    /// Calculate confidence interval for the difference in means
442    fn confidence_interval(&self, a: &MetricSamples, b: &MetricSamples, alpha: f64) -> (f64, f64) {
443        let mean_diff = b.mean() - a.mean();
444        let se = ((a.variance() / a.count() as f64) + (b.variance() / b.count() as f64)).sqrt();
445
446        // Use normal approximation for large samples
447        let z = self.normal_quantile(1.0 - alpha / 2.0);
448        let margin = z * se;
449
450        (mean_diff - margin, mean_diff + margin)
451    }
452
453    /// Normal quantile function (inverse CDF) approximation
454    pub(crate) fn normal_quantile(&self, p: f64) -> f64 {
455        // Rational approximation from Abramowitz and Stegun
456        if p <= 0.0 {
457            return f64::NEG_INFINITY;
458        }
459        if p >= 1.0 {
460            return f64::INFINITY;
461        }
462        if p == 0.5 {
463            return 0.0;
464        }
465
466        let t = if p < 0.5 {
467            (-2.0 * p.ln()).sqrt()
468        } else {
469            (-2.0 * (1.0 - p).ln()).sqrt()
470        };
471
472        let c0 = 2.515517;
473        let c1 = 0.802853;
474        let c2 = 0.010328;
475        let d1 = 1.432788;
476        let d2 = 0.189269;
477        let d3 = 0.001308;
478
479        let x = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
480
481        if p < 0.5 {
482            -x
483        } else {
484            x
485        }
486    }
487
488    /// Get configuration
489    pub fn config(&self) -> &CompareConfig {
490        &self.config
491    }
492}