use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Verdict {
Regression,
Improvement,
NoChange,
}
impl std::fmt::Display for Verdict {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Verdict::Regression => write!(f, "REGRESSION"),
Verdict::Improvement => write!(f, "IMPROVED"),
Verdict::NoChange => write!(f, "NO_CHANGE"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegressionResult {
pub verdict: Verdict,
pub change_pct: f64,
pub p_value: f64,
pub effect_size_cohens_d: f64,
pub ci_lower: f64,
pub ci_upper: f64,
}
pub struct RegressionDetector {
pub min_samples: usize,
pub confidence: f64,
pub threshold: f64,
pub require_large_effect: bool,
pub bootstrap_iterations: usize,
}
impl Default for RegressionDetector {
fn default() -> Self {
Self {
min_samples: 30,
confidence: 0.99,
threshold: 0.05,
require_large_effect: true,
bootstrap_iterations: 10_000,
}
}
}
impl RegressionDetector {
pub fn new() -> Self {
Self::default()
}
pub fn compare(&self, baseline: &[f64], current: &[f64]) -> RegressionResult {
if baseline.is_empty() || current.is_empty() {
return RegressionResult {
verdict: Verdict::NoChange,
change_pct: 0.0,
p_value: 1.0,
effect_size_cohens_d: 0.0,
ci_lower: 1.0,
ci_upper: 1.0,
};
}
let baseline_mean = mean(baseline);
let current_mean = mean(current);
if baseline_mean == 0.0 {
return RegressionResult {
verdict: Verdict::NoChange,
change_pct: 0.0,
p_value: 1.0,
effect_size_cohens_d: 0.0,
ci_lower: 1.0,
ci_upper: 1.0,
};
}
let ratio = current_mean / baseline_mean;
let change_pct = (ratio - 1.0) * 100.0;
let cohens_d = compute_cohens_d(baseline, current);
let (ci_lower, ci_upper) = self.bootstrap_ratio_ci(baseline, current);
let p_value = self.bootstrap_p_value(baseline, current);
let verdict = if ci_lower > 1.0 + self.threshold {
if !self.require_large_effect || cohens_d.abs() >= 0.8 {
Verdict::Regression
} else {
Verdict::NoChange
}
} else if ci_upper < 1.0 - self.threshold {
if !self.require_large_effect || cohens_d.abs() >= 0.8 {
Verdict::Improvement
} else {
Verdict::NoChange
}
} else {
Verdict::NoChange
};
RegressionResult {
verdict,
change_pct,
p_value,
effect_size_cohens_d: cohens_d,
ci_lower,
ci_upper,
}
}
fn bootstrap_ratio_ci(&self, baseline: &[f64], current: &[f64]) -> (f64, f64) {
let mut ratios = Vec::with_capacity(self.bootstrap_iterations);
let alpha = 1.0 - self.confidence;
let mut rng_state: u64 = 42;
let lcg_next = |state: &mut u64| -> usize {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
(*state >> 33) as usize
};
for _ in 0..self.bootstrap_iterations {
let b_mean = bootstrap_mean(baseline, &mut rng_state, &lcg_next);
let c_mean = bootstrap_mean(current, &mut rng_state, &lcg_next);
if b_mean > 0.0 {
ratios.push(c_mean / b_mean);
}
}
ratios.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if ratios.is_empty() {
return (1.0, 1.0);
}
let lower_idx = ((alpha / 2.0) * ratios.len() as f64) as usize;
let upper_idx = ((1.0 - alpha / 2.0) * ratios.len() as f64) as usize;
let lower = ratios[lower_idx.min(ratios.len() - 1)];
let upper = ratios[upper_idx.min(ratios.len() - 1)];
(lower, upper)
}
fn bootstrap_p_value(&self, baseline: &[f64], current: &[f64]) -> f64 {
let observed_ratio = mean(current) / mean(baseline).max(f64::EPSILON);
let mut pooled = Vec::with_capacity(baseline.len() + current.len());
pooled.extend_from_slice(baseline);
pooled.extend_from_slice(current);
let mut rng_state: u64 = 123;
let lcg_next = |state: &mut u64| -> usize {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
(*state >> 33) as usize
};
let mut extreme_count = 0;
for _ in 0..self.bootstrap_iterations {
let b_mean =
bootstrap_mean_from_pool(&pooled, baseline.len(), &mut rng_state, &lcg_next);
let c_mean =
bootstrap_mean_from_pool(&pooled, current.len(), &mut rng_state, &lcg_next);
if b_mean > 0.0 {
let null_ratio = c_mean / b_mean;
if (null_ratio - 1.0).abs() >= (observed_ratio - 1.0).abs() {
extreme_count += 1;
}
}
}
extreme_count as f64 / self.bootstrap_iterations as f64
}
}
fn mean(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
data.iter().sum::<f64>() / data.len() as f64
}
fn variance(data: &[f64]) -> f64 {
if data.len() < 2 {
return 0.0;
}
let m = mean(data);
data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() - 1) as f64
}
fn compute_cohens_d(baseline: &[f64], current: &[f64]) -> f64 {
let m1 = mean(baseline);
let m2 = mean(current);
let v1 = variance(baseline);
let v2 = variance(current);
let n1 = baseline.len() as f64;
let n2 = current.len() as f64;
let pooled_var = ((n1 - 1.0) * v1 + (n2 - 1.0) * v2) / (n1 + n2 - 2.0);
let pooled_sd = pooled_var.sqrt();
if pooled_sd == 0.0 {
return 0.0;
}
(m2 - m1) / pooled_sd
}
fn bootstrap_mean(data: &[f64], rng_state: &mut u64, lcg_next: &dyn Fn(&mut u64) -> usize) -> f64 {
let n = data.len();
let mut sum = 0.0;
for _ in 0..n {
let idx = lcg_next(rng_state) % n;
sum += data[idx];
}
sum / n as f64
}
fn bootstrap_mean_from_pool(
pool: &[f64],
sample_size: usize,
rng_state: &mut u64,
lcg_next: &dyn Fn(&mut u64) -> usize,
) -> f64 {
let n = pool.len();
let mut sum = 0.0;
for _ in 0..sample_size {
let idx = lcg_next(rng_state) % n;
sum += pool[idx];
}
sum / sample_size as f64
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_10pct_regression() {
let detector = RegressionDetector {
min_samples: 10,
bootstrap_iterations: 5_000,
require_large_effect: false,
..Default::default()
};
let baseline: Vec<f64> = (0..50).map(|i| 100.0 + (i as f64 * 0.1) - 2.5).collect();
let current: Vec<f64> = (0..50).map(|i| 112.0 + (i as f64 * 0.1) - 2.5).collect();
let result = detector.compare(&baseline, ¤t);
assert_eq!(result.verdict, Verdict::Regression);
assert!(result.change_pct > 10.0);
}
#[test]
fn test_no_false_positive_on_noise() {
let detector = RegressionDetector {
min_samples: 10,
bootstrap_iterations: 5_000,
..Default::default()
};
let baseline: Vec<f64> = (0..50).map(|i| 100.0 + (i as f64 % 3.0) - 1.0).collect();
let current: Vec<f64> = (0..50).map(|i| 100.5 + (i as f64 % 3.0) - 1.0).collect();
let result = detector.compare(&baseline, ¤t);
assert_eq!(result.verdict, Verdict::NoChange);
}
#[test]
fn test_detect_improvement() {
let detector = RegressionDetector {
min_samples: 10,
bootstrap_iterations: 5_000,
require_large_effect: false,
..Default::default()
};
let baseline: Vec<f64> = (0..50).map(|i| 35.7 + (i as f64 * 0.01) - 0.25).collect();
let current: Vec<f64> = (0..50).map(|i| 23.2 + (i as f64 * 0.01) - 0.25).collect();
let result = detector.compare(&baseline, ¤t);
assert_eq!(result.verdict, Verdict::Improvement);
assert!(result.change_pct < -30.0);
}
#[test]
fn test_cohens_d_large_effect() {
let baseline: Vec<f64> = vec![10.0; 30];
let current: Vec<f64> = vec![15.0; 30];
let _d = compute_cohens_d(&baseline, ¤t);
let baseline: Vec<f64> = (0..30).map(|i| 10.0 + (i as f64 * 0.1)).collect();
let current: Vec<f64> = (0..30).map(|i| 15.0 + (i as f64 * 0.1)).collect();
let d = compute_cohens_d(&baseline, ¤t);
assert!(d.abs() > 0.8, "Cohen's d = {d:.2} should be large effect");
}
#[test]
fn test_empty_samples() {
let detector = RegressionDetector::new();
let result = detector.compare(&[], &[1.0, 2.0]);
assert_eq!(result.verdict, Verdict::NoChange);
}
}