1use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
10pub enum Verdict {
11 Regression,
12 Improvement,
13 NoChange,
14}
15
16impl std::fmt::Display for Verdict {
17 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18 match self {
19 Verdict::Regression => write!(f, "REGRESSION"),
20 Verdict::Improvement => write!(f, "IMPROVED"),
21 Verdict::NoChange => write!(f, "NO_CHANGE"),
22 }
23 }
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct RegressionResult {
29 pub verdict: Verdict,
30 pub change_pct: f64,
32 pub p_value: f64,
34 pub effect_size_cohens_d: f64,
36 pub ci_lower: f64,
38 pub ci_upper: f64,
40}
41
42pub struct RegressionDetector {
45 pub min_samples: usize,
47 pub confidence: f64,
49 pub threshold: f64,
51 pub require_large_effect: bool,
53 pub bootstrap_iterations: usize,
55}
56
57impl Default for RegressionDetector {
58 fn default() -> Self {
59 Self {
60 min_samples: 30,
61 confidence: 0.99,
62 threshold: 0.05,
63 require_large_effect: true,
64 bootstrap_iterations: 10_000,
65 }
66 }
67}
68
69impl RegressionDetector {
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn compare(&self, baseline: &[f64], current: &[f64]) -> RegressionResult {
77 if baseline.is_empty() || current.is_empty() {
78 return RegressionResult {
79 verdict: Verdict::NoChange,
80 change_pct: 0.0,
81 p_value: 1.0,
82 effect_size_cohens_d: 0.0,
83 ci_lower: 1.0,
84 ci_upper: 1.0,
85 };
86 }
87
88 let baseline_mean = mean(baseline);
89 let current_mean = mean(current);
90
91 if baseline_mean == 0.0 {
92 return RegressionResult {
93 verdict: Verdict::NoChange,
94 change_pct: 0.0,
95 p_value: 1.0,
96 effect_size_cohens_d: 0.0,
97 ci_lower: 1.0,
98 ci_upper: 1.0,
99 };
100 }
101
102 let ratio = current_mean / baseline_mean;
103 let change_pct = (ratio - 1.0) * 100.0;
104
105 let cohens_d = compute_cohens_d(baseline, current);
107
108 let (ci_lower, ci_upper) = self.bootstrap_ratio_ci(baseline, current);
110
111 let p_value = self.bootstrap_p_value(baseline, current);
113
114 let verdict = if ci_lower > 1.0 + self.threshold {
116 if !self.require_large_effect || cohens_d.abs() >= 0.8 {
118 Verdict::Regression
119 } else {
120 Verdict::NoChange
121 }
122 } else if ci_upper < 1.0 - self.threshold {
123 if !self.require_large_effect || cohens_d.abs() >= 0.8 {
125 Verdict::Improvement
126 } else {
127 Verdict::NoChange
128 }
129 } else {
130 Verdict::NoChange
131 };
132
133 RegressionResult {
134 verdict,
135 change_pct,
136 p_value,
137 effect_size_cohens_d: cohens_d,
138 ci_lower,
139 ci_upper,
140 }
141 }
142
143 fn bootstrap_ratio_ci(&self, baseline: &[f64], current: &[f64]) -> (f64, f64) {
145 let mut ratios = Vec::with_capacity(self.bootstrap_iterations);
146 let alpha = 1.0 - self.confidence;
147
148 let mut rng_state: u64 = 42;
150 let lcg_next = |state: &mut u64| -> usize {
151 *state = state
152 .wrapping_mul(6_364_136_223_846_793_005)
153 .wrapping_add(1);
154 (*state >> 33) as usize
155 };
156
157 for _ in 0..self.bootstrap_iterations {
158 let b_mean = bootstrap_mean(baseline, &mut rng_state, &lcg_next);
159 let c_mean = bootstrap_mean(current, &mut rng_state, &lcg_next);
160 if b_mean > 0.0 {
161 ratios.push(c_mean / b_mean);
162 }
163 }
164
165 ratios.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
166
167 if ratios.is_empty() {
168 return (1.0, 1.0);
169 }
170
171 let lower_idx = ((alpha / 2.0) * ratios.len() as f64) as usize;
172 let upper_idx = ((1.0 - alpha / 2.0) * ratios.len() as f64) as usize;
173
174 let lower = ratios[lower_idx.min(ratios.len() - 1)];
175 let upper = ratios[upper_idx.min(ratios.len() - 1)];
176 (lower, upper)
177 }
178
179 fn bootstrap_p_value(&self, baseline: &[f64], current: &[f64]) -> f64 {
182 let observed_ratio = mean(current) / mean(baseline).max(f64::EPSILON);
183
184 let mut pooled = Vec::with_capacity(baseline.len() + current.len());
186 pooled.extend_from_slice(baseline);
187 pooled.extend_from_slice(current);
188
189 let mut rng_state: u64 = 123;
190 let lcg_next = |state: &mut u64| -> usize {
191 *state = state
192 .wrapping_mul(6_364_136_223_846_793_005)
193 .wrapping_add(1);
194 (*state >> 33) as usize
195 };
196
197 let mut extreme_count = 0;
198 for _ in 0..self.bootstrap_iterations {
199 let b_mean =
200 bootstrap_mean_from_pool(&pooled, baseline.len(), &mut rng_state, &lcg_next);
201 let c_mean =
202 bootstrap_mean_from_pool(&pooled, current.len(), &mut rng_state, &lcg_next);
203 if b_mean > 0.0 {
204 let null_ratio = c_mean / b_mean;
205 if (null_ratio - 1.0).abs() >= (observed_ratio - 1.0).abs() {
206 extreme_count += 1;
207 }
208 }
209 }
210
211 extreme_count as f64 / self.bootstrap_iterations as f64
212 }
213}
214
215fn mean(data: &[f64]) -> f64 {
216 if data.is_empty() {
217 return 0.0;
218 }
219 data.iter().sum::<f64>() / data.len() as f64
220}
221
222fn variance(data: &[f64]) -> f64 {
223 if data.len() < 2 {
224 return 0.0;
225 }
226 let m = mean(data);
227 data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() - 1) as f64
228}
229
230fn compute_cohens_d(baseline: &[f64], current: &[f64]) -> f64 {
232 let m1 = mean(baseline);
233 let m2 = mean(current);
234 let v1 = variance(baseline);
235 let v2 = variance(current);
236 let n1 = baseline.len() as f64;
237 let n2 = current.len() as f64;
238
239 let pooled_var = ((n1 - 1.0) * v1 + (n2 - 1.0) * v2) / (n1 + n2 - 2.0);
241 let pooled_sd = pooled_var.sqrt();
242
243 if pooled_sd == 0.0 {
244 return 0.0;
245 }
246
247 (m2 - m1) / pooled_sd
248}
249
250fn bootstrap_mean(data: &[f64], rng_state: &mut u64, lcg_next: &dyn Fn(&mut u64) -> usize) -> f64 {
252 let n = data.len();
253 let mut sum = 0.0;
254 for _ in 0..n {
255 let idx = lcg_next(rng_state) % n;
256 sum += data[idx];
257 }
258 sum / n as f64
259}
260
261fn bootstrap_mean_from_pool(
263 pool: &[f64],
264 sample_size: usize,
265 rng_state: &mut u64,
266 lcg_next: &dyn Fn(&mut u64) -> usize,
267) -> f64 {
268 let n = pool.len();
269 let mut sum = 0.0;
270 for _ in 0..sample_size {
271 let idx = lcg_next(rng_state) % n;
272 sum += pool[idx];
273 }
274 sum / sample_size as f64
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280
281 #[test]
283 fn test_detect_10pct_regression() {
284 let detector = RegressionDetector {
285 min_samples: 10,
286 bootstrap_iterations: 5_000,
287 require_large_effect: false,
288 ..Default::default()
289 };
290
291 let baseline: Vec<f64> = (0..50).map(|i| 100.0 + (i as f64 * 0.1) - 2.5).collect();
293 let current: Vec<f64> = (0..50).map(|i| 112.0 + (i as f64 * 0.1) - 2.5).collect();
295
296 let result = detector.compare(&baseline, ¤t);
297 assert_eq!(result.verdict, Verdict::Regression);
298 assert!(result.change_pct > 10.0);
299 }
300
301 #[test]
303 fn test_no_false_positive_on_noise() {
304 let detector = RegressionDetector {
305 min_samples: 10,
306 bootstrap_iterations: 5_000,
307 ..Default::default()
308 };
309
310 let baseline: Vec<f64> = (0..50).map(|i| 100.0 + (i as f64 % 3.0) - 1.0).collect();
312 let current: Vec<f64> = (0..50).map(|i| 100.5 + (i as f64 % 3.0) - 1.0).collect();
313
314 let result = detector.compare(&baseline, ¤t);
315 assert_eq!(result.verdict, Verdict::NoChange);
316 }
317
318 #[test]
320 fn test_detect_improvement() {
321 let detector = RegressionDetector {
322 min_samples: 10,
323 bootstrap_iterations: 5_000,
324 require_large_effect: false,
325 ..Default::default()
326 };
327
328 let baseline: Vec<f64> = (0..50).map(|i| 35.7 + (i as f64 * 0.01) - 0.25).collect();
330 let current: Vec<f64> = (0..50).map(|i| 23.2 + (i as f64 * 0.01) - 0.25).collect();
332
333 let result = detector.compare(&baseline, ¤t);
334 assert_eq!(result.verdict, Verdict::Improvement);
335 assert!(result.change_pct < -30.0);
336 }
337
338 #[test]
339 fn test_cohens_d_large_effect() {
340 let baseline: Vec<f64> = vec![10.0; 30];
341 let current: Vec<f64> = vec![15.0; 30];
342 let _d = compute_cohens_d(&baseline, ¤t);
343 let baseline: Vec<f64> = (0..30).map(|i| 10.0 + (i as f64 * 0.1)).collect();
346 let current: Vec<f64> = (0..30).map(|i| 15.0 + (i as f64 * 0.1)).collect();
347 let d = compute_cohens_d(&baseline, ¤t);
348 assert!(d.abs() > 0.8, "Cohen's d = {d:.2} should be large effect");
349 }
350
351 #[test]
352 fn test_empty_samples() {
353 let detector = RegressionDetector::new();
354 let result = detector.compare(&[], &[1.0, 2.0]);
355 assert_eq!(result.verdict, Verdict::NoChange);
356 }
357}