scirs2_core/benchmarking/
regression.rs1use crate::benchmarking::{BenchmarkResult, BenchmarkRunner};
7use crate::error::{CoreError, CoreResult, ErrorContext};
8#[cfg(feature = "serialization")]
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::path::{Path, PathBuf};
13use std::time::{Duration, SystemTime, UNIX_EPOCH};
14
15#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
17#[derive(Debug, Clone)]
18pub struct RegressionConfig {
19 pub regression_threshold: f64,
21 pub min_historical_samples: usize,
23 pub confidence_level: f64,
25 pub auto_updatebaseline: bool,
27 pub results_directory: PathBuf,
29}
30
31impl Default for RegressionConfig {
32 fn default() -> Self {
33 Self {
34 regression_threshold: 1.1, min_historical_samples: 5,
36 confidence_level: 0.95,
37 auto_updatebaseline: false,
38 results_directory: PathBuf::from("benchmark_results"),
39 }
40 }
41}
42
43impl RegressionConfig {
44 pub fn new() -> Self {
46 Self::default()
47 }
48
49 pub fn with_regression_threshold(mut self, threshold: f64) -> Self {
51 self.regression_threshold = threshold;
52 self
53 }
54
55 pub fn with_min_historical_samples(mut self, samples: usize) -> Self {
57 self.min_historical_samples = samples;
58 self
59 }
60
61 pub fn with_confidence_level(mut self, level: f64) -> Self {
63 self.confidence_level = level;
64 self
65 }
66
67 pub fn with_auto_updatebaseline(mut self, enable: bool) -> Self {
69 self.auto_updatebaseline = enable;
70 self
71 }
72
73 pub fn with_results_directory<P: AsRef<Path>>(mut self, dir: P) -> Self {
75 self.results_directory = dir.as_ref().to_path_buf();
76 self
77 }
78}
79
80#[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
82#[derive(Debug, Clone)]
83pub struct HistoricalResult {
84 pub timestamp: u64,
86 pub commit_hash: Option<String>,
88 pub version: Option<String>,
90 pub benchmark_name: String,
92 pub mean_execution_time_nanos: u64,
94 pub std_dev_nanos: u64,
96 pub coefficient_of_variation: f64,
98 pub mean_memory_usage: usize,
100 pub sample_count: usize,
102 pub metadata: HashMap<String, String>,
104}
105
106impl HistoricalResult {
107 pub fn from_result(result: &BenchmarkResult) -> Self {
109 let timestamp = SystemTime::now()
110 .duration_since(UNIX_EPOCH)
111 .unwrap_or_default()
112 .as_secs();
113
114 Self {
115 timestamp,
116 commit_hash: Self::get_git_commit_hash(),
117 version: Some(env!("CARGO_PKG_VERSION").to_string()),
118 benchmark_name: result.name.clone(),
119 mean_execution_time_nanos: result.statistics.mean_execution_time.as_nanos() as u64,
120 std_dev_nanos: result.statistics.std_dev_execution_time.as_nanos() as u64,
121 coefficient_of_variation: result.statistics.coefficient_of_variation,
122 mean_memory_usage: result.statistics.mean_memory_usage,
123 sample_count: result.statistics.sample_count,
124 metadata: HashMap::new(),
125 }
126 }
127
128 fn get_git_commit_hash() -> Option<String> {
130 None
133 }
134
135 pub fn execution_time(&self) -> Duration {
137 Duration::from_nanos(self.mean_execution_time_nanos)
138 }
139
140 pub fn std_dev(&self) -> Duration {
142 Duration::from_nanos(self.std_dev_nanos)
143 }
144}
145
146#[derive(Debug, Clone)]
148pub struct RegressionAnalysis {
149 pub benchmark_name: String,
151 pub current_result: HistoricalResult,
153 pub baseline: HistoricalResult,
155 pub historical_results: Vec<HistoricalResult>,
157 pub regression_detected: bool,
159 pub performance_ratio: f64,
161 pub statistical_significance: f64,
163 pub trend: PerformanceTrend,
165 pub confidence: f64,
167}
168
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
171pub enum PerformanceTrend {
172 Improving,
174 Stable,
176 Degrading,
178 Unknown,
180}
181
182pub struct RegressionDetector {
184 config: RegressionConfig,
185}
186
187impl RegressionDetector {
188 pub fn new(config: RegressionConfig) -> Self {
190 Self { config }
191 }
192
193 pub fn analyze_regression(&self, result: &BenchmarkResult) -> CoreResult<RegressionAnalysis> {
195 let current_result = HistoricalResult::from_result(result);
196
197 let historical_results = self.load_historical_results(&result.name)?;
199
200 if historical_results.len() < self.config.min_historical_samples {
201 return Ok(RegressionAnalysis {
202 benchmark_name: result.name.clone(),
203 current_result: current_result.clone(),
204 baseline: current_result.clone(),
205 historical_results,
206 regression_detected: false,
207 performance_ratio: 1.0,
208 statistical_significance: 0.0,
209 trend: PerformanceTrend::Unknown,
210 confidence: 0.0,
211 });
212 }
213
214 let baseline = self.calculate_baseline(&historical_results)?;
216
217 let performance_ratio = current_result.mean_execution_time_nanos as f64
219 / baseline.mean_execution_time_nanos as f64;
220
221 let regression_detected = performance_ratio > self.config.regression_threshold;
222
223 let statistical_significance =
225 self.calculate_statistical_significance(¤t_result, &historical_results)?;
226
227 let trend = self.analyze_trend(&historical_results)?;
229
230 let confidence = self.calculate_confidence(&historical_results, ¤t_result)?;
232
233 Ok(RegressionAnalysis {
234 benchmark_name: result.name.clone(),
235 current_result,
236 baseline,
237 historical_results,
238 regression_detected,
239 performance_ratio,
240 statistical_significance,
241 trend,
242 confidence,
243 })
244 }
245
246 pub fn analyze_multiple_regressions(
248 &self,
249 results: &[BenchmarkResult],
250 ) -> CoreResult<Vec<RegressionAnalysis>> {
251 let mut analyses = Vec::new();
252
253 for result in results {
254 let analysis = self.analyze_regression(result)?;
255 analyses.push(analysis);
256 }
257
258 Ok(analyses)
259 }
260
261 pub fn store_result(&self, result: &BenchmarkResult) -> CoreResult<()> {
263 let historical_result = HistoricalResult::from_result(result);
264
265 fs::create_dir_all(&self.config.results_directory).map_err(|e| {
267 CoreError::IoError(ErrorContext::new(format!(
268 "Failed to create results directory: {e}"
269 )))
270 })?;
271
272 let mut historical_results = self.load_historical_results(&result.name)?;
274
275 historical_results.push(historical_result);
277
278 historical_results.sort_by_key(|r| r.timestamp);
280
281 if historical_results.len() > 1000 {
283 historical_results.drain(0..historical_results.len() - 1000);
284 }
285
286 let file_path = self.get_results_file_path(&result.name);
288 let serialized = serde_json::to_string_pretty(&historical_results).map_err(|e| {
289 CoreError::IoError(ErrorContext::new(format!(
290 "Failed to serialize results: {e}"
291 )))
292 })?;
293
294 fs::write(&file_path, serialized).map_err(|e| {
295 CoreError::IoError(ErrorContext::new(format!(
296 "Failed to write results file: {e}"
297 )))
298 })?;
299
300 Ok(())
301 }
302
303 fn load_historical_results(&self, benchmark_name: &str) -> CoreResult<Vec<HistoricalResult>> {
305 let file_path = self.get_results_file_path(benchmark_name);
306
307 if !file_path.exists() {
308 return Ok(Vec::new());
309 }
310
311 let content = fs::read_to_string(&file_path).map_err(|e| {
312 CoreError::IoError(ErrorContext::new(format!(
313 "Failed to read results file: {e}"
314 )))
315 })?;
316
317 let results: Vec<HistoricalResult> = serde_json::from_str(&content).map_err(|e| {
318 CoreError::IoError(ErrorContext::new(format!(
319 "Failed to parse results file: {e}"
320 )))
321 })?;
322
323 Ok(results)
324 }
325
326 fn calculate_baseline(
328 &self,
329 historical_results: &[HistoricalResult],
330 ) -> CoreResult<HistoricalResult> {
331 if historical_results.is_empty() {
332 return Err(CoreError::ValidationError(crate::error::ErrorContext::new(
333 "No historical _results for baseline calculation",
334 )));
335 }
336
337 let recent_count = (historical_results.len() / 3).max(self.config.min_historical_samples);
339 let recent_results = &historical_results[historical_results.len() - recent_count..];
340
341 let mut execution_times: Vec<u64> = recent_results
342 .iter()
343 .map(|r| r.mean_execution_time_nanos)
344 .collect();
345 execution_times.sort();
346
347 let median_time = if execution_times.len().is_multiple_of(2) {
348 let mid = execution_times.len() / 2;
349 (execution_times[mid - 1] + execution_times[mid]) / 2
350 } else {
351 execution_times[execution_times.len() / 2]
352 };
353
354 let mut baseline = recent_results[recent_results.len() / 2].clone();
356 baseline.mean_execution_time_nanos = median_time;
357
358 Ok(baseline)
359 }
360
361 fn calculate_statistical_significance(
363 &self,
364 current: &HistoricalResult,
365 historical: &[HistoricalResult],
366 ) -> CoreResult<f64> {
367 if historical.len() < 2 {
368 return Ok(0.0);
369 }
370
371 let historical_times: Vec<f64> = historical
373 .iter()
374 .map(|r| r.mean_execution_time_nanos as f64)
375 .collect();
376
377 let historical_mean = historical_times.iter().sum::<f64>() / historical_times.len() as f64;
378 let historical_variance = historical_times
379 .iter()
380 .map(|&x| (x - historical_mean).powi(2))
381 .sum::<f64>()
382 / (historical_times.len() - 1) as f64;
383 let historical_std = historical_variance.sqrt();
384
385 let current_time = current.mean_execution_time_nanos as f64;
387 let z_score =
388 (current_time - historical_mean) / (historical_std / (historical.len() as f64).sqrt());
389
390 let p_value = if z_score > 0.0 {
392 0.5 * (1.0 - erf(z_score / std::f64::consts::SQRT_2))
393 } else {
394 0.5 * (1.0 + erf(-z_score / std::f64::consts::SQRT_2))
395 };
396
397 Ok(1.0 - p_value) }
399
400 fn analyze_trend(
402 &self,
403 historical_results: &[HistoricalResult],
404 ) -> CoreResult<PerformanceTrend> {
405 if historical_results.len() < 5 {
406 return Ok(PerformanceTrend::Unknown);
407 }
408
409 let n = historical_results.len() as f64;
411 let sum_x: f64 = (0..historical_results.len()).map(|i| i as f64).sum();
412 let sum_y: f64 = historical_results
413 .iter()
414 .map(|r| r.mean_execution_time_nanos as f64)
415 .sum();
416 let sum_xy: f64 = historical_results
417 .iter()
418 .enumerate()
419 .map(|(i, r)| i as f64 * r.mean_execution_time_nanos as f64)
420 .sum();
421 let sum_x_sq: f64 = (0..historical_results.len())
422 .map(|i| (i as f64).powi(2))
423 .sum();
424
425 let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x_sq - sum_x.powi(2));
426
427 let relative_slope = slope / (sum_y / n); if relative_slope > 0.01 {
431 Ok(PerformanceTrend::Degrading)
432 } else if relative_slope < -0.01 {
433 Ok(PerformanceTrend::Improving)
434 } else {
435 Ok(PerformanceTrend::Stable)
436 }
437 }
438
439 fn calculate_confidence(
441 &self,
442 historical_results: &[HistoricalResult],
443 current: &HistoricalResult,
444 ) -> CoreResult<f64> {
445 let sample_size_factor = (historical_results.len() as f64 / 10.0).min(1.0);
446 let variance_factor = if current.coefficient_of_variation < 0.1 {
447 1.0
448 } else {
449 (0.1 / current.coefficient_of_variation).min(1.0)
450 };
451
452 Ok(sample_size_factor * variance_factor)
453 }
454
455 fn get_results_file_path(&self, benchmark_name: &str) -> PathBuf {
457 let safe_name = benchmark_name.replace(|c: char| !c.is_alphanumeric(), "_");
458 self.config
459 .results_directory
460 .join(format!("{safe_name}.json"))
461 }
462}
463
464pub struct RegressionTestUtils;
466
467impl RegressionTestUtils {
468 pub fn run_regression_tests(benchmark_names: &[&str]) -> CoreResult<Vec<RegressionAnalysis>> {
470 let mut analyses = Vec::new();
471 let benchmark_runner =
472 BenchmarkRunner::new(crate::benchmarking::BenchmarkConfig::default());
473 let detector = RegressionDetector::new(RegressionConfig::default());
474
475 for &name in benchmark_names {
476 let result = benchmark_runner.run(name, || {
478 std::thread::sleep(Duration::from_micros(100));
480 Ok(())
481 })?;
482
483 detector.store_result(&result)?;
485
486 let analysis = detector.analyze_regression(&result)?;
488 analyses.push(analysis);
489 }
490
491 Ok(analyses)
492 }
493
494 pub fn analyses(analyses: &[RegressionAnalysis]) -> String {
496 let mut report = String::new();
497
498 report.push_str("# Performance Regression Report\n\n");
499
500 let regressions: Vec<_> = analyses.iter().filter(|a| a.regression_detected).collect();
501
502 if regressions.is_empty() {
503 report.push_str("✅ No performance regressions detected.\n\n");
504 } else {
505 report.push_str(&format!(
506 "⚠️ {} performance regression(s) detected:\n\n",
507 regressions.len()
508 ));
509
510 for regression in ®ressions {
511 report.push_str(&format!(
512 "- **{}**: {:.1}% slower (ratio: {:.3}, confidence: {:.0}%)\n",
513 regression.benchmark_name,
514 (regression.performance_ratio - 1.0) * 100.0,
515 regression.performance_ratio,
516 regression.confidence * 100.0
517 ));
518 }
519 report.push('\n');
520 }
521
522 report.push_str("## Summary\n\n");
524 report.push_str(&format!("- Total benchmarks: {}\n", analyses.len()));
525 report.push_str(&format!("- Regressions detected: {}\n", regressions.len()));
526
527 let improving = analyses
528 .iter()
529 .filter(|a| a.trend == PerformanceTrend::Improving)
530 .count();
531 let stable = analyses
532 .iter()
533 .filter(|a| a.trend == PerformanceTrend::Stable)
534 .count();
535 let degrading = analyses
536 .iter()
537 .filter(|a| a.trend == PerformanceTrend::Degrading)
538 .count();
539
540 report.push_str(&format!("- Improving trends: {improving}\n"));
541 report.push_str(&format!("- Stable trends: {stable}\n"));
542 report.push_str(&format!("- Degrading trends: {degrading}\n"));
543
544 report
545 }
546}
547
548#[allow(dead_code)]
550fn erf(x: f64) -> f64 {
551 let a1 = 0.254829592;
553 let a2 = -0.284496736;
554 let a3 = 1.421413741;
555 let a4 = -1.453152027;
556 let a5 = 1.061405429;
557 let p = 0.3275911;
558
559 let sign = if x < 0.0 { -1.0 } else { 1.0 };
560 let x = x.abs();
561
562 let t = 1.0 / (1.0 + p * x);
563 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
564
565 sign * y
566}
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571 use tempfile::TempDir;
572
573 #[test]
574 fn test_regression_config() {
575 let config = RegressionConfig::new()
576 .with_regression_threshold(1.2)
577 .with_min_historical_samples(10)
578 .with_confidence_level(0.99)
579 .with_auto_updatebaseline(true);
580
581 assert_eq!(config.regression_threshold, 1.2);
582 assert_eq!(config.min_historical_samples, 10);
583 assert_eq!(config.confidence_level, 0.99);
584 assert!(config.auto_updatebaseline);
585 }
586
587 #[test]
588 fn test_historical_result() {
589 let benchmark_config = crate::benchmarking::BenchmarkConfig::default();
590 let mut result = BenchmarkResult::new("test_benchmark".to_string(), benchmark_config);
591 result.add_measurement(crate::benchmarking::BenchmarkMeasurement::new(
592 Duration::from_millis(100),
593 ));
594 result.finalize().expect("Operation failed");
595
596 let historical = HistoricalResult::from_result(&result);
597
598 assert_eq!(historical.benchmark_name, "test_benchmark");
599 assert!(historical.mean_execution_time_nanos > 0);
600 assert_eq!(historical.sample_count, 1);
601 }
602
603 #[test]
604 fn test_regression_detector() {
605 let temp_dir = TempDir::new().expect("Operation failed");
606 let config = RegressionConfig::new()
607 .with_results_directory(temp_dir.path())
608 .with_min_historical_samples(1);
609
610 let detector = RegressionDetector::new(config);
611
612 let benchmark_config = crate::benchmarking::BenchmarkConfig::default();
614 let mut result = BenchmarkResult::new("test_regression".to_string(), benchmark_config);
615 result.add_measurement(crate::benchmarking::BenchmarkMeasurement::new(
616 Duration::from_millis(100),
617 ));
618 result.finalize().expect("Operation failed");
619
620 detector.store_result(&result).expect("Operation failed");
622 let analysis = detector
623 .analyze_regression(&result)
624 .expect("Operation failed");
625
626 assert_eq!(analysis.benchmark_name, "test_regression");
627 assert!(!analysis.regression_detected); }
629
630 #[test]
631 fn test_performance_trend() {
632 assert_eq!(PerformanceTrend::Improving, PerformanceTrend::Improving);
633 assert_ne!(PerformanceTrend::Improving, PerformanceTrend::Degrading);
634 }
635
636 #[test]
637 fn test_erf_function() {
638 assert!((erf(0.0) - 0.0).abs() < 1e-6);
640 assert!((erf(1.0) - 0.8427007929).abs() < 1e-6);
641 assert!((erf(-1.0) + 0.8427007929).abs() < 1e-6);
642 }
643}