sklears_core/
advanced_benchmarking.rs

1//! Advanced Benchmarking Suite with Performance Regression Detection
2//!
3//! This module provides sophisticated benchmarking capabilities including
4//! statistical analysis, regression detection, and performance tracking over time.
5
6use crate::error::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, VecDeque};
9use std::time::{Duration, Instant, SystemTime};
10
11/// Advanced benchmark runner with regression detection
12///
13/// Tracks performance metrics over time and detects statistical anomalies
14/// and performance regressions automatically.
15#[derive(Debug)]
16pub struct AdvancedBenchmarkRunner {
17    /// Configuration for benchmarking
18    pub config: BenchmarkConfig,
19    /// Historical benchmark results
20    pub history: BenchmarkHistory,
21    /// Statistical analyzer for detecting regressions
22    pub analyzer: RegressionAnalyzer,
23    /// Performance baselines
24    pub baselines: HashMap<String, PerformanceBaseline>,
25}
26
27/// Configuration for benchmark execution
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct BenchmarkConfig {
30    /// Number of warmup iterations
31    pub warmup_iterations: usize,
32    /// Number of measurement iterations
33    pub measurement_iterations: usize,
34    /// Confidence level for statistical tests (e.g., 0.95 for 95%)
35    pub confidence_level: f64,
36    /// Maximum acceptable performance degradation (as fraction, e.g., 0.10 for 10%)
37    pub max_degradation_threshold: f64,
38    /// Enable outlier detection and removal
39    pub enable_outlier_detection: bool,
40    /// Sample size for statistical analysis
41    pub sample_size: usize,
42}
43
44impl Default for BenchmarkConfig {
45    fn default() -> Self {
46        Self {
47            warmup_iterations: 10,
48            measurement_iterations: 100,
49            confidence_level: 0.95,
50            max_degradation_threshold: 0.10, // 10% degradation threshold
51            enable_outlier_detection: true,
52            sample_size: 50,
53        }
54    }
55}
56
57/// Historical benchmark results with time series data
58#[derive(Debug, Clone)]
59pub struct BenchmarkHistory {
60    /// Benchmark results indexed by benchmark name
61    pub results: HashMap<String, VecDeque<BenchmarkResult>>,
62    /// Maximum history length to keep
63    pub max_history_length: usize,
64}
65
66impl BenchmarkHistory {
67    /// Create a new benchmark history with specified capacity
68    pub fn new(max_history_length: usize) -> Self {
69        Self {
70            results: HashMap::new(),
71            max_history_length,
72        }
73    }
74
75    /// Add a benchmark result to history
76    pub fn add_result(&mut self, name: String, result: BenchmarkResult) {
77        let entry = self.results.entry(name).or_default();
78
79        entry.push_back(result);
80
81        // Maintain maximum history length
82        while entry.len() > self.max_history_length {
83            entry.pop_front();
84        }
85    }
86
87    /// Get historical results for a benchmark
88    pub fn get_history(&self, name: &str) -> Option<&VecDeque<BenchmarkResult>> {
89        self.results.get(name)
90    }
91
92    /// Get statistical summary of historical performance
93    pub fn get_summary(&self, name: &str) -> Option<HistoricalSummary> {
94        let history = self.get_history(name)?;
95
96        if history.is_empty() {
97            return None;
98        }
99
100        let durations: Vec<f64> = history
101            .iter()
102            .map(|r| r.median_duration.as_secs_f64())
103            .collect();
104
105        let mean = durations.iter().sum::<f64>() / durations.len() as f64;
106        let variance =
107            durations.iter().map(|d| (d - mean).powi(2)).sum::<f64>() / durations.len() as f64;
108        let std_dev = variance.sqrt();
109
110        let mut sorted = durations.clone();
111        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
112
113        Some(HistoricalSummary {
114            mean_duration: Duration::from_secs_f64(mean),
115            std_deviation: std_dev,
116            min_duration: Duration::from_secs_f64(sorted[0]),
117            max_duration: Duration::from_secs_f64(*sorted.last().expect("last should succeed")),
118            median_duration: Duration::from_secs_f64(sorted[sorted.len() / 2]),
119            sample_count: durations.len(),
120        })
121    }
122}
123
124/// Individual benchmark result
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct BenchmarkResult {
127    /// Benchmark name
128    pub name: String,
129    /// Timestamp of execution
130    pub timestamp: SystemTime,
131    /// All measured durations
132    pub durations: Vec<Duration>,
133    /// Median duration
134    pub median_duration: Duration,
135    /// Mean duration
136    pub mean_duration: Duration,
137    /// Standard deviation
138    pub std_deviation: f64,
139    /// Throughput (operations per second)
140    pub throughput: f64,
141    /// Memory usage statistics
142    pub memory_stats: Option<MemoryStats>,
143}
144
145/// Memory usage statistics
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct MemoryStats {
148    /// Peak memory usage in bytes
149    pub peak_bytes: usize,
150    /// Average memory usage in bytes
151    pub average_bytes: usize,
152    /// Number of allocations
153    pub allocation_count: usize,
154    /// Number of deallocations
155    pub deallocation_count: usize,
156}
157
158/// Historical summary statistics
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct HistoricalSummary {
161    /// Mean duration across all historical runs
162    pub mean_duration: Duration,
163    /// Standard deviation
164    pub std_deviation: f64,
165    /// Minimum observed duration
166    pub min_duration: Duration,
167    /// Maximum observed duration
168    pub max_duration: Duration,
169    /// Median duration
170    pub median_duration: Duration,
171    /// Number of samples
172    pub sample_count: usize,
173}
174
175/// Performance baseline for comparison
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct PerformanceBaseline {
178    /// Baseline name
179    pub name: String,
180    /// Baseline duration
181    pub baseline_duration: Duration,
182    /// Acceptable variance (as fraction)
183    pub acceptable_variance: f64,
184    /// When the baseline was established
185    pub established_at: SystemTime,
186    /// Git commit hash (if available)
187    pub git_commit: Option<String>,
188}
189
190/// Regression analyzer for detecting performance issues
191#[derive(Debug, Clone)]
192pub struct RegressionAnalyzer {
193    /// Configuration
194    pub config: AnalyzerConfig,
195    /// Detected regressions
196    pub detected_regressions: Vec<RegressionReport>,
197}
198
199/// Configuration for regression analysis
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct AnalyzerConfig {
202    /// Minimum sample size for analysis
203    pub min_sample_size: usize,
204    /// Sensitivity for detecting changes (lower = more sensitive)
205    pub sensitivity: f64,
206    /// Use statistical hypothesis testing
207    pub use_hypothesis_testing: bool,
208}
209
210impl Default for AnalyzerConfig {
211    fn default() -> Self {
212        Self {
213            min_sample_size: 10,
214            sensitivity: 0.05, // 5% significance level
215            use_hypothesis_testing: true,
216        }
217    }
218}
219
220/// Report of a detected regression
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct RegressionReport {
223    /// Benchmark name
224    pub benchmark_name: String,
225    /// Severity of the regression
226    pub severity: RegressionSeverity,
227    /// Performance degradation percentage
228    pub degradation_percent: f64,
229    /// Current performance
230    pub current_performance: Duration,
231    /// Expected performance based on baseline
232    pub expected_performance: Duration,
233    /// Statistical confidence of detection
234    pub confidence: f64,
235    /// Additional details
236    pub details: String,
237    /// Detected at
238    pub detected_at: SystemTime,
239}
240
241/// Severity level of performance regression
242#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
243pub enum RegressionSeverity {
244    /// Minor regression (<5% degradation)
245    Minor,
246    /// Moderate regression (5-15% degradation)
247    Moderate,
248    /// Major regression (15-30% degradation)
249    Major,
250    /// Critical regression (>30% degradation)
251    Critical,
252}
253
254impl AdvancedBenchmarkRunner {
255    /// Create a new benchmark runner
256    pub fn new() -> Self {
257        Self {
258            config: BenchmarkConfig::default(),
259            history: BenchmarkHistory::new(100),
260            analyzer: RegressionAnalyzer {
261                config: AnalyzerConfig::default(),
262                detected_regressions: Vec::new(),
263            },
264            baselines: HashMap::new(),
265        }
266    }
267
268    /// Create a runner with custom configuration
269    pub fn with_config(config: BenchmarkConfig) -> Self {
270        Self {
271            config,
272            history: BenchmarkHistory::new(100),
273            analyzer: RegressionAnalyzer {
274                config: AnalyzerConfig::default(),
275                detected_regressions: Vec::new(),
276            },
277            baselines: HashMap::new(),
278        }
279    }
280
281    /// Run a benchmark and analyze results
282    pub fn run_benchmark<F>(&mut self, name: &str, mut benchmark_fn: F) -> Result<BenchmarkResult>
283    where
284        F: FnMut(),
285    {
286        // Warmup phase
287        for _ in 0..self.config.warmup_iterations {
288            benchmark_fn();
289        }
290
291        // Measurement phase
292        let mut durations = Vec::new();
293        for _ in 0..self.config.measurement_iterations {
294            let start = Instant::now();
295            benchmark_fn();
296            durations.push(start.elapsed());
297        }
298
299        // Remove outliers if enabled
300        if self.config.enable_outlier_detection {
301            durations = self.remove_outliers(durations);
302        }
303
304        // Calculate statistics
305        let mut sorted_durations = durations.clone();
306        sorted_durations.sort();
307
308        let median = sorted_durations[sorted_durations.len() / 2];
309        let mean = Duration::from_secs_f64(
310            durations.iter().map(|d| d.as_secs_f64()).sum::<f64>() / durations.len() as f64,
311        );
312
313        let variance = durations
314            .iter()
315            .map(|d| (d.as_secs_f64() - mean.as_secs_f64()).powi(2))
316            .sum::<f64>()
317            / durations.len() as f64;
318        let std_dev = variance.sqrt();
319
320        let throughput = 1.0 / mean.as_secs_f64(); // ops/sec
321
322        let result = BenchmarkResult {
323            name: name.to_string(),
324            timestamp: SystemTime::now(),
325            durations,
326            median_duration: median,
327            mean_duration: mean,
328            std_deviation: std_dev,
329            throughput,
330            memory_stats: None, // Could be populated with actual memory tracking
331        };
332
333        // Add to history
334        self.history.add_result(name.to_string(), result.clone());
335
336        // Check for regressions
337        self.check_for_regression(name, &result)?;
338
339        Ok(result)
340    }
341
342    /// Remove statistical outliers from measurements
343    fn remove_outliers(&self, mut durations: Vec<Duration>) -> Vec<Duration> {
344        if durations.len() < 10 {
345            return durations; // Not enough data for outlier detection
346        }
347
348        // Convert to f64 for calculations
349        let values: Vec<f64> = durations.iter().map(|d| d.as_secs_f64()).collect();
350
351        // Calculate IQR (Interquartile Range) method
352        let mut sorted_values = values.clone();
353        sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
354
355        let q1_idx = sorted_values.len() / 4;
356        let q3_idx = (sorted_values.len() * 3) / 4;
357
358        let q1 = sorted_values[q1_idx];
359        let q3 = sorted_values[q3_idx];
360        let iqr = q3 - q1;
361
362        let lower_bound = q1 - 1.5 * iqr;
363        let upper_bound = q3 + 1.5 * iqr;
364
365        // Filter outliers
366        durations.retain(|d| {
367            let val = d.as_secs_f64();
368            val >= lower_bound && val <= upper_bound
369        });
370
371        durations
372    }
373
374    /// Check for performance regression
375    fn check_for_regression(&mut self, name: &str, current: &BenchmarkResult) -> Result<()> {
376        // Get baseline if it exists
377        if let Some(baseline) = self.baselines.get(name) {
378            let degradation = (current.median_duration.as_secs_f64()
379                - baseline.baseline_duration.as_secs_f64())
380                / baseline.baseline_duration.as_secs_f64();
381
382            if degradation > self.config.max_degradation_threshold {
383                let severity = match degradation {
384                    d if d < 0.05 => RegressionSeverity::Minor,
385                    d if d < 0.15 => RegressionSeverity::Moderate,
386                    d if d < 0.30 => RegressionSeverity::Major,
387                    _ => RegressionSeverity::Critical,
388                };
389
390                let report = RegressionReport {
391                    benchmark_name: name.to_string(),
392                    severity,
393                    degradation_percent: degradation * 100.0,
394                    current_performance: current.median_duration,
395                    expected_performance: baseline.baseline_duration,
396                    confidence: self.config.confidence_level,
397                    details: format!(
398                        "Performance degraded by {:.2}% compared to baseline",
399                        degradation * 100.0
400                    ),
401                    detected_at: SystemTime::now(),
402                };
403
404                self.analyzer.detected_regressions.push(report);
405            }
406        }
407
408        Ok(())
409    }
410
411    /// Set a performance baseline
412    pub fn set_baseline(&mut self, name: String, duration: Duration) {
413        let baseline = PerformanceBaseline {
414            name: name.clone(),
415            baseline_duration: duration,
416            acceptable_variance: self.config.max_degradation_threshold,
417            established_at: SystemTime::now(),
418            git_commit: None,
419        };
420
421        self.baselines.insert(name, baseline);
422    }
423
424    /// Get all detected regressions
425    pub fn get_regressions(&self) -> &[RegressionReport] {
426        &self.analyzer.detected_regressions
427    }
428
429    /// Generate comprehensive benchmark report
430    pub fn generate_report(&self) -> BenchmarkReport {
431        let mut benchmark_summaries = HashMap::new();
432
433        for name in self.history.results.keys() {
434            if let Some(summary) = self.history.get_summary(name) {
435                benchmark_summaries.insert(name.clone(), summary);
436            }
437        }
438
439        BenchmarkReport {
440            total_benchmarks: self.history.results.len(),
441            regressions_detected: self.analyzer.detected_regressions.len(),
442            benchmark_summaries,
443            regressions: self.analyzer.detected_regressions.clone(),
444            generated_at: SystemTime::now(),
445        }
446    }
447}
448
449impl Default for AdvancedBenchmarkRunner {
450    fn default() -> Self {
451        Self::new()
452    }
453}
454
455/// Comprehensive benchmark report
456#[derive(Debug, Clone, Serialize, Deserialize)]
457pub struct BenchmarkReport {
458    /// Total number of benchmarks
459    pub total_benchmarks: usize,
460    /// Number of regressions detected
461    pub regressions_detected: usize,
462    /// Summary statistics for each benchmark
463    pub benchmark_summaries: HashMap<String, HistoricalSummary>,
464    /// All detected regressions
465    pub regressions: Vec<RegressionReport>,
466    /// When the report was generated
467    pub generated_at: SystemTime,
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn test_benchmark_runner_creation() {
476        let runner = AdvancedBenchmarkRunner::new();
477        assert_eq!(runner.config.warmup_iterations, 10);
478        assert_eq!(runner.config.measurement_iterations, 100);
479    }
480
481    #[test]
482    fn test_custom_config() {
483        let config = BenchmarkConfig {
484            warmup_iterations: 5,
485            measurement_iterations: 50,
486            confidence_level: 0.99,
487            max_degradation_threshold: 0.05,
488            enable_outlier_detection: false,
489            sample_size: 30,
490        };
491
492        let runner = AdvancedBenchmarkRunner::with_config(config);
493        assert_eq!(runner.config.warmup_iterations, 5);
494        assert_eq!(runner.config.measurement_iterations, 50);
495    }
496
497    #[test]
498    fn test_simple_benchmark() {
499        let mut runner = AdvancedBenchmarkRunner::new();
500
501        let result = runner
502            .run_benchmark("test_benchmark", || {
503                // Simulate work
504                let _sum: u64 = (0..1000).sum();
505            })
506            .expect("expected valid value");
507
508        assert_eq!(result.name, "test_benchmark");
509        assert!(result.median_duration > Duration::from_nanos(0));
510        assert!(result.throughput > 0.0);
511    }
512
513    #[test]
514    fn test_baseline_setting() {
515        let mut runner = AdvancedBenchmarkRunner::new();
516
517        runner.set_baseline("test".to_string(), Duration::from_millis(10));
518
519        assert!(runner.baselines.contains_key("test"));
520        assert_eq!(
521            runner
522                .baselines
523                .get("test")
524                .expect("key should exist")
525                .baseline_duration,
526            Duration::from_millis(10)
527        );
528    }
529
530    #[test]
531    fn test_regression_detection() {
532        let mut runner = AdvancedBenchmarkRunner::new();
533
534        // Set a fast baseline
535        runner.set_baseline("test".to_string(), Duration::from_micros(100));
536
537        // Run a much slower benchmark
538        let _result = runner
539            .run_benchmark("test", || {
540                std::thread::sleep(Duration::from_micros(200));
541            })
542            .expect("expected valid value");
543
544        // Should detect a regression
545        let regressions = runner.get_regressions();
546        assert!(!regressions.is_empty());
547    }
548
549    #[test]
550    fn test_history_tracking() {
551        let mut runner = AdvancedBenchmarkRunner::new();
552
553        runner
554            .run_benchmark("test", || {
555                let _x = 1 + 1;
556            })
557            .expect("expected valid value");
558
559        runner
560            .run_benchmark("test", || {
561                let _x = 1 + 1;
562            })
563            .expect("expected valid value");
564
565        let history = runner
566            .history
567            .get_history("test")
568            .expect("get_history should succeed");
569        assert_eq!(history.len(), 2);
570    }
571
572    #[test]
573    fn test_historical_summary() {
574        let mut runner = AdvancedBenchmarkRunner::new();
575
576        for _ in 0..5 {
577            runner
578                .run_benchmark("test", || {
579                    // Use black_box to prevent compiler optimization
580                    // and make the computation take measurable time
581                    let mut sum = 0u64;
582                    for i in 0..100 {
583                        sum = std::hint::black_box(sum.wrapping_add(i));
584                    }
585                    std::hint::black_box(sum);
586                })
587                .expect("expected valid value");
588        }
589
590        let summary = runner
591            .history
592            .get_summary("test")
593            .expect("get_summary should succeed");
594        assert_eq!(summary.sample_count, 5);
595        assert!(summary.mean_duration > Duration::from_nanos(0));
596    }
597
598    #[test]
599    fn test_report_generation() {
600        let mut runner = AdvancedBenchmarkRunner::new();
601
602        runner
603            .run_benchmark("bench1", || {
604                let x = std::hint::black_box(1 + 1);
605                std::hint::black_box(x);
606            })
607            .expect("expected valid value");
608
609        runner
610            .run_benchmark("bench2", || {
611                let y = std::hint::black_box(2 + 2);
612                std::hint::black_box(y);
613            })
614            .expect("expected valid value");
615
616        let report = runner.generate_report();
617        assert_eq!(report.total_benchmarks, 2);
618    }
619
620    #[test]
621    fn test_outlier_removal() {
622        let runner = AdvancedBenchmarkRunner::new();
623
624        let durations = vec![
625            Duration::from_millis(10),
626            Duration::from_millis(11),
627            Duration::from_millis(10),
628            Duration::from_millis(100), // Outlier
629            Duration::from_millis(10),
630            Duration::from_millis(11),
631            Duration::from_millis(10),
632            Duration::from_millis(11),
633            Duration::from_millis(10),
634            Duration::from_millis(11),
635        ];
636
637        let filtered = runner.remove_outliers(durations);
638        assert!(filtered.len() < 10); // Outlier should be removed
639    }
640
641    #[test]
642    fn test_regression_severity() {
643        use RegressionSeverity::*;
644
645        assert!(Minor < Moderate);
646        assert!(Moderate < Major);
647        assert!(Major < Critical);
648    }
649}
sklears_core/advanced_benchmarking.rs

sklears_core/
advanced_benchmarking.rs