sklears_utils/
performance_regression.rs

1//! Performance regression testing utilities
2//!
3//! This module provides utilities for tracking performance metrics over time
4//! and detecting performance regressions automatically.
5
6use crate::{UtilsError, UtilsResult};
7use std::collections::HashMap;
8use std::fs::{File, OpenOptions};
9use std::io::{BufRead, BufReader, BufWriter, Write};
10use std::path::{Path, PathBuf};
11use std::time::{Duration, Instant};
12
13/// Performance regression testing framework
14pub struct PerformanceRegressionTester {
15    baseline_file: PathBuf,
16    current_results: HashMap<String, Vec<f64>>,
17    thresholds: RegressionThresholds,
18}
19
20/// Thresholds for regression detection
21#[derive(Clone, Debug)]
22pub struct RegressionThresholds {
23    /// Maximum allowed percentage increase in execution time
24    pub time_increase_threshold: f64,
25    /// Maximum allowed percentage increase in memory usage
26    pub memory_increase_threshold: f64,
27    /// Minimum number of samples required for statistical significance
28    pub min_samples: usize,
29    /// Confidence level for regression detection (e.g., 0.95 for 95%)
30    pub confidence_level: f64,
31}
32
33impl Default for RegressionThresholds {
34    fn default() -> Self {
35        Self {
36            time_increase_threshold: 10.0,   // 10% increase
37            memory_increase_threshold: 15.0, // 15% increase
38            min_samples: 10,
39            confidence_level: 0.95,
40        }
41    }
42}
43
44/// Result of a regression test
45#[derive(Debug, Clone)]
46pub struct RegressionTestResult {
47    pub test_name: String,
48    pub baseline_mean: f64,
49    pub current_mean: f64,
50    pub percentage_change: f64,
51    pub is_regression: bool,
52    pub confidence_interval: (f64, f64),
53    pub p_value: Option<f64>,
54}
55
56impl PerformanceRegressionTester {
57    /// Create a new performance regression tester
58    pub fn new<P: AsRef<Path>>(baseline_file: P) -> Self {
59        Self {
60            baseline_file: baseline_file.as_ref().to_path_buf(),
61            current_results: HashMap::new(),
62            thresholds: RegressionThresholds::default(),
63        }
64    }
65
66    /// Set custom regression thresholds
67    pub fn with_thresholds(mut self, thresholds: RegressionThresholds) -> Self {
68        self.thresholds = thresholds;
69        self
70    }
71
72    /// Record a performance measurement
73    pub fn record_measurement(&mut self, test_name: &str, duration: Duration) {
74        let duration_ms = duration.as_secs_f64() * 1000.0;
75        self.current_results
76            .entry(test_name.to_string())
77            .or_default()
78            .push(duration_ms);
79    }
80
81    /// Benchmark a function and record its performance
82    pub fn benchmark_function<F, R>(
83        &mut self,
84        test_name: &str,
85        iterations: usize,
86        mut func: F,
87    ) -> UtilsResult<R>
88    where
89        F: FnMut() -> R,
90    {
91        let mut result = None;
92        let mut measurements = Vec::with_capacity(iterations);
93
94        for _ in 0..iterations {
95            let start = Instant::now();
96            let r = func();
97            let duration = start.elapsed();
98
99            measurements.push(duration.as_secs_f64() * 1000.0);
100            if result.is_none() {
101                result = Some(r);
102            }
103        }
104
105        self.current_results
106            .insert(test_name.to_string(), measurements);
107
108        result.ok_or_else(|| UtilsError::InvalidParameter("No measurements recorded".to_string()))
109    }
110
111    /// Load baseline measurements from file
112    pub fn load_baseline(&self) -> UtilsResult<HashMap<String, Vec<f64>>> {
113        if !self.baseline_file.exists() {
114            return Ok(HashMap::new());
115        }
116
117        let file = File::open(&self.baseline_file).map_err(|e| {
118            UtilsError::InvalidParameter(format!("Failed to open baseline file: {e}"))
119        })?;
120
121        let reader = BufReader::new(file);
122        let mut baseline = HashMap::new();
123
124        for line in reader.lines() {
125            let line = line
126                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read line: {e}")))?;
127
128            if line.trim().is_empty() || line.starts_with('#') {
129                continue;
130            }
131
132            let parts: Vec<&str> = line.split(',').collect();
133            if parts.len() >= 2 {
134                let test_name = parts[0].trim().to_string();
135                let measurements: Result<Vec<f64>, _> =
136                    parts[1..].iter().map(|s| s.trim().parse::<f64>()).collect();
137
138                if let Ok(measurements) = measurements {
139                    baseline.insert(test_name, measurements);
140                }
141            }
142        }
143
144        Ok(baseline)
145    }
146
147    /// Save current measurements as new baseline
148    pub fn save_baseline(&self) -> UtilsResult<()> {
149        let file = OpenOptions::new()
150            .write(true)
151            .create(true)
152            .truncate(true)
153            .open(&self.baseline_file)
154            .map_err(|e| {
155                UtilsError::InvalidParameter(format!("Failed to create baseline file: {e}"))
156            })?;
157
158        let mut writer = BufWriter::new(file);
159
160        writeln!(writer, "# Performance baseline measurements")
161            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write header: {e}")))?;
162
163        for (test_name, measurements) in &self.current_results {
164            write!(writer, "{test_name}").map_err(|e| {
165                UtilsError::InvalidParameter(format!("Failed to write test name: {e}"))
166            })?;
167
168            for measurement in measurements {
169                write!(writer, ",{measurement}").map_err(|e| {
170                    UtilsError::InvalidParameter(format!("Failed to write measurement: {e}"))
171                })?;
172            }
173
174            writeln!(writer).map_err(|e| {
175                UtilsError::InvalidParameter(format!("Failed to write newline: {e}"))
176            })?;
177        }
178
179        writer
180            .flush()
181            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to flush writer: {e}")))?;
182
183        Ok(())
184    }
185
186    /// Run regression tests against baseline
187    pub fn run_regression_tests(&self) -> UtilsResult<Vec<RegressionTestResult>> {
188        let baseline = self.load_baseline()?;
189        let mut results = Vec::new();
190
191        for (test_name, current_measurements) in &self.current_results {
192            if let Some(baseline_measurements) = baseline.get(test_name) {
193                let result = self.analyze_regression(
194                    test_name,
195                    baseline_measurements,
196                    current_measurements,
197                )?;
198                results.push(result);
199            }
200        }
201
202        Ok(results)
203    }
204
205    /// Analyze regression for a specific test
206    fn analyze_regression(
207        &self,
208        test_name: &str,
209        baseline: &[f64],
210        current: &[f64],
211    ) -> UtilsResult<RegressionTestResult> {
212        if baseline.len() < self.thresholds.min_samples
213            || current.len() < self.thresholds.min_samples
214        {
215            return Err(UtilsError::InsufficientData {
216                min: self.thresholds.min_samples,
217                actual: baseline.len().min(current.len()),
218            });
219        }
220
221        let baseline_mean = baseline.iter().sum::<f64>() / baseline.len() as f64;
222        let current_mean = current.iter().sum::<f64>() / current.len() as f64;
223
224        let percentage_change = ((current_mean - baseline_mean) / baseline_mean) * 100.0;
225
226        // Calculate confidence interval for current measurements
227        let current_std = self.calculate_std_dev(current, current_mean);
228        let current_sem = current_std / (current.len() as f64).sqrt();
229        let t_value = self.get_t_value(current.len() - 1, self.thresholds.confidence_level);
230
231        let margin_of_error = t_value * current_sem;
232        let confidence_interval = (
233            current_mean - margin_of_error,
234            current_mean + margin_of_error,
235        );
236
237        // Perform t-test for statistical significance
238        let p_value = self.welch_t_test(baseline, current);
239
240        // Determine if this is a regression
241        let is_regression = percentage_change > self.thresholds.time_increase_threshold;
242
243        Ok(RegressionTestResult {
244            test_name: test_name.to_string(),
245            baseline_mean,
246            current_mean,
247            percentage_change,
248            is_regression,
249            confidence_interval,
250            p_value,
251        })
252    }
253
254    /// Calculate standard deviation
255    fn calculate_std_dev(&self, data: &[f64], mean: f64) -> f64 {
256        let variance =
257            data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64;
258        variance.sqrt()
259    }
260
261    /// Get t-value for given degrees of freedom and confidence level
262    fn get_t_value(&self, df: usize, confidence_level: f64) -> f64 {
263        // Simplified t-value lookup for common confidence levels
264        // In a real implementation, you'd use a proper t-distribution
265        match (df, (confidence_level * 1000.0) as usize) {
266            (_, 950) => 1.96, // 95% confidence, approximation
267            (_, 990) => 2.58, // 99% confidence, approximation
268            (_, 995) => 2.81, // 99.5% confidence, approximation
269            _ => 2.0,         // Default conservative estimate
270        }
271    }
272
273    /// Perform Welch's t-test
274    fn welch_t_test(&self, sample1: &[f64], sample2: &[f64]) -> Option<f64> {
275        if sample1.len() < 2 || sample2.len() < 2 {
276            return None;
277        }
278
279        let mean1 = sample1.iter().sum::<f64>() / sample1.len() as f64;
280        let mean2 = sample2.iter().sum::<f64>() / sample2.len() as f64;
281
282        let var1 =
283            sample1.iter().map(|x| (x - mean1).powi(2)).sum::<f64>() / (sample1.len() - 1) as f64;
284
285        let var2 =
286            sample2.iter().map(|x| (x - mean2).powi(2)).sum::<f64>() / (sample2.len() - 1) as f64;
287
288        let se1 = var1 / sample1.len() as f64;
289        let se2 = var2 / sample2.len() as f64;
290
291        let se_diff = (se1 + se2).sqrt();
292
293        if se_diff == 0.0 {
294            return None;
295        }
296
297        let t_stat = (mean1 - mean2) / se_diff;
298
299        // Simplified p-value calculation (in practice, use proper statistical library)
300        Some((2.0 * (1.0 - (t_stat.abs() / 3.0).min(1.0))).max(0.0))
301    }
302
303    /// Generate a performance report
304    pub fn generate_report(&self, results: &[RegressionTestResult]) -> String {
305        let mut report = String::new();
306
307        report.push_str("# Performance Regression Test Report\n\n");
308
309        let regression_count = results.iter().filter(|r| r.is_regression).count();
310        let total_tests = results.len();
311        report.push_str(&format!("Total tests: {total_tests}\n"));
312        report.push_str(&format!("Regressions detected: {regression_count}\n\n"));
313
314        if regression_count > 0 {
315            report.push_str("## ⚠️ Performance Regressions\n\n");
316            for result in results.iter().filter(|r| r.is_regression) {
317                report.push_str(&format!(
318                    "**{}**: {:.2}% slower ({:.2}ms → {:.2}ms)\n",
319                    result.test_name,
320                    result.percentage_change,
321                    result.baseline_mean,
322                    result.current_mean
323                ));
324            }
325            report.push('\n');
326        }
327
328        report.push_str("## 📊 All Test Results\n\n");
329        report.push_str("| Test Name | Baseline (ms) | Current (ms) | Change (%) | Status |\n");
330        report.push_str("|-----------|---------------|--------------|------------|---------|\n");
331
332        for result in results {
333            let status = if result.is_regression {
334                "🔴 REGRESSION"
335            } else {
336                "✅ OK"
337            };
338            report.push_str(&format!(
339                "| {} | {:.2} | {:.2} | {:+.2} | {} |\n",
340                result.test_name,
341                result.baseline_mean,
342                result.current_mean,
343                result.percentage_change,
344                status
345            ));
346        }
347
348        report
349    }
350
351    /// Clear current measurements
352    pub fn clear_measurements(&mut self) {
353        self.current_results.clear();
354    }
355}
356
357/// Macro to easily benchmark functions
358#[macro_export]
359macro_rules! benchmark_regression {
360    ($tester:expr, $name:expr, $iterations:expr, $func:expr) => {
361        $tester.benchmark_function($name, $iterations, || $func)?
362    };
363}
364
365#[allow(non_snake_case)]
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use tempfile::NamedTempFile;
370
371    #[test]
372    fn test_regression_tester_basic() {
373        let temp_file = NamedTempFile::new().unwrap();
374        let mut tester = PerformanceRegressionTester::new(temp_file.path());
375
376        // Record some measurements
377        tester.record_measurement("fast_function", Duration::from_millis(10));
378        tester.record_measurement("fast_function", Duration::from_millis(12));
379        tester.record_measurement("slow_function", Duration::from_millis(100));
380
381        // Save as baseline
382        tester.save_baseline().unwrap();
383
384        // Clear and add new measurements (simulating regression)
385        tester.clear_measurements();
386        tester.record_measurement("fast_function", Duration::from_millis(15)); // slower
387        tester.record_measurement("fast_function", Duration::from_millis(16));
388
389        // Load baseline and compare
390        let baseline = tester.load_baseline().unwrap();
391        assert!(baseline.contains_key("fast_function"));
392        assert!(baseline.contains_key("slow_function"));
393    }
394
395    #[test]
396    fn test_benchmark_function() {
397        let temp_file = NamedTempFile::new().unwrap();
398        let mut tester = PerformanceRegressionTester::new(temp_file.path());
399
400        let result = tester
401            .benchmark_function("test_computation", 5, || {
402                // Simulate some work
403                (0..1000).fold(0, |acc, x| acc + x)
404            })
405            .unwrap();
406
407        assert_eq!(result, 499500); // Expected sum
408        assert!(tester.current_results.contains_key("test_computation"));
409        assert_eq!(tester.current_results["test_computation"].len(), 5);
410    }
411
412    #[test]
413    fn test_regression_detection() {
414        let temp_file = NamedTempFile::new().unwrap();
415        let mut tester = PerformanceRegressionTester::new(temp_file.path()).with_thresholds(
416            RegressionThresholds {
417                time_increase_threshold: 5.0, // 5% threshold
418                memory_increase_threshold: 10.0,
419                min_samples: 3,
420                confidence_level: 0.95,
421            },
422        );
423
424        // Create baseline with consistent measurements
425        for _ in 0..10 {
426            tester.record_measurement("stable_function", Duration::from_millis(100));
427        }
428        tester.save_baseline().unwrap();
429
430        // Clear and add regressed measurements
431        tester.clear_measurements();
432        for _ in 0..10 {
433            tester.record_measurement("stable_function", Duration::from_millis(120));
434            // 20% slower
435        }
436
437        let results = tester.run_regression_tests().unwrap();
438        assert_eq!(results.len(), 1);
439        assert!(results[0].is_regression);
440        assert!(results[0].percentage_change > 5.0);
441    }
442
443    #[test]
444    fn test_report_generation() {
445        let temp_file = NamedTempFile::new().unwrap();
446        let tester = PerformanceRegressionTester::new(temp_file.path());
447
448        let results = vec![
449            RegressionTestResult {
450                test_name: "fast_function".to_string(),
451                baseline_mean: 10.0,
452                current_mean: 12.0,
453                percentage_change: 20.0,
454                is_regression: true,
455                confidence_interval: (11.0, 13.0),
456                p_value: Some(0.05),
457            },
458            RegressionTestResult {
459                test_name: "stable_function".to_string(),
460                baseline_mean: 50.0,
461                current_mean: 48.0,
462                percentage_change: -4.0,
463                is_regression: false,
464                confidence_interval: (47.0, 49.0),
465                p_value: Some(0.3),
466            },
467        ];
468
469        let report = tester.generate_report(&results);
470        assert!(report.contains("Performance Regression Test Report"));
471        assert!(report.contains("Regressions detected: 1"));
472        assert!(report.contains("fast_function"));
473        assert!(report.contains("🔴 REGRESSION"));
474        assert!(report.contains("✅ OK"));
475    }
476}