Skip to main content

tensorlogic_infer/
perfregression.rs

1//! Performance regression testing framework.
2//!
3//! This module provides infrastructure for tracking performance over time
4//! and detecting regressions. Backend developers can use this to ensure
5//! optimizations don't regress and to track performance improvements.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use tensorlogic_infer::perfregression::{PerfRegression, BenchmarkConfig};
11//!
12//! let mut perf = PerfRegression::new("my_backend");
13//!
14//! // Run benchmarks
15//! perf.benchmark("matmul_1000x1000", || {
16//!     executor.matmul(&a, &b)
17//! })?;
18//!
19//! // Save baseline
20//! perf.save_baseline("baselines/")?;
21//!
22//! // Later, compare against baseline
23//! let report = perf.compare_to_baseline("baselines/")?;
24//! if report.has_regressions() {
25//!     eprintln!("Performance regressions detected!");
26//!     report.print_regressions();
27//! }
28//! ```
29
30use serde::{Deserialize, Serialize};
31use std::collections::HashMap;
32use std::fs;
33use std::path::Path;
34use std::time::Instant;
35
36/// Configuration for performance benchmarks
37#[derive(Debug, Clone)]
38pub struct BenchmarkConfig {
39    /// Number of warmup iterations
40    pub warmup_iterations: usize,
41    /// Number of measurement iterations
42    pub measurement_iterations: usize,
43    /// Regression threshold (as percentage, e.g., 10.0 means 10% slower is a regression)
44    pub regression_threshold_percent: f64,
45    /// Improvement threshold (as percentage)
46    pub improvement_threshold_percent: f64,
47    /// Minimum execution time to consider (filter out noise)
48    pub min_time_ns: u64,
49    /// Whether to save detailed timing distributions
50    pub save_distribution: bool,
51}
52
53impl Default for BenchmarkConfig {
54    fn default() -> Self {
55        BenchmarkConfig {
56            warmup_iterations: 10,
57            measurement_iterations: 100,
58            regression_threshold_percent: 5.0,
59            improvement_threshold_percent: 5.0,
60            min_time_ns: 1000, // 1 microsecond
61            save_distribution: false,
62        }
63    }
64}
65
66impl BenchmarkConfig {
67    /// Create a quick config with fewer iterations
68    pub fn quick() -> Self {
69        BenchmarkConfig {
70            warmup_iterations: 3,
71            measurement_iterations: 20,
72            ..Default::default()
73        }
74    }
75
76    /// Create a thorough config with more iterations
77    pub fn thorough() -> Self {
78        BenchmarkConfig {
79            warmup_iterations: 20,
80            measurement_iterations: 200,
81            save_distribution: true,
82            ..Default::default()
83        }
84    }
85
86    /// Set warmup iterations
87    pub fn with_warmup(mut self, iterations: usize) -> Self {
88        self.warmup_iterations = iterations;
89        self
90    }
91
92    /// Set measurement iterations
93    pub fn with_measurements(mut self, iterations: usize) -> Self {
94        self.measurement_iterations = iterations;
95        self
96    }
97
98    /// Set regression threshold
99    pub fn with_regression_threshold(mut self, percent: f64) -> Self {
100        self.regression_threshold_percent = percent;
101        self
102    }
103}
104
105/// Statistics for a single benchmark
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct BenchmarkStats {
108    /// Benchmark name
109    pub name: String,
110    /// Number of samples
111    pub samples: usize,
112    /// Mean execution time (nanoseconds)
113    pub mean_ns: f64,
114    /// Median execution time
115    pub median_ns: f64,
116    /// Standard deviation
117    pub std_dev_ns: f64,
118    /// Minimum execution time
119    pub min_ns: u64,
120    /// Maximum execution time
121    pub max_ns: u64,
122    /// Timestamp when benchmark was run
123    pub timestamp: String,
124    /// Optional: full distribution of timings
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub distribution: Option<Vec<u64>>,
127}
128
129impl BenchmarkStats {
130    /// Create from a list of timing samples
131    pub fn from_samples(name: String, samples: Vec<u64>) -> Self {
132        let n = samples.len() as f64;
133        let mean = samples.iter().sum::<u64>() as f64 / n;
134
135        let mut sorted = samples.clone();
136        sorted.sort_unstable();
137        let median = sorted[sorted.len() / 2] as f64;
138
139        let variance = samples
140            .iter()
141            .map(|&x| {
142                let diff = x as f64 - mean;
143                diff * diff
144            })
145            .sum::<f64>()
146            / n;
147        let std_dev = variance.sqrt();
148
149        let min = sorted[0];
150        let max = sorted[sorted.len() - 1];
151
152        BenchmarkStats {
153            name,
154            samples: samples.len(),
155            mean_ns: mean,
156            median_ns: median,
157            std_dev_ns: std_dev,
158            min_ns: min,
159            max_ns: max,
160            timestamp: chrono::Utc::now().to_rfc3339(),
161            distribution: None,
162        }
163    }
164
165    /// Calculate coefficient of variation (CV)
166    pub fn coefficient_of_variation(&self) -> f64 {
167        self.std_dev_ns / self.mean_ns
168    }
169
170    /// Check if measurements are stable (low CV)
171    pub fn is_stable(&self, max_cv: f64) -> bool {
172        self.coefficient_of_variation() < max_cv
173    }
174
175    /// Format duration in human-readable form
176    pub fn format_mean(&self) -> String {
177        format_duration_ns(self.mean_ns as u64)
178    }
179
180    /// Format median duration
181    pub fn format_median(&self) -> String {
182        format_duration_ns(self.median_ns as u64)
183    }
184
185    /// Calculate percentile (0.0 to 100.0)
186    pub fn percentile(&self, p: f64) -> Option<f64> {
187        self.distribution.as_ref().and_then(|dist| {
188            if dist.is_empty() || !(0.0..=100.0).contains(&p) {
189                return None;
190            }
191
192            let mut sorted = dist.clone();
193            sorted.sort_unstable();
194
195            let index = (p / 100.0 * (sorted.len() - 1) as f64).round() as usize;
196            Some(sorted[index] as f64)
197        })
198    }
199
200    /// Get P50 (median) from distribution
201    pub fn p50(&self) -> Option<f64> {
202        self.percentile(50.0)
203    }
204
205    /// Get P95 (95th percentile)
206    pub fn p95(&self) -> Option<f64> {
207        self.percentile(95.0)
208    }
209
210    /// Get P99 (99th percentile)
211    pub fn p99(&self) -> Option<f64> {
212        self.percentile(99.0)
213    }
214
215    /// Calculate 95% confidence interval for the mean
216    pub fn confidence_interval_95(&self) -> (f64, f64) {
217        // Using t-distribution approximation for 95% CI
218        // t ≈ 1.96 for large samples (normal approximation)
219        let margin = 1.96 * (self.std_dev_ns / (self.samples as f64).sqrt());
220        (self.mean_ns - margin, self.mean_ns + margin)
221    }
222
223    /// Detect outliers using IQR method
224    pub fn detect_outliers(&self) -> Option<Vec<u64>> {
225        self.distribution.as_ref().map(|dist| {
226            let mut sorted = dist.clone();
227            sorted.sort_unstable();
228
229            let q1_idx = sorted.len() / 4;
230            let q3_idx = 3 * sorted.len() / 4;
231            let q1 = sorted[q1_idx] as f64;
232            let q3 = sorted[q3_idx] as f64;
233            let iqr = q3 - q1;
234
235            let lower_bound = q1 - 1.5 * iqr;
236            let upper_bound = q3 + 1.5 * iqr;
237
238            dist.iter()
239                .filter(|&&x| {
240                    let val = x as f64;
241                    val < lower_bound || val > upper_bound
242                })
243                .copied()
244                .collect()
245        })
246    }
247
248    /// Create a new BenchmarkStats with outliers removed
249    pub fn without_outliers(&self) -> Option<Self> {
250        self.distribution.as_ref().map(|dist| {
251            let mut sorted = dist.clone();
252            sorted.sort_unstable();
253
254            let q1_idx = sorted.len() / 4;
255            let q3_idx = 3 * sorted.len() / 4;
256            let q1 = sorted[q1_idx] as f64;
257            let q3 = sorted[q3_idx] as f64;
258            let iqr = q3 - q1;
259
260            let lower_bound = q1 - 1.5 * iqr;
261            let upper_bound = q3 + 1.5 * iqr;
262
263            let filtered: Vec<u64> = dist
264                .iter()
265                .filter(|&&x| {
266                    let val = x as f64;
267                    val >= lower_bound && val <= upper_bound
268                })
269                .copied()
270                .collect();
271
272            if filtered.is_empty() {
273                // If all data is outliers, return original
274                return self.clone();
275            }
276
277            BenchmarkStats::from_samples(self.name.clone(), filtered)
278        })
279    }
280}
281
282/// Comparison between current and baseline benchmarks
283#[derive(Debug, Clone)]
284pub struct BenchmarkComparison {
285    pub name: String,
286    pub current: BenchmarkStats,
287    pub baseline: BenchmarkStats,
288    pub change_percent: f64,
289    pub is_regression: bool,
290    pub is_improvement: bool,
291    /// Statistical significance (p-value from Mann-Whitney U test, if distributions available)
292    pub p_value: Option<f64>,
293    /// Effect size (Cohen's d)
294    pub effect_size: f64,
295    /// Whether the change is statistically significant (p < 0.05)
296    pub is_significant: bool,
297}
298
299impl BenchmarkComparison {
300    /// Create a comparison
301    pub fn new(
302        current: BenchmarkStats,
303        baseline: BenchmarkStats,
304        regression_threshold: f64,
305        improvement_threshold: f64,
306    ) -> Self {
307        let change_percent = ((current.mean_ns - baseline.mean_ns) / baseline.mean_ns) * 100.0;
308
309        // Calculate effect size (Cohen's d)
310        let pooled_std = ((current.std_dev_ns.powi(2) + baseline.std_dev_ns.powi(2)) / 2.0).sqrt();
311        let effect_size = if pooled_std > 0.0 {
312            (current.mean_ns - baseline.mean_ns) / pooled_std
313        } else {
314            0.0
315        };
316
317        // Perform Mann-Whitney U test if distributions are available
318        let p_value = match (&current.distribution, &baseline.distribution) {
319            (Some(curr_dist), Some(base_dist)) => mann_whitney_u_test(curr_dist, base_dist),
320            _ => None,
321        };
322
323        let is_significant = p_value.map(|p| p < 0.05).unwrap_or(false);
324
325        BenchmarkComparison {
326            name: current.name.clone(),
327            is_regression: change_percent > regression_threshold,
328            is_improvement: change_percent < -improvement_threshold,
329            current,
330            baseline,
331            change_percent,
332            p_value,
333            effect_size,
334            is_significant,
335        }
336    }
337
338    /// Get effect size interpretation
339    pub fn effect_size_interpretation(&self) -> &str {
340        let abs_d = self.effect_size.abs();
341        if abs_d < 0.2 {
342            "negligible"
343        } else if abs_d < 0.5 {
344            "small"
345        } else if abs_d < 0.8 {
346            "medium"
347        } else {
348            "large"
349        }
350    }
351
352    /// Get status symbol
353    pub fn status_symbol(&self) -> &str {
354        if self.is_regression {
355            "⚠️"
356        } else if self.is_improvement {
357            "✨"
358        } else {
359            "✓"
360        }
361    }
362
363    /// Summary line
364    pub fn summary(&self) -> String {
365        format!(
366            "{} {}: {} -> {} ({:+.2}%)",
367            self.status_symbol(),
368            self.name,
369            self.baseline.format_mean(),
370            self.current.format_mean(),
371            self.change_percent
372        )
373    }
374}
375
376/// Collection of benchmark results
377#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct BenchmarkBaseline {
379    /// Backend/system identifier
380    pub backend_name: String,
381    /// When baseline was created
382    pub created_at: String,
383    /// Benchmarks in this baseline
384    pub benchmarks: HashMap<String, BenchmarkStats>,
385    /// Metadata
386    #[serde(skip_serializing_if = "Option::is_none")]
387    pub metadata: Option<HashMap<String, String>>,
388}
389
390impl BenchmarkBaseline {
391    /// Create a new baseline
392    pub fn new(backend_name: String) -> Self {
393        BenchmarkBaseline {
394            backend_name,
395            created_at: chrono::Utc::now().to_rfc3339(),
396            benchmarks: HashMap::new(),
397            metadata: None,
398        }
399    }
400
401    /// Add a benchmark result
402    pub fn add(&mut self, stats: BenchmarkStats) {
403        self.benchmarks.insert(stats.name.clone(), stats);
404    }
405
406    /// Save to JSON file
407    pub fn save<P: AsRef<Path>>(&self, path: P) -> std::io::Result<()> {
408        let json = serde_json::to_string_pretty(self)?;
409        fs::write(path, json)?;
410        Ok(())
411    }
412
413    /// Load from JSON file
414    pub fn load<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
415        let json = fs::read_to_string(path)?;
416        let baseline = serde_json::from_str(&json)?;
417        Ok(baseline)
418    }
419}
420
421/// Performance regression testing framework
422pub struct PerfRegression {
423    backend_name: String,
424    config: BenchmarkConfig,
425    current_results: HashMap<String, BenchmarkStats>,
426}
427
428impl PerfRegression {
429    /// Create a new performance regression tester
430    pub fn new(backend_name: impl Into<String>) -> Self {
431        PerfRegression {
432            backend_name: backend_name.into(),
433            config: BenchmarkConfig::default(),
434            current_results: HashMap::new(),
435        }
436    }
437
438    /// Create with custom config
439    pub fn with_config(backend_name: impl Into<String>, config: BenchmarkConfig) -> Self {
440        PerfRegression {
441            backend_name: backend_name.into(),
442            config,
443            current_results: HashMap::new(),
444        }
445    }
446
447    /// Run a benchmark
448    pub fn benchmark<F, R>(
449        &mut self,
450        name: impl Into<String>,
451        mut f: F,
452    ) -> Result<BenchmarkStats, String>
453    where
454        F: FnMut() -> R,
455    {
456        let name = name.into();
457
458        // Warmup
459        for _ in 0..self.config.warmup_iterations {
460            let _ = f();
461        }
462
463        // Measurements
464        let mut samples = Vec::with_capacity(self.config.measurement_iterations);
465        for _ in 0..self.config.measurement_iterations {
466            let start = Instant::now();
467            let _ = f();
468            let duration = start.elapsed();
469
470            let ns = duration.as_nanos() as u64;
471            if ns >= self.config.min_time_ns {
472                samples.push(ns);
473            }
474        }
475
476        if samples.is_empty() {
477            return Err(format!(
478                "No valid samples for benchmark '{}' (all below min_time_ns threshold)",
479                name
480            ));
481        }
482
483        let mut stats = BenchmarkStats::from_samples(name.clone(), samples.clone());
484        if self.config.save_distribution {
485            stats.distribution = Some(samples);
486        }
487
488        self.current_results.insert(name, stats.clone());
489        Ok(stats)
490    }
491
492    /// Get current results
493    pub fn results(&self) -> &HashMap<String, BenchmarkStats> {
494        &self.current_results
495    }
496
497    /// Save current results as baseline
498    pub fn save_baseline<P: AsRef<Path>>(&self, dir: P) -> std::io::Result<()> {
499        let dir = dir.as_ref();
500        fs::create_dir_all(dir)?;
501
502        let filename = format!("{}_baseline.json", self.backend_name);
503        let path = dir.join(filename);
504
505        let mut baseline = BenchmarkBaseline::new(self.backend_name.clone());
506        for stats in self.current_results.values() {
507            baseline.add(stats.clone());
508        }
509
510        baseline.save(path)
511    }
512
513    /// Compare current results to baseline
514    pub fn compare_to_baseline<P: AsRef<Path>>(&self, dir: P) -> std::io::Result<RegressionReport> {
515        let dir = dir.as_ref();
516        let filename = format!("{}_baseline.json", self.backend_name);
517        let path = dir.join(filename);
518
519        let baseline = BenchmarkBaseline::load(path)?;
520
521        let mut comparisons = Vec::new();
522        for (name, current_stats) in &self.current_results {
523            if let Some(baseline_stats) = baseline.benchmarks.get(name) {
524                let comparison = BenchmarkComparison::new(
525                    current_stats.clone(),
526                    baseline_stats.clone(),
527                    self.config.regression_threshold_percent,
528                    self.config.improvement_threshold_percent,
529                );
530                comparisons.push(comparison);
531            }
532        }
533
534        Ok(RegressionReport {
535            backend_name: self.backend_name.clone(),
536            comparisons,
537            regression_threshold: self.config.regression_threshold_percent,
538        })
539    }
540
541    /// Clear current results
542    pub fn clear(&mut self) {
543        self.current_results.clear();
544    }
545}
546
547/// Report of regression testing results
548#[derive(Debug)]
549pub struct RegressionReport {
550    pub backend_name: String,
551    pub comparisons: Vec<BenchmarkComparison>,
552    pub regression_threshold: f64,
553}
554
555impl RegressionReport {
556    /// Check if there are any regressions
557    pub fn has_regressions(&self) -> bool {
558        self.comparisons.iter().any(|c| c.is_regression)
559    }
560
561    /// Get all regressions
562    pub fn regressions(&self) -> Vec<&BenchmarkComparison> {
563        self.comparisons
564            .iter()
565            .filter(|c| c.is_regression)
566            .collect()
567    }
568
569    /// Get all improvements
570    pub fn improvements(&self) -> Vec<&BenchmarkComparison> {
571        self.comparisons
572            .iter()
573            .filter(|c| c.is_improvement)
574            .collect()
575    }
576
577    /// Get unchanged benchmarks
578    pub fn unchanged(&self) -> Vec<&BenchmarkComparison> {
579        self.comparisons
580            .iter()
581            .filter(|c| !c.is_regression && !c.is_improvement)
582            .collect()
583    }
584
585    /// Print regressions
586    pub fn print_regressions(&self) {
587        let regressions = self.regressions();
588        if regressions.is_empty() {
589            println!("No performance regressions detected! ✓");
590            return;
591        }
592
593        println!(
594            "\n⚠️  Performance Regressions Detected (threshold: {:.1}%):",
595            self.regression_threshold
596        );
597        for comp in regressions {
598            println!("  {}", comp.summary());
599        }
600    }
601
602    /// Print improvements
603    pub fn print_improvements(&self) {
604        let improvements = self.improvements();
605        if improvements.is_empty() {
606            return;
607        }
608
609        println!("\n✨ Performance Improvements:");
610        for comp in improvements {
611            println!("  {}", comp.summary());
612        }
613    }
614
615    /// Print full report
616    pub fn print_report(&self) {
617        println!(
618            "\n=== Performance Regression Report: {} ===",
619            self.backend_name
620        );
621        println!("Total benchmarks: {}", self.comparisons.len());
622        println!("Regressions: {}", self.regressions().len());
623        println!("Improvements: {}", self.improvements().len());
624        println!("Unchanged: {}", self.unchanged().len());
625
626        self.print_regressions();
627        self.print_improvements();
628
629        if !self.unchanged().is_empty() {
630            println!("\n✓ Unchanged:");
631            for comp in self.unchanged() {
632                println!("  {}", comp.summary());
633            }
634        }
635    }
636
637    /// Generate HTML report
638    pub fn to_html(&self) -> String {
639        let mut html = String::from("<html><head><title>Performance Report</title></head><body>");
640        html.push_str(&format!(
641            "<h1>Performance Report: {}</h1>",
642            self.backend_name
643        ));
644        html.push_str(&format!(
645            "<p>Total: {} | Regressions: {} | Improvements: {}</p>",
646            self.comparisons.len(),
647            self.regressions().len(),
648            self.improvements().len()
649        ));
650
651        if !self.regressions().is_empty() {
652            html.push_str("<h2>⚠️ Regressions</h2><ul>");
653            for comp in self.regressions() {
654                html.push_str(&format!("<li style='color:red'>{}</li>", comp.summary()));
655            }
656            html.push_str("</ul>");
657        }
658
659        if !self.improvements().is_empty() {
660            html.push_str("<h2>✨ Improvements</h2><ul>");
661            for comp in self.improvements() {
662                html.push_str(&format!("<li style='color:green'>{}</li>", comp.summary()));
663            }
664            html.push_str("</ul>");
665        }
666
667        html.push_str("</body></html>");
668        html
669    }
670}
671
672/// Format duration in human-readable form
673fn format_duration_ns(ns: u64) -> String {
674    if ns < 1_000 {
675        format!("{} ns", ns)
676    } else if ns < 1_000_000 {
677        format!("{:.2} μs", ns as f64 / 1_000.0)
678    } else if ns < 1_000_000_000 {
679        format!("{:.2} ms", ns as f64 / 1_000_000.0)
680    } else {
681        format!("{:.2} s", ns as f64 / 1_000_000_000.0)
682    }
683}
684
685/// Mann-Whitney U test for non-parametric comparison of two distributions
686///
687/// Returns the p-value for the two-sided test.
688/// Lower p-value indicates stronger evidence that the distributions are different.
689/// p < 0.05 is typically considered statistically significant.
690fn mann_whitney_u_test(sample1: &[u64], sample2: &[u64]) -> Option<f64> {
691    let n1 = sample1.len();
692    let n2 = sample2.len();
693
694    if n1 == 0 || n2 == 0 {
695        return None;
696    }
697
698    // Combine and rank all values
699    let mut combined: Vec<(u64, usize)> = Vec::new();
700    for &val in sample1 {
701        combined.push((val, 1)); // 1 for sample1
702    }
703    for &val in sample2 {
704        combined.push((val, 2)); // 2 for sample2
705    }
706
707    // Sort by value
708    combined.sort_unstable_by_key(|(val, _)| *val);
709
710    // Assign ranks (average rank for ties)
711    let mut ranks = vec![0.0; combined.len()];
712    let mut i = 0;
713    while i < combined.len() {
714        let mut j = i;
715        let current_value = combined[i].0;
716
717        // Find all tied values
718        while j < combined.len() && combined[j].0 == current_value {
719            j += 1;
720        }
721
722        // Average rank for tied values
723        let avg_rank = ((i + 1) + j) as f64 / 2.0;
724        for rank in ranks.iter_mut().take(j).skip(i) {
725            *rank = avg_rank;
726        }
727
728        i = j;
729    }
730
731    // Calculate U statistic for sample1
732    let r1: f64 = combined
733        .iter()
734        .zip(ranks.iter())
735        .filter(|((_, sample), _)| *sample == 1)
736        .map(|(_, &rank)| rank)
737        .sum();
738
739    let u1 = r1 - (n1 * (n1 + 1)) as f64 / 2.0;
740    let u2 = (n1 * n2) as f64 - u1;
741
742    // Use smaller U
743    let u = u1.min(u2);
744
745    // Calculate z-score for large samples (normal approximation)
746    // Valid when both n1 and n2 > 20
747    if n1 > 20 && n2 > 20 {
748        let mean_u = (n1 * n2) as f64 / 2.0;
749        let std_u = ((n1 * n2 * (n1 + n2 + 1)) as f64 / 12.0).sqrt();
750        let z = (u - mean_u) / std_u;
751
752        // Two-tailed p-value using normal distribution approximation
753        // P(|Z| > |z|) = 2 * P(Z > |z|) = 2 * (1 - Φ(|z|))
754        let abs_z = z.abs();
755        let p = 2.0 * (1.0 - standard_normal_cdf(abs_z));
756        Some(p)
757    } else {
758        // For small samples, we'd need exact tables or permutation tests
759        // For now, return None (could be extended with exact tests)
760        None
761    }
762}
763
764/// Cumulative distribution function for standard normal distribution
765/// Approximation using error function
766fn standard_normal_cdf(x: f64) -> f64 {
767    0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
768}
769
770/// Error function approximation (Abramowitz and Stegun formula 7.1.26)
771fn erf(x: f64) -> f64 {
772    let sign = if x >= 0.0 { 1.0 } else { -1.0 };
773    let x = x.abs();
774
775    let a1 = 0.254829592;
776    let a2 = -0.284496736;
777    let a3 = 1.421413741;
778    let a4 = -1.453152027;
779    let a5 = 1.061405429;
780    let p = 0.3275911;
781
782    let t = 1.0 / (1.0 + p * x);
783    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
784
785    sign * y
786}
787
788#[cfg(test)]
789mod tests {
790    use super::*;
791
792    #[test]
793    fn test_benchmark_config() {
794        let config = BenchmarkConfig::default();
795        assert!(config.warmup_iterations > 0);
796        assert!(config.measurement_iterations > 0);
797        assert!(config.regression_threshold_percent > 0.0);
798    }
799
800    #[test]
801    fn test_benchmark_config_quick() {
802        let quick = BenchmarkConfig::quick();
803        let default = BenchmarkConfig::default();
804        assert!(quick.measurement_iterations < default.measurement_iterations);
805    }
806
807    #[test]
808    fn test_benchmark_config_builder() {
809        let config = BenchmarkConfig::default()
810            .with_warmup(5)
811            .with_measurements(50)
812            .with_regression_threshold(10.0);
813
814        assert_eq!(config.warmup_iterations, 5);
815        assert_eq!(config.measurement_iterations, 50);
816        assert_eq!(config.regression_threshold_percent, 10.0);
817    }
818
819    #[test]
820    fn test_benchmark_stats_from_samples() {
821        let samples = vec![100, 110, 105, 108, 102];
822        let stats = BenchmarkStats::from_samples("test".to_string(), samples);
823
824        assert_eq!(stats.name, "test");
825        assert_eq!(stats.samples, 5);
826        assert!(stats.mean_ns > 100.0);
827        assert!(stats.mean_ns < 110.0);
828        assert_eq!(stats.min_ns, 100);
829        assert_eq!(stats.max_ns, 110);
830    }
831
832    #[test]
833    fn test_benchmark_stats_cv() {
834        let samples = vec![100, 100, 100, 100, 100]; // No variation
835        let stats = BenchmarkStats::from_samples("test".to_string(), samples);
836
837        assert!(stats.coefficient_of_variation() < 0.01);
838        assert!(stats.is_stable(0.1));
839    }
840
841    #[test]
842    fn test_format_duration() {
843        assert_eq!(format_duration_ns(500), "500 ns");
844        assert_eq!(format_duration_ns(5_000), "5.00 μs");
845        assert_eq!(format_duration_ns(5_000_000), "5.00 ms");
846        assert_eq!(format_duration_ns(5_000_000_000), "5.00 s");
847    }
848
849    #[test]
850    fn test_perf_regression_creation() {
851        let perf = PerfRegression::new("test_backend");
852        assert_eq!(perf.backend_name, "test_backend");
853        assert!(perf.current_results.is_empty());
854    }
855
856    #[test]
857    fn test_perf_regression_benchmark() {
858        let mut perf = PerfRegression::with_config("test", BenchmarkConfig::quick());
859
860        let stats = perf
861            .benchmark("simple", || {
862                std::thread::sleep(std::time::Duration::from_micros(10));
863            })
864            .unwrap();
865
866        assert_eq!(stats.name, "simple");
867        assert!(stats.samples > 0);
868        assert!(stats.mean_ns > 10_000.0); // At least 10 microseconds
869    }
870
871    #[test]
872    fn test_benchmark_comparison() {
873        let baseline = BenchmarkStats::from_samples("test".to_string(), vec![100, 100, 100]);
874        let current = BenchmarkStats::from_samples("test".to_string(), vec![110, 110, 110]);
875
876        let comp = BenchmarkComparison::new(current, baseline, 5.0, 5.0);
877
878        assert!(comp.change_percent > 5.0); // 10% slower
879        assert!(comp.is_regression);
880        assert!(!comp.is_improvement);
881    }
882
883    #[test]
884    fn test_benchmark_improvement() {
885        let baseline = BenchmarkStats::from_samples("test".to_string(), vec![100, 100, 100]);
886        let current = BenchmarkStats::from_samples("test".to_string(), vec![90, 90, 90]);
887
888        let comp = BenchmarkComparison::new(current, baseline, 5.0, 5.0);
889
890        assert!(comp.change_percent < -5.0); // 10% faster
891        assert!(!comp.is_regression);
892        assert!(comp.is_improvement);
893    }
894
895    #[test]
896    fn test_regression_report() {
897        let baseline = BenchmarkStats::from_samples("test1".to_string(), vec![100, 100, 100]);
898        let current = BenchmarkStats::from_samples("test1".to_string(), vec![110, 110, 110]);
899        let comp = BenchmarkComparison::new(current, baseline, 5.0, 5.0);
900
901        let report = RegressionReport {
902            backend_name: "test".to_string(),
903            comparisons: vec![comp],
904            regression_threshold: 5.0,
905        };
906
907        assert!(report.has_regressions());
908        assert_eq!(report.regressions().len(), 1);
909        assert_eq!(report.improvements().len(), 0);
910    }
911
912    #[test]
913    fn test_clear_results() {
914        let mut config = BenchmarkConfig::quick();
915        config.min_time_ns = 0; // Accept all samples
916        let mut perf = PerfRegression::with_config("test", config);
917        // Use a non-empty function to ensure it takes some time
918        perf.benchmark("test", || {
919            let _x = (0..100).sum::<i32>();
920        })
921        .unwrap();
922        assert!(!perf.results().is_empty());
923
924        perf.clear();
925        assert!(perf.results().is_empty());
926    }
927
928    #[test]
929    fn test_percentile_calculation() {
930        let samples = vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100];
931        let mut stats = BenchmarkStats::from_samples("test".to_string(), samples.clone());
932        stats.distribution = Some(samples);
933
934        // Test various percentiles
935        assert_eq!(stats.percentile(0.0), Some(10.0));
936        // P50 should be close to 50-60 range (linear interpolation)
937        assert!(stats.percentile(50.0).unwrap() >= 50.0 && stats.percentile(50.0).unwrap() <= 60.0);
938        assert_eq!(stats.percentile(100.0), Some(100.0));
939
940        // Test P50, P95, P99 exist
941        assert!(stats.p50().is_some());
942        assert!(stats.p95().is_some());
943        assert!(stats.p99().is_some());
944    }
945
946    #[test]
947    fn test_percentile_without_distribution() {
948        let samples = vec![10, 20, 30];
949        let stats = BenchmarkStats::from_samples("test".to_string(), samples);
950        // No distribution saved
951        assert_eq!(stats.p50(), None);
952        assert_eq!(stats.p95(), None);
953    }
954
955    #[test]
956    fn test_confidence_interval() {
957        let samples = vec![100, 105, 110, 95, 102, 108, 97, 103];
958        let stats = BenchmarkStats::from_samples("test".to_string(), samples);
959
960        let (lower, upper) = stats.confidence_interval_95();
961        assert!(lower < stats.mean_ns);
962        assert!(upper > stats.mean_ns);
963        assert!(upper - lower > 0.0); // CI should have non-zero width
964    }
965
966    #[test]
967    fn test_outlier_detection() {
968        // Create data with clear outliers
969        let mut samples = vec![100; 20]; // Most values around 100
970        samples.push(1000); // Clear outlier
971        samples.push(2000); // Another outlier
972
973        let mut stats = BenchmarkStats::from_samples("test".to_string(), samples.clone());
974        stats.distribution = Some(samples);
975
976        let outliers = stats.detect_outliers().unwrap();
977        assert!(!outliers.is_empty());
978        assert!(outliers.contains(&1000));
979        assert!(outliers.contains(&2000));
980    }
981
982    #[test]
983    fn test_without_outliers() {
984        let mut samples = vec![100, 102, 98, 101, 99, 103, 97];
985        samples.push(1000); // Add outlier
986
987        let mut stats = BenchmarkStats::from_samples("test".to_string(), samples.clone());
988        stats.distribution = Some(samples);
989
990        let filtered = stats.without_outliers().unwrap();
991        assert!(filtered.mean_ns < stats.mean_ns); // Mean should be lower without outlier
992        assert!(filtered.std_dev_ns < stats.std_dev_ns); // Std dev should be lower
993    }
994
995    #[test]
996    fn test_effect_size_calculation() {
997        // Use samples with variation for meaningful std dev
998        let baseline =
999            BenchmarkStats::from_samples("test".to_string(), vec![95, 100, 105, 98, 102]);
1000        let current =
1001            BenchmarkStats::from_samples("test".to_string(), vec![105, 110, 115, 108, 112]);
1002
1003        let comp = BenchmarkComparison::new(current, baseline, 5.0, 5.0);
1004
1005        // Effect size should be positive (current is slower)
1006        assert!(comp.effect_size > 0.0);
1007    }
1008
1009    #[test]
1010    fn test_effect_size_interpretation() {
1011        // Use samples with variation for meaningful effect size
1012        let baseline = BenchmarkStats::from_samples(
1013            "test".to_string(),
1014            vec![95, 98, 100, 102, 105, 97, 103, 99, 101, 104],
1015        );
1016
1017        // Very small effect - minimal increase
1018        let current_small = BenchmarkStats::from_samples(
1019            "test".to_string(),
1020            vec![96, 99, 101, 103, 106, 98, 104, 100, 102, 105],
1021        );
1022        let comp_small = BenchmarkComparison::new(current_small, baseline.clone(), 5.0, 5.0);
1023        // Effect could be negligible or small depending on variation
1024        assert!(
1025            comp_small.effect_size.abs() < 1.0,
1026            "Effect size should be less than 1.0 for small differences"
1027        );
1028
1029        // Large effect - significant increase (100 to 200 = doubling)
1030        let current_large = BenchmarkStats::from_samples(
1031            "test".to_string(),
1032            vec![195, 198, 200, 202, 205, 197, 203, 199, 201, 204],
1033        );
1034        let comp_large = BenchmarkComparison::new(current_large, baseline, 5.0, 5.0);
1035        assert_eq!(comp_large.effect_size_interpretation(), "large");
1036        assert!(comp_large.effect_size > 1.0);
1037    }
1038
1039    #[test]
1040    fn test_mann_whitney_u_test_identical_distributions() {
1041        let sample1 = vec![100; 50];
1042        let sample2 = vec![100; 50];
1043
1044        let p = mann_whitney_u_test(&sample1, &sample2);
1045        // Identical distributions should have high p-value (close to 1.0)
1046        assert!(p.is_some());
1047        assert!(p.unwrap() > 0.5);
1048    }
1049
1050    #[test]
1051    fn test_mann_whitney_u_test_different_distributions() {
1052        let sample1 = vec![100; 50];
1053        let sample2 = vec![150; 50];
1054
1055        let p = mann_whitney_u_test(&sample1, &sample2);
1056        // Very different distributions should have low p-value (close to 0.0)
1057        assert!(p.is_some());
1058        assert!(p.unwrap() < 0.05); // Statistically significant
1059    }
1060
1061    #[test]
1062    fn test_mann_whitney_u_test_small_samples() {
1063        let sample1 = vec![100, 110, 105];
1064        let sample2 = vec![120, 125, 130];
1065
1066        let p = mann_whitney_u_test(&sample1, &sample2);
1067        // Small samples (< 20) should return None
1068        assert!(p.is_none());
1069    }
1070
1071    #[test]
1072    fn test_statistical_significance() {
1073        // Create distributions with large difference
1074        let baseline_samples: Vec<u64> = (0..100).map(|_| 100).collect();
1075        let current_samples: Vec<u64> = (0..100).map(|_| 150).collect();
1076
1077        let mut baseline =
1078            BenchmarkStats::from_samples("test".to_string(), baseline_samples.clone());
1079        baseline.distribution = Some(baseline_samples);
1080
1081        let mut current = BenchmarkStats::from_samples("test".to_string(), current_samples.clone());
1082        current.distribution = Some(current_samples);
1083
1084        let comp = BenchmarkComparison::new(current, baseline, 5.0, 5.0);
1085
1086        assert!(comp.is_significant); // Should be statistically significant
1087        assert!(comp.p_value.is_some());
1088        assert!(comp.p_value.unwrap() < 0.05);
1089    }
1090
1091    #[test]
1092    fn test_erf_function() {
1093        // Test error function with known values
1094        assert!((erf(0.0) - 0.0).abs() < 0.01);
1095        assert!((erf(1.0) - 0.8427).abs() < 0.01);
1096        assert!((erf(-1.0) - (-0.8427)).abs() < 0.01);
1097    }
1098
1099    #[test]
1100    fn test_standard_normal_cdf() {
1101        // Test CDF with known values
1102        assert!((standard_normal_cdf(0.0) - 0.5).abs() < 0.01);
1103        assert!((standard_normal_cdf(1.96) - 0.975).abs() < 0.01);
1104        assert!((standard_normal_cdf(-1.96) - 0.025).abs() < 0.01);
1105    }
1106}