tenflowers_core/
performance_gates.rs

1/// Performance Regression Gates
2///
3/// This module provides infrastructure for performance regression testing using
4/// criterion-based thresholds. It ensures that performance-critical operations
5/// maintain their speed characteristics across code changes.
6///
7/// # Architecture
8///
9/// The performance gate system consists of:
10/// - Baseline measurements for critical operations
11/// - Configurable regression thresholds
12/// - Automatic validation against baselines
13/// - CI-friendly pass/fail reporting
14///
15/// # Usage
16///
17/// ```rust,no_run
18/// use tenflowers_core::performance_gates::{PerformanceGate, OperationBaseline};
19/// use tenflowers_core::{Tensor, ops::matmul};
20///
21/// // Define a baseline
22/// let baseline = OperationBaseline::new(
23///     "matmul_64x64",
24///     100_000, // 100 microseconds baseline
25///     0.10,    // Allow 10% regression
26/// );
27///
28/// // Create test data
29/// let size = 64;
30/// let a = Tensor::<f32>::zeros(&[size, size]);
31/// let b = Tensor::<f32>::zeros(&[size, size]);
32///
33/// // Validate performance
34/// let gate = PerformanceGate::new(baseline);
35/// let passed = gate.validate(|| {
36///     matmul(&a, &b).expect("matmul should succeed");
37/// });
38///
39/// assert!(passed, "Performance regression detected!");
40/// ```
41use std::collections::HashMap;
42use std::sync::{Arc, Mutex};
43use std::time::{Duration, Instant};
44
45lazy_static::lazy_static! {
46    /// Global registry of performance baselines
47    static ref PERFORMANCE_BASELINES: Arc<Mutex<HashMap<String, OperationBaseline>>> = {
48        Arc::new(Mutex::new(initialize_baselines()))
49    };
50}
51
52/// Performance baseline for a specific operation
53#[derive(Debug, Clone)]
54pub struct OperationBaseline {
55    /// Name of the operation
56    pub name: String,
57    /// Baseline time in nanoseconds
58    pub baseline_ns: u64,
59    /// Maximum allowed regression (0.10 = 10%)
60    pub max_regression: f64,
61    /// Minimum sample size for measurements
62    pub min_samples: usize,
63    /// Warmup iterations before measurement
64    pub warmup_iters: usize,
65}
66
67impl OperationBaseline {
68    /// Create a new operation baseline
69    pub fn new(name: &str, baseline_ns: u64, max_regression: f64) -> Self {
70        Self {
71            name: name.to_string(),
72            baseline_ns,
73            max_regression,
74            min_samples: 10,
75            warmup_iters: 3,
76        }
77    }
78
79    /// Create a baseline with custom sampling configuration
80    pub fn with_sampling(
81        name: &str,
82        baseline_ns: u64,
83        max_regression: f64,
84        min_samples: usize,
85        warmup_iters: usize,
86    ) -> Self {
87        Self {
88            name: name.to_string(),
89            baseline_ns,
90            max_regression,
91            min_samples,
92            warmup_iters,
93        }
94    }
95
96    /// Check if a measured time passes the regression threshold
97    pub fn check_regression(&self, measured_ns: u64) -> bool {
98        let threshold_ns = (self.baseline_ns as f64 * (1.0 + self.max_regression)) as u64;
99        measured_ns <= threshold_ns
100    }
101
102    /// Calculate regression percentage
103    pub fn regression_percentage(&self, measured_ns: u64) -> f64 {
104        ((measured_ns as f64 - self.baseline_ns as f64) / self.baseline_ns as f64) * 100.0
105    }
106}
107
108/// Performance gate validator
109pub struct PerformanceGate {
110    baseline: OperationBaseline,
111}
112
113impl PerformanceGate {
114    /// Create a new performance gate
115    pub fn new(baseline: OperationBaseline) -> Self {
116        Self { baseline }
117    }
118
119    /// Validate that operation meets performance baseline
120    ///
121    /// Returns true if performance is within acceptable regression threshold
122    pub fn validate<F>(&self, mut op: F) -> bool
123    where
124        F: FnMut(),
125    {
126        // Warmup iterations
127        for _ in 0..self.baseline.warmup_iters {
128            op();
129        }
130
131        // Measurement iterations
132        let mut times = Vec::with_capacity(self.baseline.min_samples);
133        for _ in 0..self.baseline.min_samples {
134            let start = Instant::now();
135            op();
136            let elapsed = start.elapsed();
137            times.push(elapsed.as_nanos() as u64);
138        }
139
140        // Calculate median time (more robust than mean for performance)
141        times.sort_unstable();
142        let median_ns = times[times.len() / 2];
143
144        self.baseline.check_regression(median_ns)
145    }
146
147    /// Validate and return detailed measurement
148    pub fn validate_detailed<F>(&self, mut op: F) -> PerformanceMeasurement
149    where
150        F: FnMut(),
151    {
152        // Warmup iterations
153        for _ in 0..self.baseline.warmup_iters {
154            op();
155        }
156
157        // Measurement iterations
158        let mut times = Vec::with_capacity(self.baseline.min_samples);
159        for _ in 0..self.baseline.min_samples {
160            let start = Instant::now();
161            op();
162            let elapsed = start.elapsed();
163            times.push(elapsed.as_nanos() as u64);
164        }
165
166        // Calculate statistics
167        times.sort_unstable();
168        let median_ns = times[times.len() / 2];
169        let min_ns = *times.first().expect("collection should not be empty");
170        let max_ns = *times.last().expect("collection should not be empty");
171        let mean_ns = times.iter().sum::<u64>() / times.len() as u64;
172
173        let passed = self.baseline.check_regression(median_ns);
174        let regression_pct = self.baseline.regression_percentage(median_ns);
175
176        PerformanceMeasurement {
177            operation: self.baseline.name.clone(),
178            baseline_ns: self.baseline.baseline_ns,
179            measured_ns: median_ns,
180            min_ns,
181            max_ns,
182            mean_ns,
183            regression_pct,
184            passed,
185            samples: times.len(),
186        }
187    }
188}
189
190/// Detailed performance measurement result
191#[derive(Debug, Clone)]
192pub struct PerformanceMeasurement {
193    pub operation: String,
194    pub baseline_ns: u64,
195    pub measured_ns: u64,
196    pub min_ns: u64,
197    pub max_ns: u64,
198    pub mean_ns: u64,
199    pub regression_pct: f64,
200    pub passed: bool,
201    pub samples: usize,
202}
203
204impl PerformanceMeasurement {
205    /// Format as human-readable report
206    pub fn report(&self) -> String {
207        let status = if self.passed { "✓ PASS" } else { "✗ FAIL" };
208        let regression_sign = if self.regression_pct >= 0.0 { "+" } else { "" };
209
210        format!(
211            "{} | {} | baseline: {:>8}ns | measured: {:>8}ns | regression: {}{:>6.2}% | min: {:>8}ns | max: {:>8}ns | samples: {}",
212            status,
213            self.operation,
214            self.baseline_ns,
215            self.measured_ns,
216            regression_sign,
217            self.regression_pct,
218            self.min_ns,
219            self.max_ns,
220            self.samples
221        )
222    }
223
224    /// Get duration from nanoseconds
225    pub fn baseline_duration(&self) -> Duration {
226        Duration::from_nanos(self.baseline_ns)
227    }
228
229    /// Get measured duration
230    pub fn measured_duration(&self) -> Duration {
231        Duration::from_nanos(self.measured_ns)
232    }
233}
234
235/// Suite of performance gates for comprehensive validation
236pub struct PerformanceGateSuite {
237    gates: Vec<(String, PerformanceGate)>,
238}
239
240impl PerformanceGateSuite {
241    /// Create a new empty suite
242    pub fn new() -> Self {
243        Self { gates: Vec::new() }
244    }
245
246    /// Add a gate to the suite
247    pub fn add_gate(&mut self, name: String, gate: PerformanceGate) -> &mut Self {
248        self.gates.push((name, gate));
249        self
250    }
251
252    /// Run all gates and collect results
253    pub fn run_all<F>(&self, op_factory: F) -> Vec<PerformanceMeasurement>
254    where
255        F: Fn(&str) -> Box<dyn FnMut()>,
256    {
257        let mut results = Vec::new();
258        for (name, gate) in &self.gates {
259            let mut op = op_factory(name);
260            let measurement = gate.validate_detailed(&mut *op);
261            results.push(measurement);
262        }
263        results
264    }
265
266    /// Check if all gates pass
267    pub fn all_passed(&self, results: &[PerformanceMeasurement]) -> bool {
268        results.iter().all(|r| r.passed)
269    }
270
271    /// Print comprehensive report
272    pub fn print_report(&self, results: &[PerformanceMeasurement]) {
273        println!(
274            "\n╔════════════════════════════════════════════════════════════════════════════╗"
275        );
276        println!("║                    PERFORMANCE REGRESSION GATE REPORT                      ║");
277        println!(
278            "╚════════════════════════════════════════════════════════════════════════════╝\n"
279        );
280
281        for result in results {
282            println!("{}", result.report());
283        }
284
285        let total = results.len();
286        let passed = results.iter().filter(|r| r.passed).count();
287        let failed = total - passed;
288
289        println!("\n{}", "─".repeat(80));
290        println!(
291            "Summary: {} total | {} passed | {} failed",
292            total, passed, failed
293        );
294
295        if failed > 0 {
296            println!("\n⚠ WARNING: Performance regressions detected!");
297        } else {
298            println!("\n✓ All performance gates passed!");
299        }
300    }
301}
302
303impl Default for PerformanceGateSuite {
304    fn default() -> Self {
305        Self::new()
306    }
307}
308
309/// Initialize default performance baselines for critical operations
310fn initialize_baselines() -> HashMap<String, OperationBaseline> {
311    let mut baselines = HashMap::new();
312
313    // Matrix multiplication baselines (in nanoseconds)
314    // These are conservative estimates - adjust based on your hardware
315    baselines.insert(
316        "matmul_64x64_f32".to_string(),
317        OperationBaseline::new("matmul_64x64_f32", 50_000, 0.15),
318    );
319    baselines.insert(
320        "matmul_128x128_f32".to_string(),
321        OperationBaseline::new("matmul_128x128_f32", 400_000, 0.15),
322    );
323    baselines.insert(
324        "matmul_256x256_f32".to_string(),
325        OperationBaseline::new("matmul_256x256_f32", 3_000_000, 0.15),
326    );
327
328    // Binary operations baselines
329    baselines.insert(
330        "add_10k_f32".to_string(),
331        OperationBaseline::new("add_10k_f32", 5_000, 0.20),
332    );
333    baselines.insert(
334        "mul_10k_f32".to_string(),
335        OperationBaseline::new("mul_10k_f32", 5_000, 0.20),
336    );
337
338    // Reduction operations baselines
339    baselines.insert(
340        "sum_100k_f32".to_string(),
341        OperationBaseline::new("sum_100k_f32", 20_000, 0.20),
342    );
343    baselines.insert(
344        "mean_100k_f32".to_string(),
345        OperationBaseline::new("mean_100k_f32", 25_000, 0.20),
346    );
347
348    // Convolution baselines (conservative for CPU)
349    baselines.insert(
350        "conv2d_3x3_32ch".to_string(),
351        OperationBaseline::new("conv2d_3x3_32ch", 1_000_000, 0.15),
352    );
353
354    baselines
355}
356
357/// Register a custom baseline
358pub fn register_baseline(baseline: OperationBaseline) {
359    if let Ok(mut baselines) = PERFORMANCE_BASELINES.lock() {
360        baselines.insert(baseline.name.clone(), baseline);
361    }
362}
363
364/// Get a baseline by name
365pub fn get_baseline(name: &str) -> Option<OperationBaseline> {
366    PERFORMANCE_BASELINES
367        .lock()
368        .ok()
369        .and_then(|baselines| baselines.get(name).cloned())
370}
371
372/// List all registered baselines
373pub fn list_baselines() -> Vec<String> {
374    PERFORMANCE_BASELINES
375        .lock()
376        .ok()
377        .map(|baselines| baselines.keys().cloned().collect())
378        .unwrap_or_default()
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn test_baseline_creation() {
387        let baseline = OperationBaseline::new("test_op", 1000, 0.10);
388        assert_eq!(baseline.name, "test_op");
389        assert_eq!(baseline.baseline_ns, 1000);
390        assert_eq!(baseline.max_regression, 0.10);
391    }
392
393    #[test]
394    fn test_regression_check() {
395        let baseline = OperationBaseline::new("test_op", 1000, 0.10);
396
397        // Within threshold
398        assert!(baseline.check_regression(1000));
399        assert!(baseline.check_regression(1050));
400        assert!(baseline.check_regression(1100));
401
402        // Exceeds threshold
403        assert!(!baseline.check_regression(1150));
404        assert!(!baseline.check_regression(2000));
405    }
406
407    #[test]
408    fn test_regression_percentage() {
409        let baseline = OperationBaseline::new("test_op", 1000, 0.10);
410
411        assert_eq!(baseline.regression_percentage(1000), 0.0);
412        assert_eq!(baseline.regression_percentage(1100), 10.0);
413        assert_eq!(baseline.regression_percentage(1200), 20.0);
414        assert_eq!(baseline.regression_percentage(900), -10.0);
415    }
416
417    #[test]
418    fn test_performance_gate_validation() {
419        // Use more realistic baseline accounting for closure call and measurement overhead
420        let baseline = OperationBaseline::new("fast_op", 10_000, 0.50);
421        let gate = PerformanceGate::new(baseline);
422
423        // Fast operation should pass
424        let passed = gate.validate(|| {
425            // Simulate fast operation (essentially instant)
426            let _ = 1 + 1;
427        });
428
429        assert!(passed, "Fast operation should pass performance gate");
430    }
431
432    #[test]
433    fn test_performance_measurement_report() {
434        let measurement = PerformanceMeasurement {
435            operation: "test_op".to_string(),
436            baseline_ns: 1000,
437            measured_ns: 1050,
438            min_ns: 1000,
439            max_ns: 1100,
440            mean_ns: 1050,
441            regression_pct: 5.0,
442            passed: true,
443            samples: 10,
444        };
445
446        let report = measurement.report();
447        assert!(report.contains("✓ PASS"));
448        assert!(report.contains("test_op"));
449        assert!(report.contains("1000"));
450        assert!(report.contains("1050"));
451    }
452
453    #[test]
454    fn test_baseline_registry() {
455        let baseline = OperationBaseline::new("custom_op", 5000, 0.15);
456        register_baseline(baseline.clone());
457
458        let retrieved = get_baseline("custom_op");
459        assert!(retrieved.is_some());
460        let retrieved = retrieved.expect("test: operation should succeed");
461        assert_eq!(retrieved.name, "custom_op");
462        assert_eq!(retrieved.baseline_ns, 5000);
463    }
464
465    #[test]
466    fn test_gate_suite() {
467        let mut suite = PerformanceGateSuite::new();
468
469        // Use more realistic baselines accounting for closure call and measurement overhead
470        let baseline1 = OperationBaseline::new("op1", 10_000, 0.50);
471        let baseline2 = OperationBaseline::new("op2", 10_000, 0.50);
472
473        suite.add_gate("op1".to_string(), PerformanceGate::new(baseline1));
474        suite.add_gate("op2".to_string(), PerformanceGate::new(baseline2));
475
476        let results = suite.run_all(|_name| {
477            Box::new(|| {
478                let _ = 1 + 1;
479            })
480        });
481
482        assert_eq!(results.len(), 2);
483        assert!(suite.all_passed(&results));
484    }
485}
tenflowers_core/performance_gates.rs

tenflowers_core/
performance_gates.rs