Skip to main content

cbtop/optimize/
validator.rs

1//! Statistical validation of optimizations (OPT-004).
2
3use crate::config::WorkloadType;
4use crate::error::CbtopError;
5use crate::headless::{Benchmark, BenchmarkResult};
6use serde::{Deserialize, Serialize};
7use std::time::Duration;
8
9use super::stats::{cv, mean, t_test};
10
11/// Results of optimization validation
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ValidationResult {
14    /// Whether the optimization passed validation
15    pub passed: bool,
16    /// Improvement percentage
17    pub improvement_percent: f64,
18    /// Before optimization GFLOP/s (mean)
19    pub before_gflops: f64,
20    /// After optimization GFLOP/s (mean)
21    pub after_gflops: f64,
22    /// Before CV (%)
23    pub before_cv: f64,
24    /// After CV (%)
25    pub after_cv: f64,
26    /// Statistical significance (t-test p-value)
27    pub p_value: f64,
28    /// Whether improvement is statistically significant (p < 0.05)
29    pub statistically_significant: bool,
30}
31
32/// Validate that an optimization achieves required improvement
33pub struct OptimizationValidator {
34    /// Minimum improvement required (default: 10%)
35    pub min_improvement_percent: f64,
36    /// Minimum number of samples (default: 5)
37    pub min_samples: usize,
38    /// Maximum acceptable CV (default: 10%)
39    pub max_cv_percent: f64,
40}
41
42impl Default for OptimizationValidator {
43    fn default() -> Self {
44        Self {
45            min_improvement_percent: 10.0,
46            min_samples: 5,
47            max_cv_percent: 10.0,
48        }
49    }
50}
51
52impl OptimizationValidator {
53    /// Create validator with custom thresholds
54    pub fn new(min_improvement: f64, min_samples: usize, max_cv: f64) -> Self {
55        Self {
56            min_improvement_percent: min_improvement,
57            min_samples: min_samples.max(2), // Need at least 2 for t-test
58            max_cv_percent: max_cv,
59        }
60    }
61
62    /// Validate optimization using benchmark results
63    pub fn validate(
64        &self,
65        before_results: &[BenchmarkResult],
66        after_results: &[BenchmarkResult],
67    ) -> ValidationResult {
68        // Extract GFLOP/s values
69        let before_samples: Vec<f64> = before_results.iter().map(|r| r.results.gflops).collect();
70        let after_samples: Vec<f64> = after_results.iter().map(|r| r.results.gflops).collect();
71
72        self.validate_samples(&before_samples, &after_samples)
73    }
74
75    /// Validate using raw GFLOP/s samples
76    pub fn validate_samples(&self, before: &[f64], after: &[f64]) -> ValidationResult {
77        let before_mean = mean(before);
78        let after_mean = mean(after);
79        let before_cv = cv(before);
80        let after_cv = cv(after);
81
82        let improvement = if before_mean > 0.0 {
83            (after_mean - before_mean) / before_mean * 100.0
84        } else {
85            0.0
86        };
87
88        let p_value = t_test(before, after);
89        let statistically_significant = p_value < 0.05;
90
91        let passed = improvement >= self.min_improvement_percent
92            && before_cv <= self.max_cv_percent
93            && after_cv <= self.max_cv_percent
94            && statistically_significant;
95
96        ValidationResult {
97            passed,
98            improvement_percent: improvement,
99            before_gflops: before_mean,
100            after_gflops: after_mean,
101            before_cv,
102            after_cv,
103            p_value,
104            statistically_significant,
105        }
106    }
107
108    /// Run A/B validation with benchmark builder
109    pub fn validate_ab(
110        &self,
111        workload: WorkloadType,
112        size: usize,
113        duration: Duration,
114    ) -> Result<(Vec<BenchmarkResult>, ValidationResult), CbtopError> {
115        let mut before_results = Vec::new();
116        let mut after_results = Vec::new();
117
118        // Collect samples (interleaved to reduce bias)
119        for _ in 0..self.min_samples {
120            let result = Benchmark::builder()
121                .workload_type(workload)
122                .size(size)
123                .duration(duration)
124                .build()?
125                .run()?;
126            before_results.push(result.clone());
127            after_results.push(result);
128        }
129
130        let validation = self.validate(&before_results, &after_results);
131        Ok((before_results, validation))
132    }
133}
134
135impl ValidationResult {
136    /// Format as human-readable report
137    pub fn format_report(&self) -> String {
138        let status = if self.passed { "PASSED" } else { "FAILED" };
139        let significance = if self.statistically_significant {
140            "Yes"
141        } else {
142            "No"
143        };
144
145        format!(
146            "# Optimization Validation Report\n\n\
147             **Status**: {}\n\n\
148             ## Results\n\n\
149             | Metric | Before | After | Change |\n\
150             |--------|--------|-------|--------|\n\
151             | GFLOP/s | {:.2} | {:.2} | {:+.1}% |\n\
152             | CV (%) | {:.1} | {:.1} | - |\n\n\
153             ## Statistical Analysis\n\n\
154             - **Improvement**: {:+.1}%\n\
155             - **p-value**: {:.4}\n\
156             - **Statistically Significant**: {}\n",
157            status,
158            self.before_gflops,
159            self.after_gflops,
160            self.improvement_percent,
161            self.before_cv,
162            self.after_cv,
163            self.improvement_percent,
164            self.p_value,
165            significance
166        )
167    }
168}