cbtop/optimize/
validator.rs1use crate::config::WorkloadType;
4use crate::error::CbtopError;
5use crate::headless::{Benchmark, BenchmarkResult};
6use serde::{Deserialize, Serialize};
7use std::time::Duration;
8
9use super::stats::{cv, mean, t_test};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ValidationResult {
14 pub passed: bool,
16 pub improvement_percent: f64,
18 pub before_gflops: f64,
20 pub after_gflops: f64,
22 pub before_cv: f64,
24 pub after_cv: f64,
26 pub p_value: f64,
28 pub statistically_significant: bool,
30}
31
32pub struct OptimizationValidator {
34 pub min_improvement_percent: f64,
36 pub min_samples: usize,
38 pub max_cv_percent: f64,
40}
41
42impl Default for OptimizationValidator {
43 fn default() -> Self {
44 Self {
45 min_improvement_percent: 10.0,
46 min_samples: 5,
47 max_cv_percent: 10.0,
48 }
49 }
50}
51
52impl OptimizationValidator {
53 pub fn new(min_improvement: f64, min_samples: usize, max_cv: f64) -> Self {
55 Self {
56 min_improvement_percent: min_improvement,
57 min_samples: min_samples.max(2), max_cv_percent: max_cv,
59 }
60 }
61
62 pub fn validate(
64 &self,
65 before_results: &[BenchmarkResult],
66 after_results: &[BenchmarkResult],
67 ) -> ValidationResult {
68 let before_samples: Vec<f64> = before_results.iter().map(|r| r.results.gflops).collect();
70 let after_samples: Vec<f64> = after_results.iter().map(|r| r.results.gflops).collect();
71
72 self.validate_samples(&before_samples, &after_samples)
73 }
74
75 pub fn validate_samples(&self, before: &[f64], after: &[f64]) -> ValidationResult {
77 let before_mean = mean(before);
78 let after_mean = mean(after);
79 let before_cv = cv(before);
80 let after_cv = cv(after);
81
82 let improvement = if before_mean > 0.0 {
83 (after_mean - before_mean) / before_mean * 100.0
84 } else {
85 0.0
86 };
87
88 let p_value = t_test(before, after);
89 let statistically_significant = p_value < 0.05;
90
91 let passed = improvement >= self.min_improvement_percent
92 && before_cv <= self.max_cv_percent
93 && after_cv <= self.max_cv_percent
94 && statistically_significant;
95
96 ValidationResult {
97 passed,
98 improvement_percent: improvement,
99 before_gflops: before_mean,
100 after_gflops: after_mean,
101 before_cv,
102 after_cv,
103 p_value,
104 statistically_significant,
105 }
106 }
107
108 pub fn validate_ab(
110 &self,
111 workload: WorkloadType,
112 size: usize,
113 duration: Duration,
114 ) -> Result<(Vec<BenchmarkResult>, ValidationResult), CbtopError> {
115 let mut before_results = Vec::new();
116 let mut after_results = Vec::new();
117
118 for _ in 0..self.min_samples {
120 let result = Benchmark::builder()
121 .workload_type(workload)
122 .size(size)
123 .duration(duration)
124 .build()?
125 .run()?;
126 before_results.push(result.clone());
127 after_results.push(result);
128 }
129
130 let validation = self.validate(&before_results, &after_results);
131 Ok((before_results, validation))
132 }
133}
134
135impl ValidationResult {
136 pub fn format_report(&self) -> String {
138 let status = if self.passed { "PASSED" } else { "FAILED" };
139 let significance = if self.statistically_significant {
140 "Yes"
141 } else {
142 "No"
143 };
144
145 format!(
146 "# Optimization Validation Report\n\n\
147 **Status**: {}\n\n\
148 ## Results\n\n\
149 | Metric | Before | After | Change |\n\
150 |--------|--------|-------|--------|\n\
151 | GFLOP/s | {:.2} | {:.2} | {:+.1}% |\n\
152 | CV (%) | {:.1} | {:.1} | - |\n\n\
153 ## Statistical Analysis\n\n\
154 - **Improvement**: {:+.1}%\n\
155 - **p-value**: {:.4}\n\
156 - **Statistically Significant**: {}\n",
157 status,
158 self.before_gflops,
159 self.after_gflops,
160 self.improvement_percent,
161 self.before_cv,
162 self.after_cv,
163 self.improvement_percent,
164 self.p_value,
165 significance
166 )
167 }
168}