Skip to main content

fluxbench_cli/executor/
verification.rs

1//! Verification and Comparison Execution
2//!
3//! Processes benchmark comparisons, synthetic metrics, and performance
4//! verifications against computed statistics.
5//!
6//! ## Data Flow
7//!
8//! ```text
9//! BenchExecutionResult + SummaryStatistics
10//!           │
11//!           ▼
12//!    ┌──────────────────┐
13//!    │  MetricContext   │ ◄── Collects all benchmark metrics
14//!    └────────┬─────────┘     (mean, median, percentiles, allocations)
15//!             │
16//!    ┌────────┴────────┬─────────────────┐
17//!    ▼                 ▼                 ▼
18//! Comparisons     Synthetics      Verifications
19//! (speedup vs     (computed        (pass/fail
20//!  baseline)       metrics)         thresholds)
21//! ```
22//!
23//! ## Key Components
24//!
25//! - **Comparisons**: Compare benchmarks against baselines (e.g., speedup tables)
26//! - **ComparisonSeries**: Group comparisons for multi-point charts (scaling analysis)
27//! - **Synthetics**: Computed metrics from expressions (e.g., `ops_per_byte = 1e9 / mean`)
28//! - **Verifications**: Pass/fail checks with configurable severity levels
29
30use super::execution::BenchExecutionResult;
31use fluxbench_core::CompareDef;
32use fluxbench_logic::{
33    MetricContext, SyntheticDef, SyntheticResult, Verification, VerificationContext,
34    VerificationResult, VerifyDef, compute_synthetics, run_verifications,
35};
36use fluxbench_report::{ComparisonEntry, ComparisonResult, ComparisonSeries};
37use fluxbench_stats::SummaryStatistics;
38use fxhash::FxHashSet;
39
40/// Run comparisons, synthetics, and verifications against computed metrics
41///
42/// # Arguments
43/// * `results` - Benchmark execution results
44/// * `stats` - Pre-computed summary statistics
45///
46/// # Returns
47/// Tuple of (comparisons, comparison_series, synthetics, verifications)
48pub fn execute_verifications(
49    results: &[BenchExecutionResult],
50    stats: &[(String, Option<SummaryStatistics>)],
51) -> (
52    Vec<ComparisonResult>,
53    Vec<ComparisonSeries>,
54    Vec<SyntheticResult>,
55    Vec<VerificationResult>,
56) {
57    // Build metric context with benchmark results
58    let mut context = MetricContext::new();
59    let mut unavailable = FxHashSet::default();
60
61    // Build stats lookup for comparison processing
62    let stats_lookup: std::collections::HashMap<_, _> = stats
63        .iter()
64        .filter_map(|(id, s)| s.as_ref().map(|s| (id.as_str(), s)))
65        .collect();
66
67    // Build a lookup for allocation data from results
68    let alloc_lookup: std::collections::HashMap<_, _> = results
69        .iter()
70        .map(|r| (r.benchmark_id.as_str(), (r.alloc_bytes, r.alloc_count)))
71        .collect();
72
73    for (bench_id, stats_opt) in stats {
74        if let Some(stats) = stats_opt {
75            // Add mean as the primary metric for each benchmark
76            context.set(bench_id, stats.mean);
77
78            // Central tendency
79            context.set(format!("{}_mean", bench_id), stats.mean);
80            context.set(format!("{}_median", bench_id), stats.median);
81            context.set(format!("{}_std_dev", bench_id), stats.std_dev);
82
83            // Extremes
84            context.set(format!("{}_min", bench_id), stats.min);
85            context.set(format!("{}_max", bench_id), stats.max);
86
87            // Percentiles
88            context.set(format!("{}_p50", bench_id), stats.p50);
89            context.set(format!("{}_p90", bench_id), stats.p90);
90            context.set(format!("{}_p95", bench_id), stats.p95);
91            context.set(format!("{}_p99", bench_id), stats.p99);
92            context.set(format!("{}_p999", bench_id), stats.p999);
93
94            // Sample info
95            context.set(format!("{}_samples", bench_id), stats.sample_count as f64);
96
97            // Allocation data (from results, not stats)
98            if let Some(&(alloc_bytes, alloc_count)) = alloc_lookup.get(bench_id.as_str()) {
99                context.set(format!("{}_alloc_bytes", bench_id), alloc_bytes as f64);
100                context.set(format!("{}_alloc_count", bench_id), alloc_count as f64);
101            }
102        } else {
103            unavailable.insert(bench_id.clone());
104        }
105    }
106
107    // Process comparison groups
108    let mut comparison_results: Vec<ComparisonResult> = Vec::new();
109    let mut grouped_comparisons: std::collections::BTreeMap<
110        String,
111        Vec<(&CompareDef, Vec<ComparisonEntry>)>,
112    > = std::collections::BTreeMap::new();
113
114    for cmp in inventory::iter::<CompareDef> {
115        // Get baseline (first benchmark if not specified)
116        let baseline_id = cmp
117            .baseline
118            .unwrap_or_else(|| cmp.benchmarks.first().copied().unwrap_or(""));
119        let baseline_stats = match stats_lookup.get(baseline_id) {
120            Some(s) => s,
121            None => continue,
122        };
123        let baseline_value = get_metric_value(baseline_stats, cmp.metric);
124
125        // Build entries for all benchmarks
126        let entries: Vec<ComparisonEntry> = cmp
127            .benchmarks
128            .iter()
129            .filter_map(|bench_id| {
130                let bench_stats = stats_lookup.get(bench_id)?;
131                let value = get_metric_value(bench_stats, cmp.metric);
132                let speedup = if value > 0.0 {
133                    baseline_value / value
134                } else {
135                    0.0
136                };
137
138                Some(ComparisonEntry {
139                    benchmark_id: bench_id.to_string(),
140                    value,
141                    speedup,
142                    is_baseline: *bench_id == baseline_id,
143                })
144            })
145            .collect();
146
147        // Only include comparison if we have at least 2 entries
148        if entries.len() >= 2 {
149            // If grouped, collect for chart generation
150            if let Some(group) = cmp.group {
151                grouped_comparisons
152                    .entry(group.to_string())
153                    .or_default()
154                    .push((cmp, entries.clone()));
155            }
156
157            // Always add to individual comparisons (for non-grouped display)
158            if cmp.group.is_none() {
159                comparison_results.push(ComparisonResult {
160                    id: cmp.id.to_string(),
161                    title: cmp.title.to_string(),
162                    baseline: baseline_id.to_string(),
163                    metric: cmp.metric.to_string(),
164                    entries,
165                });
166            }
167        }
168    }
169
170    // Build comparison series from grouped comparisons
171    let comparison_series: Vec<ComparisonSeries> = grouped_comparisons
172        .into_iter()
173        .filter_map(|(group, comparisons)| {
174            if comparisons.is_empty() {
175                return None;
176            }
177
178            // Get title and metric from first comparison
179            let (first_cmp, _) = &comparisons[0];
180            let title = first_cmp.title.to_string();
181            let metric = first_cmp.metric.to_string();
182
183            // Get series names: use series labels if provided, otherwise benchmark IDs
184            let series_names: Vec<String> = if let Some(labels) = first_cmp.series {
185                labels.iter().map(|s| s.to_string()).collect()
186            } else {
187                first_cmp.benchmarks.iter().map(|s| s.to_string()).collect()
188            };
189
190            // Sort comparisons by x value
191            let mut sorted_comparisons = comparisons;
192            sorted_comparisons.sort_by(|(a, _), (b, _)| {
193                let ax = a.x.unwrap_or("0");
194                let bx = b.x.unwrap_or("0");
195                ax.parse::<f64>()
196                    .unwrap_or(0.0)
197                    .partial_cmp(&bx.parse::<f64>().unwrap_or(0.0))
198                    .unwrap_or(std::cmp::Ordering::Equal)
199            });
200
201            // Build x_values
202            let x_values: Vec<String> = sorted_comparisons
203                .iter()
204                .map(|(cmp, _)| cmp.x.unwrap_or("").to_string())
205                .collect();
206
207            // Build series_data[series_idx][x_idx]
208            // When series labels are provided, match by position index
209            let mut series_data: Vec<Vec<f64>> =
210                vec![vec![0.0; x_values.len()]; series_names.len()];
211
212            for (x_idx, (cmp, entries)) in sorted_comparisons.iter().enumerate() {
213                // Build a map from benchmark_id to value for this x point
214                let entry_map: std::collections::HashMap<&str, f64> = entries
215                    .iter()
216                    .map(|e| (e.benchmark_id.as_str(), e.value))
217                    .collect();
218
219                // Match by position in the benchmarks array
220                for (series_idx, bench_id) in cmp.benchmarks.iter().enumerate() {
221                    if series_idx < series_names.len() {
222                        if let Some(&value) = entry_map.get(bench_id) {
223                            series_data[series_idx][x_idx] = value;
224                        }
225                    }
226                }
227            }
228
229            Some(ComparisonSeries {
230                group,
231                title,
232                x_values,
233                series_names,
234                series_data,
235                metric,
236            })
237        })
238        .collect();
239
240    // Collect and compute synthetic metrics
241    let synthetic_defs: Vec<SyntheticDef> = inventory::iter::<SyntheticDef>
242        .into_iter()
243        .cloned()
244        .collect();
245
246    let mut synthetic_results = Vec::new();
247    if !synthetic_defs.is_empty() {
248        let computed = compute_synthetics(&synthetic_defs, &context);
249        for result in computed {
250            match result {
251                Ok(sr) => {
252                    // Add synthetic metric to context for verifications to use
253                    context.set(&sr.id, sr.value);
254                    synthetic_results.push(sr);
255                }
256                Err(_) => {
257                    // Synthetic couldn't be computed (missing dependencies)
258                }
259            }
260        }
261    }
262
263    // Collect all registered verifications
264    let verifications: Vec<Verification> = inventory::iter::<VerifyDef>
265        .into_iter()
266        .map(|v| Verification {
267            id: v.id.to_string(),
268            expression: v.expression.to_string(),
269            severity: v.severity,
270            margin: v.margin,
271        })
272        .collect();
273
274    let verification_results = if verifications.is_empty() {
275        Vec::new()
276    } else {
277        // Run verifications
278        let mut verification_context = VerificationContext::new(&context, unavailable);
279        // Propagate unit info from synthetics
280        for sr in &synthetic_results {
281            if let Some(ref unit) = sr.unit {
282                verification_context.set_unit(&sr.id, unit);
283            }
284        }
285        run_verifications(&verifications, &verification_context)
286    };
287
288    (
289        comparison_results,
290        comparison_series,
291        synthetic_results,
292        verification_results,
293    )
294}
295
296/// Get metric value from stats based on metric name
297fn get_metric_value(stats: &SummaryStatistics, metric: &str) -> f64 {
298    match metric {
299        "mean" => stats.mean,
300        "median" => stats.median,
301        "min" => stats.min,
302        "max" => stats.max,
303        "p50" => stats.p50,
304        "p90" => stats.p90,
305        "p95" => stats.p95,
306        "p99" => stats.p99,
307        "p999" => stats.p999,
308        _ => stats.mean, // Default to mean
309    }
310}