Skip to main content

fluxbench_cli/executor/
report.rs

1//! Report Building
2//!
3//! Constructs the complete benchmark report from execution results,
4//! including parallel computation of bootstrap confidence intervals.
5//!
6//! ## Pipeline
7//!
8//! ```text
9//! BenchExecutionResult + SummaryStatistics
10//!              │
11//!              ▼
12//!   ┌─────────────────────┐
13//!   │ Parallel Bootstrap  │  Rayon-parallelized CI computation
14//!   │   CI Computation    │  (100k iterations per benchmark)
15//!   └──────────┬──────────┘
16//!              │
17//!              ▼
18//!   ┌─────────────────────┐
19//!   │  BenchmarkMetrics   │  All timing stats + allocations + cycles
20//!   └──────────┬──────────┘
21//!              │
22//!              ▼
23//!   ┌─────────────────────┐
24//!   │      Report         │  Ready for JSON/HTML/CSV/GitHub output
25//!   └─────────────────────┘
26//! ```
27//!
28//! The expensive bootstrap computation is parallelized via Rayon, making
29//! report generation scale with available CPU cores.
30
31use super::execution::{BenchExecutionResult, ExecutionConfig};
32use super::metadata::build_report_meta;
33use fluxbench_report::{
34    BenchmarkMetrics, BenchmarkReportResult, BenchmarkStatus, FailureInfo, Report, ReportSummary,
35};
36use fluxbench_stats::{
37    BootstrapConfig, SummaryStatistics, compute_bootstrap, compute_cycles_stats,
38};
39use rayon::prelude::*;
40
41/// Build a complete Report from execution results
42///
43/// Uses parallel computation for bootstrap CI calculations (the expensive part).
44///
45/// # Arguments
46/// * `results` - Benchmark execution results
47/// * `stats` - Pre-computed summary statistics for each benchmark
48/// * `config` - Execution configuration (for bootstrap settings)
49/// * `total_duration_ms` - Total execution time in milliseconds
50///
51/// # Returns
52/// Complete Report structure ready for output
53pub fn build_report(
54    results: &[BenchExecutionResult],
55    stats: &[(String, Option<SummaryStatistics>)],
56    config: &ExecutionConfig,
57    total_duration_ms: f64,
58) -> Report {
59    // Build stats lookup
60    let stats_map: std::collections::HashMap<_, _> = stats.iter().cloned().collect();
61
62    // Compute metrics in parallel (bootstrap is expensive)
63    let metrics_vec: Vec<_> = results
64        .par_iter()
65        .map(|result| {
66            let stats_opt = stats_map.get(&result.benchmark_id).cloned().flatten();
67
68            stats_opt.as_ref().map(|s| {
69                // Compute bootstrap CI (expensive - parallelized)
70                let bootstrap_config = BootstrapConfig {
71                    iterations: config.bootstrap_iterations,
72                    confidence_level: config.confidence_level,
73                    ..Default::default()
74                };
75                let bootstrap_result = compute_bootstrap(&result.samples, &bootstrap_config);
76
77                let (ci_lower, ci_upper) = match bootstrap_result {
78                    Ok(br) => (br.confidence_interval.lower, br.confidence_interval.upper),
79                    Err(_) => (s.mean, s.mean), // Fallback to point estimate
80                };
81
82                let throughput = if s.mean > 0.0 {
83                    Some(1_000_000_000.0 / s.mean)
84                } else {
85                    None
86                };
87
88                // Compute CPU cycles statistics
89                let cycles_stats = compute_cycles_stats(&result.cpu_cycles, &result.samples);
90
91                BenchmarkMetrics {
92                    samples: s.sample_count,
93                    mean_ns: s.mean,
94                    median_ns: s.median,
95                    std_dev_ns: s.std_dev,
96                    min_ns: s.min,
97                    max_ns: s.max,
98                    p50_ns: s.p50,
99                    p90_ns: s.p90,
100                    p95_ns: s.p95,
101                    p99_ns: s.p99,
102                    p999_ns: s.p999,
103                    skewness: s.skewness,
104                    kurtosis: s.kurtosis,
105                    ci_lower_ns: ci_lower,
106                    ci_upper_ns: ci_upper,
107                    ci_level: config.confidence_level,
108                    throughput_ops_sec: throughput,
109                    alloc_bytes: result.alloc_bytes,
110                    alloc_count: result.alloc_count,
111                    // CPU cycles from RDTSC (x86_64 only, 0 on other platforms)
112                    mean_cycles: cycles_stats.mean_cycles,
113                    median_cycles: cycles_stats.median_cycles,
114                    min_cycles: cycles_stats.min_cycles,
115                    max_cycles: cycles_stats.max_cycles,
116                    cycles_per_ns: cycles_stats.cycles_per_ns,
117                }
118            })
119        })
120        .collect();
121
122    // Build final results sequentially (cheap - just aggregation)
123    let mut benchmark_results = Vec::with_capacity(results.len());
124    let mut summary = ReportSummary {
125        total_benchmarks: results.len(),
126        total_duration_ms,
127        ..Default::default()
128    };
129
130    for (result, metrics) in results.iter().zip(metrics_vec) {
131        let failure = result.error_message.as_ref().map(|msg| FailureInfo {
132            kind: result
133                .failure_kind
134                .clone()
135                .unwrap_or_else(|| "panic".to_string()),
136            message: msg.clone(),
137            backtrace: result.backtrace.clone(),
138        });
139
140        match result.status {
141            BenchmarkStatus::Passed => summary.passed += 1,
142            BenchmarkStatus::Failed => summary.failed += 1,
143            BenchmarkStatus::Crashed => summary.crashed += 1,
144            BenchmarkStatus::Skipped => summary.skipped += 1,
145        }
146
147        benchmark_results.push(BenchmarkReportResult {
148            id: result.benchmark_id.clone(),
149            name: result.benchmark_name.clone(),
150            group: result.group.clone(),
151            status: result.status,
152            severity: result.severity,
153            file: result.file.clone(),
154            line: result.line,
155            metrics,
156            threshold: result.threshold,
157            comparison: None, // Filled when comparing to baseline
158            failure,
159        });
160    }
161
162    Report {
163        meta: build_report_meta(config),
164        results: benchmark_results,
165        comparisons: Vec::new(),       // Filled by execute_verifications
166        comparison_series: Vec::new(), // Filled by execute_verifications
167        synthetics: Vec::new(),        // Filled by execute_verifications
168        verifications: Vec::new(),     // Filled by execute_verifications
169        summary,
170        baseline_meta: None,
171    }
172}