simplebench_runtime/
lib.rs

1//! SimpleBench Runtime - Core library for the SimpleBench microbenchmarking framework.
2//!
3//! This crate provides the runtime components for SimpleBench:
4//! - Benchmark registration via the [`SimpleBench`] struct and `inventory` crate
5//! - Timing and measurement with warmup phases
6//! - Statistical analysis of benchmark results
7//! - Baseline storage and regression detection
8//!
9//! # Usage
10//!
11//! This crate is typically used alongside `simplebench-macros` which provides the
12//! `#[bench]` attribute for easy benchmark registration:
13//!
14//! ```rust,ignore
15//! use simplebench_macros::bench;
16//!
17//! #[bench]
18//! fn my_benchmark() {
19//!     // code to benchmark
20//! }
21//! ```
22//!
23//! The `cargo simplebench` CLI tool handles compilation and execution of benchmarks.
24
25use serde::{Deserialize, Serialize};
26use std::time::Duration;
27
28pub mod baseline;
29pub mod changepoint;
30pub mod config;
31pub mod cpu_analysis;
32pub mod cpu_monitor;
33pub mod measurement;
34pub mod output;
35pub mod progress;
36pub mod statistics;
37
38pub use baseline::*;
39pub use changepoint::*;
40pub use config::*;
41pub use cpu_analysis::*;
42pub use cpu_monitor::*;
43pub use measurement::*;
44pub use output::*;
45pub use progress::*;
46pub use statistics::*;
47
48// Re-export inventory for use by the macro
49pub use inventory;
50
51/// Percentile statistics for a benchmark run.
52///
53/// Contains the 50th, 90th, and 99th percentile timings along with the mean.
54#[derive(Debug, Default, Clone, Serialize, Deserialize)]
55pub struct Percentiles {
56    /// 50th percentile (median) timing
57    pub p50: Duration,
58    /// 90th percentile timing
59    pub p90: Duration,
60    /// 99th percentile timing
61    pub p99: Duration,
62    /// Arithmetic mean of all timings
63    pub mean: Duration,
64}
65
66/// Comprehensive statistics for a benchmark run.
67///
68/// All timing values are in nanoseconds for precision.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct Statistics {
71    /// Arithmetic mean in nanoseconds
72    pub mean: u128,
73    /// Median (50th percentile) in nanoseconds
74    pub median: u128,
75    /// 90th percentile in nanoseconds
76    pub p90: u128,
77    /// 99th percentile in nanoseconds
78    pub p99: u128,
79    /// Standard deviation in nanoseconds
80    pub std_dev: f64,
81    /// Variance in nanoseconds squared
82    pub variance: f64,
83    /// Minimum timing in nanoseconds
84    pub min: u128,
85    /// Maximum timing in nanoseconds
86    pub max: u128,
87    /// Number of samples collected
88    pub sample_count: usize,
89}
90
91/// Complete result of a benchmark run.
92///
93/// Contains all timing data, statistics, and metadata for a single benchmark execution.
94#[derive(Debug, Default, Clone, Serialize, Deserialize)]
95pub struct BenchResult {
96    /// Benchmark function name
97    pub name: String,
98    /// Module path where the benchmark is defined
99    pub module: String,
100    /// Number of iterations per sample
101    pub iterations: usize,
102    /// Number of samples collected
103    pub samples: usize,
104    /// Percentile statistics computed from all timings
105    pub percentiles: Percentiles,
106    /// Raw timing data for each sample
107    pub all_timings: Vec<Duration>,
108    /// CPU state samples collected during the run
109    #[serde(default)]
110    pub cpu_samples: Vec<CpuSnapshot>,
111    /// Total warmup duration in milliseconds
112    #[serde(default)]
113    pub warmup_ms: Option<u128>,
114    /// Number of iterations performed during warmup
115    #[serde(default)]
116    pub warmup_iterations: Option<u64>,
117}
118
119/// Comparison between current benchmark run and baseline.
120///
121/// Contains statistical measures to determine if performance has regressed.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct Comparison {
124    /// Mean timing from the current run
125    pub current_mean: Duration,
126    /// Mean timing from the baseline
127    pub baseline_mean: Duration,
128    /// Percentage change from baseline (positive = slower)
129    pub percentage_change: f64,
130    /// Number of baseline samples used for comparison
131    #[serde(default)]
132    pub baseline_count: usize,
133    /// Z-score for statistical significance
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub z_score: Option<f64>,
136    /// 95% confidence interval for the change
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub confidence_interval: Option<(f64, f64)>,
139    /// Probability that a real change occurred
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub change_probability: Option<f64>,
142}
143
144/// A registered benchmark function.
145///
146/// This struct is used by the `inventory` crate for compile-time benchmark registration.
147/// The `#[bench]` macro from `simplebench-macros` generates these registrations automatically.
148///
149/// The `run` function encapsulates the entire measurement process: it receives config,
150/// performs warmup, runs measurement iterations, and returns a complete `BenchResult`.
151/// This design allows benchmarks with setup to run setup once before measurement begins.
152pub struct SimpleBench {
153    /// Name of the benchmark function
154    pub name: &'static str,
155    /// Module path where the benchmark is defined
156    pub module: &'static str,
157    /// The benchmark runner function that performs measurement and returns results
158    pub run: fn(&crate::config::BenchmarkConfig) -> BenchResult,
159}
160
161inventory::collect!(SimpleBench);
162
163/// Benchmark metadata for JSON listing.
164///
165/// A simplified representation of a benchmark for discovery/listing purposes.
166#[derive(Debug, Serialize, Deserialize)]
167pub struct BenchmarkInfo {
168    /// Name of the benchmark function
169    pub name: String,
170    /// Module path where the benchmark is defined
171    pub module: String,
172}
173
174/// List all registered benchmarks as JSON to stdout
175///
176/// Used by the orchestrator to discover benchmark names before execution.
177pub fn list_benchmarks_json() {
178    let benchmarks: Vec<BenchmarkInfo> = inventory::iter::<SimpleBench>()
179        .map(|b| BenchmarkInfo {
180            name: b.name.to_string(),
181            module: b.module.to_string(),
182        })
183        .collect();
184    println!("{}", serde_json::to_string(&benchmarks).unwrap());
185}
186
187/// Run a single benchmark and output JSON result to stdout
188///
189/// The benchmark to run is specified via SIMPLEBENCH_BENCH_FILTER env var (exact match).
190/// The core to pin to is specified via SIMPLEBENCH_PIN_CORE env var.
191pub fn run_single_benchmark_json(config: &crate::config::BenchmarkConfig) {
192    let bench_name = std::env::var("SIMPLEBENCH_BENCH_FILTER")
193        .expect("SIMPLEBENCH_BENCH_FILTER must be set for single benchmark execution");
194
195    let pin_core: usize = std::env::var("SIMPLEBENCH_PIN_CORE")
196        .ok()
197        .and_then(|s| s.parse().ok())
198        .unwrap_or(1); // Default to core 1, not 0 (reserved)
199
200    // Set CPU affinity
201    if let Err(e) = affinity::set_thread_affinity([pin_core]) {
202        eprintln!(
203            "Warning: Failed to set affinity to core {}: {:?}",
204            pin_core, e
205        );
206    }
207
208    // Find and run the benchmark
209    for bench in inventory::iter::<SimpleBench>() {
210        if bench.name == bench_name {
211            // The benchmark's run function handles warmup, measurement, and returns results
212            let result = (bench.run)(config);
213            println!("{}", serde_json::to_string(&result).unwrap());
214            return;
215        }
216    }
217
218    eprintln!("ERROR: Benchmark '{}' not found", bench_name);
219    std::process::exit(1);
220}
221
222pub(crate) fn calculate_percentiles(timings: &[Duration]) -> Percentiles {
223    let mut sorted_timings = timings.to_vec();
224    sorted_timings.sort();
225
226    let len = sorted_timings.len();
227    let p50_idx = (len * 50) / 100;
228    let p90_idx = (len * 90) / 100;
229    let p99_idx = (len * 99) / 100;
230
231    // Calculate mean
232    let sum_nanos: u128 = timings.iter().map(|d| d.as_nanos()).sum();
233    let mean_nanos = sum_nanos / (len as u128);
234    let mean = Duration::from_nanos(mean_nanos as u64);
235
236    Percentiles {
237        p50: sorted_timings[p50_idx.min(len - 1)],
238        p90: sorted_timings[p90_idx.min(len - 1)],
239        p99: sorted_timings[p99_idx.min(len - 1)],
240        mean,
241    }
242}
243
244/// Calculate comprehensive statistics from raw timing samples
245pub fn calculate_statistics(samples: &[u128]) -> Statistics {
246    let sample_count = samples.len();
247
248    if sample_count == 0 {
249        return Statistics {
250            mean: 0,
251            median: 0,
252            p90: 0,
253            p99: 0,
254            std_dev: 0.0,
255            variance: 0.0,
256            min: 0,
257            max: 0,
258            sample_count: 0,
259        };
260    }
261
262    // Sort for percentile calculations
263    let mut sorted = samples.to_vec();
264    sorted.sort();
265
266    // Calculate percentiles
267    let p50_idx = (sample_count * 50) / 100;
268    let p90_idx = (sample_count * 90) / 100;
269    let p99_idx = (sample_count * 99) / 100;
270
271    let median = sorted[p50_idx.min(sample_count - 1)];
272    let p90 = sorted[p90_idx.min(sample_count - 1)];
273    let p99 = sorted[p99_idx.min(sample_count - 1)];
274
275    // Calculate mean
276    let sum: u128 = samples.iter().sum();
277    let mean = sum / (sample_count as u128);
278
279    // Calculate variance and standard deviation
280    let mean_f64 = mean as f64;
281    let variance: f64 = samples
282        .iter()
283        .map(|&s| {
284            let diff = s as f64 - mean_f64;
285            diff * diff
286        })
287        .sum::<f64>()
288        / (sample_count as f64);
289
290    let std_dev = variance.sqrt();
291
292    // Min and max
293    let min = *sorted.first().unwrap();
294    let max = *sorted.last().unwrap();
295
296    Statistics {
297        mean,
298        median,
299        p90,
300        p99,
301        std_dev,
302        variance,
303        min,
304        max,
305        sample_count,
306    }
307}
308
309/// Run all benchmarks with configuration and stream results
310///
311/// This is the primary entry point for the generated runner.
312/// Prints each benchmark result immediately as it completes.
313pub fn run_and_stream_benchmarks(config: &crate::config::BenchmarkConfig) -> Vec<BenchResult> {
314    use crate::baseline::{BaselineManager, ComparisonResult};
315    use crate::output::{
316        print_benchmark_result_line, print_comparison_line, print_new_baseline_line,
317        print_streaming_summary,
318    };
319    use colored::*;
320
321    match affinity::set_thread_affinity([0]) {
322        Ok(_) => println!(
323            "{} {}\n",
324            "Set affinity to core".green().bold(),
325            "0".cyan().bold()
326        ),
327        Err(e) => println!("Failed to set core affinity {e:?}"),
328    };
329
330    // Verify benchmark environment
331    crate::cpu_monitor::verify_benchmark_environment(0);
332
333    let mut results = Vec::new();
334    let mut comparisons = Vec::new();
335
336    // Initialize baseline manager
337    let baseline_manager = match BaselineManager::new() {
338        Ok(bm) => Some(bm),
339        Err(e) => {
340            eprintln!("Warning: Could not initialize baseline manager: {}", e);
341            eprintln!("Running without baseline comparison.");
342            None
343        }
344    };
345
346    // Get benchmark filter if specified
347    let bench_filter = std::env::var("SIMPLEBENCH_BENCH_FILTER").ok();
348
349    // Count how many benchmarks match the filter
350    let total_benchmarks: usize = inventory::iter::<SimpleBench>().count();
351    let filtered_count = if let Some(ref filter) = bench_filter {
352        inventory::iter::<SimpleBench>()
353            .filter(|b| b.name.contains(filter))
354            .count()
355    } else {
356        total_benchmarks
357    };
358
359    println!(
360        "{} {} {} {} {}",
361        "Running benchmarks with".green().bold(),
362        config.measurement.samples,
363        "samples ×".green().bold(),
364        config.measurement.iterations,
365        "iterations".green().bold()
366    );
367
368    if let Some(ref filter) = bench_filter {
369        println!(
370            "{} {} ({} matched filter: \"{}\")\n",
371            "Filtering to".dimmed(),
372            filtered_count,
373            if filtered_count == 1 {
374                "benchmark"
375            } else {
376                "benchmarks"
377            },
378            filter
379        );
380    } else {
381        println!();
382    }
383
384    // Run each benchmark and print immediately
385    for bench in inventory::iter::<SimpleBench> {
386        // Apply filter if specified
387        if let Some(ref filter) = bench_filter {
388            if !bench.name.contains(filter) {
389                continue; // Skip this benchmark
390            }
391        }
392        // Run benchmark - the run function handles warmup, measurement, and returns results
393        let result = (bench.run)(config);
394
395        // Print benchmark result immediately
396        print_benchmark_result_line(&result);
397
398        // Compare with baseline using CPD and print comparison
399        if let Some(ref bm) = baseline_manager {
400            let crate_name = result.module.split("::").next().unwrap_or("unknown");
401
402            // Load recent baselines for window-based comparison
403            let mut is_regression = false;
404            if let Ok(historical) =
405                bm.load_recent_baselines(crate_name, &result.name, config.comparison.window_size)
406            {
407                if !historical.is_empty() {
408                    // Use CPD-based comparison
409                    let comparison_result = crate::baseline::detect_regression_with_cpd(
410                        &result,
411                        &historical,
412                        config.comparison.threshold,
413                        config.comparison.confidence_level,
414                        config.comparison.cp_threshold,
415                        config.comparison.hazard_rate,
416                    );
417
418                    is_regression = comparison_result.is_regression;
419
420                    if let Some(ref comparison) = comparison_result.comparison {
421                        print_comparison_line(
422                            comparison,
423                            &result.name,
424                            comparison_result.is_regression,
425                        );
426                    }
427
428                    comparisons.push(comparison_result);
429                } else {
430                    // First run - no baseline
431                    print_new_baseline_line(&result.name);
432
433                    comparisons.push(ComparisonResult {
434                        benchmark_name: result.name.clone(),
435                        comparison: None,
436                        is_regression: false,
437                    });
438                }
439            }
440
441            // Save new baseline with regression flag
442            if let Err(e) = bm.save_baseline(crate_name, &result, is_regression) {
443                eprintln!(
444                    "Warning: Failed to save baseline for {}: {}",
445                    result.name, e
446                );
447            }
448        }
449
450        results.push(result);
451        println!(); // Blank line between benchmarks
452    }
453
454    // Print summary footer
455    if !comparisons.is_empty() {
456        print_streaming_summary(&comparisons, &config.comparison);
457
458        // Show filter stats if filtering was applied
459        if let Some(ref filter) = bench_filter {
460            println!(
461                "\n{} {} of {} total benchmarks (filter: \"{}\")",
462                "Ran".dimmed(),
463                filtered_count,
464                total_benchmarks,
465                filter
466            );
467        }
468    }
469
470    results
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476
477    #[test]
478    fn test_calculate_percentiles() {
479        let timings = vec![
480            Duration::from_millis(1),
481            Duration::from_millis(2),
482            Duration::from_millis(3),
483            Duration::from_millis(4),
484            Duration::from_millis(5),
485            Duration::from_millis(6),
486            Duration::from_millis(7),
487            Duration::from_millis(8),
488            Duration::from_millis(9),
489            Duration::from_millis(10),
490        ];
491
492        let percentiles = calculate_percentiles(&timings);
493
494        // For 10 samples: p50 at index 5 (6ms), p90 at index 9 (10ms), p99 at index 9 (10ms)
495        // Mean: (1+2+3+4+5+6+7+8+9+10)/10 = 55/10 = 5.5ms
496        assert_eq!(percentiles.p50, Duration::from_millis(6));
497        assert_eq!(percentiles.p90, Duration::from_millis(10));
498        assert_eq!(percentiles.p99, Duration::from_millis(10));
499        assert_eq!(percentiles.mean, Duration::from_micros(5500));
500    }
501
502    #[test]
503    fn test_calculate_percentiles_single_element() {
504        let timings = vec![Duration::from_millis(5)];
505        let percentiles = calculate_percentiles(&timings);
506
507        assert_eq!(percentiles.p50, Duration::from_millis(5));
508        assert_eq!(percentiles.p90, Duration::from_millis(5));
509        assert_eq!(percentiles.p99, Duration::from_millis(5));
510        assert_eq!(percentiles.mean, Duration::from_millis(5));
511    }
512}
simplebench_runtime/lib.rs

simplebench_runtime/
lib.rs