simplebench_runtime/
lib.rs

1//! SimpleBench Runtime - Core library for the SimpleBench microbenchmarking framework.
2//!
3//! This crate provides the runtime components for SimpleBench:
4//! - Benchmark registration via the [`SimpleBench`] struct and `inventory` crate
5//! - Timing and measurement with warmup phases
6//! - Statistical analysis of benchmark results
7//! - Baseline storage and regression detection
8//!
9//! # Usage
10//!
11//! This crate is typically used alongside `simplebench-macros` which provides the
12//! `#[bench]` attribute for easy benchmark registration:
13//!
14//! ```rust,ignore
15//! use simplebench_macros::bench;
16//!
17//! // Simple benchmark - measures single function calls
18//! #[bench]
19//! fn my_benchmark() {
20//!     // code to benchmark
21//! }
22//!
23//! // Setup runs once, benchmark receives reference
24//! #[bench(setup = create_data)]
25//! fn benchmark_with_setup(data: &Data) {
26//!     process(data);
27//! }
28//!
29//! // Setup runs before each sample - for mutations/consumption
30//! #[bench(setup_each = || vec![3, 1, 4, 1, 5])]
31//! fn bench_sort(mut data: Vec<i32>) {
32//!     data.sort();
33//! }
34//! ```
35//!
36//! The `cargo simplebench` CLI tool handles compilation and execution of benchmarks.
37
38use serde::{Deserialize, Serialize};
39use std::time::Duration;
40
41pub mod baseline;
42pub mod changepoint;
43pub mod config;
44pub mod cpu_analysis;
45pub mod cpu_monitor;
46pub mod measurement;
47pub mod output;
48pub mod progress;
49pub mod statistics;
50
51pub use baseline::*;
52pub use changepoint::*;
53pub use config::*;
54pub use cpu_analysis::*;
55pub use cpu_monitor::*;
56pub use measurement::*;
57pub use output::*;
58pub use progress::*;
59pub use statistics::*;
60
61// Re-export inventory for use by the macro
62pub use inventory;
63
64/// Percentile statistics for a benchmark run.
65///
66/// Contains the 50th, 90th, and 99th percentile timings along with the mean.
67#[derive(Debug, Default, Clone, Serialize, Deserialize)]
68pub struct Percentiles {
69    /// 50th percentile (median) timing
70    pub p50: Duration,
71    /// 90th percentile timing
72    pub p90: Duration,
73    /// 99th percentile timing
74    pub p99: Duration,
75    /// Arithmetic mean of all timings
76    pub mean: Duration,
77}
78
79/// Comprehensive statistics for a benchmark run.
80///
81/// All timing values are in nanoseconds for precision.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct Statistics {
84    /// Arithmetic mean in nanoseconds
85    pub mean: u128,
86    /// Median (50th percentile) in nanoseconds
87    pub median: u128,
88    /// 90th percentile in nanoseconds
89    pub p90: u128,
90    /// 99th percentile in nanoseconds
91    pub p99: u128,
92    /// Standard deviation in nanoseconds
93    pub std_dev: f64,
94    /// Variance in nanoseconds squared
95    pub variance: f64,
96    /// Minimum timing in nanoseconds
97    pub min: u128,
98    /// Maximum timing in nanoseconds
99    pub max: u128,
100    /// Number of samples collected
101    pub sample_count: usize,
102}
103
104/// Complete result of a benchmark run.
105///
106/// Contains all timing data, statistics, and metadata for a single benchmark execution.
107#[derive(Debug, Default, Clone, Serialize, Deserialize)]
108pub struct BenchResult {
109    /// Benchmark function name
110    pub name: String,
111    /// Module path where the benchmark is defined
112    pub module: String,
113    /// Number of samples collected
114    pub samples: usize,
115    /// Percentile statistics computed from all timings
116    pub percentiles: Percentiles,
117    /// Raw timing data for each sample
118    pub all_timings: Vec<Duration>,
119    /// CPU state samples collected during the run
120    #[serde(default)]
121    pub cpu_samples: Vec<CpuSnapshot>,
122    /// Total warmup duration in milliseconds
123    #[serde(default)]
124    pub warmup_ms: Option<u128>,
125    /// Number of iterations performed during warmup
126    #[serde(default)]
127    pub warmup_iterations: Option<u64>,
128}
129
130/// Comparison between current benchmark run and baseline.
131///
132/// Contains statistical measures to determine if performance has regressed.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct Comparison {
135    /// Mean timing from the current run
136    pub current_mean: Duration,
137    /// Mean timing from the baseline
138    pub baseline_mean: Duration,
139    /// Percentage change from baseline (positive = slower)
140    pub percentage_change: f64,
141    /// Number of baseline samples used for comparison
142    #[serde(default)]
143    pub baseline_count: usize,
144    /// Z-score for statistical significance
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub z_score: Option<f64>,
147    /// 95% confidence interval for the change
148    #[serde(skip_serializing_if = "Option::is_none")]
149    pub confidence_interval: Option<(f64, f64)>,
150    /// Probability that a real change occurred
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub change_probability: Option<f64>,
153}
154
155/// A registered benchmark function.
156///
157/// This struct is used by the `inventory` crate for compile-time benchmark registration.
158/// The `#[bench]` macro from `simplebench-macros` generates these registrations automatically.
159///
160/// The `run` function encapsulates the entire measurement process: it receives config,
161/// performs warmup, runs measurement iterations, and returns a complete `BenchResult`.
162/// This design allows benchmarks with setup to run setup once before measurement begins.
163pub struct SimpleBench {
164    /// Name of the benchmark function
165    pub name: &'static str,
166    /// Module path where the benchmark is defined
167    pub module: &'static str,
168    /// The benchmark runner function that performs measurement and returns results
169    pub run: fn(&crate::config::BenchmarkConfig) -> BenchResult,
170}
171
172inventory::collect!(SimpleBench);
173
174/// Benchmark metadata for JSON listing.
175///
176/// A simplified representation of a benchmark for discovery/listing purposes.
177#[derive(Debug, Serialize, Deserialize)]
178pub struct BenchmarkInfo {
179    /// Name of the benchmark function
180    pub name: String,
181    /// Module path where the benchmark is defined
182    pub module: String,
183}
184
185/// List all registered benchmarks as JSON to stdout
186///
187/// Used by the orchestrator to discover benchmark names before execution.
188pub fn list_benchmarks_json() {
189    let benchmarks: Vec<BenchmarkInfo> = inventory::iter::<SimpleBench>()
190        .map(|b| BenchmarkInfo {
191            name: b.name.to_string(),
192            module: b.module.to_string(),
193        })
194        .collect();
195    println!("{}", serde_json::to_string(&benchmarks).unwrap());
196}
197
198/// Run a single benchmark and output JSON result to stdout
199///
200/// The benchmark to run is specified via SIMPLEBENCH_BENCH_FILTER env var (exact match).
201/// The core to pin to is specified via SIMPLEBENCH_PIN_CORE env var.
202pub fn run_single_benchmark_json(config: &crate::config::BenchmarkConfig) {
203    let bench_name = std::env::var("SIMPLEBENCH_BENCH_FILTER")
204        .expect("SIMPLEBENCH_BENCH_FILTER must be set for single benchmark execution");
205
206    let pin_core: usize = std::env::var("SIMPLEBENCH_PIN_CORE")
207        .ok()
208        .and_then(|s| s.parse().ok())
209        .unwrap_or(1); // Default to core 1, not 0 (reserved)
210
211    // Set CPU affinity
212    if let Err(e) = affinity::set_thread_affinity([pin_core]) {
213        eprintln!(
214            "Warning: Failed to set affinity to core {}: {:?}",
215            pin_core, e
216        );
217    }
218
219    // Find and run the benchmark
220    for bench in inventory::iter::<SimpleBench>() {
221        if bench.name == bench_name {
222            // The benchmark's run function handles warmup, measurement, and returns results
223            let result = (bench.run)(config);
224            println!("{}", serde_json::to_string(&result).unwrap());
225            return;
226        }
227    }
228
229    eprintln!("ERROR: Benchmark '{}' not found", bench_name);
230    std::process::exit(1);
231}
232
233pub(crate) fn calculate_percentiles(timings: &[Duration]) -> Percentiles {
234    let mut sorted_timings = timings.to_vec();
235    sorted_timings.sort();
236
237    let len = sorted_timings.len();
238    let p50_idx = (len * 50) / 100;
239    let p90_idx = (len * 90) / 100;
240    let p99_idx = (len * 99) / 100;
241
242    // Calculate mean
243    let sum_nanos: u128 = timings.iter().map(|d| d.as_nanos()).sum();
244    let mean_nanos = sum_nanos / (len as u128);
245    let mean = Duration::from_nanos(mean_nanos as u64);
246
247    Percentiles {
248        p50: sorted_timings[p50_idx.min(len - 1)],
249        p90: sorted_timings[p90_idx.min(len - 1)],
250        p99: sorted_timings[p99_idx.min(len - 1)],
251        mean,
252    }
253}
254
255/// Calculate comprehensive statistics from raw timing samples
256pub fn calculate_statistics(samples: &[u128]) -> Statistics {
257    let sample_count = samples.len();
258
259    if sample_count == 0 {
260        return Statistics {
261            mean: 0,
262            median: 0,
263            p90: 0,
264            p99: 0,
265            std_dev: 0.0,
266            variance: 0.0,
267            min: 0,
268            max: 0,
269            sample_count: 0,
270        };
271    }
272
273    // Sort for percentile calculations
274    let mut sorted = samples.to_vec();
275    sorted.sort();
276
277    // Calculate percentiles
278    let p50_idx = (sample_count * 50) / 100;
279    let p90_idx = (sample_count * 90) / 100;
280    let p99_idx = (sample_count * 99) / 100;
281
282    let median = sorted[p50_idx.min(sample_count - 1)];
283    let p90 = sorted[p90_idx.min(sample_count - 1)];
284    let p99 = sorted[p99_idx.min(sample_count - 1)];
285
286    // Calculate mean
287    let sum: u128 = samples.iter().sum();
288    let mean = sum / (sample_count as u128);
289
290    // Calculate variance and standard deviation
291    let mean_f64 = mean as f64;
292    let variance: f64 = samples
293        .iter()
294        .map(|&s| {
295            let diff = s as f64 - mean_f64;
296            diff * diff
297        })
298        .sum::<f64>()
299        / (sample_count as f64);
300
301    let std_dev = variance.sqrt();
302
303    // Min and max
304    let min = *sorted.first().unwrap();
305    let max = *sorted.last().unwrap();
306
307    Statistics {
308        mean,
309        median,
310        p90,
311        p99,
312        std_dev,
313        variance,
314        min,
315        max,
316        sample_count,
317    }
318}
319
320/// Run all benchmarks with configuration and stream results
321///
322/// This is the primary entry point for the generated runner.
323/// Prints each benchmark result immediately as it completes.
324pub fn run_and_stream_benchmarks(config: &crate::config::BenchmarkConfig) -> Vec<BenchResult> {
325    use crate::baseline::{BaselineManager, ComparisonResult};
326    use crate::output::{
327        print_benchmark_result_line, print_comparison_line, print_new_baseline_line,
328        print_streaming_summary,
329    };
330    use colored::*;
331
332    match affinity::set_thread_affinity([0]) {
333        Ok(_) => println!(
334            "{} {}\n",
335            "Set affinity to core".green().bold(),
336            "0".cyan().bold()
337        ),
338        Err(e) => println!("Failed to set core affinity {e:?}"),
339    };
340
341    // Verify benchmark environment
342    crate::cpu_monitor::verify_benchmark_environment(0);
343
344    let mut results = Vec::new();
345    let mut comparisons = Vec::new();
346
347    // Initialize baseline manager
348    let baseline_manager = match BaselineManager::new() {
349        Ok(bm) => Some(bm),
350        Err(e) => {
351            eprintln!("Warning: Could not initialize baseline manager: {}", e);
352            eprintln!("Running without baseline comparison.");
353            None
354        }
355    };
356
357    // Get benchmark filter if specified
358    let bench_filter = std::env::var("SIMPLEBENCH_BENCH_FILTER").ok();
359
360    // Count how many benchmarks match the filter
361    let total_benchmarks: usize = inventory::iter::<SimpleBench>().count();
362    let filtered_count = if let Some(ref filter) = bench_filter {
363        inventory::iter::<SimpleBench>()
364            .filter(|b| b.name.contains(filter))
365            .count()
366    } else {
367        total_benchmarks
368    };
369
370    println!(
371        "{} {} {}",
372        "Running benchmarks with".green().bold(),
373        config.measurement.samples,
374        "samples".green().bold()
375    );
376
377    if let Some(ref filter) = bench_filter {
378        println!(
379            "{} {} ({} matched filter: \"{}\")\n",
380            "Filtering to".dimmed(),
381            filtered_count,
382            if filtered_count == 1 {
383                "benchmark"
384            } else {
385                "benchmarks"
386            },
387            filter
388        );
389    } else {
390        println!();
391    }
392
393    // Run each benchmark and print immediately
394    for bench in inventory::iter::<SimpleBench> {
395        // Apply filter if specified
396        if let Some(ref filter) = bench_filter {
397            if !bench.name.contains(filter) {
398                continue; // Skip this benchmark
399            }
400        }
401        // Run benchmark - the run function handles warmup, measurement, and returns results
402        let result = (bench.run)(config);
403
404        // Print benchmark result immediately
405        print_benchmark_result_line(&result);
406
407        // Compare with baseline using CPD and print comparison
408        if let Some(ref bm) = baseline_manager {
409            let crate_name = result.module.split("::").next().unwrap_or("unknown");
410
411            // Load recent baselines for window-based comparison
412            let mut is_regression = false;
413            if let Ok(historical) =
414                bm.load_recent_baselines(crate_name, &result.name, config.comparison.window_size)
415            {
416                if !historical.is_empty() {
417                    // Use CPD-based comparison
418                    let comparison_result = crate::baseline::detect_regression_with_cpd(
419                        &result,
420                        &historical,
421                        config.comparison.threshold,
422                        config.comparison.confidence_level,
423                        config.comparison.cp_threshold,
424                        config.comparison.hazard_rate,
425                    );
426
427                    is_regression = comparison_result.is_regression;
428
429                    if let Some(ref comparison) = comparison_result.comparison {
430                        print_comparison_line(
431                            comparison,
432                            &result.name,
433                            comparison_result.is_regression,
434                        );
435                    }
436
437                    comparisons.push(comparison_result);
438                } else {
439                    // First run - no baseline
440                    print_new_baseline_line(&result.name);
441
442                    comparisons.push(ComparisonResult {
443                        benchmark_name: result.name.clone(),
444                        comparison: None,
445                        is_regression: false,
446                    });
447                }
448            }
449
450            // Save new baseline with regression flag
451            if let Err(e) = bm.save_baseline(crate_name, &result, is_regression) {
452                eprintln!(
453                    "Warning: Failed to save baseline for {}: {}",
454                    result.name, e
455                );
456            }
457        }
458
459        results.push(result);
460        println!(); // Blank line between benchmarks
461    }
462
463    // Print summary footer
464    if !comparisons.is_empty() {
465        print_streaming_summary(&comparisons, &config.comparison);
466
467        // Show filter stats if filtering was applied
468        if let Some(ref filter) = bench_filter {
469            println!(
470                "\n{} {} of {} total benchmarks (filter: \"{}\")",
471                "Ran".dimmed(),
472                filtered_count,
473                total_benchmarks,
474                filter
475            );
476        }
477    }
478
479    results
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    #[test]
487    fn test_calculate_percentiles() {
488        let timings = vec![
489            Duration::from_millis(1),
490            Duration::from_millis(2),
491            Duration::from_millis(3),
492            Duration::from_millis(4),
493            Duration::from_millis(5),
494            Duration::from_millis(6),
495            Duration::from_millis(7),
496            Duration::from_millis(8),
497            Duration::from_millis(9),
498            Duration::from_millis(10),
499        ];
500
501        let percentiles = calculate_percentiles(&timings);
502
503        // For 10 samples: p50 at index 5 (6ms), p90 at index 9 (10ms), p99 at index 9 (10ms)
504        // Mean: (1+2+3+4+5+6+7+8+9+10)/10 = 55/10 = 5.5ms
505        assert_eq!(percentiles.p50, Duration::from_millis(6));
506        assert_eq!(percentiles.p90, Duration::from_millis(10));
507        assert_eq!(percentiles.p99, Duration::from_millis(10));
508        assert_eq!(percentiles.mean, Duration::from_micros(5500));
509    }
510
511    #[test]
512    fn test_calculate_percentiles_single_element() {
513        let timings = vec![Duration::from_millis(5)];
514        let percentiles = calculate_percentiles(&timings);
515
516        assert_eq!(percentiles.p50, Duration::from_millis(5));
517        assert_eq!(percentiles.p90, Duration::from_millis(5));
518        assert_eq!(percentiles.p99, Duration::from_millis(5));
519        assert_eq!(percentiles.mean, Duration::from_millis(5));
520    }
521}