simplebench_runtime/
lib.rs

1//! SimpleBench Runtime - Core library for the SimpleBench microbenchmarking framework.
2//!
3//! This crate provides the runtime components for SimpleBench:
4//! - Benchmark registration via the [`SimpleBench`] struct and `inventory` crate
5//! - Timing and measurement with warmup phases
6//! - Statistical analysis of benchmark results
7//! - Baseline storage and regression detection
8//!
9//! # Usage
10//!
11//! This crate is typically used alongside `simplebench-macros` which provides the
12//! `#[bench]` attribute for easy benchmark registration:
13//!
14//! ```rust,ignore
15//! use simplebench_macros::bench;
16//!
17//! #[bench]
18//! fn my_benchmark() {
19//!     // code to benchmark
20//! }
21//! ```
22//!
23//! The `cargo simplebench` CLI tool handles compilation and execution of benchmarks.
24
25use serde::{Deserialize, Serialize};
26use std::time::Duration;
27
28pub mod baseline;
29pub mod changepoint;
30pub mod config;
31pub mod cpu_analysis;
32pub mod cpu_monitor;
33pub mod measurement;
34pub mod output;
35pub mod statistics;
36
37pub use baseline::*;
38pub use changepoint::*;
39pub use config::*;
40pub use cpu_analysis::*;
41pub use cpu_monitor::*;
42pub use measurement::*;
43pub use output::*;
44pub use statistics::*;
45
46// Re-export inventory for use by the macro
47pub use inventory;
48
49/// Percentile statistics for a benchmark run.
50///
51/// Contains the 50th, 90th, and 99th percentile timings along with the mean.
52#[derive(Debug, Default, Clone, Serialize, Deserialize)]
53pub struct Percentiles {
54    /// 50th percentile (median) timing
55    pub p50: Duration,
56    /// 90th percentile timing
57    pub p90: Duration,
58    /// 99th percentile timing
59    pub p99: Duration,
60    /// Arithmetic mean of all timings
61    pub mean: Duration,
62}
63
64/// Comprehensive statistics for a benchmark run.
65///
66/// All timing values are in nanoseconds for precision.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct Statistics {
69    /// Arithmetic mean in nanoseconds
70    pub mean: u128,
71    /// Median (50th percentile) in nanoseconds
72    pub median: u128,
73    /// 90th percentile in nanoseconds
74    pub p90: u128,
75    /// 99th percentile in nanoseconds
76    pub p99: u128,
77    /// Standard deviation in nanoseconds
78    pub std_dev: f64,
79    /// Variance in nanoseconds squared
80    pub variance: f64,
81    /// Minimum timing in nanoseconds
82    pub min: u128,
83    /// Maximum timing in nanoseconds
84    pub max: u128,
85    /// Number of samples collected
86    pub sample_count: usize,
87}
88
89/// Complete result of a benchmark run.
90///
91/// Contains all timing data, statistics, and metadata for a single benchmark execution.
92#[derive(Debug, Default, Clone, Serialize, Deserialize)]
93pub struct BenchResult {
94    /// Benchmark function name
95    pub name: String,
96    /// Module path where the benchmark is defined
97    pub module: String,
98    /// Number of iterations per sample
99    pub iterations: usize,
100    /// Number of samples collected
101    pub samples: usize,
102    /// Percentile statistics computed from all timings
103    pub percentiles: Percentiles,
104    /// Raw timing data for each sample
105    pub all_timings: Vec<Duration>,
106    /// CPU state samples collected during the run
107    #[serde(default)]
108    pub cpu_samples: Vec<CpuSnapshot>,
109    /// Total warmup duration in milliseconds
110    #[serde(default)]
111    pub warmup_ms: Option<u128>,
112    /// Number of iterations performed during warmup
113    #[serde(default)]
114    pub warmup_iterations: Option<u64>,
115}
116
117/// Comparison between current benchmark run and baseline.
118///
119/// Contains statistical measures to determine if performance has regressed.
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct Comparison {
122    /// Mean timing from the current run
123    pub current_mean: Duration,
124    /// Mean timing from the baseline
125    pub baseline_mean: Duration,
126    /// Percentage change from baseline (positive = slower)
127    pub percentage_change: f64,
128    /// Number of baseline samples used for comparison
129    #[serde(default)]
130    pub baseline_count: usize,
131    /// Z-score for statistical significance
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub z_score: Option<f64>,
134    /// 95% confidence interval for the change
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub confidence_interval: Option<(f64, f64)>,
137    /// Probability that a real change occurred
138    #[serde(skip_serializing_if = "Option::is_none")]
139    pub change_probability: Option<f64>,
140}
141
142/// A registered benchmark function.
143///
144/// This struct is used by the `inventory` crate for compile-time benchmark registration.
145/// The `#[bench]` macro from `simplebench-macros` generates these registrations automatically.
146///
147/// The `run` function encapsulates the entire measurement process: it receives config,
148/// performs warmup, runs measurement iterations, and returns a complete `BenchResult`.
149/// This design allows benchmarks with setup to run setup once before measurement begins.
150pub struct SimpleBench {
151    /// Name of the benchmark function
152    pub name: &'static str,
153    /// Module path where the benchmark is defined
154    pub module: &'static str,
155    /// The benchmark runner function that performs measurement and returns results
156    pub run: fn(&crate::config::BenchmarkConfig) -> BenchResult,
157}
158
159inventory::collect!(SimpleBench);
160
161/// Benchmark metadata for JSON listing.
162///
163/// A simplified representation of a benchmark for discovery/listing purposes.
164#[derive(Debug, Serialize, Deserialize)]
165pub struct BenchmarkInfo {
166    /// Name of the benchmark function
167    pub name: String,
168    /// Module path where the benchmark is defined
169    pub module: String,
170}
171
172/// List all registered benchmarks as JSON to stdout
173///
174/// Used by the orchestrator to discover benchmark names before execution.
175pub fn list_benchmarks_json() {
176    let benchmarks: Vec<BenchmarkInfo> = inventory::iter::<SimpleBench>()
177        .map(|b| BenchmarkInfo {
178            name: b.name.to_string(),
179            module: b.module.to_string(),
180        })
181        .collect();
182    println!("{}", serde_json::to_string(&benchmarks).unwrap());
183}
184
185/// Run a single benchmark and output JSON result to stdout
186///
187/// The benchmark to run is specified via SIMPLEBENCH_BENCH_FILTER env var (exact match).
188/// The core to pin to is specified via SIMPLEBENCH_PIN_CORE env var.
189pub fn run_single_benchmark_json(config: &crate::config::BenchmarkConfig) {
190    let bench_name = std::env::var("SIMPLEBENCH_BENCH_FILTER")
191        .expect("SIMPLEBENCH_BENCH_FILTER must be set for single benchmark execution");
192
193    let pin_core: usize = std::env::var("SIMPLEBENCH_PIN_CORE")
194        .ok()
195        .and_then(|s| s.parse().ok())
196        .unwrap_or(1); // Default to core 1, not 0 (reserved)
197
198    // Set CPU affinity
199    if let Err(e) = affinity::set_thread_affinity([pin_core]) {
200        eprintln!(
201            "Warning: Failed to set affinity to core {}: {:?}",
202            pin_core, e
203        );
204    }
205
206    // Find and run the benchmark
207    for bench in inventory::iter::<SimpleBench>() {
208        if bench.name == bench_name {
209            // The benchmark's run function handles warmup, measurement, and returns results
210            let result = (bench.run)(config);
211            println!("{}", serde_json::to_string(&result).unwrap());
212            return;
213        }
214    }
215
216    eprintln!("ERROR: Benchmark '{}' not found", bench_name);
217    std::process::exit(1);
218}
219
220pub(crate) fn calculate_percentiles(timings: &[Duration]) -> Percentiles {
221    let mut sorted_timings = timings.to_vec();
222    sorted_timings.sort();
223
224    let len = sorted_timings.len();
225    let p50_idx = (len * 50) / 100;
226    let p90_idx = (len * 90) / 100;
227    let p99_idx = (len * 99) / 100;
228
229    // Calculate mean
230    let sum_nanos: u128 = timings.iter().map(|d| d.as_nanos()).sum();
231    let mean_nanos = sum_nanos / (len as u128);
232    let mean = Duration::from_nanos(mean_nanos as u64);
233
234    Percentiles {
235        p50: sorted_timings[p50_idx.min(len - 1)],
236        p90: sorted_timings[p90_idx.min(len - 1)],
237        p99: sorted_timings[p99_idx.min(len - 1)],
238        mean,
239    }
240}
241
242/// Calculate comprehensive statistics from raw timing samples
243pub fn calculate_statistics(samples: &[u128]) -> Statistics {
244    let sample_count = samples.len();
245
246    if sample_count == 0 {
247        return Statistics {
248            mean: 0,
249            median: 0,
250            p90: 0,
251            p99: 0,
252            std_dev: 0.0,
253            variance: 0.0,
254            min: 0,
255            max: 0,
256            sample_count: 0,
257        };
258    }
259
260    // Sort for percentile calculations
261    let mut sorted = samples.to_vec();
262    sorted.sort();
263
264    // Calculate percentiles
265    let p50_idx = (sample_count * 50) / 100;
266    let p90_idx = (sample_count * 90) / 100;
267    let p99_idx = (sample_count * 99) / 100;
268
269    let median = sorted[p50_idx.min(sample_count - 1)];
270    let p90 = sorted[p90_idx.min(sample_count - 1)];
271    let p99 = sorted[p99_idx.min(sample_count - 1)];
272
273    // Calculate mean
274    let sum: u128 = samples.iter().sum();
275    let mean = sum / (sample_count as u128);
276
277    // Calculate variance and standard deviation
278    let mean_f64 = mean as f64;
279    let variance: f64 = samples
280        .iter()
281        .map(|&s| {
282            let diff = s as f64 - mean_f64;
283            diff * diff
284        })
285        .sum::<f64>()
286        / (sample_count as f64);
287
288    let std_dev = variance.sqrt();
289
290    // Min and max
291    let min = *sorted.first().unwrap();
292    let max = *sorted.last().unwrap();
293
294    Statistics {
295        mean,
296        median,
297        p90,
298        p99,
299        std_dev,
300        variance,
301        min,
302        max,
303        sample_count,
304    }
305}
306
307/// Run all benchmarks with configuration and stream results
308///
309/// This is the primary entry point for the generated runner.
310/// Prints each benchmark result immediately as it completes.
311pub fn run_and_stream_benchmarks(config: &crate::config::BenchmarkConfig) -> Vec<BenchResult> {
312    use crate::baseline::{BaselineManager, ComparisonResult};
313    use crate::output::{
314        print_benchmark_result_line, print_comparison_line, print_new_baseline_line,
315        print_streaming_summary,
316    };
317    use colored::*;
318
319    match affinity::set_thread_affinity([0]) {
320        Ok(_) => println!(
321            "{} {}\n",
322            "Set affinity to core".green().bold(),
323            "0".cyan().bold()
324        ),
325        Err(e) => println!("Failed to set core affinity {e:?}"),
326    };
327
328    // Verify benchmark environment
329    crate::cpu_monitor::verify_benchmark_environment(0);
330
331    let mut results = Vec::new();
332    let mut comparisons = Vec::new();
333
334    // Initialize baseline manager
335    let baseline_manager = match BaselineManager::new() {
336        Ok(bm) => Some(bm),
337        Err(e) => {
338            eprintln!("Warning: Could not initialize baseline manager: {}", e);
339            eprintln!("Running without baseline comparison.");
340            None
341        }
342    };
343
344    // Get benchmark filter if specified
345    let bench_filter = std::env::var("SIMPLEBENCH_BENCH_FILTER").ok();
346
347    // Count how many benchmarks match the filter
348    let total_benchmarks: usize = inventory::iter::<SimpleBench>().count();
349    let filtered_count = if let Some(ref filter) = bench_filter {
350        inventory::iter::<SimpleBench>()
351            .filter(|b| b.name.contains(filter))
352            .count()
353    } else {
354        total_benchmarks
355    };
356
357    println!(
358        "{} {} {} {} {}",
359        "Running benchmarks with".green().bold(),
360        config.measurement.samples,
361        "samples ×".green().bold(),
362        config.measurement.iterations,
363        "iterations".green().bold()
364    );
365
366    if let Some(ref filter) = bench_filter {
367        println!(
368            "{} {} ({} matched filter: \"{}\")\n",
369            "Filtering to".dimmed(),
370            filtered_count,
371            if filtered_count == 1 {
372                "benchmark"
373            } else {
374                "benchmarks"
375            },
376            filter
377        );
378    } else {
379        println!();
380    }
381
382    // Run each benchmark and print immediately
383    for bench in inventory::iter::<SimpleBench> {
384        // Apply filter if specified
385        if let Some(ref filter) = bench_filter {
386            if !bench.name.contains(filter) {
387                continue; // Skip this benchmark
388            }
389        }
390        // Run benchmark - the run function handles warmup, measurement, and returns results
391        let result = (bench.run)(config);
392
393        // Print benchmark result immediately
394        print_benchmark_result_line(&result);
395
396        // Compare with baseline using CPD and print comparison
397        if let Some(ref bm) = baseline_manager {
398            let crate_name = result.module.split("::").next().unwrap_or("unknown");
399
400            // Load recent baselines for window-based comparison
401            let mut is_regression = false;
402            if let Ok(historical) =
403                bm.load_recent_baselines(crate_name, &result.name, config.comparison.window_size)
404            {
405                if !historical.is_empty() {
406                    // Use CPD-based comparison
407                    let comparison_result = crate::baseline::detect_regression_with_cpd(
408                        &result,
409                        &historical,
410                        config.comparison.threshold,
411                        config.comparison.confidence_level,
412                        config.comparison.cp_threshold,
413                        config.comparison.hazard_rate,
414                    );
415
416                    is_regression = comparison_result.is_regression;
417
418                    if let Some(ref comparison) = comparison_result.comparison {
419                        print_comparison_line(
420                            comparison,
421                            &result.name,
422                            comparison_result.is_regression,
423                        );
424                    }
425
426                    comparisons.push(comparison_result);
427                } else {
428                    // First run - no baseline
429                    print_new_baseline_line(&result.name);
430
431                    comparisons.push(ComparisonResult {
432                        benchmark_name: result.name.clone(),
433                        comparison: None,
434                        is_regression: false,
435                    });
436                }
437            }
438
439            // Save new baseline with regression flag
440            if let Err(e) = bm.save_baseline(crate_name, &result, is_regression) {
441                eprintln!(
442                    "Warning: Failed to save baseline for {}: {}",
443                    result.name, e
444                );
445            }
446        }
447
448        results.push(result);
449        println!(); // Blank line between benchmarks
450    }
451
452    // Print summary footer
453    if !comparisons.is_empty() {
454        print_streaming_summary(&comparisons, &config.comparison);
455
456        // Show filter stats if filtering was applied
457        if let Some(ref filter) = bench_filter {
458            println!(
459                "\n{} {} of {} total benchmarks (filter: \"{}\")",
460                "Ran".dimmed(),
461                filtered_count,
462                total_benchmarks,
463                filter
464            );
465        }
466    }
467
468    results
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    #[test]
476    fn test_calculate_percentiles() {
477        let timings = vec![
478            Duration::from_millis(1),
479            Duration::from_millis(2),
480            Duration::from_millis(3),
481            Duration::from_millis(4),
482            Duration::from_millis(5),
483            Duration::from_millis(6),
484            Duration::from_millis(7),
485            Duration::from_millis(8),
486            Duration::from_millis(9),
487            Duration::from_millis(10),
488        ];
489
490        let percentiles = calculate_percentiles(&timings);
491
492        // For 10 samples: p50 at index 5 (6ms), p90 at index 9 (10ms), p99 at index 9 (10ms)
493        // Mean: (1+2+3+4+5+6+7+8+9+10)/10 = 55/10 = 5.5ms
494        assert_eq!(percentiles.p50, Duration::from_millis(6));
495        assert_eq!(percentiles.p90, Duration::from_millis(10));
496        assert_eq!(percentiles.p99, Duration::from_millis(10));
497        assert_eq!(percentiles.mean, Duration::from_micros(5500));
498    }
499
500    #[test]
501    fn test_calculate_percentiles_single_element() {
502        let timings = vec![Duration::from_millis(5)];
503        let percentiles = calculate_percentiles(&timings);
504
505        assert_eq!(percentiles.p50, Duration::from_millis(5));
506        assert_eq!(percentiles.p90, Duration::from_millis(5));
507        assert_eq!(percentiles.p99, Duration::from_millis(5));
508        assert_eq!(percentiles.mean, Duration::from_millis(5));
509    }
510}
simplebench_runtime/lib.rs

simplebench_runtime/
lib.rs