Skip to main content

fluxbench_core/
bencher.rs

1//! Bencher - The Benchmark Iteration API
2//!
3//! Provides the user-facing API for defining what to measure.
4//! Uses Criterion-style batched sampling: iterations are grouped into samples,
5//! with each sample being the average of many iterations.
6
7use crate::allocator::{current_allocation, reset_allocation_counter};
8use crate::measure::Timer;
9use fluxbench_ipc::Sample;
10
11/// Default number of samples to collect (matches Criterion)
12pub const DEFAULT_SAMPLE_COUNT: usize = 100;
13
14/// Minimum samples required for statistical validity
15pub const MIN_SAMPLE_COUNT: usize = 10;
16
17/// Mode of iteration for the benchmark
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum IterationMode {
20    /// Simple iteration - measure entire closure
21    Simple,
22    /// Iteration with setup - separate setup from measurement
23    WithSetup,
24    /// Iteration with teardown
25    WithTeardown,
26}
27
28/// Result of a single benchmark run
29#[derive(Debug, Clone)]
30pub struct BenchmarkResult {
31    /// All collected samples (each sample = average of many iterations)
32    pub samples: Vec<Sample>,
33    /// Total iterations performed across all samples
34    pub iterations: u64,
35    /// Total time spent in measurement (excluding warmup)
36    pub total_time_ns: u64,
37}
38
39/// The Bencher provides iteration control for benchmarks.
40///
41/// Uses Criterion-style batched sampling:
42/// - Warmup phase estimates iteration time
43/// - Measurement phase batches iterations into samples
44/// - Each sample = average of many iterations (reduces noise)
45pub struct Bencher {
46    // === Current sample accumulation ===
47    current_sample_time_ns: u64,
48    current_sample_cycles: u64,
49    current_sample_iters: u64,
50    current_sample_alloc_bytes: u64,
51    current_sample_alloc_count: u64,
52
53    // === Completed samples ===
54    samples: Vec<Sample>,
55
56    // === Configuration ===
57    target_samples: usize,
58    iters_per_sample: u64,
59    track_allocations: bool,
60
61    // === State ===
62    total_iterations: u64,
63    is_warmup: bool,
64    warmup_times: Vec<u64>, // Raw timings during warmup for estimation
65
66    // === Cached runtime for iter_async fallback ===
67    cached_runtime: Option<tokio::runtime::Runtime>,
68}
69
70impl Bencher {
71    /// Create a new Bencher
72    pub fn new(track_allocations: bool) -> Self {
73        Self::with_config(track_allocations, DEFAULT_SAMPLE_COUNT)
74    }
75
76    /// Create a Bencher with custom sample count
77    pub fn with_config(track_allocations: bool, target_samples: usize) -> Self {
78        let target_samples = target_samples.max(MIN_SAMPLE_COUNT);
79        Self {
80            current_sample_time_ns: 0,
81            current_sample_cycles: 0,
82            current_sample_iters: 0,
83            current_sample_alloc_bytes: 0,
84            current_sample_alloc_count: 0,
85            samples: Vec::with_capacity(target_samples),
86            target_samples,
87            iters_per_sample: 1, // Will be set after warmup
88            track_allocations,
89            total_iterations: 0,
90            is_warmup: true,
91            warmup_times: Vec::with_capacity(1000),
92            cached_runtime: None,
93        }
94    }
95
96    /// Set iterations per sample (called after warmup estimation)
97    pub fn set_iters_per_sample(&mut self, iters: u64) {
98        self.iters_per_sample = iters.max(1);
99    }
100
101    /// Get estimated iteration time from warmup (in nanoseconds)
102    pub fn estimated_iter_time_ns(&self) -> Option<u64> {
103        if self.warmup_times.is_empty() {
104            return None;
105        }
106        let sum: u64 = self.warmup_times.iter().sum();
107        Some(sum / self.warmup_times.len() as u64)
108    }
109
110    /// Transition from warmup to measurement phase
111    pub fn start_measurement(&mut self, measurement_time_ns: u64) {
112        self.is_warmup = false;
113
114        // Calculate iterations per sample based on warmup estimate
115        if let Some(iter_time) = self.estimated_iter_time_ns() {
116            let time_per_sample = measurement_time_ns / self.target_samples as u64;
117            self.iters_per_sample = (time_per_sample / iter_time).max(1);
118        } else {
119            self.iters_per_sample = 1;
120        }
121
122        // Clear warmup data
123        self.warmup_times.clear();
124        self.warmup_times.shrink_to_fit();
125
126        // Reset accumulation state
127        self.current_sample_time_ns = 0;
128        self.current_sample_cycles = 0;
129        self.current_sample_iters = 0;
130        self.current_sample_alloc_bytes = 0;
131        self.current_sample_alloc_count = 0;
132    }
133
134    /// Accumulate a single iteration's measurements into the current sample
135    ///
136    /// During warmup: records timing for iteration time estimation.
137    /// During measurement: accumulates into batched samples, flushing when complete.
138    ///
139    /// # Arguments
140    /// * `duration_nanos` - Iteration duration in nanoseconds
141    /// * `cpu_cycles` - CPU cycles consumed (x86_64 only, 0 otherwise)
142    /// * `alloc_bytes` - Bytes allocated during iteration
143    /// * `alloc_count` - Number of allocations during iteration
144    #[inline]
145    fn accumulate_sample(
146        &mut self,
147        duration_nanos: u64,
148        cpu_cycles: u64,
149        alloc_bytes: u64,
150        alloc_count: u64,
151    ) {
152        self.total_iterations += 1;
153
154        if self.is_warmup {
155            // During warmup: collect raw timings for estimation
156            self.warmup_times.push(duration_nanos);
157        } else {
158            // During measurement: accumulate into current sample
159            self.current_sample_time_ns += duration_nanos;
160            self.current_sample_cycles += cpu_cycles;
161            self.current_sample_iters += 1;
162            self.current_sample_alloc_bytes += alloc_bytes;
163            self.current_sample_alloc_count += alloc_count;
164
165            // Check if we've completed this sample batch
166            if self.current_sample_iters >= self.iters_per_sample {
167                self.flush_sample();
168            }
169        }
170    }
171
172    /// Run the benchmark closure for one iteration.
173    ///
174    /// During warmup: records individual timings for estimation.
175    /// During measurement: accumulates into batched samples.
176    #[inline]
177    pub fn iter<T, F>(&mut self, mut f: F)
178    where
179        F: FnMut() -> T,
180    {
181        // Reset allocation tracking
182        if self.track_allocations {
183            reset_allocation_counter();
184        }
185
186        // Start timing
187        let timer = Timer::start();
188
189        // Run the benchmark
190        let _ = std::hint::black_box(f());
191
192        // Stop timing
193        let (duration_nanos, cpu_cycles) = timer.stop();
194
195        // Collect allocation data
196        let (alloc_bytes, alloc_count) = if self.track_allocations {
197            current_allocation()
198        } else {
199            (0, 0)
200        };
201
202        self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
203    }
204
205    /// Run the benchmark with separate setup phase
206    #[inline]
207    pub fn iter_with_setup<T, S, F, R>(&mut self, mut setup: S, mut routine: F)
208    where
209        S: FnMut() -> T,
210        F: FnMut(T) -> R,
211    {
212        // Run setup (not timed)
213        let input = setup();
214
215        // Reset allocation tracking after setup
216        if self.track_allocations {
217            reset_allocation_counter();
218        }
219
220        // Start timing
221        let timer = Timer::start();
222
223        // Run the benchmark
224        let _ = std::hint::black_box(routine(input));
225
226        // Stop timing
227        let (duration_nanos, cpu_cycles) = timer.stop();
228
229        // Collect allocation data
230        let (alloc_bytes, alloc_count) = if self.track_allocations {
231            current_allocation()
232        } else {
233            (0, 0)
234        };
235
236        self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
237    }
238
239    /// Run benchmark with batched iterations (user-specified batch size)
240    #[inline]
241    pub fn iter_batched<T, S, F, R>(&mut self, batch_size: u64, mut setup: S, mut routine: F)
242    where
243        S: FnMut() -> T,
244        F: FnMut(&T) -> R,
245    {
246        // Run setup
247        let input = setup();
248
249        // Reset allocation tracking
250        if self.track_allocations {
251            reset_allocation_counter();
252        }
253
254        // Start timing
255        let timer = Timer::start();
256
257        // Run batched iterations
258        for _ in 0..batch_size {
259            let _ = std::hint::black_box(routine(std::hint::black_box(&input)));
260        }
261
262        // Stop timing
263        let (total_nanos, total_cycles) = timer.stop();
264
265        // Per-iteration values (use f64 to avoid integer truncation for fast ops)
266        let per_iter_nanos = ((total_nanos as f64) / (batch_size as f64)).round() as u64;
267        let per_iter_cycles = ((total_cycles as f64) / (batch_size as f64)).round() as u64;
268
269        // Collect allocation data (total for batch, then average)
270        let (alloc_bytes, alloc_count) = if self.track_allocations {
271            let (bytes, count) = current_allocation();
272            (bytes / batch_size, count / batch_size)
273        } else {
274            (0, 0)
275        };
276
277        // For batched iterations, we count the batch as batch_size iterations
278        // but accumulate as a single sample point
279        self.total_iterations += batch_size - 1; // -1 because accumulate_sample adds 1
280        self.accumulate_sample(per_iter_nanos, per_iter_cycles, alloc_bytes, alloc_count);
281    }
282
283    /// Run an async benchmark closure (standalone - creates its own runtime)
284    #[inline]
285    pub fn iter_async_standalone<T, F, Fut>(&mut self, mut f: F)
286    where
287        F: FnMut() -> Fut,
288        Fut: std::future::Future<Output = T>,
289    {
290        let rt = tokio::runtime::Builder::new_current_thread()
291            .enable_all()
292            .build()
293            .expect("Failed to create tokio runtime");
294
295        if self.track_allocations {
296            reset_allocation_counter();
297        }
298
299        let timer = Timer::start();
300        let _ = std::hint::black_box(rt.block_on(f()));
301        let (duration_nanos, cpu_cycles) = timer.stop();
302
303        let (alloc_bytes, alloc_count) = if self.track_allocations {
304            current_allocation()
305        } else {
306            (0, 0)
307        };
308
309        self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
310    }
311
312    /// Run an async benchmark closure within an existing tokio runtime
313    #[inline]
314    pub fn iter_async<T, F, Fut>(&mut self, mut f: F)
315    where
316        F: FnMut() -> Fut,
317        Fut: std::future::Future<Output = T>,
318    {
319        if self.track_allocations {
320            reset_allocation_counter();
321        }
322
323        let handle = tokio::runtime::Handle::try_current();
324
325        let (duration_nanos, cpu_cycles, alloc_bytes, alloc_count) = if let Ok(handle) = handle {
326            tokio::task::block_in_place(|| {
327                let timer = Timer::start();
328                let _ = std::hint::black_box(handle.block_on(f()));
329                let (duration_nanos, cpu_cycles) = timer.stop();
330
331                let (alloc_bytes, alloc_count) = if self.track_allocations {
332                    current_allocation()
333                } else {
334                    (0, 0)
335                };
336
337                (duration_nanos, cpu_cycles, alloc_bytes, alloc_count)
338            })
339        } else {
340            // Cache runtime across iterations to avoid per-iteration construction overhead
341            let rt = self.cached_runtime.get_or_insert_with(|| {
342                tokio::runtime::Builder::new_current_thread()
343                    .enable_all()
344                    .build()
345                    .expect("Failed to create tokio runtime")
346            });
347
348            let timer = Timer::start();
349            let _ = std::hint::black_box(rt.block_on(f()));
350            let (duration_nanos, cpu_cycles) = timer.stop();
351
352            let (alloc_bytes, alloc_count) = if self.track_allocations {
353                current_allocation()
354            } else {
355                (0, 0)
356            };
357
358            (duration_nanos, cpu_cycles, alloc_bytes, alloc_count)
359        };
360
361        self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
362    }
363
364    /// Flush current accumulated iterations as a single sample
365    fn flush_sample(&mut self) {
366        if self.current_sample_iters == 0 || self.samples.len() >= self.target_samples {
367            return;
368        }
369
370        let n = self.current_sample_iters;
371
372        // Average values for this sample
373        let avg_time_ns = self.current_sample_time_ns / n;
374        let avg_cycles = self.current_sample_cycles / n;
375        let avg_alloc_bytes = self.current_sample_alloc_bytes / n;
376        let avg_alloc_count = (self.current_sample_alloc_count / n) as u32;
377
378        self.samples.push(Sample::new(
379            avg_time_ns,
380            avg_alloc_bytes,
381            avg_alloc_count,
382            avg_cycles,
383        ));
384
385        // Reset for next sample
386        self.current_sample_time_ns = 0;
387        self.current_sample_cycles = 0;
388        self.current_sample_iters = 0;
389        self.current_sample_alloc_bytes = 0;
390        self.current_sample_alloc_count = 0;
391    }
392
393    /// Check if we've collected enough samples
394    pub fn has_enough_samples(&self) -> bool {
395        self.samples.len() >= self.target_samples
396    }
397
398    /// Get collected samples
399    pub fn samples(&self) -> &[Sample] {
400        &self.samples
401    }
402
403    /// Take ownership of collected samples (clears warmup data)
404    pub fn take_samples(&mut self) -> Vec<Sample> {
405        self.warmup_times.clear();
406        std::mem::take(&mut self.samples)
407    }
408
409    /// Get total iteration count
410    pub fn iteration_count(&self) -> u64 {
411        self.total_iterations
412    }
413
414    /// Get target sample count
415    pub fn target_samples(&self) -> usize {
416        self.target_samples
417    }
418
419    /// Finalize and return results
420    pub fn finish(mut self) -> BenchmarkResult {
421        // Flush any remaining accumulated iterations
422        self.flush_sample();
423
424        let total_time_ns: u64 = self.samples.iter().map(|s| s.duration_nanos).sum();
425
426        BenchmarkResult {
427            samples: self.samples,
428            iterations: self.total_iterations,
429            total_time_ns,
430        }
431    }
432}
433
434/// Run the full benchmark loop: warmup → measurement → finish
435///
436/// This is the shared implementation used by both in-process and isolated execution.
437/// Extracts common logic to avoid duplication between `worker.rs` and `executor.rs`.
438///
439/// # Arguments
440/// * `bencher` - The Bencher instance (takes ownership)
441/// * `runner_fn` - Function that calls the benchmark under test
442/// * `warmup_time_ns` - How long to run warmup phase (nanoseconds)
443/// * `measurement_time_ns` - How long to run measurement phase (nanoseconds)
444/// * `min_iterations` - Optional minimum measurement iterations before completion
445/// * `max_iterations` - Optional cap on measurement iterations
446pub fn run_benchmark_loop<F>(
447    mut bencher: Bencher,
448    mut runner_fn: F,
449    warmup_time_ns: u64,
450    measurement_time_ns: u64,
451    min_iterations: Option<u64>,
452    max_iterations: Option<u64>,
453) -> BenchmarkResult
454where
455    F: FnMut(&mut Bencher),
456{
457    use crate::Instant;
458
459    // Warmup phase - Bencher starts in warmup mode
460    // This collects timing data to estimate iterations per sample
461    let warmup_start = Instant::now();
462    while warmup_start.elapsed().as_nanos() < warmup_time_ns as u128 {
463        runner_fn(&mut bencher);
464    }
465
466    // Transition to measurement phase
467    // This calculates iters_per_sample based on warmup timings
468    bencher.start_measurement(measurement_time_ns);
469
470    // Measurement phase - run until we have enough samples or time runs out
471    let measure_start = Instant::now();
472    let measurement_start_iterations = bencher.iteration_count();
473    let min_iterations = min_iterations.unwrap_or(0);
474    let max_iterations = max_iterations.unwrap_or(u64::MAX).max(min_iterations);
475
476    loop {
477        let measurement_iterations = bencher
478            .iteration_count()
479            .saturating_sub(measurement_start_iterations);
480        let min_iterations_met = measurement_iterations >= min_iterations;
481        let max_iterations_reached = measurement_iterations >= max_iterations;
482        let has_enough_samples = bencher.has_enough_samples();
483        let time_limit_reached = measure_start.elapsed().as_nanos() >= measurement_time_ns as u128;
484
485        if max_iterations_reached {
486            break;
487        }
488
489        // Respect both quality controls:
490        // - stop when sample target reached AND minimum iterations satisfied, or
491        // - stop on time budget only after minimum iterations are satisfied.
492        if (has_enough_samples || time_limit_reached) && min_iterations_met {
493            break;
494        }
495
496        runner_fn(&mut bencher);
497    }
498
499    bencher.finish()
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505
506    #[test]
507    fn test_basic_iter() {
508        let mut bencher = Bencher::new(false);
509
510        // Simulate warmup
511        for _ in 0..100 {
512            bencher.iter(|| {
513                let mut sum = 0u64;
514                for i in 0..1000 {
515                    sum += i;
516                }
517                sum
518            });
519        }
520
521        // Transition to measurement
522        bencher.start_measurement(1_000_000_000); // 1 second
523
524        // Simulate measurement
525        for _ in 0..1000 {
526            bencher.iter(|| {
527                let mut sum = 0u64;
528                for i in 0..1000 {
529                    sum += i;
530                }
531                sum
532            });
533        }
534
535        let result = bencher.finish();
536        assert!(!result.samples.is_empty());
537        assert!(result.samples.len() <= DEFAULT_SAMPLE_COUNT);
538    }
539
540    #[test]
541    fn test_iter_with_setup() {
542        let mut bencher = Bencher::new(false);
543
544        for _ in 0..5 {
545            bencher.iter_with_setup(
546                || vec![1, 2, 3, 4, 5],    // Setup: create vec
547                |v| v.iter().sum::<i32>(), // Measure: sum
548            );
549        }
550
551        // During warmup, samples aren't recorded
552        assert_eq!(bencher.samples().len(), 0);
553        assert_eq!(bencher.warmup_times.len(), 5);
554    }
555
556    #[test]
557    fn test_sample_batching() {
558        let mut bencher = Bencher::with_config(false, 10); // 10 target samples
559
560        // Skip warmup, go directly to measurement
561        bencher.is_warmup = false;
562        bencher.iters_per_sample = 5; // 5 iterations per sample
563
564        // Run 50 iterations -> should produce 10 samples
565        for _ in 0..50 {
566            bencher.iter(|| 42);
567        }
568
569        let result = bencher.finish();
570        assert_eq!(result.samples.len(), 10);
571        assert_eq!(result.iterations, 50);
572    }
573
574    #[test]
575    fn test_run_loop_respects_min_iterations() {
576        let bencher = Bencher::with_config(false, 10);
577        let result = run_benchmark_loop(bencher, |b| b.iter(|| 42_u64), 0, 0, Some(100), Some(100));
578
579        assert_eq!(result.iterations, 100);
580    }
581
582    #[test]
583    fn test_run_loop_clamps_min_to_max() {
584        let bencher = Bencher::with_config(false, 10);
585        let result = run_benchmark_loop(bencher, |b| b.iter(|| 7_u64), 0, 0, Some(200), Some(50));
586
587        assert_eq!(result.iterations, 200);
588    }
589}