1use crate::allocator::{current_allocation, reset_allocation_counter};
8use crate::measure::Timer;
9use fluxbench_ipc::Sample;
10
11pub const DEFAULT_SAMPLE_COUNT: usize = 100;
13
14pub const MIN_SAMPLE_COUNT: usize = 10;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum IterationMode {
20 Simple,
22 WithSetup,
24 WithTeardown,
26}
27
28#[derive(Debug, Clone)]
30pub struct BenchmarkResult {
31 pub samples: Vec<Sample>,
33 pub iterations: u64,
35 pub total_time_ns: u64,
37}
38
39pub struct Bencher {
46 current_sample_time_ns: u64,
48 current_sample_cycles: u64,
49 current_sample_iters: u64,
50 current_sample_alloc_bytes: u64,
51 current_sample_alloc_count: u64,
52
53 samples: Vec<Sample>,
55
56 target_samples: usize,
58 iters_per_sample: u64,
59 track_allocations: bool,
60
61 total_iterations: u64,
63 is_warmup: bool,
64 warmup_times: Vec<u64>, cached_runtime: Option<tokio::runtime::Runtime>,
68}
69
70impl Bencher {
71 pub fn new(track_allocations: bool) -> Self {
73 Self::with_config(track_allocations, DEFAULT_SAMPLE_COUNT)
74 }
75
76 pub fn with_config(track_allocations: bool, target_samples: usize) -> Self {
78 let target_samples = target_samples.max(MIN_SAMPLE_COUNT);
79 Self {
80 current_sample_time_ns: 0,
81 current_sample_cycles: 0,
82 current_sample_iters: 0,
83 current_sample_alloc_bytes: 0,
84 current_sample_alloc_count: 0,
85 samples: Vec::with_capacity(target_samples),
86 target_samples,
87 iters_per_sample: 1, track_allocations,
89 total_iterations: 0,
90 is_warmup: true,
91 warmup_times: Vec::with_capacity(1000),
92 cached_runtime: None,
93 }
94 }
95
96 pub fn set_iters_per_sample(&mut self, iters: u64) {
98 self.iters_per_sample = iters.max(1);
99 }
100
101 pub fn estimated_iter_time_ns(&self) -> Option<u64> {
103 if self.warmup_times.is_empty() {
104 return None;
105 }
106 let sum: u64 = self.warmup_times.iter().sum();
107 Some(sum / self.warmup_times.len() as u64)
108 }
109
110 pub fn start_measurement(&mut self, measurement_time_ns: u64) {
112 self.is_warmup = false;
113
114 if let Some(iter_time) = self.estimated_iter_time_ns() {
116 let time_per_sample = measurement_time_ns / self.target_samples as u64;
117 self.iters_per_sample = (time_per_sample / iter_time).max(1);
118 } else {
119 self.iters_per_sample = 1;
120 }
121
122 self.warmup_times.clear();
124 self.warmup_times.shrink_to_fit();
125
126 self.current_sample_time_ns = 0;
128 self.current_sample_cycles = 0;
129 self.current_sample_iters = 0;
130 self.current_sample_alloc_bytes = 0;
131 self.current_sample_alloc_count = 0;
132 }
133
134 #[inline]
145 fn accumulate_sample(
146 &mut self,
147 duration_nanos: u64,
148 cpu_cycles: u64,
149 alloc_bytes: u64,
150 alloc_count: u64,
151 ) {
152 self.total_iterations += 1;
153
154 if self.is_warmup {
155 self.warmup_times.push(duration_nanos);
157 } else {
158 self.current_sample_time_ns += duration_nanos;
160 self.current_sample_cycles += cpu_cycles;
161 self.current_sample_iters += 1;
162 self.current_sample_alloc_bytes += alloc_bytes;
163 self.current_sample_alloc_count += alloc_count;
164
165 if self.current_sample_iters >= self.iters_per_sample {
167 self.flush_sample();
168 }
169 }
170 }
171
172 #[inline]
177 pub fn iter<T, F>(&mut self, mut f: F)
178 where
179 F: FnMut() -> T,
180 {
181 if self.track_allocations {
183 reset_allocation_counter();
184 }
185
186 let timer = Timer::start();
188
189 let _ = std::hint::black_box(f());
191
192 let (duration_nanos, cpu_cycles) = timer.stop();
194
195 let (alloc_bytes, alloc_count) = if self.track_allocations {
197 current_allocation()
198 } else {
199 (0, 0)
200 };
201
202 self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
203 }
204
205 #[inline]
207 pub fn iter_with_setup<T, S, F, R>(&mut self, mut setup: S, mut routine: F)
208 where
209 S: FnMut() -> T,
210 F: FnMut(T) -> R,
211 {
212 let input = setup();
214
215 if self.track_allocations {
217 reset_allocation_counter();
218 }
219
220 let timer = Timer::start();
222
223 let _ = std::hint::black_box(routine(input));
225
226 let (duration_nanos, cpu_cycles) = timer.stop();
228
229 let (alloc_bytes, alloc_count) = if self.track_allocations {
231 current_allocation()
232 } else {
233 (0, 0)
234 };
235
236 self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
237 }
238
239 #[inline]
241 pub fn iter_batched<T, S, F, R>(&mut self, batch_size: u64, mut setup: S, mut routine: F)
242 where
243 S: FnMut() -> T,
244 F: FnMut(&T) -> R,
245 {
246 let input = setup();
248
249 if self.track_allocations {
251 reset_allocation_counter();
252 }
253
254 let timer = Timer::start();
256
257 for _ in 0..batch_size {
259 let _ = std::hint::black_box(routine(std::hint::black_box(&input)));
260 }
261
262 let (total_nanos, total_cycles) = timer.stop();
264
265 let per_iter_nanos = ((total_nanos as f64) / (batch_size as f64)).round() as u64;
267 let per_iter_cycles = ((total_cycles as f64) / (batch_size as f64)).round() as u64;
268
269 let (alloc_bytes, alloc_count) = if self.track_allocations {
271 let (bytes, count) = current_allocation();
272 (bytes / batch_size, count / batch_size)
273 } else {
274 (0, 0)
275 };
276
277 self.total_iterations += batch_size - 1; self.accumulate_sample(per_iter_nanos, per_iter_cycles, alloc_bytes, alloc_count);
281 }
282
283 #[inline]
285 pub fn iter_async_standalone<T, F, Fut>(&mut self, mut f: F)
286 where
287 F: FnMut() -> Fut,
288 Fut: std::future::Future<Output = T>,
289 {
290 let rt = tokio::runtime::Builder::new_current_thread()
291 .enable_all()
292 .build()
293 .expect("Failed to create tokio runtime");
294
295 if self.track_allocations {
296 reset_allocation_counter();
297 }
298
299 let timer = Timer::start();
300 let _ = std::hint::black_box(rt.block_on(f()));
301 let (duration_nanos, cpu_cycles) = timer.stop();
302
303 let (alloc_bytes, alloc_count) = if self.track_allocations {
304 current_allocation()
305 } else {
306 (0, 0)
307 };
308
309 self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
310 }
311
312 #[inline]
314 pub fn iter_async<T, F, Fut>(&mut self, mut f: F)
315 where
316 F: FnMut() -> Fut,
317 Fut: std::future::Future<Output = T>,
318 {
319 if self.track_allocations {
320 reset_allocation_counter();
321 }
322
323 let handle = tokio::runtime::Handle::try_current();
324
325 let (duration_nanos, cpu_cycles, alloc_bytes, alloc_count) = if let Ok(handle) = handle {
326 tokio::task::block_in_place(|| {
327 let timer = Timer::start();
328 let _ = std::hint::black_box(handle.block_on(f()));
329 let (duration_nanos, cpu_cycles) = timer.stop();
330
331 let (alloc_bytes, alloc_count) = if self.track_allocations {
332 current_allocation()
333 } else {
334 (0, 0)
335 };
336
337 (duration_nanos, cpu_cycles, alloc_bytes, alloc_count)
338 })
339 } else {
340 let rt = self.cached_runtime.get_or_insert_with(|| {
342 tokio::runtime::Builder::new_current_thread()
343 .enable_all()
344 .build()
345 .expect("Failed to create tokio runtime")
346 });
347
348 let timer = Timer::start();
349 let _ = std::hint::black_box(rt.block_on(f()));
350 let (duration_nanos, cpu_cycles) = timer.stop();
351
352 let (alloc_bytes, alloc_count) = if self.track_allocations {
353 current_allocation()
354 } else {
355 (0, 0)
356 };
357
358 (duration_nanos, cpu_cycles, alloc_bytes, alloc_count)
359 };
360
361 self.accumulate_sample(duration_nanos, cpu_cycles, alloc_bytes, alloc_count);
362 }
363
364 fn flush_sample(&mut self) {
366 if self.current_sample_iters == 0 || self.samples.len() >= self.target_samples {
367 return;
368 }
369
370 let n = self.current_sample_iters;
371
372 let avg_time_ns = self.current_sample_time_ns / n;
374 let avg_cycles = self.current_sample_cycles / n;
375 let avg_alloc_bytes = self.current_sample_alloc_bytes / n;
376 let avg_alloc_count = (self.current_sample_alloc_count / n) as u32;
377
378 self.samples.push(Sample::new(
379 avg_time_ns,
380 avg_alloc_bytes,
381 avg_alloc_count,
382 avg_cycles,
383 ));
384
385 self.current_sample_time_ns = 0;
387 self.current_sample_cycles = 0;
388 self.current_sample_iters = 0;
389 self.current_sample_alloc_bytes = 0;
390 self.current_sample_alloc_count = 0;
391 }
392
393 pub fn has_enough_samples(&self) -> bool {
395 self.samples.len() >= self.target_samples
396 }
397
398 pub fn samples(&self) -> &[Sample] {
400 &self.samples
401 }
402
403 pub fn take_samples(&mut self) -> Vec<Sample> {
405 self.warmup_times.clear();
406 std::mem::take(&mut self.samples)
407 }
408
409 pub fn iteration_count(&self) -> u64 {
411 self.total_iterations
412 }
413
414 pub fn target_samples(&self) -> usize {
416 self.target_samples
417 }
418
419 pub fn finish(mut self) -> BenchmarkResult {
421 self.flush_sample();
423
424 let total_time_ns: u64 = self.samples.iter().map(|s| s.duration_nanos).sum();
425
426 BenchmarkResult {
427 samples: self.samples,
428 iterations: self.total_iterations,
429 total_time_ns,
430 }
431 }
432}
433
434pub fn run_benchmark_loop<F>(
447 mut bencher: Bencher,
448 mut runner_fn: F,
449 warmup_time_ns: u64,
450 measurement_time_ns: u64,
451 min_iterations: Option<u64>,
452 max_iterations: Option<u64>,
453) -> BenchmarkResult
454where
455 F: FnMut(&mut Bencher),
456{
457 use crate::Instant;
458
459 let warmup_start = Instant::now();
462 while warmup_start.elapsed().as_nanos() < warmup_time_ns as u128 {
463 runner_fn(&mut bencher);
464 }
465
466 bencher.start_measurement(measurement_time_ns);
469
470 let measure_start = Instant::now();
472 let measurement_start_iterations = bencher.iteration_count();
473 let min_iterations = min_iterations.unwrap_or(0);
474 let max_iterations = max_iterations.unwrap_or(u64::MAX).max(min_iterations);
475
476 loop {
477 let measurement_iterations = bencher
478 .iteration_count()
479 .saturating_sub(measurement_start_iterations);
480 let min_iterations_met = measurement_iterations >= min_iterations;
481 let max_iterations_reached = measurement_iterations >= max_iterations;
482 let has_enough_samples = bencher.has_enough_samples();
483 let time_limit_reached = measure_start.elapsed().as_nanos() >= measurement_time_ns as u128;
484
485 if max_iterations_reached {
486 break;
487 }
488
489 if (has_enough_samples || time_limit_reached) && min_iterations_met {
493 break;
494 }
495
496 runner_fn(&mut bencher);
497 }
498
499 bencher.finish()
500}
501
502#[cfg(test)]
503mod tests {
504 use super::*;
505
506 #[test]
507 fn test_basic_iter() {
508 let mut bencher = Bencher::new(false);
509
510 for _ in 0..100 {
512 bencher.iter(|| {
513 let mut sum = 0u64;
514 for i in 0..1000 {
515 sum += i;
516 }
517 sum
518 });
519 }
520
521 bencher.start_measurement(1_000_000_000); for _ in 0..1000 {
526 bencher.iter(|| {
527 let mut sum = 0u64;
528 for i in 0..1000 {
529 sum += i;
530 }
531 sum
532 });
533 }
534
535 let result = bencher.finish();
536 assert!(!result.samples.is_empty());
537 assert!(result.samples.len() <= DEFAULT_SAMPLE_COUNT);
538 }
539
540 #[test]
541 fn test_iter_with_setup() {
542 let mut bencher = Bencher::new(false);
543
544 for _ in 0..5 {
545 bencher.iter_with_setup(
546 || vec![1, 2, 3, 4, 5], |v| v.iter().sum::<i32>(), );
549 }
550
551 assert_eq!(bencher.samples().len(), 0);
553 assert_eq!(bencher.warmup_times.len(), 5);
554 }
555
556 #[test]
557 fn test_sample_batching() {
558 let mut bencher = Bencher::with_config(false, 10); bencher.is_warmup = false;
562 bencher.iters_per_sample = 5; for _ in 0..50 {
566 bencher.iter(|| 42);
567 }
568
569 let result = bencher.finish();
570 assert_eq!(result.samples.len(), 10);
571 assert_eq!(result.iterations, 50);
572 }
573
574 #[test]
575 fn test_run_loop_respects_min_iterations() {
576 let bencher = Bencher::with_config(false, 10);
577 let result = run_benchmark_loop(bencher, |b| b.iter(|| 42_u64), 0, 0, Some(100), Some(100));
578
579 assert_eq!(result.iterations, 100);
580 }
581
582 #[test]
583 fn test_run_loop_clamps_min_to_max() {
584 let bencher = Bencher::with_config(false, 10);
585 let result = run_benchmark_loop(bencher, |b| b.iter(|| 7_u64), 0, 0, Some(200), Some(50));
586
587 assert_eq!(result.iterations, 200);
588 }
589}