dev_bench/lib.rs
1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//! b.iter(|| {
18//! std::hint::black_box(40 + 2);
19//! });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0); // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//! alongside time, using `dhat`. See the `alloc` module
37//! (visible in rustdoc when the feature is enabled).
38
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![warn(missing_docs)]
41#![warn(rust_2018_idioms)]
42
43use std::time::{Duration, Instant};
44
45use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
46
47#[cfg(feature = "alloc-tracking")]
48#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
49pub mod alloc;
50
51/// Re-export of `dhat` for use by [`install_global_allocator!`].
52///
53/// Hidden from rustdoc; consumers should use the macro, not this path.
54#[cfg(feature = "alloc-tracking")]
55#[doc(hidden)]
56pub use ::dhat as __dhat;
57
58/// Install `dhat::Alloc` as the global allocator.
59///
60/// Available with the `alloc-tracking` feature. Invoke at module scope
61/// in your binary or test target — the macro expands to a
62/// `#[global_allocator] static` declaration that consumers cannot
63/// otherwise express without depending on `dhat` directly.
64///
65/// # Example
66///
67/// ```ignore
68/// // in main.rs or a test target's top level:
69/// dev_bench::install_global_allocator!();
70///
71/// fn main() {
72/// let _profiler = dhat::Profiler::new_heap();
73/// // ... benchmarked code ...
74/// }
75/// ```
76#[cfg(feature = "alloc-tracking")]
77#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
78#[macro_export]
79macro_rules! install_global_allocator {
80 () => {
81 #[global_allocator]
82 static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
83 };
84}
85
86pub mod baseline;
87
88pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
89
90/// A single benchmark run.
91///
92/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
93/// to produce a [`BenchmarkResult`].
94///
95/// # Example
96///
97/// ```
98/// use dev_bench::Benchmark;
99///
100/// let mut b = Benchmark::new("noop");
101/// for _ in 0..10 {
102/// b.iter(|| std::hint::black_box(42));
103/// }
104/// let r = b.finish();
105/// assert_eq!(r.samples.len(), 10);
106/// ```
107pub struct Benchmark {
108 name: String,
109 samples: Vec<Duration>,
110 iterations_recorded: u64,
111}
112
113impl Benchmark {
114 /// Begin a new benchmark with a stable name.
115 pub fn new(name: impl Into<String>) -> Self {
116 Self {
117 name: name.into(),
118 samples: Vec::new(),
119 iterations_recorded: 0,
120 }
121 }
122
123 /// Run one iteration of the benchmark, capturing the duration.
124 ///
125 /// Each call records exactly one sample.
126 ///
127 /// # Example
128 ///
129 /// ```
130 /// use dev_bench::Benchmark;
131 ///
132 /// let mut b = Benchmark::new("noop");
133 /// b.iter(|| std::hint::black_box(1 + 1));
134 /// let r = b.finish();
135 /// assert_eq!(r.samples.len(), 1);
136 /// ```
137 pub fn iter<F, R>(&mut self, f: F) -> R
138 where
139 F: FnOnce() -> R,
140 {
141 let start = Instant::now();
142 let r = f();
143 let elapsed = start.elapsed();
144 self.samples.push(elapsed);
145 self.iterations_recorded += 1;
146 r
147 }
148
149 /// Run a closure `n` times and record ONE sample for the entire batch.
150 ///
151 /// Use for sub-microsecond operations where per-iteration timing
152 /// would be dominated by `Instant::now()` overhead. The reported
153 /// per-iteration mean is `batch_duration / n`.
154 ///
155 /// # Example
156 ///
157 /// ```
158 /// use dev_bench::Benchmark;
159 ///
160 /// let mut b = Benchmark::new("hot");
161 /// b.iter_with_count(1000, || {
162 /// std::hint::black_box(40 + 2);
163 /// });
164 /// let r = b.finish();
165 /// assert_eq!(r.samples.len(), 1);
166 /// assert_eq!(r.iterations_recorded, 1000);
167 /// ```
168 pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
169 where
170 F: FnMut(),
171 {
172 let start = Instant::now();
173 for _ in 0..n {
174 f();
175 }
176 let elapsed = start.elapsed();
177 self.samples.push(elapsed);
178 self.iterations_recorded += n;
179 }
180
181 /// Run a closure repeatedly for at most `budget` wall-clock time,
182 /// recording one sample per iteration.
183 ///
184 /// Stops as soon as the elapsed time crosses `budget`. The
185 /// closure may run slightly past the budget (the in-flight
186 /// iteration completes); the recorded sample count reflects what
187 /// was actually executed.
188 ///
189 /// Useful when you want a benchmark to run "for N seconds" rather
190 /// than "for N iterations" — the per-iter cost is unknown and you
191 /// just want a bounded run.
192 ///
193 /// # Example
194 ///
195 /// ```
196 /// use dev_bench::Benchmark;
197 /// use std::time::Duration;
198 ///
199 /// let mut b = Benchmark::new("hot");
200 /// b.run_for(Duration::from_millis(20), || {
201 /// std::hint::black_box(1 + 1);
202 /// });
203 /// let r = b.finish();
204 /// // At least one sample was collected.
205 /// assert!(!r.samples.is_empty());
206 /// ```
207 pub fn run_for<F>(&mut self, budget: Duration, mut f: F)
208 where
209 F: FnMut(),
210 {
211 let deadline = Instant::now() + budget;
212 while Instant::now() < deadline {
213 let start = Instant::now();
214 f();
215 let elapsed = start.elapsed();
216 self.samples.push(elapsed);
217 self.iterations_recorded += 1;
218 }
219 }
220
221 /// Finalize the benchmark and produce a [`BenchmarkResult`].
222 pub fn finish(self) -> BenchmarkResult {
223 let n = self.samples.len();
224 let mean = if n == 0 {
225 Duration::ZERO
226 } else {
227 let total: Duration = self.samples.iter().copied().sum();
228 total / n as u32
229 };
230 let mut sorted = self.samples.clone();
231 sorted.sort();
232 let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
233 let p99 = sorted
234 .get((n as f64 * 0.99).floor() as usize)
235 .copied()
236 .unwrap_or(Duration::ZERO);
237 let cv = compute_cv(&self.samples, mean);
238 let total_elapsed: Duration = self.samples.iter().copied().sum();
239 BenchmarkResult {
240 name: self.name,
241 samples: self.samples,
242 iterations_recorded: self.iterations_recorded,
243 total_elapsed,
244 mean,
245 p50,
246 p99,
247 cv,
248 }
249 }
250}
251
252fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
253 if samples.is_empty() {
254 return 0.0;
255 }
256 let mean_s = mean.as_secs_f64();
257 if mean_s == 0.0 {
258 return 0.0;
259 }
260 let n = samples.len() as f64;
261 let var = samples
262 .iter()
263 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
264 .sum::<f64>()
265 / n;
266 var.sqrt() / mean_s
267}
268
269/// One bin of a sample-distribution histogram.
270///
271/// Returned by [`BenchmarkResult::histogram`]. Bins are ordered, the
272/// first bin's `lower` equals `BenchmarkResult::min` and the last
273/// bin's `upper` equals `BenchmarkResult::max`.
274///
275/// # Example
276///
277/// ```
278/// use dev_bench::Benchmark;
279///
280/// let mut b = Benchmark::new("h");
281/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
282/// let bins = b.finish().histogram(4);
283/// assert!(bins.iter().all(|b| b.lower <= b.upper));
284/// ```
285#[derive(Debug, Clone, Copy, PartialEq, Eq)]
286pub struct HistogramBin {
287 /// Inclusive lower bound of this bin.
288 pub lower: Duration,
289 /// Inclusive upper bound (for the last bin) or exclusive upper
290 /// bound (for all other bins).
291 pub upper: Duration,
292 /// Number of samples falling into this bin.
293 pub count: usize,
294}
295
296/// The result of a finished benchmark.
297///
298/// Statistics are computed losslessly from the raw `samples`.
299///
300/// # Example
301///
302/// ```
303/// use dev_bench::Benchmark;
304///
305/// let mut b = Benchmark::new("noop");
306/// for _ in 0..10 {
307/// b.iter(|| std::hint::black_box(42));
308/// }
309/// let r = b.finish();
310/// assert!(r.mean.as_nanos() > 0);
311/// ```
312#[derive(Debug, Clone)]
313pub struct BenchmarkResult {
314 /// Stable name of the benchmark.
315 pub name: String,
316 /// All raw sample durations.
317 pub samples: Vec<Duration>,
318 /// Total iterations across all samples. With per-iter sampling this
319 /// equals `samples.len()`. With batched sampling, it is the sum of
320 /// `n` across all `iter_with_count` calls.
321 pub iterations_recorded: u64,
322 /// Sum of all sample durations.
323 pub total_elapsed: Duration,
324 /// Mean sample duration.
325 pub mean: Duration,
326 /// 50th percentile sample duration.
327 pub p50: Duration,
328 /// 99th percentile sample duration.
329 pub p99: Duration,
330 /// Coefficient of variation across samples (stddev / mean).
331 ///
332 /// Higher numbers indicate noisier measurements. A CV of `0.05`
333 /// means the standard deviation is 5% of the mean. Reported
334 /// regressions within the CV are downgraded from `Fail` to `Warn`
335 /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
336 pub cv: f64,
337}
338
339impl BenchmarkResult {
340 /// Effective throughput in operations per second.
341 ///
342 /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
343 /// `0.0` for an empty result or zero elapsed time.
344 ///
345 /// # Example
346 ///
347 /// ```
348 /// use dev_bench::Benchmark;
349 ///
350 /// let mut b = Benchmark::new("hot");
351 /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
352 /// let r = b.finish();
353 /// assert!(r.ops_per_sec() > 0.0);
354 /// ```
355 pub fn ops_per_sec(&self) -> f64 {
356 if self.total_elapsed.is_zero() {
357 return 0.0;
358 }
359 self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
360 }
361
362 /// Smallest sample. Returns `Duration::ZERO` for an empty result.
363 pub fn min(&self) -> Duration {
364 self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
365 }
366
367 /// Largest sample. Returns `Duration::ZERO` for an empty result.
368 pub fn max(&self) -> Duration {
369 self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
370 }
371
372 /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
373 ///
374 /// Uses `n-1` (Bessel's correction) for the sample variance.
375 pub fn stddev(&self) -> f64 {
376 let n = self.samples.len();
377 if n < 2 {
378 return 0.0;
379 }
380 let mean_s = self.mean.as_secs_f64();
381 let var = self
382 .samples
383 .iter()
384 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
385 .sum::<f64>()
386 / (n as f64 - 1.0);
387 var.sqrt()
388 }
389
390 /// Median absolute deviation, in seconds. `0.0` for empty results.
391 ///
392 /// `MAD = median(|x_i - median(x)|)`. Less affected by outliers than
393 /// standard deviation; useful for noisy measurements.
394 pub fn mad(&self) -> f64 {
395 if self.samples.is_empty() {
396 return 0.0;
397 }
398 let p50_s = self.p50.as_secs_f64();
399 let mut deviations: Vec<f64> = self
400 .samples
401 .iter()
402 .map(|d| (d.as_secs_f64() - p50_s).abs())
403 .collect();
404 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
405 let mid = deviations.len() / 2;
406 deviations[mid]
407 }
408
409 /// 90th percentile sample duration. `Duration::ZERO` for empty results.
410 pub fn p90(&self) -> Duration {
411 self.percentile(0.90)
412 }
413
414 /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
415 ///
416 /// At least 1000 samples are required to be meaningful; with fewer
417 /// samples this returns the largest sample.
418 pub fn p999(&self) -> Duration {
419 self.percentile(0.999)
420 }
421
422 /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
423 /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
424 pub fn percentile(&self, q: f64) -> Duration {
425 if self.samples.is_empty() {
426 return Duration::ZERO;
427 }
428 let q = q.clamp(0.0, 1.0);
429 let mut sorted = self.samples.clone();
430 sorted.sort();
431 let n = sorted.len();
432 let idx = ((n as f64) * q).floor() as usize;
433 let idx = idx.min(n - 1);
434 sorted[idx]
435 }
436
437 /// Compute a uniform-width histogram over the sample distribution.
438 ///
439 /// Returns `bucket_count` bins covering `[min, max]`, each with
440 /// the count of samples falling into that bin. The returned
441 /// `Vec<HistogramBin>` is in ascending order; the first bin's
442 /// `lower` equals `min()`, the last bin's `upper` equals `max()`.
443 ///
444 /// For an empty result or `bucket_count == 0`, returns `vec![]`.
445 /// When `min == max` (all samples equal), returns one bin with
446 /// the full sample count.
447 ///
448 /// Useful for spotting bimodality, outlier tails, and warmup
449 /// effects that mean/percentile alone hide.
450 ///
451 /// # Example
452 ///
453 /// ```
454 /// use dev_bench::Benchmark;
455 ///
456 /// let mut b = Benchmark::new("h");
457 /// for _ in 0..50 { b.iter(|| std::hint::black_box(1 + 1)); }
458 /// let r = b.finish();
459 /// let hist = r.histogram(8);
460 /// assert!(hist.len() <= 8);
461 /// let total: usize = hist.iter().map(|h| h.count).sum();
462 /// assert_eq!(total, r.samples.len());
463 /// ```
464 pub fn histogram(&self, bucket_count: usize) -> Vec<HistogramBin> {
465 if bucket_count == 0 || self.samples.is_empty() {
466 return Vec::new();
467 }
468 let min = self.min();
469 let max = self.max();
470 if min == max {
471 return vec![HistogramBin {
472 lower: min,
473 upper: max,
474 count: self.samples.len(),
475 }];
476 }
477 let total_ns = (max.as_nanos() - min.as_nanos()) as f64;
478 let bucket_ns = total_ns / bucket_count as f64;
479 let mut counts = vec![0usize; bucket_count];
480 for s in &self.samples {
481 let offset = (s.as_nanos() - min.as_nanos()) as f64;
482 let mut idx = (offset / bucket_ns).floor() as usize;
483 if idx >= bucket_count {
484 idx = bucket_count - 1;
485 }
486 counts[idx] += 1;
487 }
488 let min_ns = min.as_nanos() as u64;
489 let mut bins = Vec::with_capacity(bucket_count);
490 for (i, count) in counts.into_iter().enumerate() {
491 let lower_ns = min_ns + (bucket_ns * i as f64) as u64;
492 let upper_ns = if i + 1 == bucket_count {
493 max.as_nanos() as u64
494 } else {
495 min_ns + (bucket_ns * (i + 1) as f64) as u64
496 };
497 bins.push(HistogramBin {
498 lower: Duration::from_nanos(lower_ns),
499 upper: Duration::from_nanos(upper_ns),
500 count,
501 });
502 }
503 bins
504 }
505
506 /// Compare this result against a baseline using a default-tuned
507 /// [`CompareOptions`].
508 ///
509 /// `baseline_mean` is the previous mean duration. If `None`, the
510 /// verdict is `Skip` and no comparison is made.
511 ///
512 /// # Example
513 ///
514 /// ```
515 /// use dev_bench::{Benchmark, Threshold};
516 /// use std::time::Duration;
517 ///
518 /// let mut b = Benchmark::new("x");
519 /// b.iter(|| std::hint::black_box(1 + 1));
520 /// let r = b.finish();
521 /// let _ = r.compare_against_baseline(
522 /// Some(Duration::from_nanos(1)),
523 /// Threshold::regression_pct(10.0),
524 /// );
525 /// ```
526 pub fn compare_against_baseline(
527 &self,
528 baseline_mean: Option<Duration>,
529 threshold: Threshold,
530 ) -> CheckResult {
531 self.compare_with_options(&CompareOptions {
532 baseline_mean,
533 threshold,
534 ..CompareOptions::default()
535 })
536 }
537
538 /// Compare this result against a baseline using full options.
539 ///
540 /// Behavior:
541 /// - No baseline -> `Skip`.
542 /// - Sample count below `min_samples` -> `Skip` with detail.
543 /// - Within threshold -> `Pass` with numeric evidence.
544 /// - Over threshold but within CV noise band -> `Warn`.
545 /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
546 ///
547 /// In every non-`Skip` case, the returned [`CheckResult`] carries
548 /// a `bench` tag and numeric `Evidence` for `mean_ns`,
549 /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
550 ///
551 /// # Example
552 ///
553 /// ```
554 /// use dev_bench::{Benchmark, CompareOptions, Threshold};
555 /// use std::time::Duration;
556 ///
557 /// let mut b = Benchmark::new("x");
558 /// b.iter(|| std::hint::black_box(1 + 1));
559 /// let r = b.finish();
560 /// let opts = CompareOptions {
561 /// baseline_mean: Some(Duration::from_nanos(1)),
562 /// threshold: Threshold::regression_pct(20.0),
563 /// min_samples: 1,
564 /// allow_cv_noise_band: true,
565 /// };
566 /// let _check = r.compare_with_options(&opts);
567 /// ```
568 pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
569 let name = format!("bench::{}", self.name);
570 let mut evidence = self.numeric_evidence();
571 let tags = vec!["bench".to_string()];
572
573 let Some(baseline) = opts.baseline_mean else {
574 let mut c = CheckResult::skip(name).with_detail("no baseline available");
575 c.tags = tags;
576 c.evidence = evidence;
577 return c;
578 };
579
580 if (self.samples.len() as u64) < opts.min_samples {
581 let mut c = CheckResult::skip(name).with_detail(format!(
582 "fewer samples than min_samples ({} < {})",
583 self.samples.len(),
584 opts.min_samples
585 ));
586 c.tags = tags;
587 c.evidence = evidence;
588 return c;
589 }
590
591 let current_ns = self.mean.as_nanos();
592 let baseline_ns = baseline.as_nanos();
593 evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
594
595 let regressed = match opts.threshold {
596 Threshold::RegressionPct(pct) => {
597 let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
598 current_ns as f64 > allowed
599 }
600 Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
601 Threshold::ThroughputDropPct(pct) => {
602 // Throughput-based; convert via mean.
603 let baseline_ops = if baseline.is_zero() {
604 0.0
605 } else {
606 1.0 / baseline.as_secs_f64()
607 };
608 let drop_floor = baseline_ops * (1.0 - pct / 100.0);
609 self.ops_per_sec() < drop_floor
610 }
611 };
612
613 let detail = format!(
614 "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
615 current_ns,
616 baseline_ns,
617 self.cv,
618 self.ops_per_sec()
619 );
620
621 if !regressed {
622 let mut c = CheckResult::pass(name).with_detail(detail);
623 c.tags = tags;
624 c.evidence = evidence;
625 return c;
626 }
627
628 // Regression detected. Decide Fail vs Warn based on CV noise band.
629 let in_noise_band = opts.allow_cv_noise_band && {
630 let allowed_noise_ns = baseline_ns as f64 * self.cv;
631 let delta_ns = (current_ns as f64) - (baseline_ns as f64);
632 delta_ns <= allowed_noise_ns
633 };
634 let mut tags = tags;
635 tags.push("regression".to_string());
636 if in_noise_band {
637 let mut c = CheckResult::warn(name, Severity::Warning)
638 .with_detail(format!("{} (within CV noise band)", detail));
639 c.tags = tags;
640 c.evidence = evidence;
641 c
642 } else {
643 let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
644 c.tags = tags;
645 c.evidence = evidence;
646 c
647 }
648 }
649
650 /// Build a one-check `Report` containing the comparison result.
651 ///
652 /// Convenience for producers that want a complete `Report` rather
653 /// than a single `CheckResult`. Sets `subject = self.name`,
654 /// `producer = "dev-bench"`.
655 ///
656 /// # Example
657 ///
658 /// ```
659 /// use dev_bench::{Benchmark, Threshold};
660 ///
661 /// let mut b = Benchmark::new("x");
662 /// b.iter(|| std::hint::black_box(1 + 1));
663 /// let r = b.finish();
664 /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
665 /// assert_eq!(report.checks.len(), 1);
666 /// ```
667 pub fn into_report(
668 self,
669 subject_version: impl Into<String>,
670 baseline_mean: Option<Duration>,
671 threshold: Threshold,
672 ) -> Report {
673 let name = self.name.clone();
674 let check = self.compare_against_baseline(baseline_mean, threshold);
675 let mut r = Report::new(name, subject_version).with_producer("dev-bench");
676 r.push(check);
677 r.finish();
678 r
679 }
680
681 fn numeric_evidence(&self) -> Vec<Evidence> {
682 vec![
683 Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
684 // baseline_ns inserted at index 1 by callers when available.
685 Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
686 Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
687 Evidence::numeric("cv", self.cv),
688 Evidence::numeric("ops_per_sec", self.ops_per_sec()),
689 Evidence::numeric("samples", self.samples.len() as f64),
690 Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
691 ]
692 }
693}
694
695/// A threshold defining how much slower-than-baseline is acceptable.
696#[derive(Debug, Clone, Copy)]
697pub enum Threshold {
698 /// Fail if the new mean is more than `pct` percent slower than baseline.
699 RegressionPct(f64),
700 /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
701 RegressionAbsoluteNs(u128),
702 /// Fail if throughput dropped more than `pct` percent below baseline.
703 ///
704 /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
705 /// assumes the baseline duration is a per-operation duration. Use
706 /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
707 /// per-op duration. For batched sampling
708 /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
709 /// or pre-compute the baseline manually.
710 ThroughputDropPct(f64),
711}
712
713impl Threshold {
714 /// Build a percent-based duration regression threshold.
715 ///
716 /// # Example
717 ///
718 /// ```
719 /// use dev_bench::Threshold;
720 /// let t = Threshold::regression_pct(20.0);
721 /// assert!(matches!(t, Threshold::RegressionPct(_)));
722 /// ```
723 pub fn regression_pct(pct: f64) -> Self {
724 Threshold::RegressionPct(pct)
725 }
726
727 /// Build an absolute duration regression threshold in nanoseconds.
728 ///
729 /// # Example
730 ///
731 /// ```
732 /// use dev_bench::Threshold;
733 /// let t = Threshold::regression_abs_ns(500);
734 /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
735 /// ```
736 pub fn regression_abs_ns(nanos: u128) -> Self {
737 Threshold::RegressionAbsoluteNs(nanos)
738 }
739
740 /// Build a percent-based throughput drop threshold.
741 ///
742 /// # Example
743 ///
744 /// ```
745 /// use dev_bench::Threshold;
746 /// let t = Threshold::throughput_drop_pct(10.0);
747 /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
748 /// ```
749 pub fn throughput_drop_pct(pct: f64) -> Self {
750 Threshold::ThroughputDropPct(pct)
751 }
752}
753
754/// Options for [`BenchmarkResult::compare_with_options`].
755///
756/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
757/// `allow_cv_noise_band = true`.
758///
759/// # Example
760///
761/// ```
762/// use dev_bench::{CompareOptions, Threshold};
763/// use std::time::Duration;
764///
765/// let opts = CompareOptions {
766/// baseline_mean: Some(Duration::from_nanos(1000)),
767/// threshold: Threshold::regression_pct(20.0),
768/// min_samples: 30,
769/// allow_cv_noise_band: true,
770/// };
771/// assert_eq!(opts.min_samples, 30);
772/// ```
773#[derive(Debug, Clone)]
774pub struct CompareOptions {
775 /// Baseline mean to compare against. `None` -> verdict is `Skip`.
776 pub baseline_mean: Option<Duration>,
777 /// Regression threshold to apply.
778 pub threshold: Threshold,
779 /// Minimum sample count required before a comparison can be made.
780 /// Below this, the verdict is `Skip` with a `min_samples` detail.
781 pub min_samples: u64,
782 /// If `true`, regressions within `baseline_ns * cv` are downgraded
783 /// from `Fail` to `Warn`.
784 pub allow_cv_noise_band: bool,
785}
786
787impl Default for CompareOptions {
788 fn default() -> Self {
789 Self {
790 baseline_mean: None,
791 threshold: Threshold::regression_pct(10.0),
792 min_samples: 1,
793 allow_cv_noise_band: true,
794 }
795 }
796}
797
798/// A trait for any object that can run a benchmark and produce a result.
799pub trait Bench {
800 /// Run the benchmark and return its result.
801 fn run(&mut self) -> BenchmarkResult;
802}
803
804/// Producer wrapper that runs a benchmark and emits a single-check
805/// [`Report`] via [`Producer::produce`].
806///
807/// # Example
808///
809/// ```no_run
810/// use dev_bench::{Benchmark, BenchProducer, Threshold};
811/// use dev_report::Producer;
812///
813/// fn run_bench() -> dev_bench::BenchmarkResult {
814/// let mut b = Benchmark::new("hot_path");
815/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
816/// b.finish()
817/// }
818///
819/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
820/// let report = producer.produce();
821/// assert_eq!(report.checks.len(), 1);
822/// ```
823pub struct BenchProducer<F>
824where
825 F: Fn() -> BenchmarkResult,
826{
827 run: F,
828 subject_version: String,
829 baseline_mean: Option<Duration>,
830 threshold: Threshold,
831}
832
833impl<F> BenchProducer<F>
834where
835 F: Fn() -> BenchmarkResult,
836{
837 /// Build a new producer.
838 pub fn new(
839 run: F,
840 subject_version: impl Into<String>,
841 baseline_mean: Option<Duration>,
842 threshold: Threshold,
843 ) -> Self {
844 Self {
845 run,
846 subject_version: subject_version.into(),
847 baseline_mean,
848 threshold,
849 }
850 }
851}
852
853impl<F> Producer for BenchProducer<F>
854where
855 F: Fn() -> BenchmarkResult,
856{
857 fn produce(&self) -> Report {
858 let result = (self.run)();
859 result.into_report(
860 self.subject_version.clone(),
861 self.baseline_mean,
862 self.threshold,
863 )
864 }
865}
866
867#[cfg(test)]
868mod tests {
869 use super::*;
870 use dev_report::Verdict;
871
872 #[test]
873 fn benchmark_runs_and_finishes() {
874 let mut b = Benchmark::new("noop");
875 for _ in 0..10 {
876 b.iter(|| std::hint::black_box(42));
877 }
878 let r = b.finish();
879 assert_eq!(r.samples.len(), 10);
880 assert_eq!(r.iterations_recorded, 10);
881 assert!(r.mean > Duration::ZERO);
882 }
883
884 #[test]
885 fn iter_with_count_records_one_sample() {
886 let mut b = Benchmark::new("hot");
887 b.iter_with_count(1000, || {
888 std::hint::black_box(1 + 1);
889 });
890 let r = b.finish();
891 assert_eq!(r.samples.len(), 1);
892 assert_eq!(r.iterations_recorded, 1000);
893 assert!(r.ops_per_sec() > 0.0);
894 }
895
896 #[test]
897 fn comparison_without_baseline_is_skip() {
898 let mut b = Benchmark::new("x");
899 b.iter(|| ());
900 let r = b.finish();
901 let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
902 assert_eq!(v.verdict, Verdict::Skip);
903 assert!(v.has_tag("bench"));
904 }
905
906 #[test]
907 fn min_samples_skip() {
908 let mut b = Benchmark::new("x");
909 b.iter(|| ());
910 let r = b.finish();
911 let opts = CompareOptions {
912 baseline_mean: Some(Duration::from_nanos(100)),
913 threshold: Threshold::regression_pct(5.0),
914 min_samples: 100,
915 allow_cv_noise_band: true,
916 };
917 let v = r.compare_with_options(&opts);
918 assert_eq!(v.verdict, Verdict::Skip);
919 assert!(v.detail.unwrap().contains("min_samples"));
920 }
921
922 #[test]
923 fn small_regression_under_threshold_passes() {
924 let mut b = Benchmark::new("x");
925 for _ in 0..5 {
926 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
927 }
928 let r = b.finish();
929 let baseline = r.mean;
930 let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
931 assert_eq!(v.verdict, Verdict::Pass);
932 assert!(v.has_tag("bench"));
933 // Numeric evidence is attached.
934 assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
935 assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
936 assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
937 }
938
939 #[test]
940 fn regression_outside_cv_band_fails() {
941 // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
942 let mut b = Benchmark::new("x");
943 // Inject controlled samples by running noop iterations.
944 for _ in 0..50 {
945 b.iter(|| std::hint::black_box(1 + 1));
946 }
947 let mut r = b.finish();
948 // Force a known mean and cv for deterministic comparison.
949 r.mean = Duration::from_nanos(200);
950 r.cv = 0.0;
951 let opts = CompareOptions {
952 baseline_mean: Some(Duration::from_nanos(100)),
953 threshold: Threshold::regression_pct(10.0),
954 min_samples: 1,
955 allow_cv_noise_band: true,
956 };
957 let v = r.compare_with_options(&opts);
958 assert_eq!(v.verdict, Verdict::Fail);
959 assert!(v.has_tag("regression"));
960 }
961
962 #[test]
963 fn regression_inside_cv_band_warns() {
964 let mut b = Benchmark::new("x");
965 for _ in 0..50 {
966 b.iter(|| std::hint::black_box(1 + 1));
967 }
968 let mut r = b.finish();
969 // Current is 12% over baseline but cv is 30% -> within noise band.
970 r.mean = Duration::from_nanos(112);
971 r.cv = 0.30;
972 let opts = CompareOptions {
973 baseline_mean: Some(Duration::from_nanos(100)),
974 threshold: Threshold::regression_pct(10.0),
975 min_samples: 1,
976 allow_cv_noise_band: true,
977 };
978 let v = r.compare_with_options(&opts);
979 assert_eq!(v.verdict, Verdict::Warn);
980 assert!(v.has_tag("regression"));
981 assert!(v.detail.unwrap().contains("CV noise band"));
982 }
983
984 #[test]
985 fn throughput_threshold_detects_drop() {
986 // ThroughputDropPct expects a per-op baseline duration. Use
987 // per-iter sampling so mean == per-op duration.
988 let mut b = Benchmark::new("x");
989 for _ in 0..10 {
990 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
991 }
992 let r = b.finish();
993 // Baseline 10x faster (per-op duration is 1/10 of current);
994 // current throughput is 90% lower than baseline -> regression.
995 let baseline = r.mean / 10;
996 let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
997 assert_eq!(v.verdict, Verdict::Fail);
998 }
999
1000 #[test]
1001 fn extra_stats_are_consistent() {
1002 let mut b = Benchmark::new("uniform");
1003 for _ in 0..20 {
1004 b.iter(|| std::hint::black_box(1 + 1));
1005 }
1006 let r = b.finish();
1007 // Bounds.
1008 assert!(r.min() <= r.mean);
1009 assert!(r.mean <= r.max());
1010 assert!(r.p50 <= r.p90());
1011 assert!(r.p90() <= r.p99);
1012 assert!(r.p99 <= r.p999());
1013 // Numbers are non-negative finite.
1014 assert!(r.stddev() >= 0.0);
1015 assert!(r.mad() >= 0.0);
1016 }
1017
1018 #[test]
1019 fn percentile_clamps_to_bounds() {
1020 let mut b = Benchmark::new("p");
1021 for _ in 0..10 {
1022 b.iter(|| std::hint::black_box(1));
1023 }
1024 let r = b.finish();
1025 // q < 0.0 -> first sample; q > 1.0 -> last sample.
1026 let lo = r.percentile(-0.5);
1027 let hi = r.percentile(1.5);
1028 assert!(lo <= hi);
1029 }
1030
1031 #[test]
1032 fn empty_result_stats_are_zero() {
1033 let r = Benchmark::new("empty").finish();
1034 assert_eq!(r.min(), Duration::ZERO);
1035 assert_eq!(r.max(), Duration::ZERO);
1036 assert_eq!(r.p90(), Duration::ZERO);
1037 assert_eq!(r.p999(), Duration::ZERO);
1038 assert_eq!(r.stddev(), 0.0);
1039 assert_eq!(r.mad(), 0.0);
1040 }
1041
1042 #[test]
1043 fn run_for_collects_at_least_one_sample() {
1044 let mut b = Benchmark::new("budget");
1045 b.run_for(Duration::from_millis(10), || {
1046 std::hint::black_box(1 + 1);
1047 });
1048 let r = b.finish();
1049 assert!(!r.samples.is_empty());
1050 assert_eq!(r.iterations_recorded, r.samples.len() as u64);
1051 }
1052
1053 #[test]
1054 fn run_for_zero_budget_collects_no_samples() {
1055 let mut b = Benchmark::new("zero");
1056 b.run_for(Duration::ZERO, || {
1057 std::hint::black_box(1 + 1);
1058 });
1059 let r = b.finish();
1060 // With zero budget, deadline has already passed; no iterations.
1061 assert!(r.samples.is_empty() || r.samples.len() <= 1);
1062 }
1063
1064 #[test]
1065 fn histogram_total_count_equals_samples() {
1066 let mut b = Benchmark::new("h");
1067 for _ in 0..50 {
1068 b.iter(|| std::hint::black_box(1 + 1));
1069 }
1070 let r = b.finish();
1071 let bins = r.histogram(8);
1072 assert!(!bins.is_empty());
1073 let total: usize = bins.iter().map(|b| b.count).sum();
1074 assert_eq!(total, r.samples.len());
1075 }
1076
1077 #[test]
1078 fn histogram_zero_buckets_returns_empty() {
1079 let mut b = Benchmark::new("h");
1080 b.iter(|| std::hint::black_box(1));
1081 let r = b.finish();
1082 assert!(r.histogram(0).is_empty());
1083 }
1084
1085 #[test]
1086 fn histogram_empty_result_returns_empty() {
1087 let r = Benchmark::new("e").finish();
1088 assert!(r.histogram(8).is_empty());
1089 }
1090
1091 #[test]
1092 fn histogram_bins_are_ordered() {
1093 let mut b = Benchmark::new("h");
1094 for _ in 0..30 {
1095 b.iter(|| std::hint::black_box(1 + 1));
1096 }
1097 let bins = b.finish().histogram(5);
1098 for win in bins.windows(2) {
1099 assert!(win[0].lower <= win[1].lower);
1100 assert!(win[0].lower <= win[0].upper);
1101 }
1102 }
1103
1104 #[test]
1105 fn cv_is_zero_for_uniform_samples() {
1106 // Samples are nearly identical -> cv near 0.
1107 let mut b = Benchmark::new("x");
1108 for _ in 0..10 {
1109 b.iter(|| std::hint::black_box(1 + 1));
1110 }
1111 let r = b.finish();
1112 // Not strictly zero on real machines, just bounded.
1113 assert!(r.cv >= 0.0);
1114 }
1115
1116 #[test]
1117 fn into_report_emits_one_check() {
1118 let mut b = Benchmark::new("x");
1119 for _ in 0..5 {
1120 b.iter(|| std::hint::black_box(1 + 1));
1121 }
1122 let r = b.finish();
1123 let baseline = r.mean;
1124 let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
1125 assert_eq!(report.checks.len(), 1);
1126 assert_eq!(report.producer.as_deref(), Some("dev-bench"));
1127 assert_eq!(report.overall_verdict(), Verdict::Pass);
1128 }
1129
1130 #[test]
1131 fn bench_producer_implements_producer_trait() {
1132 fn run() -> BenchmarkResult {
1133 let mut b = Benchmark::new("noop");
1134 for _ in 0..5 {
1135 b.iter(|| std::hint::black_box(1 + 1));
1136 }
1137 b.finish()
1138 }
1139 let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
1140 let report = p.produce();
1141 assert_eq!(report.checks.len(), 1);
1142 }
1143}