dev_bench/lib.rs
1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//! b.iter(|| {
18//! std::hint::black_box(40 + 2);
19//! });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0); // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//! alongside time, using `mod-alloc`'s `dhat_compat` surface
37//! (drop-in for `dhat-rs`). See the `alloc` module (visible
38//! in rustdoc when the feature is enabled).
39
40#![cfg_attr(docsrs, feature(doc_cfg))]
41#![warn(missing_docs)]
42#![warn(rust_2018_idioms)]
43
44use std::time::{Duration, Instant};
45
46use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
47
48#[cfg(feature = "alloc-tracking")]
49#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
50pub mod alloc;
51
52/// Re-export of `mod-alloc`'s `dhat_compat` surface for use by
53/// [`install_global_allocator!`].
54///
55/// Kept under the historical `__dhat` name so the macro expansion
56/// stays compatible across the v0.9.6 → v0.9.7 backend swap. The
57/// `dhat_compat::Alloc` / `Profiler` / `HeapStats` shapes mirror
58/// `dhat-rs`'s public surface field-for-field; users following
59/// dhat-rs's documentation pattern in their own code continue to
60/// work via `use mod_alloc::dhat_compat as dhat;`.
61///
62/// Hidden from rustdoc; consumers should use the macro, not this path.
63#[cfg(feature = "alloc-tracking")]
64#[doc(hidden)]
65pub use ::mod_alloc::dhat_compat as __dhat;
66
67/// Install the allocation-tracking global allocator.
68///
69/// Available with the `alloc-tracking` feature. Invoke at module
70/// scope in your binary or test target — the macro expands to a
71/// `#[global_allocator] static` declaration that consumers cannot
72/// otherwise express without depending on `mod-alloc` directly.
73///
74/// The backend is `mod-alloc`'s `dhat_compat::Alloc` (drop-in for
75/// `dhat-rs`'s `dhat::Alloc`); behaviour, API surface, and JSON
76/// output remain DHAT-viewer-compatible.
77///
78/// # Example
79///
80/// ```ignore
81/// // in main.rs or a test target's top level:
82/// dev_bench::install_global_allocator!();
83///
84/// // Optionally pull the same compat surface into your own code:
85/// use mod_alloc::dhat_compat as dhat;
86///
87/// fn main() {
88/// let _profiler = dhat::Profiler::new_heap();
89/// // ... benchmarked code ...
90/// }
91/// ```
92#[cfg(feature = "alloc-tracking")]
93#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
94#[macro_export]
95macro_rules! install_global_allocator {
96 () => {
97 #[global_allocator]
98 static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
99 };
100}
101
102pub mod baseline;
103
104pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
105
106/// A single benchmark run.
107///
108/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
109/// to produce a [`BenchmarkResult`].
110///
111/// # Example
112///
113/// ```
114/// use dev_bench::Benchmark;
115///
116/// let mut b = Benchmark::new("noop");
117/// for _ in 0..10 {
118/// b.iter(|| std::hint::black_box(42));
119/// }
120/// let r = b.finish();
121/// assert_eq!(r.samples.len(), 10);
122/// ```
123pub struct Benchmark {
124 name: String,
125 samples: Vec<Duration>,
126 iterations_recorded: u64,
127}
128
129impl Benchmark {
130 /// Begin a new benchmark with a stable name.
131 pub fn new(name: impl Into<String>) -> Self {
132 Self {
133 name: name.into(),
134 samples: Vec::new(),
135 iterations_recorded: 0,
136 }
137 }
138
139 /// Run one iteration of the benchmark, capturing the duration.
140 ///
141 /// Each call records exactly one sample.
142 ///
143 /// # Example
144 ///
145 /// ```
146 /// use dev_bench::Benchmark;
147 ///
148 /// let mut b = Benchmark::new("noop");
149 /// b.iter(|| std::hint::black_box(1 + 1));
150 /// let r = b.finish();
151 /// assert_eq!(r.samples.len(), 1);
152 /// ```
153 pub fn iter<F, R>(&mut self, f: F) -> R
154 where
155 F: FnOnce() -> R,
156 {
157 let start = Instant::now();
158 let r = f();
159 let elapsed = start.elapsed();
160 self.samples.push(elapsed);
161 self.iterations_recorded += 1;
162 r
163 }
164
165 /// Run a closure `n` times and record ONE sample for the entire batch.
166 ///
167 /// Use for sub-microsecond operations where per-iteration timing
168 /// would be dominated by `Instant::now()` overhead. The reported
169 /// per-iteration mean is `batch_duration / n`.
170 ///
171 /// # Example
172 ///
173 /// ```
174 /// use dev_bench::Benchmark;
175 ///
176 /// let mut b = Benchmark::new("hot");
177 /// b.iter_with_count(1000, || {
178 /// std::hint::black_box(40 + 2);
179 /// });
180 /// let r = b.finish();
181 /// assert_eq!(r.samples.len(), 1);
182 /// assert_eq!(r.iterations_recorded, 1000);
183 /// ```
184 pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
185 where
186 F: FnMut(),
187 {
188 let start = Instant::now();
189 for _ in 0..n {
190 f();
191 }
192 let elapsed = start.elapsed();
193 self.samples.push(elapsed);
194 self.iterations_recorded += n;
195 }
196
197 /// Run a closure repeatedly for at most `budget` wall-clock time,
198 /// recording one sample per iteration.
199 ///
200 /// Stops as soon as the elapsed time crosses `budget`. The
201 /// closure may run slightly past the budget (the in-flight
202 /// iteration completes); the recorded sample count reflects what
203 /// was actually executed.
204 ///
205 /// Useful when you want a benchmark to run "for N seconds" rather
206 /// than "for N iterations" — the per-iter cost is unknown and you
207 /// just want a bounded run.
208 ///
209 /// # Example
210 ///
211 /// ```
212 /// use dev_bench::Benchmark;
213 /// use std::time::Duration;
214 ///
215 /// let mut b = Benchmark::new("hot");
216 /// b.run_for(Duration::from_millis(20), || {
217 /// std::hint::black_box(1 + 1);
218 /// });
219 /// let r = b.finish();
220 /// // At least one sample was collected.
221 /// assert!(!r.samples.is_empty());
222 /// ```
223 pub fn run_for<F>(&mut self, budget: Duration, mut f: F)
224 where
225 F: FnMut(),
226 {
227 let deadline = Instant::now() + budget;
228 while Instant::now() < deadline {
229 let start = Instant::now();
230 f();
231 let elapsed = start.elapsed();
232 self.samples.push(elapsed);
233 self.iterations_recorded += 1;
234 }
235 }
236
237 /// Finalize the benchmark and produce a [`BenchmarkResult`].
238 pub fn finish(self) -> BenchmarkResult {
239 let n = self.samples.len();
240 let mean = if n == 0 {
241 Duration::ZERO
242 } else {
243 let total: Duration = self.samples.iter().copied().sum();
244 total / n as u32
245 };
246 let mut sorted = self.samples.clone();
247 sorted.sort();
248 let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
249 let p99 = sorted
250 .get((n as f64 * 0.99).floor() as usize)
251 .copied()
252 .unwrap_or(Duration::ZERO);
253 let cv = compute_cv(&self.samples, mean);
254 let total_elapsed: Duration = self.samples.iter().copied().sum();
255 BenchmarkResult {
256 name: self.name,
257 samples: self.samples,
258 iterations_recorded: self.iterations_recorded,
259 total_elapsed,
260 mean,
261 p50,
262 p99,
263 cv,
264 }
265 }
266}
267
268fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
269 if samples.is_empty() {
270 return 0.0;
271 }
272 let mean_s = mean.as_secs_f64();
273 if mean_s == 0.0 {
274 return 0.0;
275 }
276 let n = samples.len() as f64;
277 let var = samples
278 .iter()
279 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
280 .sum::<f64>()
281 / n;
282 var.sqrt() / mean_s
283}
284
285/// One bin of a sample-distribution histogram.
286///
287/// Returned by [`BenchmarkResult::histogram`]. Bins are ordered, the
288/// first bin's `lower` equals `BenchmarkResult::min` and the last
289/// bin's `upper` equals `BenchmarkResult::max`.
290///
291/// # Example
292///
293/// ```
294/// use dev_bench::Benchmark;
295///
296/// let mut b = Benchmark::new("h");
297/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
298/// let bins = b.finish().histogram(4);
299/// assert!(bins.iter().all(|b| b.lower <= b.upper));
300/// ```
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
302pub struct HistogramBin {
303 /// Inclusive lower bound of this bin.
304 pub lower: Duration,
305 /// Inclusive upper bound (for the last bin) or exclusive upper
306 /// bound (for all other bins).
307 pub upper: Duration,
308 /// Number of samples falling into this bin.
309 pub count: usize,
310}
311
312/// The result of a finished benchmark.
313///
314/// Statistics are computed losslessly from the raw `samples`.
315///
316/// # Example
317///
318/// ```
319/// use dev_bench::Benchmark;
320///
321/// let mut b = Benchmark::new("noop");
322/// for _ in 0..10 {
323/// b.iter(|| std::hint::black_box(42));
324/// }
325/// let r = b.finish();
326/// assert!(r.mean.as_nanos() > 0);
327/// ```
328#[derive(Debug, Clone)]
329pub struct BenchmarkResult {
330 /// Stable name of the benchmark.
331 pub name: String,
332 /// All raw sample durations.
333 pub samples: Vec<Duration>,
334 /// Total iterations across all samples. With per-iter sampling this
335 /// equals `samples.len()`. With batched sampling, it is the sum of
336 /// `n` across all `iter_with_count` calls.
337 pub iterations_recorded: u64,
338 /// Sum of all sample durations.
339 pub total_elapsed: Duration,
340 /// Mean sample duration.
341 pub mean: Duration,
342 /// 50th percentile sample duration.
343 pub p50: Duration,
344 /// 99th percentile sample duration.
345 pub p99: Duration,
346 /// Coefficient of variation across samples (stddev / mean).
347 ///
348 /// Higher numbers indicate noisier measurements. A CV of `0.05`
349 /// means the standard deviation is 5% of the mean. Reported
350 /// regressions within the CV are downgraded from `Fail` to `Warn`
351 /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
352 pub cv: f64,
353}
354
355impl BenchmarkResult {
356 /// Effective throughput in operations per second.
357 ///
358 /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
359 /// `0.0` for an empty result or zero elapsed time.
360 ///
361 /// # Example
362 ///
363 /// ```
364 /// use dev_bench::Benchmark;
365 ///
366 /// let mut b = Benchmark::new("hot");
367 /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
368 /// let r = b.finish();
369 /// assert!(r.ops_per_sec() > 0.0);
370 /// ```
371 pub fn ops_per_sec(&self) -> f64 {
372 if self.total_elapsed.is_zero() {
373 return 0.0;
374 }
375 self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
376 }
377
378 /// Smallest sample. Returns `Duration::ZERO` for an empty result.
379 pub fn min(&self) -> Duration {
380 self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
381 }
382
383 /// Largest sample. Returns `Duration::ZERO` for an empty result.
384 pub fn max(&self) -> Duration {
385 self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
386 }
387
388 /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
389 ///
390 /// Uses `n-1` (Bessel's correction) for the sample variance.
391 pub fn stddev(&self) -> f64 {
392 let n = self.samples.len();
393 if n < 2 {
394 return 0.0;
395 }
396 let mean_s = self.mean.as_secs_f64();
397 let var = self
398 .samples
399 .iter()
400 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
401 .sum::<f64>()
402 / (n as f64 - 1.0);
403 var.sqrt()
404 }
405
406 /// Median absolute deviation, in seconds. `0.0` for empty results.
407 ///
408 /// `MAD = median(|x_i - median(x)|)`. Less affected by outliers than
409 /// standard deviation; useful for noisy measurements.
410 pub fn mad(&self) -> f64 {
411 if self.samples.is_empty() {
412 return 0.0;
413 }
414 let p50_s = self.p50.as_secs_f64();
415 let mut deviations: Vec<f64> = self
416 .samples
417 .iter()
418 .map(|d| (d.as_secs_f64() - p50_s).abs())
419 .collect();
420 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
421 let mid = deviations.len() / 2;
422 deviations[mid]
423 }
424
425 /// 90th percentile sample duration. `Duration::ZERO` for empty results.
426 pub fn p90(&self) -> Duration {
427 self.percentile(0.90)
428 }
429
430 /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
431 ///
432 /// At least 1000 samples are required to be meaningful; with fewer
433 /// samples this returns the largest sample.
434 pub fn p999(&self) -> Duration {
435 self.percentile(0.999)
436 }
437
438 /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
439 /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
440 pub fn percentile(&self, q: f64) -> Duration {
441 if self.samples.is_empty() {
442 return Duration::ZERO;
443 }
444 let q = q.clamp(0.0, 1.0);
445 let mut sorted = self.samples.clone();
446 sorted.sort();
447 let n = sorted.len();
448 let idx = ((n as f64) * q).floor() as usize;
449 let idx = idx.min(n - 1);
450 sorted[idx]
451 }
452
453 /// Compute a uniform-width histogram over the sample distribution.
454 ///
455 /// Returns `bucket_count` bins covering `[min, max]`, each with
456 /// the count of samples falling into that bin. The returned
457 /// `Vec<HistogramBin>` is in ascending order; the first bin's
458 /// `lower` equals `min()`, the last bin's `upper` equals `max()`.
459 ///
460 /// For an empty result or `bucket_count == 0`, returns `vec![]`.
461 /// When `min == max` (all samples equal), returns one bin with
462 /// the full sample count.
463 ///
464 /// Useful for spotting bimodality, outlier tails, and warmup
465 /// effects that mean/percentile alone hide.
466 ///
467 /// # Example
468 ///
469 /// ```
470 /// use dev_bench::Benchmark;
471 ///
472 /// let mut b = Benchmark::new("h");
473 /// for _ in 0..50 { b.iter(|| std::hint::black_box(1 + 1)); }
474 /// let r = b.finish();
475 /// let hist = r.histogram(8);
476 /// assert!(hist.len() <= 8);
477 /// let total: usize = hist.iter().map(|h| h.count).sum();
478 /// assert_eq!(total, r.samples.len());
479 /// ```
480 pub fn histogram(&self, bucket_count: usize) -> Vec<HistogramBin> {
481 if bucket_count == 0 || self.samples.is_empty() {
482 return Vec::new();
483 }
484 let min = self.min();
485 let max = self.max();
486 if min == max {
487 return vec![HistogramBin {
488 lower: min,
489 upper: max,
490 count: self.samples.len(),
491 }];
492 }
493 let total_ns = (max.as_nanos() - min.as_nanos()) as f64;
494 let bucket_ns = total_ns / bucket_count as f64;
495 let mut counts = vec![0usize; bucket_count];
496 for s in &self.samples {
497 let offset = (s.as_nanos() - min.as_nanos()) as f64;
498 let mut idx = (offset / bucket_ns).floor() as usize;
499 if idx >= bucket_count {
500 idx = bucket_count - 1;
501 }
502 counts[idx] += 1;
503 }
504 let min_ns = min.as_nanos() as u64;
505 let mut bins = Vec::with_capacity(bucket_count);
506 for (i, count) in counts.into_iter().enumerate() {
507 let lower_ns = min_ns + (bucket_ns * i as f64) as u64;
508 let upper_ns = if i + 1 == bucket_count {
509 max.as_nanos() as u64
510 } else {
511 min_ns + (bucket_ns * (i + 1) as f64) as u64
512 };
513 bins.push(HistogramBin {
514 lower: Duration::from_nanos(lower_ns),
515 upper: Duration::from_nanos(upper_ns),
516 count,
517 });
518 }
519 bins
520 }
521
522 /// Compare this result against a baseline using a default-tuned
523 /// [`CompareOptions`].
524 ///
525 /// `baseline_mean` is the previous mean duration. If `None`, the
526 /// verdict is `Skip` and no comparison is made.
527 ///
528 /// # Example
529 ///
530 /// ```
531 /// use dev_bench::{Benchmark, Threshold};
532 /// use std::time::Duration;
533 ///
534 /// let mut b = Benchmark::new("x");
535 /// b.iter(|| std::hint::black_box(1 + 1));
536 /// let r = b.finish();
537 /// let _ = r.compare_against_baseline(
538 /// Some(Duration::from_nanos(1)),
539 /// Threshold::regression_pct(10.0),
540 /// );
541 /// ```
542 pub fn compare_against_baseline(
543 &self,
544 baseline_mean: Option<Duration>,
545 threshold: Threshold,
546 ) -> CheckResult {
547 self.compare_with_options(&CompareOptions {
548 baseline_mean,
549 threshold,
550 ..CompareOptions::default()
551 })
552 }
553
554 /// Compare this result against a baseline using full options.
555 ///
556 /// Behavior:
557 /// - No baseline -> `Skip`.
558 /// - Sample count below `min_samples` -> `Skip` with detail.
559 /// - Within threshold -> `Pass` with numeric evidence.
560 /// - Over threshold but within CV noise band -> `Warn`.
561 /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
562 ///
563 /// In every non-`Skip` case, the returned [`CheckResult`] carries
564 /// a `bench` tag and numeric `Evidence` for `mean_ns`,
565 /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
566 ///
567 /// # Example
568 ///
569 /// ```
570 /// use dev_bench::{Benchmark, CompareOptions, Threshold};
571 /// use std::time::Duration;
572 ///
573 /// let mut b = Benchmark::new("x");
574 /// b.iter(|| std::hint::black_box(1 + 1));
575 /// let r = b.finish();
576 /// let opts = CompareOptions {
577 /// baseline_mean: Some(Duration::from_nanos(1)),
578 /// threshold: Threshold::regression_pct(20.0),
579 /// min_samples: 1,
580 /// allow_cv_noise_band: true,
581 /// };
582 /// let _check = r.compare_with_options(&opts);
583 /// ```
584 pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
585 let name = format!("bench::{}", self.name);
586 let mut evidence = self.numeric_evidence();
587 let tags = vec!["bench".to_string()];
588
589 let Some(baseline) = opts.baseline_mean else {
590 let mut c = CheckResult::skip(name).with_detail("no baseline available");
591 c.tags = tags;
592 c.evidence = evidence;
593 return c;
594 };
595
596 if (self.samples.len() as u64) < opts.min_samples {
597 let mut c = CheckResult::skip(name).with_detail(format!(
598 "fewer samples than min_samples ({} < {})",
599 self.samples.len(),
600 opts.min_samples
601 ));
602 c.tags = tags;
603 c.evidence = evidence;
604 return c;
605 }
606
607 let current_ns = self.mean.as_nanos();
608 let baseline_ns = baseline.as_nanos();
609 evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
610
611 let regressed = match opts.threshold {
612 Threshold::RegressionPct(pct) => {
613 let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
614 current_ns as f64 > allowed
615 }
616 Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
617 Threshold::ThroughputDropPct(pct) => {
618 // Throughput-based; convert via mean.
619 let baseline_ops = if baseline.is_zero() {
620 0.0
621 } else {
622 1.0 / baseline.as_secs_f64()
623 };
624 let drop_floor = baseline_ops * (1.0 - pct / 100.0);
625 self.ops_per_sec() < drop_floor
626 }
627 };
628
629 let detail = format!(
630 "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
631 current_ns,
632 baseline_ns,
633 self.cv,
634 self.ops_per_sec()
635 );
636
637 if !regressed {
638 let mut c = CheckResult::pass(name).with_detail(detail);
639 c.tags = tags;
640 c.evidence = evidence;
641 return c;
642 }
643
644 // Regression detected. Decide Fail vs Warn based on CV noise band.
645 let in_noise_band = opts.allow_cv_noise_band && {
646 let allowed_noise_ns = baseline_ns as f64 * self.cv;
647 let delta_ns = (current_ns as f64) - (baseline_ns as f64);
648 delta_ns <= allowed_noise_ns
649 };
650 let mut tags = tags;
651 tags.push("regression".to_string());
652 if in_noise_band {
653 let mut c = CheckResult::warn(name, Severity::Warning)
654 .with_detail(format!("{} (within CV noise band)", detail));
655 c.tags = tags;
656 c.evidence = evidence;
657 c
658 } else {
659 let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
660 c.tags = tags;
661 c.evidence = evidence;
662 c
663 }
664 }
665
666 /// Build a one-check `Report` containing the comparison result.
667 ///
668 /// Convenience for producers that want a complete `Report` rather
669 /// than a single `CheckResult`. Sets `subject = self.name`,
670 /// `producer = "dev-bench"`.
671 ///
672 /// # Example
673 ///
674 /// ```
675 /// use dev_bench::{Benchmark, Threshold};
676 ///
677 /// let mut b = Benchmark::new("x");
678 /// b.iter(|| std::hint::black_box(1 + 1));
679 /// let r = b.finish();
680 /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
681 /// assert_eq!(report.checks.len(), 1);
682 /// ```
683 pub fn into_report(
684 self,
685 subject_version: impl Into<String>,
686 baseline_mean: Option<Duration>,
687 threshold: Threshold,
688 ) -> Report {
689 let name = self.name.clone();
690 let check = self.compare_against_baseline(baseline_mean, threshold);
691 let mut r = Report::new(name, subject_version).with_producer("dev-bench");
692 r.push(check);
693 r.finish();
694 r
695 }
696
697 fn numeric_evidence(&self) -> Vec<Evidence> {
698 vec![
699 Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
700 // baseline_ns inserted at index 1 by callers when available.
701 Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
702 Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
703 Evidence::numeric("cv", self.cv),
704 Evidence::numeric("ops_per_sec", self.ops_per_sec()),
705 Evidence::numeric("samples", self.samples.len() as f64),
706 Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
707 ]
708 }
709}
710
711/// A threshold defining how much slower-than-baseline is acceptable.
712#[derive(Debug, Clone, Copy)]
713pub enum Threshold {
714 /// Fail if the new mean is more than `pct` percent slower than baseline.
715 RegressionPct(f64),
716 /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
717 RegressionAbsoluteNs(u128),
718 /// Fail if throughput dropped more than `pct` percent below baseline.
719 ///
720 /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
721 /// assumes the baseline duration is a per-operation duration. Use
722 /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
723 /// per-op duration. For batched sampling
724 /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
725 /// or pre-compute the baseline manually.
726 ThroughputDropPct(f64),
727}
728
729impl Threshold {
730 /// Build a percent-based duration regression threshold.
731 ///
732 /// # Example
733 ///
734 /// ```
735 /// use dev_bench::Threshold;
736 /// let t = Threshold::regression_pct(20.0);
737 /// assert!(matches!(t, Threshold::RegressionPct(_)));
738 /// ```
739 pub fn regression_pct(pct: f64) -> Self {
740 Threshold::RegressionPct(pct)
741 }
742
743 /// Build an absolute duration regression threshold in nanoseconds.
744 ///
745 /// # Example
746 ///
747 /// ```
748 /// use dev_bench::Threshold;
749 /// let t = Threshold::regression_abs_ns(500);
750 /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
751 /// ```
752 pub fn regression_abs_ns(nanos: u128) -> Self {
753 Threshold::RegressionAbsoluteNs(nanos)
754 }
755
756 /// Build a percent-based throughput drop threshold.
757 ///
758 /// # Example
759 ///
760 /// ```
761 /// use dev_bench::Threshold;
762 /// let t = Threshold::throughput_drop_pct(10.0);
763 /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
764 /// ```
765 pub fn throughput_drop_pct(pct: f64) -> Self {
766 Threshold::ThroughputDropPct(pct)
767 }
768}
769
770/// Options for [`BenchmarkResult::compare_with_options`].
771///
772/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
773/// `allow_cv_noise_band = true`.
774///
775/// # Example
776///
777/// ```
778/// use dev_bench::{CompareOptions, Threshold};
779/// use std::time::Duration;
780///
781/// let opts = CompareOptions {
782/// baseline_mean: Some(Duration::from_nanos(1000)),
783/// threshold: Threshold::regression_pct(20.0),
784/// min_samples: 30,
785/// allow_cv_noise_band: true,
786/// };
787/// assert_eq!(opts.min_samples, 30);
788/// ```
789#[derive(Debug, Clone)]
790pub struct CompareOptions {
791 /// Baseline mean to compare against. `None` -> verdict is `Skip`.
792 pub baseline_mean: Option<Duration>,
793 /// Regression threshold to apply.
794 pub threshold: Threshold,
795 /// Minimum sample count required before a comparison can be made.
796 /// Below this, the verdict is `Skip` with a `min_samples` detail.
797 pub min_samples: u64,
798 /// If `true`, regressions within `baseline_ns * cv` are downgraded
799 /// from `Fail` to `Warn`.
800 pub allow_cv_noise_band: bool,
801}
802
803impl Default for CompareOptions {
804 fn default() -> Self {
805 Self {
806 baseline_mean: None,
807 threshold: Threshold::regression_pct(10.0),
808 min_samples: 1,
809 allow_cv_noise_band: true,
810 }
811 }
812}
813
814/// A trait for any object that can run a benchmark and produce a result.
815pub trait Bench {
816 /// Run the benchmark and return its result.
817 fn run(&mut self) -> BenchmarkResult;
818}
819
820/// Producer wrapper that runs a benchmark and emits a single-check
821/// [`Report`] via [`Producer::produce`].
822///
823/// # Example
824///
825/// ```no_run
826/// use dev_bench::{Benchmark, BenchProducer, Threshold};
827/// use dev_report::Producer;
828///
829/// fn run_bench() -> dev_bench::BenchmarkResult {
830/// let mut b = Benchmark::new("hot_path");
831/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
832/// b.finish()
833/// }
834///
835/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
836/// let report = producer.produce();
837/// assert_eq!(report.checks.len(), 1);
838/// ```
839pub struct BenchProducer<F>
840where
841 F: Fn() -> BenchmarkResult,
842{
843 run: F,
844 subject_version: String,
845 baseline_mean: Option<Duration>,
846 threshold: Threshold,
847}
848
849impl<F> BenchProducer<F>
850where
851 F: Fn() -> BenchmarkResult,
852{
853 /// Build a new producer.
854 pub fn new(
855 run: F,
856 subject_version: impl Into<String>,
857 baseline_mean: Option<Duration>,
858 threshold: Threshold,
859 ) -> Self {
860 Self {
861 run,
862 subject_version: subject_version.into(),
863 baseline_mean,
864 threshold,
865 }
866 }
867}
868
869impl<F> Producer for BenchProducer<F>
870where
871 F: Fn() -> BenchmarkResult,
872{
873 fn produce(&self) -> Report {
874 let result = (self.run)();
875 result.into_report(
876 self.subject_version.clone(),
877 self.baseline_mean,
878 self.threshold,
879 )
880 }
881}
882
883#[cfg(test)]
884mod tests {
885 use super::*;
886 use dev_report::Verdict;
887
888 #[test]
889 fn benchmark_runs_and_finishes() {
890 let mut b = Benchmark::new("noop");
891 for _ in 0..10 {
892 b.iter(|| std::hint::black_box(42));
893 }
894 let r = b.finish();
895 assert_eq!(r.samples.len(), 10);
896 assert_eq!(r.iterations_recorded, 10);
897 assert!(r.mean > Duration::ZERO);
898 }
899
900 #[test]
901 fn iter_with_count_records_one_sample() {
902 let mut b = Benchmark::new("hot");
903 b.iter_with_count(1000, || {
904 std::hint::black_box(1 + 1);
905 });
906 let r = b.finish();
907 assert_eq!(r.samples.len(), 1);
908 assert_eq!(r.iterations_recorded, 1000);
909 assert!(r.ops_per_sec() > 0.0);
910 }
911
912 #[test]
913 fn comparison_without_baseline_is_skip() {
914 let mut b = Benchmark::new("x");
915 b.iter(|| ());
916 let r = b.finish();
917 let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
918 assert_eq!(v.verdict, Verdict::Skip);
919 assert!(v.has_tag("bench"));
920 }
921
922 #[test]
923 fn min_samples_skip() {
924 let mut b = Benchmark::new("x");
925 b.iter(|| ());
926 let r = b.finish();
927 let opts = CompareOptions {
928 baseline_mean: Some(Duration::from_nanos(100)),
929 threshold: Threshold::regression_pct(5.0),
930 min_samples: 100,
931 allow_cv_noise_band: true,
932 };
933 let v = r.compare_with_options(&opts);
934 assert_eq!(v.verdict, Verdict::Skip);
935 assert!(v.detail.unwrap().contains("min_samples"));
936 }
937
938 #[test]
939 fn small_regression_under_threshold_passes() {
940 let mut b = Benchmark::new("x");
941 for _ in 0..5 {
942 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
943 }
944 let r = b.finish();
945 let baseline = r.mean;
946 let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
947 assert_eq!(v.verdict, Verdict::Pass);
948 assert!(v.has_tag("bench"));
949 // Numeric evidence is attached.
950 assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
951 assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
952 assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
953 }
954
955 #[test]
956 fn regression_outside_cv_band_fails() {
957 // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
958 let mut b = Benchmark::new("x");
959 // Inject controlled samples by running noop iterations.
960 for _ in 0..50 {
961 b.iter(|| std::hint::black_box(1 + 1));
962 }
963 let mut r = b.finish();
964 // Force a known mean and cv for deterministic comparison.
965 r.mean = Duration::from_nanos(200);
966 r.cv = 0.0;
967 let opts = CompareOptions {
968 baseline_mean: Some(Duration::from_nanos(100)),
969 threshold: Threshold::regression_pct(10.0),
970 min_samples: 1,
971 allow_cv_noise_band: true,
972 };
973 let v = r.compare_with_options(&opts);
974 assert_eq!(v.verdict, Verdict::Fail);
975 assert!(v.has_tag("regression"));
976 }
977
978 #[test]
979 fn regression_inside_cv_band_warns() {
980 let mut b = Benchmark::new("x");
981 for _ in 0..50 {
982 b.iter(|| std::hint::black_box(1 + 1));
983 }
984 let mut r = b.finish();
985 // Current is 12% over baseline but cv is 30% -> within noise band.
986 r.mean = Duration::from_nanos(112);
987 r.cv = 0.30;
988 let opts = CompareOptions {
989 baseline_mean: Some(Duration::from_nanos(100)),
990 threshold: Threshold::regression_pct(10.0),
991 min_samples: 1,
992 allow_cv_noise_band: true,
993 };
994 let v = r.compare_with_options(&opts);
995 assert_eq!(v.verdict, Verdict::Warn);
996 assert!(v.has_tag("regression"));
997 assert!(v.detail.unwrap().contains("CV noise band"));
998 }
999
1000 #[test]
1001 fn throughput_threshold_detects_drop() {
1002 // ThroughputDropPct expects a per-op baseline duration. Use
1003 // per-iter sampling so mean == per-op duration.
1004 let mut b = Benchmark::new("x");
1005 for _ in 0..10 {
1006 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
1007 }
1008 let r = b.finish();
1009 // Baseline 10x faster (per-op duration is 1/10 of current);
1010 // current throughput is 90% lower than baseline -> regression.
1011 let baseline = r.mean / 10;
1012 let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
1013 assert_eq!(v.verdict, Verdict::Fail);
1014 }
1015
1016 #[test]
1017 fn extra_stats_are_consistent() {
1018 let mut b = Benchmark::new("uniform");
1019 for _ in 0..20 {
1020 b.iter(|| std::hint::black_box(1 + 1));
1021 }
1022 let r = b.finish();
1023 // Bounds.
1024 assert!(r.min() <= r.mean);
1025 assert!(r.mean <= r.max());
1026 assert!(r.p50 <= r.p90());
1027 assert!(r.p90() <= r.p99);
1028 assert!(r.p99 <= r.p999());
1029 // Numbers are non-negative finite.
1030 assert!(r.stddev() >= 0.0);
1031 assert!(r.mad() >= 0.0);
1032 }
1033
1034 #[test]
1035 fn percentile_clamps_to_bounds() {
1036 let mut b = Benchmark::new("p");
1037 for _ in 0..10 {
1038 b.iter(|| std::hint::black_box(1));
1039 }
1040 let r = b.finish();
1041 // q < 0.0 -> first sample; q > 1.0 -> last sample.
1042 let lo = r.percentile(-0.5);
1043 let hi = r.percentile(1.5);
1044 assert!(lo <= hi);
1045 }
1046
1047 #[test]
1048 fn empty_result_stats_are_zero() {
1049 let r = Benchmark::new("empty").finish();
1050 assert_eq!(r.min(), Duration::ZERO);
1051 assert_eq!(r.max(), Duration::ZERO);
1052 assert_eq!(r.p90(), Duration::ZERO);
1053 assert_eq!(r.p999(), Duration::ZERO);
1054 assert_eq!(r.stddev(), 0.0);
1055 assert_eq!(r.mad(), 0.0);
1056 }
1057
1058 #[test]
1059 fn run_for_collects_at_least_one_sample() {
1060 let mut b = Benchmark::new("budget");
1061 b.run_for(Duration::from_millis(10), || {
1062 std::hint::black_box(1 + 1);
1063 });
1064 let r = b.finish();
1065 assert!(!r.samples.is_empty());
1066 assert_eq!(r.iterations_recorded, r.samples.len() as u64);
1067 }
1068
1069 #[test]
1070 fn run_for_zero_budget_collects_no_samples() {
1071 let mut b = Benchmark::new("zero");
1072 b.run_for(Duration::ZERO, || {
1073 std::hint::black_box(1 + 1);
1074 });
1075 let r = b.finish();
1076 // With zero budget, deadline has already passed; no iterations.
1077 assert!(r.samples.is_empty() || r.samples.len() <= 1);
1078 }
1079
1080 #[test]
1081 fn histogram_total_count_equals_samples() {
1082 let mut b = Benchmark::new("h");
1083 for _ in 0..50 {
1084 b.iter(|| std::hint::black_box(1 + 1));
1085 }
1086 let r = b.finish();
1087 let bins = r.histogram(8);
1088 assert!(!bins.is_empty());
1089 let total: usize = bins.iter().map(|b| b.count).sum();
1090 assert_eq!(total, r.samples.len());
1091 }
1092
1093 #[test]
1094 fn histogram_zero_buckets_returns_empty() {
1095 let mut b = Benchmark::new("h");
1096 b.iter(|| std::hint::black_box(1));
1097 let r = b.finish();
1098 assert!(r.histogram(0).is_empty());
1099 }
1100
1101 #[test]
1102 fn histogram_empty_result_returns_empty() {
1103 let r = Benchmark::new("e").finish();
1104 assert!(r.histogram(8).is_empty());
1105 }
1106
1107 #[test]
1108 fn histogram_bins_are_ordered() {
1109 let mut b = Benchmark::new("h");
1110 for _ in 0..30 {
1111 b.iter(|| std::hint::black_box(1 + 1));
1112 }
1113 let bins = b.finish().histogram(5);
1114 for win in bins.windows(2) {
1115 assert!(win[0].lower <= win[1].lower);
1116 assert!(win[0].lower <= win[0].upper);
1117 }
1118 }
1119
1120 #[test]
1121 fn cv_is_zero_for_uniform_samples() {
1122 // Samples are nearly identical -> cv near 0.
1123 let mut b = Benchmark::new("x");
1124 for _ in 0..10 {
1125 b.iter(|| std::hint::black_box(1 + 1));
1126 }
1127 let r = b.finish();
1128 // Not strictly zero on real machines, just bounded.
1129 assert!(r.cv >= 0.0);
1130 }
1131
1132 #[test]
1133 fn into_report_emits_one_check() {
1134 let mut b = Benchmark::new("x");
1135 for _ in 0..5 {
1136 b.iter(|| std::hint::black_box(1 + 1));
1137 }
1138 let r = b.finish();
1139 let baseline = r.mean;
1140 let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
1141 assert_eq!(report.checks.len(), 1);
1142 assert_eq!(report.producer.as_deref(), Some("dev-bench"));
1143 assert_eq!(report.overall_verdict(), Verdict::Pass);
1144 }
1145
1146 #[test]
1147 fn bench_producer_implements_producer_trait() {
1148 fn run() -> BenchmarkResult {
1149 let mut b = Benchmark::new("noop");
1150 for _ in 0..5 {
1151 b.iter(|| std::hint::black_box(1 + 1));
1152 }
1153 b.finish()
1154 }
1155 let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
1156 let report = p.produce();
1157 assert_eq!(report.checks.len(), 1);
1158 }
1159}