dev_bench/lib.rs
1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//! b.iter(|| {
18//! std::hint::black_box(40 + 2);
19//! });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0); // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//! alongside time, using `dhat`. See the `alloc` module
37//! (visible in rustdoc when the feature is enabled).
38
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![warn(missing_docs)]
41#![warn(rust_2018_idioms)]
42
43use std::time::{Duration, Instant};
44
45use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
46
47#[cfg(feature = "alloc-tracking")]
48#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
49pub mod alloc;
50
51/// Re-export of `dhat` for use by [`install_global_allocator!`].
52///
53/// Hidden from rustdoc; consumers should use the macro, not this path.
54#[cfg(feature = "alloc-tracking")]
55#[doc(hidden)]
56pub use ::dhat as __dhat;
57
58/// Install `dhat::Alloc` as the global allocator.
59///
60/// Available with the `alloc-tracking` feature. Invoke at module scope
61/// in your binary or test target — the macro expands to a
62/// `#[global_allocator] static` declaration that consumers cannot
63/// otherwise express without depending on `dhat` directly.
64///
65/// # Example
66///
67/// ```ignore
68/// // in main.rs or a test target's top level:
69/// dev_bench::install_global_allocator!();
70///
71/// fn main() {
72/// let _profiler = dhat::Profiler::new_heap();
73/// // ... benchmarked code ...
74/// }
75/// ```
76#[cfg(feature = "alloc-tracking")]
77#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
78#[macro_export]
79macro_rules! install_global_allocator {
80 () => {
81 #[global_allocator]
82 static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
83 };
84}
85
86pub mod baseline;
87
88pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
89
90/// A single benchmark run.
91///
92/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
93/// to produce a [`BenchmarkResult`].
94///
95/// # Example
96///
97/// ```
98/// use dev_bench::Benchmark;
99///
100/// let mut b = Benchmark::new("noop");
101/// for _ in 0..10 {
102/// b.iter(|| std::hint::black_box(42));
103/// }
104/// let r = b.finish();
105/// assert_eq!(r.samples.len(), 10);
106/// ```
107pub struct Benchmark {
108 name: String,
109 samples: Vec<Duration>,
110 iterations_recorded: u64,
111}
112
113impl Benchmark {
114 /// Begin a new benchmark with a stable name.
115 pub fn new(name: impl Into<String>) -> Self {
116 Self {
117 name: name.into(),
118 samples: Vec::new(),
119 iterations_recorded: 0,
120 }
121 }
122
123 /// Run one iteration of the benchmark, capturing the duration.
124 ///
125 /// Each call records exactly one sample.
126 ///
127 /// # Example
128 ///
129 /// ```
130 /// use dev_bench::Benchmark;
131 ///
132 /// let mut b = Benchmark::new("noop");
133 /// b.iter(|| std::hint::black_box(1 + 1));
134 /// let r = b.finish();
135 /// assert_eq!(r.samples.len(), 1);
136 /// ```
137 pub fn iter<F, R>(&mut self, f: F) -> R
138 where
139 F: FnOnce() -> R,
140 {
141 let start = Instant::now();
142 let r = f();
143 let elapsed = start.elapsed();
144 self.samples.push(elapsed);
145 self.iterations_recorded += 1;
146 r
147 }
148
149 /// Run a closure `n` times and record ONE sample for the entire batch.
150 ///
151 /// Use for sub-microsecond operations where per-iteration timing
152 /// would be dominated by `Instant::now()` overhead. The reported
153 /// per-iteration mean is `batch_duration / n`.
154 ///
155 /// # Example
156 ///
157 /// ```
158 /// use dev_bench::Benchmark;
159 ///
160 /// let mut b = Benchmark::new("hot");
161 /// b.iter_with_count(1000, || {
162 /// std::hint::black_box(40 + 2);
163 /// });
164 /// let r = b.finish();
165 /// assert_eq!(r.samples.len(), 1);
166 /// assert_eq!(r.iterations_recorded, 1000);
167 /// ```
168 pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
169 where
170 F: FnMut(),
171 {
172 let start = Instant::now();
173 for _ in 0..n {
174 f();
175 }
176 let elapsed = start.elapsed();
177 self.samples.push(elapsed);
178 self.iterations_recorded += n;
179 }
180
181 /// Finalize the benchmark and produce a [`BenchmarkResult`].
182 pub fn finish(self) -> BenchmarkResult {
183 let n = self.samples.len();
184 let mean = if n == 0 {
185 Duration::ZERO
186 } else {
187 let total: Duration = self.samples.iter().copied().sum();
188 total / n as u32
189 };
190 let mut sorted = self.samples.clone();
191 sorted.sort();
192 let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
193 let p99 = sorted
194 .get((n as f64 * 0.99).floor() as usize)
195 .copied()
196 .unwrap_or(Duration::ZERO);
197 let cv = compute_cv(&self.samples, mean);
198 let total_elapsed: Duration = self.samples.iter().copied().sum();
199 BenchmarkResult {
200 name: self.name,
201 samples: self.samples,
202 iterations_recorded: self.iterations_recorded,
203 total_elapsed,
204 mean,
205 p50,
206 p99,
207 cv,
208 }
209 }
210}
211
212fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
213 if samples.is_empty() {
214 return 0.0;
215 }
216 let mean_s = mean.as_secs_f64();
217 if mean_s == 0.0 {
218 return 0.0;
219 }
220 let n = samples.len() as f64;
221 let var = samples
222 .iter()
223 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
224 .sum::<f64>()
225 / n;
226 var.sqrt() / mean_s
227}
228
229/// The result of a finished benchmark.
230///
231/// Statistics are computed losslessly from the raw `samples`.
232///
233/// # Example
234///
235/// ```
236/// use dev_bench::Benchmark;
237///
238/// let mut b = Benchmark::new("noop");
239/// for _ in 0..10 {
240/// b.iter(|| std::hint::black_box(42));
241/// }
242/// let r = b.finish();
243/// assert!(r.mean.as_nanos() > 0);
244/// ```
245#[derive(Debug, Clone)]
246pub struct BenchmarkResult {
247 /// Stable name of the benchmark.
248 pub name: String,
249 /// All raw sample durations.
250 pub samples: Vec<Duration>,
251 /// Total iterations across all samples. With per-iter sampling this
252 /// equals `samples.len()`. With batched sampling, it is the sum of
253 /// `n` across all `iter_with_count` calls.
254 pub iterations_recorded: u64,
255 /// Sum of all sample durations.
256 pub total_elapsed: Duration,
257 /// Mean sample duration.
258 pub mean: Duration,
259 /// 50th percentile sample duration.
260 pub p50: Duration,
261 /// 99th percentile sample duration.
262 pub p99: Duration,
263 /// Coefficient of variation across samples (stddev / mean).
264 ///
265 /// Higher numbers indicate noisier measurements. A CV of `0.05`
266 /// means the standard deviation is 5% of the mean. Reported
267 /// regressions within the CV are downgraded from `Fail` to `Warn`
268 /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
269 pub cv: f64,
270}
271
272impl BenchmarkResult {
273 /// Effective throughput in operations per second.
274 ///
275 /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
276 /// `0.0` for an empty result or zero elapsed time.
277 ///
278 /// # Example
279 ///
280 /// ```
281 /// use dev_bench::Benchmark;
282 ///
283 /// let mut b = Benchmark::new("hot");
284 /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
285 /// let r = b.finish();
286 /// assert!(r.ops_per_sec() > 0.0);
287 /// ```
288 pub fn ops_per_sec(&self) -> f64 {
289 if self.total_elapsed.is_zero() {
290 return 0.0;
291 }
292 self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
293 }
294
295 /// Smallest sample. Returns `Duration::ZERO` for an empty result.
296 pub fn min(&self) -> Duration {
297 self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
298 }
299
300 /// Largest sample. Returns `Duration::ZERO` for an empty result.
301 pub fn max(&self) -> Duration {
302 self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
303 }
304
305 /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
306 ///
307 /// Uses `n-1` (Bessel's correction) for the sample variance.
308 pub fn stddev(&self) -> f64 {
309 let n = self.samples.len();
310 if n < 2 {
311 return 0.0;
312 }
313 let mean_s = self.mean.as_secs_f64();
314 let var = self
315 .samples
316 .iter()
317 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
318 .sum::<f64>()
319 / (n as f64 - 1.0);
320 var.sqrt()
321 }
322
323 /// Median absolute deviation, in seconds. `0.0` for empty results.
324 ///
325 /// `MAD = median(|x_i - median(x)|)`. More robust to outliers than
326 /// standard deviation; useful for noisy measurements.
327 pub fn mad(&self) -> f64 {
328 if self.samples.is_empty() {
329 return 0.0;
330 }
331 let p50_s = self.p50.as_secs_f64();
332 let mut deviations: Vec<f64> = self
333 .samples
334 .iter()
335 .map(|d| (d.as_secs_f64() - p50_s).abs())
336 .collect();
337 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
338 let mid = deviations.len() / 2;
339 deviations[mid]
340 }
341
342 /// 90th percentile sample duration. `Duration::ZERO` for empty results.
343 pub fn p90(&self) -> Duration {
344 self.percentile(0.90)
345 }
346
347 /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
348 ///
349 /// At least 1000 samples are required to be meaningful; with fewer
350 /// samples this returns the largest sample.
351 pub fn p999(&self) -> Duration {
352 self.percentile(0.999)
353 }
354
355 /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
356 /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
357 pub fn percentile(&self, q: f64) -> Duration {
358 if self.samples.is_empty() {
359 return Duration::ZERO;
360 }
361 let q = q.clamp(0.0, 1.0);
362 let mut sorted = self.samples.clone();
363 sorted.sort();
364 let n = sorted.len();
365 let idx = ((n as f64) * q).floor() as usize;
366 let idx = idx.min(n - 1);
367 sorted[idx]
368 }
369
370 /// Compare this result against a baseline using a default-tuned
371 /// [`CompareOptions`].
372 ///
373 /// `baseline_mean` is the previous mean duration. If `None`, the
374 /// verdict is `Skip` and no comparison is made.
375 ///
376 /// # Example
377 ///
378 /// ```
379 /// use dev_bench::{Benchmark, Threshold};
380 /// use std::time::Duration;
381 ///
382 /// let mut b = Benchmark::new("x");
383 /// b.iter(|| std::hint::black_box(1 + 1));
384 /// let r = b.finish();
385 /// let _ = r.compare_against_baseline(
386 /// Some(Duration::from_nanos(1)),
387 /// Threshold::regression_pct(10.0),
388 /// );
389 /// ```
390 pub fn compare_against_baseline(
391 &self,
392 baseline_mean: Option<Duration>,
393 threshold: Threshold,
394 ) -> CheckResult {
395 self.compare_with_options(&CompareOptions {
396 baseline_mean,
397 threshold,
398 ..CompareOptions::default()
399 })
400 }
401
402 /// Compare this result against a baseline using full options.
403 ///
404 /// Behavior:
405 /// - No baseline -> `Skip`.
406 /// - Sample count below `min_samples` -> `Skip` with detail.
407 /// - Within threshold -> `Pass` with numeric evidence.
408 /// - Over threshold but within CV noise band -> `Warn`.
409 /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
410 ///
411 /// In every non-`Skip` case, the returned [`CheckResult`] carries
412 /// a `bench` tag and numeric `Evidence` for `mean_ns`,
413 /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
414 ///
415 /// # Example
416 ///
417 /// ```
418 /// use dev_bench::{Benchmark, CompareOptions, Threshold};
419 /// use std::time::Duration;
420 ///
421 /// let mut b = Benchmark::new("x");
422 /// b.iter(|| std::hint::black_box(1 + 1));
423 /// let r = b.finish();
424 /// let opts = CompareOptions {
425 /// baseline_mean: Some(Duration::from_nanos(1)),
426 /// threshold: Threshold::regression_pct(20.0),
427 /// min_samples: 1,
428 /// allow_cv_noise_band: true,
429 /// };
430 /// let _check = r.compare_with_options(&opts);
431 /// ```
432 pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
433 let name = format!("bench::{}", self.name);
434 let mut evidence = self.numeric_evidence();
435 let tags = vec!["bench".to_string()];
436
437 let Some(baseline) = opts.baseline_mean else {
438 let mut c = CheckResult::skip(name).with_detail("no baseline available");
439 c.tags = tags;
440 c.evidence = evidence;
441 return c;
442 };
443
444 if (self.samples.len() as u64) < opts.min_samples {
445 let mut c = CheckResult::skip(name).with_detail(format!(
446 "fewer samples than min_samples ({} < {})",
447 self.samples.len(),
448 opts.min_samples
449 ));
450 c.tags = tags;
451 c.evidence = evidence;
452 return c;
453 }
454
455 let current_ns = self.mean.as_nanos();
456 let baseline_ns = baseline.as_nanos();
457 evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
458
459 let regressed = match opts.threshold {
460 Threshold::RegressionPct(pct) => {
461 let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
462 current_ns as f64 > allowed
463 }
464 Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
465 Threshold::ThroughputDropPct(pct) => {
466 // Throughput-based; convert via mean.
467 let baseline_ops = if baseline.is_zero() {
468 0.0
469 } else {
470 1.0 / baseline.as_secs_f64()
471 };
472 let drop_floor = baseline_ops * (1.0 - pct / 100.0);
473 self.ops_per_sec() < drop_floor
474 }
475 };
476
477 let detail = format!(
478 "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
479 current_ns,
480 baseline_ns,
481 self.cv,
482 self.ops_per_sec()
483 );
484
485 if !regressed {
486 let mut c = CheckResult::pass(name).with_detail(detail);
487 c.tags = tags;
488 c.evidence = evidence;
489 return c;
490 }
491
492 // Regression detected. Decide Fail vs Warn based on CV noise band.
493 let in_noise_band = opts.allow_cv_noise_band && {
494 let allowed_noise_ns = baseline_ns as f64 * self.cv;
495 let delta_ns = (current_ns as f64) - (baseline_ns as f64);
496 delta_ns <= allowed_noise_ns
497 };
498 let mut tags = tags;
499 tags.push("regression".to_string());
500 if in_noise_band {
501 let mut c = CheckResult::warn(name, Severity::Warning)
502 .with_detail(format!("{} (within CV noise band)", detail));
503 c.tags = tags;
504 c.evidence = evidence;
505 c
506 } else {
507 let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
508 c.tags = tags;
509 c.evidence = evidence;
510 c
511 }
512 }
513
514 /// Build a one-check `Report` containing the comparison result.
515 ///
516 /// Convenience for producers that want a complete `Report` rather
517 /// than a single `CheckResult`. Sets `subject = self.name`,
518 /// `producer = "dev-bench"`.
519 ///
520 /// # Example
521 ///
522 /// ```
523 /// use dev_bench::{Benchmark, Threshold};
524 ///
525 /// let mut b = Benchmark::new("x");
526 /// b.iter(|| std::hint::black_box(1 + 1));
527 /// let r = b.finish();
528 /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
529 /// assert_eq!(report.checks.len(), 1);
530 /// ```
531 pub fn into_report(
532 self,
533 subject_version: impl Into<String>,
534 baseline_mean: Option<Duration>,
535 threshold: Threshold,
536 ) -> Report {
537 let name = self.name.clone();
538 let check = self.compare_against_baseline(baseline_mean, threshold);
539 let mut r = Report::new(name, subject_version).with_producer("dev-bench");
540 r.push(check);
541 r.finish();
542 r
543 }
544
545 fn numeric_evidence(&self) -> Vec<Evidence> {
546 vec![
547 Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
548 // baseline_ns inserted at index 1 by callers when available.
549 Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
550 Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
551 Evidence::numeric("cv", self.cv),
552 Evidence::numeric("ops_per_sec", self.ops_per_sec()),
553 Evidence::numeric("samples", self.samples.len() as f64),
554 Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
555 ]
556 }
557}
558
559/// A threshold defining how much slower-than-baseline is acceptable.
560#[derive(Debug, Clone, Copy)]
561pub enum Threshold {
562 /// Fail if the new mean is more than `pct` percent slower than baseline.
563 RegressionPct(f64),
564 /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
565 RegressionAbsoluteNs(u128),
566 /// Fail if throughput dropped more than `pct` percent below baseline.
567 ///
568 /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
569 /// assumes the baseline duration is a per-operation duration. Use
570 /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
571 /// per-op duration. For batched sampling
572 /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
573 /// or pre-compute the baseline manually.
574 ThroughputDropPct(f64),
575}
576
577impl Threshold {
578 /// Build a percent-based duration regression threshold.
579 ///
580 /// # Example
581 ///
582 /// ```
583 /// use dev_bench::Threshold;
584 /// let t = Threshold::regression_pct(20.0);
585 /// assert!(matches!(t, Threshold::RegressionPct(_)));
586 /// ```
587 pub fn regression_pct(pct: f64) -> Self {
588 Threshold::RegressionPct(pct)
589 }
590
591 /// Build an absolute duration regression threshold in nanoseconds.
592 ///
593 /// # Example
594 ///
595 /// ```
596 /// use dev_bench::Threshold;
597 /// let t = Threshold::regression_abs_ns(500);
598 /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
599 /// ```
600 pub fn regression_abs_ns(nanos: u128) -> Self {
601 Threshold::RegressionAbsoluteNs(nanos)
602 }
603
604 /// Build a percent-based throughput drop threshold.
605 ///
606 /// # Example
607 ///
608 /// ```
609 /// use dev_bench::Threshold;
610 /// let t = Threshold::throughput_drop_pct(10.0);
611 /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
612 /// ```
613 pub fn throughput_drop_pct(pct: f64) -> Self {
614 Threshold::ThroughputDropPct(pct)
615 }
616}
617
618/// Options for [`BenchmarkResult::compare_with_options`].
619///
620/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
621/// `allow_cv_noise_band = true`.
622///
623/// # Example
624///
625/// ```
626/// use dev_bench::{CompareOptions, Threshold};
627/// use std::time::Duration;
628///
629/// let opts = CompareOptions {
630/// baseline_mean: Some(Duration::from_nanos(1000)),
631/// threshold: Threshold::regression_pct(20.0),
632/// min_samples: 30,
633/// allow_cv_noise_band: true,
634/// };
635/// assert_eq!(opts.min_samples, 30);
636/// ```
637#[derive(Debug, Clone)]
638pub struct CompareOptions {
639 /// Baseline mean to compare against. `None` -> verdict is `Skip`.
640 pub baseline_mean: Option<Duration>,
641 /// Regression threshold to apply.
642 pub threshold: Threshold,
643 /// Minimum sample count required before a comparison can be made.
644 /// Below this, the verdict is `Skip` with a `min_samples` detail.
645 pub min_samples: u64,
646 /// If `true`, regressions within `baseline_ns * cv` are downgraded
647 /// from `Fail` to `Warn`.
648 pub allow_cv_noise_band: bool,
649}
650
651impl Default for CompareOptions {
652 fn default() -> Self {
653 Self {
654 baseline_mean: None,
655 threshold: Threshold::regression_pct(10.0),
656 min_samples: 1,
657 allow_cv_noise_band: true,
658 }
659 }
660}
661
662/// A trait for any object that can run a benchmark and produce a result.
663pub trait Bench {
664 /// Run the benchmark and return its result.
665 fn run(&mut self) -> BenchmarkResult;
666}
667
668/// Producer wrapper that runs a benchmark and emits a single-check
669/// [`Report`] via [`Producer::produce`].
670///
671/// # Example
672///
673/// ```no_run
674/// use dev_bench::{Benchmark, BenchProducer, Threshold};
675/// use dev_report::Producer;
676///
677/// fn run_bench() -> dev_bench::BenchmarkResult {
678/// let mut b = Benchmark::new("hot_path");
679/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
680/// b.finish()
681/// }
682///
683/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
684/// let report = producer.produce();
685/// assert_eq!(report.checks.len(), 1);
686/// ```
687pub struct BenchProducer<F>
688where
689 F: Fn() -> BenchmarkResult,
690{
691 run: F,
692 subject_version: String,
693 baseline_mean: Option<Duration>,
694 threshold: Threshold,
695}
696
697impl<F> BenchProducer<F>
698where
699 F: Fn() -> BenchmarkResult,
700{
701 /// Build a new producer.
702 pub fn new(
703 run: F,
704 subject_version: impl Into<String>,
705 baseline_mean: Option<Duration>,
706 threshold: Threshold,
707 ) -> Self {
708 Self {
709 run,
710 subject_version: subject_version.into(),
711 baseline_mean,
712 threshold,
713 }
714 }
715}
716
717impl<F> Producer for BenchProducer<F>
718where
719 F: Fn() -> BenchmarkResult,
720{
721 fn produce(&self) -> Report {
722 let result = (self.run)();
723 result.into_report(
724 self.subject_version.clone(),
725 self.baseline_mean,
726 self.threshold,
727 )
728 }
729}
730
731#[cfg(test)]
732mod tests {
733 use super::*;
734 use dev_report::Verdict;
735
736 #[test]
737 fn benchmark_runs_and_finishes() {
738 let mut b = Benchmark::new("noop");
739 for _ in 0..10 {
740 b.iter(|| std::hint::black_box(42));
741 }
742 let r = b.finish();
743 assert_eq!(r.samples.len(), 10);
744 assert_eq!(r.iterations_recorded, 10);
745 assert!(r.mean > Duration::ZERO);
746 }
747
748 #[test]
749 fn iter_with_count_records_one_sample() {
750 let mut b = Benchmark::new("hot");
751 b.iter_with_count(1000, || {
752 std::hint::black_box(1 + 1);
753 });
754 let r = b.finish();
755 assert_eq!(r.samples.len(), 1);
756 assert_eq!(r.iterations_recorded, 1000);
757 assert!(r.ops_per_sec() > 0.0);
758 }
759
760 #[test]
761 fn comparison_without_baseline_is_skip() {
762 let mut b = Benchmark::new("x");
763 b.iter(|| ());
764 let r = b.finish();
765 let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
766 assert_eq!(v.verdict, Verdict::Skip);
767 assert!(v.has_tag("bench"));
768 }
769
770 #[test]
771 fn min_samples_skip() {
772 let mut b = Benchmark::new("x");
773 b.iter(|| ());
774 let r = b.finish();
775 let opts = CompareOptions {
776 baseline_mean: Some(Duration::from_nanos(100)),
777 threshold: Threshold::regression_pct(5.0),
778 min_samples: 100,
779 allow_cv_noise_band: true,
780 };
781 let v = r.compare_with_options(&opts);
782 assert_eq!(v.verdict, Verdict::Skip);
783 assert!(v.detail.unwrap().contains("min_samples"));
784 }
785
786 #[test]
787 fn small_regression_under_threshold_passes() {
788 let mut b = Benchmark::new("x");
789 for _ in 0..5 {
790 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
791 }
792 let r = b.finish();
793 let baseline = r.mean;
794 let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
795 assert_eq!(v.verdict, Verdict::Pass);
796 assert!(v.has_tag("bench"));
797 // Numeric evidence is attached.
798 assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
799 assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
800 assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
801 }
802
803 #[test]
804 fn regression_outside_cv_band_fails() {
805 // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
806 let mut b = Benchmark::new("x");
807 // Inject controlled samples by running noop iterations.
808 for _ in 0..50 {
809 b.iter(|| std::hint::black_box(1 + 1));
810 }
811 let mut r = b.finish();
812 // Force a known mean and cv for deterministic comparison.
813 r.mean = Duration::from_nanos(200);
814 r.cv = 0.0;
815 let opts = CompareOptions {
816 baseline_mean: Some(Duration::from_nanos(100)),
817 threshold: Threshold::regression_pct(10.0),
818 min_samples: 1,
819 allow_cv_noise_band: true,
820 };
821 let v = r.compare_with_options(&opts);
822 assert_eq!(v.verdict, Verdict::Fail);
823 assert!(v.has_tag("regression"));
824 }
825
826 #[test]
827 fn regression_inside_cv_band_warns() {
828 let mut b = Benchmark::new("x");
829 for _ in 0..50 {
830 b.iter(|| std::hint::black_box(1 + 1));
831 }
832 let mut r = b.finish();
833 // Current is 12% over baseline but cv is 30% -> within noise band.
834 r.mean = Duration::from_nanos(112);
835 r.cv = 0.30;
836 let opts = CompareOptions {
837 baseline_mean: Some(Duration::from_nanos(100)),
838 threshold: Threshold::regression_pct(10.0),
839 min_samples: 1,
840 allow_cv_noise_band: true,
841 };
842 let v = r.compare_with_options(&opts);
843 assert_eq!(v.verdict, Verdict::Warn);
844 assert!(v.has_tag("regression"));
845 assert!(v.detail.unwrap().contains("CV noise band"));
846 }
847
848 #[test]
849 fn throughput_threshold_detects_drop() {
850 // ThroughputDropPct expects a per-op baseline duration. Use
851 // per-iter sampling so mean == per-op duration.
852 let mut b = Benchmark::new("x");
853 for _ in 0..10 {
854 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
855 }
856 let r = b.finish();
857 // Baseline 10x faster (per-op duration is 1/10 of current);
858 // current throughput is 90% lower than baseline -> regression.
859 let baseline = r.mean / 10;
860 let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
861 assert_eq!(v.verdict, Verdict::Fail);
862 }
863
864 #[test]
865 fn extra_stats_are_consistent() {
866 let mut b = Benchmark::new("uniform");
867 for _ in 0..20 {
868 b.iter(|| std::hint::black_box(1 + 1));
869 }
870 let r = b.finish();
871 // Bounds.
872 assert!(r.min() <= r.mean);
873 assert!(r.mean <= r.max());
874 assert!(r.p50 <= r.p90());
875 assert!(r.p90() <= r.p99);
876 assert!(r.p99 <= r.p999());
877 // Numbers are non-negative finite.
878 assert!(r.stddev() >= 0.0);
879 assert!(r.mad() >= 0.0);
880 }
881
882 #[test]
883 fn percentile_clamps_to_bounds() {
884 let mut b = Benchmark::new("p");
885 for _ in 0..10 {
886 b.iter(|| std::hint::black_box(1));
887 }
888 let r = b.finish();
889 // q < 0.0 -> first sample; q > 1.0 -> last sample.
890 let lo = r.percentile(-0.5);
891 let hi = r.percentile(1.5);
892 assert!(lo <= hi);
893 }
894
895 #[test]
896 fn empty_result_stats_are_zero() {
897 let r = Benchmark::new("empty").finish();
898 assert_eq!(r.min(), Duration::ZERO);
899 assert_eq!(r.max(), Duration::ZERO);
900 assert_eq!(r.p90(), Duration::ZERO);
901 assert_eq!(r.p999(), Duration::ZERO);
902 assert_eq!(r.stddev(), 0.0);
903 assert_eq!(r.mad(), 0.0);
904 }
905
906 #[test]
907 fn cv_is_zero_for_uniform_samples() {
908 // Samples are nearly identical -> cv near 0.
909 let mut b = Benchmark::new("x");
910 for _ in 0..10 {
911 b.iter(|| std::hint::black_box(1 + 1));
912 }
913 let r = b.finish();
914 // Not strictly zero on real machines, just bounded.
915 assert!(r.cv >= 0.0);
916 }
917
918 #[test]
919 fn into_report_emits_one_check() {
920 let mut b = Benchmark::new("x");
921 for _ in 0..5 {
922 b.iter(|| std::hint::black_box(1 + 1));
923 }
924 let r = b.finish();
925 let baseline = r.mean;
926 let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
927 assert_eq!(report.checks.len(), 1);
928 assert_eq!(report.producer.as_deref(), Some("dev-bench"));
929 assert_eq!(report.overall_verdict(), Verdict::Pass);
930 }
931
932 #[test]
933 fn bench_producer_implements_producer_trait() {
934 fn run() -> BenchmarkResult {
935 let mut b = Benchmark::new("noop");
936 for _ in 0..5 {
937 b.iter(|| std::hint::black_box(1 + 1));
938 }
939 b.finish()
940 }
941 let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
942 let report = p.produce();
943 assert_eq!(report.checks.len(), 1);
944 }
945}