Skip to main content

dev_bench/
lib.rs

1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//!     b.iter(|| {
18//!         std::hint::black_box(40 + 2);
19//!     });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0);   // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//!   alongside time, using `dhat`. See the `alloc` module
37//!   (visible in rustdoc when the feature is enabled).
38
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![warn(missing_docs)]
41#![warn(rust_2018_idioms)]
42
43use std::time::{Duration, Instant};
44
45use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
46
47#[cfg(feature = "alloc-tracking")]
48#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
49pub mod alloc;
50
51/// Re-export of `dhat` for use by [`install_global_allocator!`].
52///
53/// Hidden from rustdoc; consumers should use the macro, not this path.
54#[cfg(feature = "alloc-tracking")]
55#[doc(hidden)]
56pub use ::dhat as __dhat;
57
58/// Install `dhat::Alloc` as the global allocator.
59///
60/// Available with the `alloc-tracking` feature. Invoke at module scope
61/// in your binary or test target — the macro expands to a
62/// `#[global_allocator] static` declaration that consumers cannot
63/// otherwise express without depending on `dhat` directly.
64///
65/// # Example
66///
67/// ```ignore
68/// // in main.rs or a test target's top level:
69/// dev_bench::install_global_allocator!();
70///
71/// fn main() {
72///     let _profiler = dhat::Profiler::new_heap();
73///     // ... benchmarked code ...
74/// }
75/// ```
76#[cfg(feature = "alloc-tracking")]
77#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
78#[macro_export]
79macro_rules! install_global_allocator {
80    () => {
81        #[global_allocator]
82        static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
83    };
84}
85
86pub mod baseline;
87
88pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
89
90/// A single benchmark run.
91///
92/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
93/// to produce a [`BenchmarkResult`].
94///
95/// # Example
96///
97/// ```
98/// use dev_bench::Benchmark;
99///
100/// let mut b = Benchmark::new("noop");
101/// for _ in 0..10 {
102///     b.iter(|| std::hint::black_box(42));
103/// }
104/// let r = b.finish();
105/// assert_eq!(r.samples.len(), 10);
106/// ```
107pub struct Benchmark {
108    name: String,
109    samples: Vec<Duration>,
110    iterations_recorded: u64,
111}
112
113impl Benchmark {
114    /// Begin a new benchmark with a stable name.
115    pub fn new(name: impl Into<String>) -> Self {
116        Self {
117            name: name.into(),
118            samples: Vec::new(),
119            iterations_recorded: 0,
120        }
121    }
122
123    /// Run one iteration of the benchmark, capturing the duration.
124    ///
125    /// Each call records exactly one sample.
126    ///
127    /// # Example
128    ///
129    /// ```
130    /// use dev_bench::Benchmark;
131    ///
132    /// let mut b = Benchmark::new("noop");
133    /// b.iter(|| std::hint::black_box(1 + 1));
134    /// let r = b.finish();
135    /// assert_eq!(r.samples.len(), 1);
136    /// ```
137    pub fn iter<F, R>(&mut self, f: F) -> R
138    where
139        F: FnOnce() -> R,
140    {
141        let start = Instant::now();
142        let r = f();
143        let elapsed = start.elapsed();
144        self.samples.push(elapsed);
145        self.iterations_recorded += 1;
146        r
147    }
148
149    /// Run a closure `n` times and record ONE sample for the entire batch.
150    ///
151    /// Use for sub-microsecond operations where per-iteration timing
152    /// would be dominated by `Instant::now()` overhead. The reported
153    /// per-iteration mean is `batch_duration / n`.
154    ///
155    /// # Example
156    ///
157    /// ```
158    /// use dev_bench::Benchmark;
159    ///
160    /// let mut b = Benchmark::new("hot");
161    /// b.iter_with_count(1000, || {
162    ///     std::hint::black_box(40 + 2);
163    /// });
164    /// let r = b.finish();
165    /// assert_eq!(r.samples.len(), 1);
166    /// assert_eq!(r.iterations_recorded, 1000);
167    /// ```
168    pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
169    where
170        F: FnMut(),
171    {
172        let start = Instant::now();
173        for _ in 0..n {
174            f();
175        }
176        let elapsed = start.elapsed();
177        self.samples.push(elapsed);
178        self.iterations_recorded += n;
179    }
180
181    /// Run a closure repeatedly for at most `budget` wall-clock time,
182    /// recording one sample per iteration.
183    ///
184    /// Stops as soon as the elapsed time crosses `budget`. The
185    /// closure may run slightly past the budget (the in-flight
186    /// iteration completes); the recorded sample count reflects what
187    /// was actually executed.
188    ///
189    /// Useful when you want a benchmark to run "for N seconds" rather
190    /// than "for N iterations" — the per-iter cost is unknown and you
191    /// just want a bounded run.
192    ///
193    /// # Example
194    ///
195    /// ```
196    /// use dev_bench::Benchmark;
197    /// use std::time::Duration;
198    ///
199    /// let mut b = Benchmark::new("hot");
200    /// b.run_for(Duration::from_millis(20), || {
201    ///     std::hint::black_box(1 + 1);
202    /// });
203    /// let r = b.finish();
204    /// // At least one sample was collected.
205    /// assert!(!r.samples.is_empty());
206    /// ```
207    pub fn run_for<F>(&mut self, budget: Duration, mut f: F)
208    where
209        F: FnMut(),
210    {
211        let deadline = Instant::now() + budget;
212        while Instant::now() < deadline {
213            let start = Instant::now();
214            f();
215            let elapsed = start.elapsed();
216            self.samples.push(elapsed);
217            self.iterations_recorded += 1;
218        }
219    }
220
221    /// Finalize the benchmark and produce a [`BenchmarkResult`].
222    pub fn finish(self) -> BenchmarkResult {
223        let n = self.samples.len();
224        let mean = if n == 0 {
225            Duration::ZERO
226        } else {
227            let total: Duration = self.samples.iter().copied().sum();
228            total / n as u32
229        };
230        let mut sorted = self.samples.clone();
231        sorted.sort();
232        let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
233        let p99 = sorted
234            .get((n as f64 * 0.99).floor() as usize)
235            .copied()
236            .unwrap_or(Duration::ZERO);
237        let cv = compute_cv(&self.samples, mean);
238        let total_elapsed: Duration = self.samples.iter().copied().sum();
239        BenchmarkResult {
240            name: self.name,
241            samples: self.samples,
242            iterations_recorded: self.iterations_recorded,
243            total_elapsed,
244            mean,
245            p50,
246            p99,
247            cv,
248        }
249    }
250}
251
252fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
253    if samples.is_empty() {
254        return 0.0;
255    }
256    let mean_s = mean.as_secs_f64();
257    if mean_s == 0.0 {
258        return 0.0;
259    }
260    let n = samples.len() as f64;
261    let var = samples
262        .iter()
263        .map(|d| (d.as_secs_f64() - mean_s).powi(2))
264        .sum::<f64>()
265        / n;
266    var.sqrt() / mean_s
267}
268
269/// One bin of a sample-distribution histogram.
270///
271/// Returned by [`BenchmarkResult::histogram`]. Bins are ordered, the
272/// first bin's `lower` equals `BenchmarkResult::min` and the last
273/// bin's `upper` equals `BenchmarkResult::max`.
274///
275/// # Example
276///
277/// ```
278/// use dev_bench::Benchmark;
279///
280/// let mut b = Benchmark::new("h");
281/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
282/// let bins = b.finish().histogram(4);
283/// assert!(bins.iter().all(|b| b.lower <= b.upper));
284/// ```
285#[derive(Debug, Clone, Copy, PartialEq, Eq)]
286pub struct HistogramBin {
287    /// Inclusive lower bound of this bin.
288    pub lower: Duration,
289    /// Inclusive upper bound (for the last bin) or exclusive upper
290    /// bound (for all other bins).
291    pub upper: Duration,
292    /// Number of samples falling into this bin.
293    pub count: usize,
294}
295
296/// The result of a finished benchmark.
297///
298/// Statistics are computed losslessly from the raw `samples`.
299///
300/// # Example
301///
302/// ```
303/// use dev_bench::Benchmark;
304///
305/// let mut b = Benchmark::new("noop");
306/// for _ in 0..10 {
307///     b.iter(|| std::hint::black_box(42));
308/// }
309/// let r = b.finish();
310/// assert!(r.mean.as_nanos() > 0);
311/// ```
312#[derive(Debug, Clone)]
313pub struct BenchmarkResult {
314    /// Stable name of the benchmark.
315    pub name: String,
316    /// All raw sample durations.
317    pub samples: Vec<Duration>,
318    /// Total iterations across all samples. With per-iter sampling this
319    /// equals `samples.len()`. With batched sampling, it is the sum of
320    /// `n` across all `iter_with_count` calls.
321    pub iterations_recorded: u64,
322    /// Sum of all sample durations.
323    pub total_elapsed: Duration,
324    /// Mean sample duration.
325    pub mean: Duration,
326    /// 50th percentile sample duration.
327    pub p50: Duration,
328    /// 99th percentile sample duration.
329    pub p99: Duration,
330    /// Coefficient of variation across samples (stddev / mean).
331    ///
332    /// Higher numbers indicate noisier measurements. A CV of `0.05`
333    /// means the standard deviation is 5% of the mean. Reported
334    /// regressions within the CV are downgraded from `Fail` to `Warn`
335    /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
336    pub cv: f64,
337}
338
339impl BenchmarkResult {
340    /// Effective throughput in operations per second.
341    ///
342    /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
343    /// `0.0` for an empty result or zero elapsed time.
344    ///
345    /// # Example
346    ///
347    /// ```
348    /// use dev_bench::Benchmark;
349    ///
350    /// let mut b = Benchmark::new("hot");
351    /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
352    /// let r = b.finish();
353    /// assert!(r.ops_per_sec() > 0.0);
354    /// ```
355    pub fn ops_per_sec(&self) -> f64 {
356        if self.total_elapsed.is_zero() {
357            return 0.0;
358        }
359        self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
360    }
361
362    /// Smallest sample. Returns `Duration::ZERO` for an empty result.
363    pub fn min(&self) -> Duration {
364        self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
365    }
366
367    /// Largest sample. Returns `Duration::ZERO` for an empty result.
368    pub fn max(&self) -> Duration {
369        self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
370    }
371
372    /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
373    ///
374    /// Uses `n-1` (Bessel's correction) for the sample variance.
375    pub fn stddev(&self) -> f64 {
376        let n = self.samples.len();
377        if n < 2 {
378            return 0.0;
379        }
380        let mean_s = self.mean.as_secs_f64();
381        let var = self
382            .samples
383            .iter()
384            .map(|d| (d.as_secs_f64() - mean_s).powi(2))
385            .sum::<f64>()
386            / (n as f64 - 1.0);
387        var.sqrt()
388    }
389
390    /// Median absolute deviation, in seconds. `0.0` for empty results.
391    ///
392    /// `MAD = median(|x_i - median(x)|)`. Less affected by outliers than
393    /// standard deviation; useful for noisy measurements.
394    pub fn mad(&self) -> f64 {
395        if self.samples.is_empty() {
396            return 0.0;
397        }
398        let p50_s = self.p50.as_secs_f64();
399        let mut deviations: Vec<f64> = self
400            .samples
401            .iter()
402            .map(|d| (d.as_secs_f64() - p50_s).abs())
403            .collect();
404        deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
405        let mid = deviations.len() / 2;
406        deviations[mid]
407    }
408
409    /// 90th percentile sample duration. `Duration::ZERO` for empty results.
410    pub fn p90(&self) -> Duration {
411        self.percentile(0.90)
412    }
413
414    /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
415    ///
416    /// At least 1000 samples are required to be meaningful; with fewer
417    /// samples this returns the largest sample.
418    pub fn p999(&self) -> Duration {
419        self.percentile(0.999)
420    }
421
422    /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
423    /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
424    pub fn percentile(&self, q: f64) -> Duration {
425        if self.samples.is_empty() {
426            return Duration::ZERO;
427        }
428        let q = q.clamp(0.0, 1.0);
429        let mut sorted = self.samples.clone();
430        sorted.sort();
431        let n = sorted.len();
432        let idx = ((n as f64) * q).floor() as usize;
433        let idx = idx.min(n - 1);
434        sorted[idx]
435    }
436
437    /// Compute a uniform-width histogram over the sample distribution.
438    ///
439    /// Returns `bucket_count` bins covering `[min, max]`, each with
440    /// the count of samples falling into that bin. The returned
441    /// `Vec<HistogramBin>` is in ascending order; the first bin's
442    /// `lower` equals `min()`, the last bin's `upper` equals `max()`.
443    ///
444    /// For an empty result or `bucket_count == 0`, returns `vec![]`.
445    /// When `min == max` (all samples equal), returns one bin with
446    /// the full sample count.
447    ///
448    /// Useful for spotting bimodality, outlier tails, and warmup
449    /// effects that mean/percentile alone hide.
450    ///
451    /// # Example
452    ///
453    /// ```
454    /// use dev_bench::Benchmark;
455    ///
456    /// let mut b = Benchmark::new("h");
457    /// for _ in 0..50 { b.iter(|| std::hint::black_box(1 + 1)); }
458    /// let r = b.finish();
459    /// let hist = r.histogram(8);
460    /// assert!(hist.len() <= 8);
461    /// let total: usize = hist.iter().map(|h| h.count).sum();
462    /// assert_eq!(total, r.samples.len());
463    /// ```
464    pub fn histogram(&self, bucket_count: usize) -> Vec<HistogramBin> {
465        if bucket_count == 0 || self.samples.is_empty() {
466            return Vec::new();
467        }
468        let min = self.min();
469        let max = self.max();
470        if min == max {
471            return vec![HistogramBin {
472                lower: min,
473                upper: max,
474                count: self.samples.len(),
475            }];
476        }
477        let total_ns = (max.as_nanos() - min.as_nanos()) as f64;
478        let bucket_ns = total_ns / bucket_count as f64;
479        let mut counts = vec![0usize; bucket_count];
480        for s in &self.samples {
481            let offset = (s.as_nanos() - min.as_nanos()) as f64;
482            let mut idx = (offset / bucket_ns).floor() as usize;
483            if idx >= bucket_count {
484                idx = bucket_count - 1;
485            }
486            counts[idx] += 1;
487        }
488        let min_ns = min.as_nanos() as u64;
489        let mut bins = Vec::with_capacity(bucket_count);
490        for (i, count) in counts.into_iter().enumerate() {
491            let lower_ns = min_ns + (bucket_ns * i as f64) as u64;
492            let upper_ns = if i + 1 == bucket_count {
493                max.as_nanos() as u64
494            } else {
495                min_ns + (bucket_ns * (i + 1) as f64) as u64
496            };
497            bins.push(HistogramBin {
498                lower: Duration::from_nanos(lower_ns),
499                upper: Duration::from_nanos(upper_ns),
500                count,
501            });
502        }
503        bins
504    }
505
506    /// Compare this result against a baseline using a default-tuned
507    /// [`CompareOptions`].
508    ///
509    /// `baseline_mean` is the previous mean duration. If `None`, the
510    /// verdict is `Skip` and no comparison is made.
511    ///
512    /// # Example
513    ///
514    /// ```
515    /// use dev_bench::{Benchmark, Threshold};
516    /// use std::time::Duration;
517    ///
518    /// let mut b = Benchmark::new("x");
519    /// b.iter(|| std::hint::black_box(1 + 1));
520    /// let r = b.finish();
521    /// let _ = r.compare_against_baseline(
522    ///     Some(Duration::from_nanos(1)),
523    ///     Threshold::regression_pct(10.0),
524    /// );
525    /// ```
526    pub fn compare_against_baseline(
527        &self,
528        baseline_mean: Option<Duration>,
529        threshold: Threshold,
530    ) -> CheckResult {
531        self.compare_with_options(&CompareOptions {
532            baseline_mean,
533            threshold,
534            ..CompareOptions::default()
535        })
536    }
537
538    /// Compare this result against a baseline using full options.
539    ///
540    /// Behavior:
541    /// - No baseline -> `Skip`.
542    /// - Sample count below `min_samples` -> `Skip` with detail.
543    /// - Within threshold -> `Pass` with numeric evidence.
544    /// - Over threshold but within CV noise band -> `Warn`.
545    /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
546    ///
547    /// In every non-`Skip` case, the returned [`CheckResult`] carries
548    /// a `bench` tag and numeric `Evidence` for `mean_ns`,
549    /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
550    ///
551    /// # Example
552    ///
553    /// ```
554    /// use dev_bench::{Benchmark, CompareOptions, Threshold};
555    /// use std::time::Duration;
556    ///
557    /// let mut b = Benchmark::new("x");
558    /// b.iter(|| std::hint::black_box(1 + 1));
559    /// let r = b.finish();
560    /// let opts = CompareOptions {
561    ///     baseline_mean: Some(Duration::from_nanos(1)),
562    ///     threshold: Threshold::regression_pct(20.0),
563    ///     min_samples: 1,
564    ///     allow_cv_noise_band: true,
565    /// };
566    /// let _check = r.compare_with_options(&opts);
567    /// ```
568    pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
569        let name = format!("bench::{}", self.name);
570        let mut evidence = self.numeric_evidence();
571        let tags = vec!["bench".to_string()];
572
573        let Some(baseline) = opts.baseline_mean else {
574            let mut c = CheckResult::skip(name).with_detail("no baseline available");
575            c.tags = tags;
576            c.evidence = evidence;
577            return c;
578        };
579
580        if (self.samples.len() as u64) < opts.min_samples {
581            let mut c = CheckResult::skip(name).with_detail(format!(
582                "fewer samples than min_samples ({} < {})",
583                self.samples.len(),
584                opts.min_samples
585            ));
586            c.tags = tags;
587            c.evidence = evidence;
588            return c;
589        }
590
591        let current_ns = self.mean.as_nanos();
592        let baseline_ns = baseline.as_nanos();
593        evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
594
595        let regressed = match opts.threshold {
596            Threshold::RegressionPct(pct) => {
597                let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
598                current_ns as f64 > allowed
599            }
600            Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
601            Threshold::ThroughputDropPct(pct) => {
602                // Throughput-based; convert via mean.
603                let baseline_ops = if baseline.is_zero() {
604                    0.0
605                } else {
606                    1.0 / baseline.as_secs_f64()
607                };
608                let drop_floor = baseline_ops * (1.0 - pct / 100.0);
609                self.ops_per_sec() < drop_floor
610            }
611        };
612
613        let detail = format!(
614            "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
615            current_ns,
616            baseline_ns,
617            self.cv,
618            self.ops_per_sec()
619        );
620
621        if !regressed {
622            let mut c = CheckResult::pass(name).with_detail(detail);
623            c.tags = tags;
624            c.evidence = evidence;
625            return c;
626        }
627
628        // Regression detected. Decide Fail vs Warn based on CV noise band.
629        let in_noise_band = opts.allow_cv_noise_band && {
630            let allowed_noise_ns = baseline_ns as f64 * self.cv;
631            let delta_ns = (current_ns as f64) - (baseline_ns as f64);
632            delta_ns <= allowed_noise_ns
633        };
634        let mut tags = tags;
635        tags.push("regression".to_string());
636        if in_noise_band {
637            let mut c = CheckResult::warn(name, Severity::Warning)
638                .with_detail(format!("{} (within CV noise band)", detail));
639            c.tags = tags;
640            c.evidence = evidence;
641            c
642        } else {
643            let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
644            c.tags = tags;
645            c.evidence = evidence;
646            c
647        }
648    }
649
650    /// Build a one-check `Report` containing the comparison result.
651    ///
652    /// Convenience for producers that want a complete `Report` rather
653    /// than a single `CheckResult`. Sets `subject = self.name`,
654    /// `producer = "dev-bench"`.
655    ///
656    /// # Example
657    ///
658    /// ```
659    /// use dev_bench::{Benchmark, Threshold};
660    ///
661    /// let mut b = Benchmark::new("x");
662    /// b.iter(|| std::hint::black_box(1 + 1));
663    /// let r = b.finish();
664    /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
665    /// assert_eq!(report.checks.len(), 1);
666    /// ```
667    pub fn into_report(
668        self,
669        subject_version: impl Into<String>,
670        baseline_mean: Option<Duration>,
671        threshold: Threshold,
672    ) -> Report {
673        let name = self.name.clone();
674        let check = self.compare_against_baseline(baseline_mean, threshold);
675        let mut r = Report::new(name, subject_version).with_producer("dev-bench");
676        r.push(check);
677        r.finish();
678        r
679    }
680
681    fn numeric_evidence(&self) -> Vec<Evidence> {
682        vec![
683            Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
684            // baseline_ns inserted at index 1 by callers when available.
685            Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
686            Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
687            Evidence::numeric("cv", self.cv),
688            Evidence::numeric("ops_per_sec", self.ops_per_sec()),
689            Evidence::numeric("samples", self.samples.len() as f64),
690            Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
691        ]
692    }
693}
694
695/// A threshold defining how much slower-than-baseline is acceptable.
696#[derive(Debug, Clone, Copy)]
697pub enum Threshold {
698    /// Fail if the new mean is more than `pct` percent slower than baseline.
699    RegressionPct(f64),
700    /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
701    RegressionAbsoluteNs(u128),
702    /// Fail if throughput dropped more than `pct` percent below baseline.
703    ///
704    /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
705    /// assumes the baseline duration is a per-operation duration. Use
706    /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
707    /// per-op duration. For batched sampling
708    /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
709    /// or pre-compute the baseline manually.
710    ThroughputDropPct(f64),
711}
712
713impl Threshold {
714    /// Build a percent-based duration regression threshold.
715    ///
716    /// # Example
717    ///
718    /// ```
719    /// use dev_bench::Threshold;
720    /// let t = Threshold::regression_pct(20.0);
721    /// assert!(matches!(t, Threshold::RegressionPct(_)));
722    /// ```
723    pub fn regression_pct(pct: f64) -> Self {
724        Threshold::RegressionPct(pct)
725    }
726
727    /// Build an absolute duration regression threshold in nanoseconds.
728    ///
729    /// # Example
730    ///
731    /// ```
732    /// use dev_bench::Threshold;
733    /// let t = Threshold::regression_abs_ns(500);
734    /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
735    /// ```
736    pub fn regression_abs_ns(nanos: u128) -> Self {
737        Threshold::RegressionAbsoluteNs(nanos)
738    }
739
740    /// Build a percent-based throughput drop threshold.
741    ///
742    /// # Example
743    ///
744    /// ```
745    /// use dev_bench::Threshold;
746    /// let t = Threshold::throughput_drop_pct(10.0);
747    /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
748    /// ```
749    pub fn throughput_drop_pct(pct: f64) -> Self {
750        Threshold::ThroughputDropPct(pct)
751    }
752}
753
754/// Options for [`BenchmarkResult::compare_with_options`].
755///
756/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
757/// `allow_cv_noise_band = true`.
758///
759/// # Example
760///
761/// ```
762/// use dev_bench::{CompareOptions, Threshold};
763/// use std::time::Duration;
764///
765/// let opts = CompareOptions {
766///     baseline_mean: Some(Duration::from_nanos(1000)),
767///     threshold: Threshold::regression_pct(20.0),
768///     min_samples: 30,
769///     allow_cv_noise_band: true,
770/// };
771/// assert_eq!(opts.min_samples, 30);
772/// ```
773#[derive(Debug, Clone)]
774pub struct CompareOptions {
775    /// Baseline mean to compare against. `None` -> verdict is `Skip`.
776    pub baseline_mean: Option<Duration>,
777    /// Regression threshold to apply.
778    pub threshold: Threshold,
779    /// Minimum sample count required before a comparison can be made.
780    /// Below this, the verdict is `Skip` with a `min_samples` detail.
781    pub min_samples: u64,
782    /// If `true`, regressions within `baseline_ns * cv` are downgraded
783    /// from `Fail` to `Warn`.
784    pub allow_cv_noise_band: bool,
785}
786
787impl Default for CompareOptions {
788    fn default() -> Self {
789        Self {
790            baseline_mean: None,
791            threshold: Threshold::regression_pct(10.0),
792            min_samples: 1,
793            allow_cv_noise_band: true,
794        }
795    }
796}
797
798/// A trait for any object that can run a benchmark and produce a result.
799pub trait Bench {
800    /// Run the benchmark and return its result.
801    fn run(&mut self) -> BenchmarkResult;
802}
803
804/// Producer wrapper that runs a benchmark and emits a single-check
805/// [`Report`] via [`Producer::produce`].
806///
807/// # Example
808///
809/// ```no_run
810/// use dev_bench::{Benchmark, BenchProducer, Threshold};
811/// use dev_report::Producer;
812///
813/// fn run_bench() -> dev_bench::BenchmarkResult {
814///     let mut b = Benchmark::new("hot_path");
815///     for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
816///     b.finish()
817/// }
818///
819/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
820/// let report = producer.produce();
821/// assert_eq!(report.checks.len(), 1);
822/// ```
823pub struct BenchProducer<F>
824where
825    F: Fn() -> BenchmarkResult,
826{
827    run: F,
828    subject_version: String,
829    baseline_mean: Option<Duration>,
830    threshold: Threshold,
831}
832
833impl<F> BenchProducer<F>
834where
835    F: Fn() -> BenchmarkResult,
836{
837    /// Build a new producer.
838    pub fn new(
839        run: F,
840        subject_version: impl Into<String>,
841        baseline_mean: Option<Duration>,
842        threshold: Threshold,
843    ) -> Self {
844        Self {
845            run,
846            subject_version: subject_version.into(),
847            baseline_mean,
848            threshold,
849        }
850    }
851}
852
853impl<F> Producer for BenchProducer<F>
854where
855    F: Fn() -> BenchmarkResult,
856{
857    fn produce(&self) -> Report {
858        let result = (self.run)();
859        result.into_report(
860            self.subject_version.clone(),
861            self.baseline_mean,
862            self.threshold,
863        )
864    }
865}
866
867#[cfg(test)]
868mod tests {
869    use super::*;
870    use dev_report::Verdict;
871
872    #[test]
873    fn benchmark_runs_and_finishes() {
874        let mut b = Benchmark::new("noop");
875        for _ in 0..10 {
876            b.iter(|| std::hint::black_box(42));
877        }
878        let r = b.finish();
879        assert_eq!(r.samples.len(), 10);
880        assert_eq!(r.iterations_recorded, 10);
881        assert!(r.mean > Duration::ZERO);
882    }
883
884    #[test]
885    fn iter_with_count_records_one_sample() {
886        let mut b = Benchmark::new("hot");
887        b.iter_with_count(1000, || {
888            std::hint::black_box(1 + 1);
889        });
890        let r = b.finish();
891        assert_eq!(r.samples.len(), 1);
892        assert_eq!(r.iterations_recorded, 1000);
893        assert!(r.ops_per_sec() > 0.0);
894    }
895
896    #[test]
897    fn comparison_without_baseline_is_skip() {
898        let mut b = Benchmark::new("x");
899        b.iter(|| ());
900        let r = b.finish();
901        let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
902        assert_eq!(v.verdict, Verdict::Skip);
903        assert!(v.has_tag("bench"));
904    }
905
906    #[test]
907    fn min_samples_skip() {
908        let mut b = Benchmark::new("x");
909        b.iter(|| ());
910        let r = b.finish();
911        let opts = CompareOptions {
912            baseline_mean: Some(Duration::from_nanos(100)),
913            threshold: Threshold::regression_pct(5.0),
914            min_samples: 100,
915            allow_cv_noise_band: true,
916        };
917        let v = r.compare_with_options(&opts);
918        assert_eq!(v.verdict, Verdict::Skip);
919        assert!(v.detail.unwrap().contains("min_samples"));
920    }
921
922    #[test]
923    fn small_regression_under_threshold_passes() {
924        let mut b = Benchmark::new("x");
925        for _ in 0..5 {
926            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
927        }
928        let r = b.finish();
929        let baseline = r.mean;
930        let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
931        assert_eq!(v.verdict, Verdict::Pass);
932        assert!(v.has_tag("bench"));
933        // Numeric evidence is attached.
934        assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
935        assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
936        assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
937    }
938
939    #[test]
940    fn regression_outside_cv_band_fails() {
941        // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
942        let mut b = Benchmark::new("x");
943        // Inject controlled samples by running noop iterations.
944        for _ in 0..50 {
945            b.iter(|| std::hint::black_box(1 + 1));
946        }
947        let mut r = b.finish();
948        // Force a known mean and cv for deterministic comparison.
949        r.mean = Duration::from_nanos(200);
950        r.cv = 0.0;
951        let opts = CompareOptions {
952            baseline_mean: Some(Duration::from_nanos(100)),
953            threshold: Threshold::regression_pct(10.0),
954            min_samples: 1,
955            allow_cv_noise_band: true,
956        };
957        let v = r.compare_with_options(&opts);
958        assert_eq!(v.verdict, Verdict::Fail);
959        assert!(v.has_tag("regression"));
960    }
961
962    #[test]
963    fn regression_inside_cv_band_warns() {
964        let mut b = Benchmark::new("x");
965        for _ in 0..50 {
966            b.iter(|| std::hint::black_box(1 + 1));
967        }
968        let mut r = b.finish();
969        // Current is 12% over baseline but cv is 30% -> within noise band.
970        r.mean = Duration::from_nanos(112);
971        r.cv = 0.30;
972        let opts = CompareOptions {
973            baseline_mean: Some(Duration::from_nanos(100)),
974            threshold: Threshold::regression_pct(10.0),
975            min_samples: 1,
976            allow_cv_noise_band: true,
977        };
978        let v = r.compare_with_options(&opts);
979        assert_eq!(v.verdict, Verdict::Warn);
980        assert!(v.has_tag("regression"));
981        assert!(v.detail.unwrap().contains("CV noise band"));
982    }
983
984    #[test]
985    fn throughput_threshold_detects_drop() {
986        // ThroughputDropPct expects a per-op baseline duration. Use
987        // per-iter sampling so mean == per-op duration.
988        let mut b = Benchmark::new("x");
989        for _ in 0..10 {
990            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
991        }
992        let r = b.finish();
993        // Baseline 10x faster (per-op duration is 1/10 of current);
994        // current throughput is 90% lower than baseline -> regression.
995        let baseline = r.mean / 10;
996        let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
997        assert_eq!(v.verdict, Verdict::Fail);
998    }
999
1000    #[test]
1001    fn extra_stats_are_consistent() {
1002        let mut b = Benchmark::new("uniform");
1003        for _ in 0..20 {
1004            b.iter(|| std::hint::black_box(1 + 1));
1005        }
1006        let r = b.finish();
1007        // Bounds.
1008        assert!(r.min() <= r.mean);
1009        assert!(r.mean <= r.max());
1010        assert!(r.p50 <= r.p90());
1011        assert!(r.p90() <= r.p99);
1012        assert!(r.p99 <= r.p999());
1013        // Numbers are non-negative finite.
1014        assert!(r.stddev() >= 0.0);
1015        assert!(r.mad() >= 0.0);
1016    }
1017
1018    #[test]
1019    fn percentile_clamps_to_bounds() {
1020        let mut b = Benchmark::new("p");
1021        for _ in 0..10 {
1022            b.iter(|| std::hint::black_box(1));
1023        }
1024        let r = b.finish();
1025        // q < 0.0 -> first sample; q > 1.0 -> last sample.
1026        let lo = r.percentile(-0.5);
1027        let hi = r.percentile(1.5);
1028        assert!(lo <= hi);
1029    }
1030
1031    #[test]
1032    fn empty_result_stats_are_zero() {
1033        let r = Benchmark::new("empty").finish();
1034        assert_eq!(r.min(), Duration::ZERO);
1035        assert_eq!(r.max(), Duration::ZERO);
1036        assert_eq!(r.p90(), Duration::ZERO);
1037        assert_eq!(r.p999(), Duration::ZERO);
1038        assert_eq!(r.stddev(), 0.0);
1039        assert_eq!(r.mad(), 0.0);
1040    }
1041
1042    #[test]
1043    fn run_for_collects_at_least_one_sample() {
1044        let mut b = Benchmark::new("budget");
1045        b.run_for(Duration::from_millis(10), || {
1046            std::hint::black_box(1 + 1);
1047        });
1048        let r = b.finish();
1049        assert!(!r.samples.is_empty());
1050        assert_eq!(r.iterations_recorded, r.samples.len() as u64);
1051    }
1052
1053    #[test]
1054    fn run_for_zero_budget_collects_no_samples() {
1055        let mut b = Benchmark::new("zero");
1056        b.run_for(Duration::ZERO, || {
1057            std::hint::black_box(1 + 1);
1058        });
1059        let r = b.finish();
1060        // With zero budget, deadline has already passed; no iterations.
1061        assert!(r.samples.is_empty() || r.samples.len() <= 1);
1062    }
1063
1064    #[test]
1065    fn histogram_total_count_equals_samples() {
1066        let mut b = Benchmark::new("h");
1067        for _ in 0..50 {
1068            b.iter(|| std::hint::black_box(1 + 1));
1069        }
1070        let r = b.finish();
1071        let bins = r.histogram(8);
1072        assert!(!bins.is_empty());
1073        let total: usize = bins.iter().map(|b| b.count).sum();
1074        assert_eq!(total, r.samples.len());
1075    }
1076
1077    #[test]
1078    fn histogram_zero_buckets_returns_empty() {
1079        let mut b = Benchmark::new("h");
1080        b.iter(|| std::hint::black_box(1));
1081        let r = b.finish();
1082        assert!(r.histogram(0).is_empty());
1083    }
1084
1085    #[test]
1086    fn histogram_empty_result_returns_empty() {
1087        let r = Benchmark::new("e").finish();
1088        assert!(r.histogram(8).is_empty());
1089    }
1090
1091    #[test]
1092    fn histogram_bins_are_ordered() {
1093        let mut b = Benchmark::new("h");
1094        for _ in 0..30 {
1095            b.iter(|| std::hint::black_box(1 + 1));
1096        }
1097        let bins = b.finish().histogram(5);
1098        for win in bins.windows(2) {
1099            assert!(win[0].lower <= win[1].lower);
1100            assert!(win[0].lower <= win[0].upper);
1101        }
1102    }
1103
1104    #[test]
1105    fn cv_is_zero_for_uniform_samples() {
1106        // Samples are nearly identical -> cv near 0.
1107        let mut b = Benchmark::new("x");
1108        for _ in 0..10 {
1109            b.iter(|| std::hint::black_box(1 + 1));
1110        }
1111        let r = b.finish();
1112        // Not strictly zero on real machines, just bounded.
1113        assert!(r.cv >= 0.0);
1114    }
1115
1116    #[test]
1117    fn into_report_emits_one_check() {
1118        let mut b = Benchmark::new("x");
1119        for _ in 0..5 {
1120            b.iter(|| std::hint::black_box(1 + 1));
1121        }
1122        let r = b.finish();
1123        let baseline = r.mean;
1124        let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
1125        assert_eq!(report.checks.len(), 1);
1126        assert_eq!(report.producer.as_deref(), Some("dev-bench"));
1127        assert_eq!(report.overall_verdict(), Verdict::Pass);
1128    }
1129
1130    #[test]
1131    fn bench_producer_implements_producer_trait() {
1132        fn run() -> BenchmarkResult {
1133            let mut b = Benchmark::new("noop");
1134            for _ in 0..5 {
1135                b.iter(|| std::hint::black_box(1 + 1));
1136            }
1137            b.finish()
1138        }
1139        let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
1140        let report = p.produce();
1141        assert_eq!(report.checks.len(), 1);
1142    }
1143}