Skip to main content

dev_bench/
lib.rs

1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//!     b.iter(|| {
18//!         std::hint::black_box(40 + 2);
19//!     });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0);   // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//!   alongside time, using `mod-alloc`'s `dhat_compat` surface
37//!   (drop-in for `dhat-rs`). See the `alloc` module (visible
38//!   in rustdoc when the feature is enabled).
39
40#![cfg_attr(docsrs, feature(doc_cfg))]
41#![warn(missing_docs)]
42#![warn(rust_2018_idioms)]
43
44use std::time::{Duration, Instant};
45
46use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
47
48#[cfg(feature = "alloc-tracking")]
49#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
50pub mod alloc;
51
52/// Re-export of `mod-alloc`'s `dhat_compat` surface for use by
53/// [`install_global_allocator!`].
54///
55/// Kept under the historical `__dhat` name so the macro expansion
56/// stays compatible across the v0.9.6 → v0.9.7 backend swap. The
57/// `dhat_compat::Alloc` / `Profiler` / `HeapStats` shapes mirror
58/// `dhat-rs`'s public surface field-for-field; users following
59/// dhat-rs's documentation pattern in their own code continue to
60/// work via `use mod_alloc::dhat_compat as dhat;`.
61///
62/// Hidden from rustdoc; consumers should use the macro, not this path.
63#[cfg(feature = "alloc-tracking")]
64#[doc(hidden)]
65pub use ::mod_alloc::dhat_compat as __dhat;
66
67/// Install the allocation-tracking global allocator.
68///
69/// Available with the `alloc-tracking` feature. Invoke at module
70/// scope in your binary or test target — the macro expands to a
71/// `#[global_allocator] static` declaration that consumers cannot
72/// otherwise express without depending on `mod-alloc` directly.
73///
74/// The backend is `mod-alloc`'s `dhat_compat::Alloc` (drop-in for
75/// `dhat-rs`'s `dhat::Alloc`); behaviour, API surface, and JSON
76/// output remain DHAT-viewer-compatible.
77///
78/// # Example
79///
80/// ```ignore
81/// // in main.rs or a test target's top level:
82/// dev_bench::install_global_allocator!();
83///
84/// // Optionally pull the same compat surface into your own code:
85/// use mod_alloc::dhat_compat as dhat;
86///
87/// fn main() {
88///     let _profiler = dhat::Profiler::new_heap();
89///     // ... benchmarked code ...
90/// }
91/// ```
92#[cfg(feature = "alloc-tracking")]
93#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
94#[macro_export]
95macro_rules! install_global_allocator {
96    () => {
97        #[global_allocator]
98        static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
99    };
100}
101
102pub mod baseline;
103
104pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
105
106/// A single benchmark run.
107///
108/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
109/// to produce a [`BenchmarkResult`].
110///
111/// # Example
112///
113/// ```
114/// use dev_bench::Benchmark;
115///
116/// let mut b = Benchmark::new("noop");
117/// for _ in 0..10 {
118///     b.iter(|| std::hint::black_box(42));
119/// }
120/// let r = b.finish();
121/// assert_eq!(r.samples.len(), 10);
122/// ```
123pub struct Benchmark {
124    name: String,
125    samples: Vec<Duration>,
126    iterations_recorded: u64,
127}
128
129impl Benchmark {
130    /// Begin a new benchmark with a stable name.
131    pub fn new(name: impl Into<String>) -> Self {
132        Self {
133            name: name.into(),
134            samples: Vec::new(),
135            iterations_recorded: 0,
136        }
137    }
138
139    /// Run one iteration of the benchmark, capturing the duration.
140    ///
141    /// Each call records exactly one sample.
142    ///
143    /// # Example
144    ///
145    /// ```
146    /// use dev_bench::Benchmark;
147    ///
148    /// let mut b = Benchmark::new("noop");
149    /// b.iter(|| std::hint::black_box(1 + 1));
150    /// let r = b.finish();
151    /// assert_eq!(r.samples.len(), 1);
152    /// ```
153    pub fn iter<F, R>(&mut self, f: F) -> R
154    where
155        F: FnOnce() -> R,
156    {
157        let start = Instant::now();
158        let r = f();
159        let elapsed = start.elapsed();
160        self.samples.push(elapsed);
161        self.iterations_recorded += 1;
162        r
163    }
164
165    /// Run a closure `n` times and record ONE sample for the entire batch.
166    ///
167    /// Use for sub-microsecond operations where per-iteration timing
168    /// would be dominated by `Instant::now()` overhead. The reported
169    /// per-iteration mean is `batch_duration / n`.
170    ///
171    /// # Example
172    ///
173    /// ```
174    /// use dev_bench::Benchmark;
175    ///
176    /// let mut b = Benchmark::new("hot");
177    /// b.iter_with_count(1000, || {
178    ///     std::hint::black_box(40 + 2);
179    /// });
180    /// let r = b.finish();
181    /// assert_eq!(r.samples.len(), 1);
182    /// assert_eq!(r.iterations_recorded, 1000);
183    /// ```
184    pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
185    where
186        F: FnMut(),
187    {
188        let start = Instant::now();
189        for _ in 0..n {
190            f();
191        }
192        let elapsed = start.elapsed();
193        self.samples.push(elapsed);
194        self.iterations_recorded += n;
195    }
196
197    /// Run a closure repeatedly for at most `budget` wall-clock time,
198    /// recording one sample per iteration.
199    ///
200    /// Stops as soon as the elapsed time crosses `budget`. The
201    /// closure may run slightly past the budget (the in-flight
202    /// iteration completes); the recorded sample count reflects what
203    /// was actually executed.
204    ///
205    /// Useful when you want a benchmark to run "for N seconds" rather
206    /// than "for N iterations" — the per-iter cost is unknown and you
207    /// just want a bounded run.
208    ///
209    /// # Example
210    ///
211    /// ```
212    /// use dev_bench::Benchmark;
213    /// use std::time::Duration;
214    ///
215    /// let mut b = Benchmark::new("hot");
216    /// b.run_for(Duration::from_millis(20), || {
217    ///     std::hint::black_box(1 + 1);
218    /// });
219    /// let r = b.finish();
220    /// // At least one sample was collected.
221    /// assert!(!r.samples.is_empty());
222    /// ```
223    pub fn run_for<F>(&mut self, budget: Duration, mut f: F)
224    where
225        F: FnMut(),
226    {
227        let deadline = Instant::now() + budget;
228        while Instant::now() < deadline {
229            let start = Instant::now();
230            f();
231            let elapsed = start.elapsed();
232            self.samples.push(elapsed);
233            self.iterations_recorded += 1;
234        }
235    }
236
237    /// Finalize the benchmark and produce a [`BenchmarkResult`].
238    pub fn finish(self) -> BenchmarkResult {
239        let n = self.samples.len();
240        let mean = if n == 0 {
241            Duration::ZERO
242        } else {
243            let total: Duration = self.samples.iter().copied().sum();
244            total / n as u32
245        };
246        let mut sorted = self.samples.clone();
247        sorted.sort();
248        let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
249        let p99 = sorted
250            .get((n as f64 * 0.99).floor() as usize)
251            .copied()
252            .unwrap_or(Duration::ZERO);
253        let cv = compute_cv(&self.samples, mean);
254        let total_elapsed: Duration = self.samples.iter().copied().sum();
255        BenchmarkResult {
256            name: self.name,
257            samples: self.samples,
258            iterations_recorded: self.iterations_recorded,
259            total_elapsed,
260            mean,
261            p50,
262            p99,
263            cv,
264        }
265    }
266}
267
268fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
269    if samples.is_empty() {
270        return 0.0;
271    }
272    let mean_s = mean.as_secs_f64();
273    if mean_s == 0.0 {
274        return 0.0;
275    }
276    let n = samples.len() as f64;
277    let var = samples
278        .iter()
279        .map(|d| (d.as_secs_f64() - mean_s).powi(2))
280        .sum::<f64>()
281        / n;
282    var.sqrt() / mean_s
283}
284
285/// One bin of a sample-distribution histogram.
286///
287/// Returned by [`BenchmarkResult::histogram`]. Bins are ordered, the
288/// first bin's `lower` equals `BenchmarkResult::min` and the last
289/// bin's `upper` equals `BenchmarkResult::max`.
290///
291/// # Example
292///
293/// ```
294/// use dev_bench::Benchmark;
295///
296/// let mut b = Benchmark::new("h");
297/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
298/// let bins = b.finish().histogram(4);
299/// assert!(bins.iter().all(|b| b.lower <= b.upper));
300/// ```
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
302pub struct HistogramBin {
303    /// Inclusive lower bound of this bin.
304    pub lower: Duration,
305    /// Inclusive upper bound (for the last bin) or exclusive upper
306    /// bound (for all other bins).
307    pub upper: Duration,
308    /// Number of samples falling into this bin.
309    pub count: usize,
310}
311
312/// The result of a finished benchmark.
313///
314/// Statistics are computed losslessly from the raw `samples`.
315///
316/// # Example
317///
318/// ```
319/// use dev_bench::Benchmark;
320///
321/// let mut b = Benchmark::new("noop");
322/// for _ in 0..10 {
323///     b.iter(|| std::hint::black_box(42));
324/// }
325/// let r = b.finish();
326/// assert!(r.mean.as_nanos() > 0);
327/// ```
328#[derive(Debug, Clone)]
329pub struct BenchmarkResult {
330    /// Stable name of the benchmark.
331    pub name: String,
332    /// All raw sample durations.
333    pub samples: Vec<Duration>,
334    /// Total iterations across all samples. With per-iter sampling this
335    /// equals `samples.len()`. With batched sampling, it is the sum of
336    /// `n` across all `iter_with_count` calls.
337    pub iterations_recorded: u64,
338    /// Sum of all sample durations.
339    pub total_elapsed: Duration,
340    /// Mean sample duration.
341    pub mean: Duration,
342    /// 50th percentile sample duration.
343    pub p50: Duration,
344    /// 99th percentile sample duration.
345    pub p99: Duration,
346    /// Coefficient of variation across samples (stddev / mean).
347    ///
348    /// Higher numbers indicate noisier measurements. A CV of `0.05`
349    /// means the standard deviation is 5% of the mean. Reported
350    /// regressions within the CV are downgraded from `Fail` to `Warn`
351    /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
352    pub cv: f64,
353}
354
355impl BenchmarkResult {
356    /// Effective throughput in operations per second.
357    ///
358    /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
359    /// `0.0` for an empty result or zero elapsed time.
360    ///
361    /// # Example
362    ///
363    /// ```
364    /// use dev_bench::Benchmark;
365    ///
366    /// let mut b = Benchmark::new("hot");
367    /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
368    /// let r = b.finish();
369    /// assert!(r.ops_per_sec() > 0.0);
370    /// ```
371    pub fn ops_per_sec(&self) -> f64 {
372        if self.total_elapsed.is_zero() {
373            return 0.0;
374        }
375        self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
376    }
377
378    /// Smallest sample. Returns `Duration::ZERO` for an empty result.
379    pub fn min(&self) -> Duration {
380        self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
381    }
382
383    /// Largest sample. Returns `Duration::ZERO` for an empty result.
384    pub fn max(&self) -> Duration {
385        self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
386    }
387
388    /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
389    ///
390    /// Uses `n-1` (Bessel's correction) for the sample variance.
391    pub fn stddev(&self) -> f64 {
392        let n = self.samples.len();
393        if n < 2 {
394            return 0.0;
395        }
396        let mean_s = self.mean.as_secs_f64();
397        let var = self
398            .samples
399            .iter()
400            .map(|d| (d.as_secs_f64() - mean_s).powi(2))
401            .sum::<f64>()
402            / (n as f64 - 1.0);
403        var.sqrt()
404    }
405
406    /// Median absolute deviation, in seconds. `0.0` for empty results.
407    ///
408    /// `MAD = median(|x_i - median(x)|)`. Less affected by outliers than
409    /// standard deviation; useful for noisy measurements.
410    pub fn mad(&self) -> f64 {
411        if self.samples.is_empty() {
412            return 0.0;
413        }
414        let p50_s = self.p50.as_secs_f64();
415        let mut deviations: Vec<f64> = self
416            .samples
417            .iter()
418            .map(|d| (d.as_secs_f64() - p50_s).abs())
419            .collect();
420        deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
421        let mid = deviations.len() / 2;
422        deviations[mid]
423    }
424
425    /// 90th percentile sample duration. `Duration::ZERO` for empty results.
426    pub fn p90(&self) -> Duration {
427        self.percentile(0.90)
428    }
429
430    /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
431    ///
432    /// At least 1000 samples are required to be meaningful; with fewer
433    /// samples this returns the largest sample.
434    pub fn p999(&self) -> Duration {
435        self.percentile(0.999)
436    }
437
438    /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
439    /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
440    pub fn percentile(&self, q: f64) -> Duration {
441        if self.samples.is_empty() {
442            return Duration::ZERO;
443        }
444        let q = q.clamp(0.0, 1.0);
445        let mut sorted = self.samples.clone();
446        sorted.sort();
447        let n = sorted.len();
448        let idx = ((n as f64) * q).floor() as usize;
449        let idx = idx.min(n - 1);
450        sorted[idx]
451    }
452
453    /// Compute a uniform-width histogram over the sample distribution.
454    ///
455    /// Returns `bucket_count` bins covering `[min, max]`, each with
456    /// the count of samples falling into that bin. The returned
457    /// `Vec<HistogramBin>` is in ascending order; the first bin's
458    /// `lower` equals `min()`, the last bin's `upper` equals `max()`.
459    ///
460    /// For an empty result or `bucket_count == 0`, returns `vec![]`.
461    /// When `min == max` (all samples equal), returns one bin with
462    /// the full sample count.
463    ///
464    /// Useful for spotting bimodality, outlier tails, and warmup
465    /// effects that mean/percentile alone hide.
466    ///
467    /// # Example
468    ///
469    /// ```
470    /// use dev_bench::Benchmark;
471    ///
472    /// let mut b = Benchmark::new("h");
473    /// for _ in 0..50 { b.iter(|| std::hint::black_box(1 + 1)); }
474    /// let r = b.finish();
475    /// let hist = r.histogram(8);
476    /// assert!(hist.len() <= 8);
477    /// let total: usize = hist.iter().map(|h| h.count).sum();
478    /// assert_eq!(total, r.samples.len());
479    /// ```
480    pub fn histogram(&self, bucket_count: usize) -> Vec<HistogramBin> {
481        if bucket_count == 0 || self.samples.is_empty() {
482            return Vec::new();
483        }
484        let min = self.min();
485        let max = self.max();
486        if min == max {
487            return vec![HistogramBin {
488                lower: min,
489                upper: max,
490                count: self.samples.len(),
491            }];
492        }
493        let total_ns = (max.as_nanos() - min.as_nanos()) as f64;
494        let bucket_ns = total_ns / bucket_count as f64;
495        let mut counts = vec![0usize; bucket_count];
496        for s in &self.samples {
497            let offset = (s.as_nanos() - min.as_nanos()) as f64;
498            let mut idx = (offset / bucket_ns).floor() as usize;
499            if idx >= bucket_count {
500                idx = bucket_count - 1;
501            }
502            counts[idx] += 1;
503        }
504        let min_ns = min.as_nanos() as u64;
505        let mut bins = Vec::with_capacity(bucket_count);
506        for (i, count) in counts.into_iter().enumerate() {
507            let lower_ns = min_ns + (bucket_ns * i as f64) as u64;
508            let upper_ns = if i + 1 == bucket_count {
509                max.as_nanos() as u64
510            } else {
511                min_ns + (bucket_ns * (i + 1) as f64) as u64
512            };
513            bins.push(HistogramBin {
514                lower: Duration::from_nanos(lower_ns),
515                upper: Duration::from_nanos(upper_ns),
516                count,
517            });
518        }
519        bins
520    }
521
522    /// Compare this result against a baseline using a default-tuned
523    /// [`CompareOptions`].
524    ///
525    /// `baseline_mean` is the previous mean duration. If `None`, the
526    /// verdict is `Skip` and no comparison is made.
527    ///
528    /// # Example
529    ///
530    /// ```
531    /// use dev_bench::{Benchmark, Threshold};
532    /// use std::time::Duration;
533    ///
534    /// let mut b = Benchmark::new("x");
535    /// b.iter(|| std::hint::black_box(1 + 1));
536    /// let r = b.finish();
537    /// let _ = r.compare_against_baseline(
538    ///     Some(Duration::from_nanos(1)),
539    ///     Threshold::regression_pct(10.0),
540    /// );
541    /// ```
542    pub fn compare_against_baseline(
543        &self,
544        baseline_mean: Option<Duration>,
545        threshold: Threshold,
546    ) -> CheckResult {
547        self.compare_with_options(&CompareOptions {
548            baseline_mean,
549            threshold,
550            ..CompareOptions::default()
551        })
552    }
553
554    /// Compare this result against a baseline using full options.
555    ///
556    /// Behavior:
557    /// - No baseline -> `Skip`.
558    /// - Sample count below `min_samples` -> `Skip` with detail.
559    /// - Within threshold -> `Pass` with numeric evidence.
560    /// - Over threshold but within CV noise band -> `Warn`.
561    /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
562    ///
563    /// In every non-`Skip` case, the returned [`CheckResult`] carries
564    /// a `bench` tag and numeric `Evidence` for `mean_ns`,
565    /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
566    ///
567    /// # Example
568    ///
569    /// ```
570    /// use dev_bench::{Benchmark, CompareOptions, Threshold};
571    /// use std::time::Duration;
572    ///
573    /// let mut b = Benchmark::new("x");
574    /// b.iter(|| std::hint::black_box(1 + 1));
575    /// let r = b.finish();
576    /// let opts = CompareOptions {
577    ///     baseline_mean: Some(Duration::from_nanos(1)),
578    ///     threshold: Threshold::regression_pct(20.0),
579    ///     min_samples: 1,
580    ///     allow_cv_noise_band: true,
581    /// };
582    /// let _check = r.compare_with_options(&opts);
583    /// ```
584    pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
585        let name = format!("bench::{}", self.name);
586        let mut evidence = self.numeric_evidence();
587        let tags = vec!["bench".to_string()];
588
589        let Some(baseline) = opts.baseline_mean else {
590            let mut c = CheckResult::skip(name).with_detail("no baseline available");
591            c.tags = tags;
592            c.evidence = evidence;
593            return c;
594        };
595
596        if (self.samples.len() as u64) < opts.min_samples {
597            let mut c = CheckResult::skip(name).with_detail(format!(
598                "fewer samples than min_samples ({} < {})",
599                self.samples.len(),
600                opts.min_samples
601            ));
602            c.tags = tags;
603            c.evidence = evidence;
604            return c;
605        }
606
607        let current_ns = self.mean.as_nanos();
608        let baseline_ns = baseline.as_nanos();
609        evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
610
611        let regressed = match opts.threshold {
612            Threshold::RegressionPct(pct) => {
613                let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
614                current_ns as f64 > allowed
615            }
616            Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
617            Threshold::ThroughputDropPct(pct) => {
618                // Throughput-based; convert via mean.
619                let baseline_ops = if baseline.is_zero() {
620                    0.0
621                } else {
622                    1.0 / baseline.as_secs_f64()
623                };
624                let drop_floor = baseline_ops * (1.0 - pct / 100.0);
625                self.ops_per_sec() < drop_floor
626            }
627        };
628
629        let detail = format!(
630            "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
631            current_ns,
632            baseline_ns,
633            self.cv,
634            self.ops_per_sec()
635        );
636
637        if !regressed {
638            let mut c = CheckResult::pass(name).with_detail(detail);
639            c.tags = tags;
640            c.evidence = evidence;
641            return c;
642        }
643
644        // Regression detected. Decide Fail vs Warn based on CV noise band.
645        let in_noise_band = opts.allow_cv_noise_band && {
646            let allowed_noise_ns = baseline_ns as f64 * self.cv;
647            let delta_ns = (current_ns as f64) - (baseline_ns as f64);
648            delta_ns <= allowed_noise_ns
649        };
650        let mut tags = tags;
651        tags.push("regression".to_string());
652        if in_noise_band {
653            let mut c = CheckResult::warn(name, Severity::Warning)
654                .with_detail(format!("{} (within CV noise band)", detail));
655            c.tags = tags;
656            c.evidence = evidence;
657            c
658        } else {
659            let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
660            c.tags = tags;
661            c.evidence = evidence;
662            c
663        }
664    }
665
666    /// Build a one-check `Report` containing the comparison result.
667    ///
668    /// Convenience for producers that want a complete `Report` rather
669    /// than a single `CheckResult`. Sets `subject = self.name`,
670    /// `producer = "dev-bench"`.
671    ///
672    /// # Example
673    ///
674    /// ```
675    /// use dev_bench::{Benchmark, Threshold};
676    ///
677    /// let mut b = Benchmark::new("x");
678    /// b.iter(|| std::hint::black_box(1 + 1));
679    /// let r = b.finish();
680    /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
681    /// assert_eq!(report.checks.len(), 1);
682    /// ```
683    pub fn into_report(
684        self,
685        subject_version: impl Into<String>,
686        baseline_mean: Option<Duration>,
687        threshold: Threshold,
688    ) -> Report {
689        let name = self.name.clone();
690        let check = self.compare_against_baseline(baseline_mean, threshold);
691        let mut r = Report::new(name, subject_version).with_producer("dev-bench");
692        r.push(check);
693        r.finish();
694        r
695    }
696
697    fn numeric_evidence(&self) -> Vec<Evidence> {
698        vec![
699            Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
700            // baseline_ns inserted at index 1 by callers when available.
701            Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
702            Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
703            Evidence::numeric("cv", self.cv),
704            Evidence::numeric("ops_per_sec", self.ops_per_sec()),
705            Evidence::numeric("samples", self.samples.len() as f64),
706            Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
707        ]
708    }
709}
710
711/// A threshold defining how much slower-than-baseline is acceptable.
712#[derive(Debug, Clone, Copy)]
713pub enum Threshold {
714    /// Fail if the new mean is more than `pct` percent slower than baseline.
715    RegressionPct(f64),
716    /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
717    RegressionAbsoluteNs(u128),
718    /// Fail if throughput dropped more than `pct` percent below baseline.
719    ///
720    /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
721    /// assumes the baseline duration is a per-operation duration. Use
722    /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
723    /// per-op duration. For batched sampling
724    /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
725    /// or pre-compute the baseline manually.
726    ThroughputDropPct(f64),
727}
728
729impl Threshold {
730    /// Build a percent-based duration regression threshold.
731    ///
732    /// # Example
733    ///
734    /// ```
735    /// use dev_bench::Threshold;
736    /// let t = Threshold::regression_pct(20.0);
737    /// assert!(matches!(t, Threshold::RegressionPct(_)));
738    /// ```
739    pub fn regression_pct(pct: f64) -> Self {
740        Threshold::RegressionPct(pct)
741    }
742
743    /// Build an absolute duration regression threshold in nanoseconds.
744    ///
745    /// # Example
746    ///
747    /// ```
748    /// use dev_bench::Threshold;
749    /// let t = Threshold::regression_abs_ns(500);
750    /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
751    /// ```
752    pub fn regression_abs_ns(nanos: u128) -> Self {
753        Threshold::RegressionAbsoluteNs(nanos)
754    }
755
756    /// Build a percent-based throughput drop threshold.
757    ///
758    /// # Example
759    ///
760    /// ```
761    /// use dev_bench::Threshold;
762    /// let t = Threshold::throughput_drop_pct(10.0);
763    /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
764    /// ```
765    pub fn throughput_drop_pct(pct: f64) -> Self {
766        Threshold::ThroughputDropPct(pct)
767    }
768}
769
770/// Options for [`BenchmarkResult::compare_with_options`].
771///
772/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
773/// `allow_cv_noise_band = true`.
774///
775/// # Example
776///
777/// ```
778/// use dev_bench::{CompareOptions, Threshold};
779/// use std::time::Duration;
780///
781/// let opts = CompareOptions {
782///     baseline_mean: Some(Duration::from_nanos(1000)),
783///     threshold: Threshold::regression_pct(20.0),
784///     min_samples: 30,
785///     allow_cv_noise_band: true,
786/// };
787/// assert_eq!(opts.min_samples, 30);
788/// ```
789#[derive(Debug, Clone)]
790pub struct CompareOptions {
791    /// Baseline mean to compare against. `None` -> verdict is `Skip`.
792    pub baseline_mean: Option<Duration>,
793    /// Regression threshold to apply.
794    pub threshold: Threshold,
795    /// Minimum sample count required before a comparison can be made.
796    /// Below this, the verdict is `Skip` with a `min_samples` detail.
797    pub min_samples: u64,
798    /// If `true`, regressions within `baseline_ns * cv` are downgraded
799    /// from `Fail` to `Warn`.
800    pub allow_cv_noise_band: bool,
801}
802
803impl Default for CompareOptions {
804    fn default() -> Self {
805        Self {
806            baseline_mean: None,
807            threshold: Threshold::regression_pct(10.0),
808            min_samples: 1,
809            allow_cv_noise_band: true,
810        }
811    }
812}
813
814/// A trait for any object that can run a benchmark and produce a result.
815pub trait Bench {
816    /// Run the benchmark and return its result.
817    fn run(&mut self) -> BenchmarkResult;
818}
819
820/// Producer wrapper that runs a benchmark and emits a single-check
821/// [`Report`] via [`Producer::produce`].
822///
823/// # Example
824///
825/// ```no_run
826/// use dev_bench::{Benchmark, BenchProducer, Threshold};
827/// use dev_report::Producer;
828///
829/// fn run_bench() -> dev_bench::BenchmarkResult {
830///     let mut b = Benchmark::new("hot_path");
831///     for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
832///     b.finish()
833/// }
834///
835/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
836/// let report = producer.produce();
837/// assert_eq!(report.checks.len(), 1);
838/// ```
839pub struct BenchProducer<F>
840where
841    F: Fn() -> BenchmarkResult,
842{
843    run: F,
844    subject_version: String,
845    baseline_mean: Option<Duration>,
846    threshold: Threshold,
847}
848
849impl<F> BenchProducer<F>
850where
851    F: Fn() -> BenchmarkResult,
852{
853    /// Build a new producer.
854    pub fn new(
855        run: F,
856        subject_version: impl Into<String>,
857        baseline_mean: Option<Duration>,
858        threshold: Threshold,
859    ) -> Self {
860        Self {
861            run,
862            subject_version: subject_version.into(),
863            baseline_mean,
864            threshold,
865        }
866    }
867}
868
869impl<F> Producer for BenchProducer<F>
870where
871    F: Fn() -> BenchmarkResult,
872{
873    fn produce(&self) -> Report {
874        let result = (self.run)();
875        result.into_report(
876            self.subject_version.clone(),
877            self.baseline_mean,
878            self.threshold,
879        )
880    }
881}
882
883#[cfg(test)]
884mod tests {
885    use super::*;
886    use dev_report::Verdict;
887
888    #[test]
889    fn benchmark_runs_and_finishes() {
890        let mut b = Benchmark::new("noop");
891        for _ in 0..10 {
892            b.iter(|| std::hint::black_box(42));
893        }
894        let r = b.finish();
895        assert_eq!(r.samples.len(), 10);
896        assert_eq!(r.iterations_recorded, 10);
897        assert!(r.mean > Duration::ZERO);
898    }
899
900    #[test]
901    fn iter_with_count_records_one_sample() {
902        let mut b = Benchmark::new("hot");
903        b.iter_with_count(1000, || {
904            std::hint::black_box(1 + 1);
905        });
906        let r = b.finish();
907        assert_eq!(r.samples.len(), 1);
908        assert_eq!(r.iterations_recorded, 1000);
909        assert!(r.ops_per_sec() > 0.0);
910    }
911
912    #[test]
913    fn comparison_without_baseline_is_skip() {
914        let mut b = Benchmark::new("x");
915        b.iter(|| ());
916        let r = b.finish();
917        let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
918        assert_eq!(v.verdict, Verdict::Skip);
919        assert!(v.has_tag("bench"));
920    }
921
922    #[test]
923    fn min_samples_skip() {
924        let mut b = Benchmark::new("x");
925        b.iter(|| ());
926        let r = b.finish();
927        let opts = CompareOptions {
928            baseline_mean: Some(Duration::from_nanos(100)),
929            threshold: Threshold::regression_pct(5.0),
930            min_samples: 100,
931            allow_cv_noise_band: true,
932        };
933        let v = r.compare_with_options(&opts);
934        assert_eq!(v.verdict, Verdict::Skip);
935        assert!(v.detail.unwrap().contains("min_samples"));
936    }
937
938    #[test]
939    fn small_regression_under_threshold_passes() {
940        let mut b = Benchmark::new("x");
941        for _ in 0..5 {
942            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
943        }
944        let r = b.finish();
945        let baseline = r.mean;
946        let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
947        assert_eq!(v.verdict, Verdict::Pass);
948        assert!(v.has_tag("bench"));
949        // Numeric evidence is attached.
950        assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
951        assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
952        assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
953    }
954
955    #[test]
956    fn regression_outside_cv_band_fails() {
957        // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
958        let mut b = Benchmark::new("x");
959        // Inject controlled samples by running noop iterations.
960        for _ in 0..50 {
961            b.iter(|| std::hint::black_box(1 + 1));
962        }
963        let mut r = b.finish();
964        // Force a known mean and cv for deterministic comparison.
965        r.mean = Duration::from_nanos(200);
966        r.cv = 0.0;
967        let opts = CompareOptions {
968            baseline_mean: Some(Duration::from_nanos(100)),
969            threshold: Threshold::regression_pct(10.0),
970            min_samples: 1,
971            allow_cv_noise_band: true,
972        };
973        let v = r.compare_with_options(&opts);
974        assert_eq!(v.verdict, Verdict::Fail);
975        assert!(v.has_tag("regression"));
976    }
977
978    #[test]
979    fn regression_inside_cv_band_warns() {
980        let mut b = Benchmark::new("x");
981        for _ in 0..50 {
982            b.iter(|| std::hint::black_box(1 + 1));
983        }
984        let mut r = b.finish();
985        // Current is 12% over baseline but cv is 30% -> within noise band.
986        r.mean = Duration::from_nanos(112);
987        r.cv = 0.30;
988        let opts = CompareOptions {
989            baseline_mean: Some(Duration::from_nanos(100)),
990            threshold: Threshold::regression_pct(10.0),
991            min_samples: 1,
992            allow_cv_noise_band: true,
993        };
994        let v = r.compare_with_options(&opts);
995        assert_eq!(v.verdict, Verdict::Warn);
996        assert!(v.has_tag("regression"));
997        assert!(v.detail.unwrap().contains("CV noise band"));
998    }
999
1000    #[test]
1001    fn throughput_threshold_detects_drop() {
1002        // ThroughputDropPct expects a per-op baseline duration. Use
1003        // per-iter sampling so mean == per-op duration.
1004        let mut b = Benchmark::new("x");
1005        for _ in 0..10 {
1006            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
1007        }
1008        let r = b.finish();
1009        // Baseline 10x faster (per-op duration is 1/10 of current);
1010        // current throughput is 90% lower than baseline -> regression.
1011        let baseline = r.mean / 10;
1012        let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
1013        assert_eq!(v.verdict, Verdict::Fail);
1014    }
1015
1016    #[test]
1017    fn extra_stats_are_consistent() {
1018        let mut b = Benchmark::new("uniform");
1019        for _ in 0..20 {
1020            b.iter(|| std::hint::black_box(1 + 1));
1021        }
1022        let r = b.finish();
1023        // Bounds.
1024        assert!(r.min() <= r.mean);
1025        assert!(r.mean <= r.max());
1026        assert!(r.p50 <= r.p90());
1027        assert!(r.p90() <= r.p99);
1028        assert!(r.p99 <= r.p999());
1029        // Numbers are non-negative finite.
1030        assert!(r.stddev() >= 0.0);
1031        assert!(r.mad() >= 0.0);
1032    }
1033
1034    #[test]
1035    fn percentile_clamps_to_bounds() {
1036        let mut b = Benchmark::new("p");
1037        for _ in 0..10 {
1038            b.iter(|| std::hint::black_box(1));
1039        }
1040        let r = b.finish();
1041        // q < 0.0 -> first sample; q > 1.0 -> last sample.
1042        let lo = r.percentile(-0.5);
1043        let hi = r.percentile(1.5);
1044        assert!(lo <= hi);
1045    }
1046
1047    #[test]
1048    fn empty_result_stats_are_zero() {
1049        let r = Benchmark::new("empty").finish();
1050        assert_eq!(r.min(), Duration::ZERO);
1051        assert_eq!(r.max(), Duration::ZERO);
1052        assert_eq!(r.p90(), Duration::ZERO);
1053        assert_eq!(r.p999(), Duration::ZERO);
1054        assert_eq!(r.stddev(), 0.0);
1055        assert_eq!(r.mad(), 0.0);
1056    }
1057
1058    #[test]
1059    fn run_for_collects_at_least_one_sample() {
1060        let mut b = Benchmark::new("budget");
1061        b.run_for(Duration::from_millis(10), || {
1062            std::hint::black_box(1 + 1);
1063        });
1064        let r = b.finish();
1065        assert!(!r.samples.is_empty());
1066        assert_eq!(r.iterations_recorded, r.samples.len() as u64);
1067    }
1068
1069    #[test]
1070    fn run_for_zero_budget_collects_no_samples() {
1071        let mut b = Benchmark::new("zero");
1072        b.run_for(Duration::ZERO, || {
1073            std::hint::black_box(1 + 1);
1074        });
1075        let r = b.finish();
1076        // With zero budget, deadline has already passed; no iterations.
1077        assert!(r.samples.is_empty() || r.samples.len() <= 1);
1078    }
1079
1080    #[test]
1081    fn histogram_total_count_equals_samples() {
1082        let mut b = Benchmark::new("h");
1083        for _ in 0..50 {
1084            b.iter(|| std::hint::black_box(1 + 1));
1085        }
1086        let r = b.finish();
1087        let bins = r.histogram(8);
1088        assert!(!bins.is_empty());
1089        let total: usize = bins.iter().map(|b| b.count).sum();
1090        assert_eq!(total, r.samples.len());
1091    }
1092
1093    #[test]
1094    fn histogram_zero_buckets_returns_empty() {
1095        let mut b = Benchmark::new("h");
1096        b.iter(|| std::hint::black_box(1));
1097        let r = b.finish();
1098        assert!(r.histogram(0).is_empty());
1099    }
1100
1101    #[test]
1102    fn histogram_empty_result_returns_empty() {
1103        let r = Benchmark::new("e").finish();
1104        assert!(r.histogram(8).is_empty());
1105    }
1106
1107    #[test]
1108    fn histogram_bins_are_ordered() {
1109        let mut b = Benchmark::new("h");
1110        for _ in 0..30 {
1111            b.iter(|| std::hint::black_box(1 + 1));
1112        }
1113        let bins = b.finish().histogram(5);
1114        for win in bins.windows(2) {
1115            assert!(win[0].lower <= win[1].lower);
1116            assert!(win[0].lower <= win[0].upper);
1117        }
1118    }
1119
1120    #[test]
1121    fn cv_is_zero_for_uniform_samples() {
1122        // Samples are nearly identical -> cv near 0.
1123        let mut b = Benchmark::new("x");
1124        for _ in 0..10 {
1125            b.iter(|| std::hint::black_box(1 + 1));
1126        }
1127        let r = b.finish();
1128        // Not strictly zero on real machines, just bounded.
1129        assert!(r.cv >= 0.0);
1130    }
1131
1132    #[test]
1133    fn into_report_emits_one_check() {
1134        let mut b = Benchmark::new("x");
1135        for _ in 0..5 {
1136            b.iter(|| std::hint::black_box(1 + 1));
1137        }
1138        let r = b.finish();
1139        let baseline = r.mean;
1140        let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
1141        assert_eq!(report.checks.len(), 1);
1142        assert_eq!(report.producer.as_deref(), Some("dev-bench"));
1143        assert_eq!(report.overall_verdict(), Verdict::Pass);
1144    }
1145
1146    #[test]
1147    fn bench_producer_implements_producer_trait() {
1148        fn run() -> BenchmarkResult {
1149            let mut b = Benchmark::new("noop");
1150            for _ in 0..5 {
1151                b.iter(|| std::hint::black_box(1 + 1));
1152            }
1153            b.finish()
1154        }
1155        let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
1156        let report = p.produce();
1157        assert_eq!(report.checks.len(), 1);
1158    }
1159}