Skip to main content

dev_bench/
lib.rs

1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//!     b.iter(|| {
18//!         std::hint::black_box(40 + 2);
19//!     });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0);   // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//!   alongside time, using `dhat`. See the `alloc` module
37//!   (visible in rustdoc when the feature is enabled).
38
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![warn(missing_docs)]
41#![warn(rust_2018_idioms)]
42
43use std::time::{Duration, Instant};
44
45use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
46
47#[cfg(feature = "alloc-tracking")]
48#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
49pub mod alloc;
50
51/// Re-export of `dhat` for use by [`install_global_allocator!`].
52///
53/// Hidden from rustdoc; consumers should use the macro, not this path.
54#[cfg(feature = "alloc-tracking")]
55#[doc(hidden)]
56pub use ::dhat as __dhat;
57
58/// Install `dhat::Alloc` as the global allocator.
59///
60/// Available with the `alloc-tracking` feature. Invoke at module scope
61/// in your binary or test target — the macro expands to a
62/// `#[global_allocator] static` declaration that consumers cannot
63/// otherwise express without depending on `dhat` directly.
64///
65/// # Example
66///
67/// ```ignore
68/// // in main.rs or a test target's top level:
69/// dev_bench::install_global_allocator!();
70///
71/// fn main() {
72///     let _profiler = dhat::Profiler::new_heap();
73///     // ... benchmarked code ...
74/// }
75/// ```
76#[cfg(feature = "alloc-tracking")]
77#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
78#[macro_export]
79macro_rules! install_global_allocator {
80    () => {
81        #[global_allocator]
82        static __DEV_BENCH_DHAT_ALLOC: $crate::__dhat::Alloc = $crate::__dhat::Alloc;
83    };
84}
85
86pub mod baseline;
87
88pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
89
90/// A single benchmark run.
91///
92/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
93/// to produce a [`BenchmarkResult`].
94///
95/// # Example
96///
97/// ```
98/// use dev_bench::Benchmark;
99///
100/// let mut b = Benchmark::new("noop");
101/// for _ in 0..10 {
102///     b.iter(|| std::hint::black_box(42));
103/// }
104/// let r = b.finish();
105/// assert_eq!(r.samples.len(), 10);
106/// ```
107pub struct Benchmark {
108    name: String,
109    samples: Vec<Duration>,
110    iterations_recorded: u64,
111}
112
113impl Benchmark {
114    /// Begin a new benchmark with a stable name.
115    pub fn new(name: impl Into<String>) -> Self {
116        Self {
117            name: name.into(),
118            samples: Vec::new(),
119            iterations_recorded: 0,
120        }
121    }
122
123    /// Run one iteration of the benchmark, capturing the duration.
124    ///
125    /// Each call records exactly one sample.
126    ///
127    /// # Example
128    ///
129    /// ```
130    /// use dev_bench::Benchmark;
131    ///
132    /// let mut b = Benchmark::new("noop");
133    /// b.iter(|| std::hint::black_box(1 + 1));
134    /// let r = b.finish();
135    /// assert_eq!(r.samples.len(), 1);
136    /// ```
137    pub fn iter<F, R>(&mut self, f: F) -> R
138    where
139        F: FnOnce() -> R,
140    {
141        let start = Instant::now();
142        let r = f();
143        let elapsed = start.elapsed();
144        self.samples.push(elapsed);
145        self.iterations_recorded += 1;
146        r
147    }
148
149    /// Run a closure `n` times and record ONE sample for the entire batch.
150    ///
151    /// Use for sub-microsecond operations where per-iteration timing
152    /// would be dominated by `Instant::now()` overhead. The reported
153    /// per-iteration mean is `batch_duration / n`.
154    ///
155    /// # Example
156    ///
157    /// ```
158    /// use dev_bench::Benchmark;
159    ///
160    /// let mut b = Benchmark::new("hot");
161    /// b.iter_with_count(1000, || {
162    ///     std::hint::black_box(40 + 2);
163    /// });
164    /// let r = b.finish();
165    /// assert_eq!(r.samples.len(), 1);
166    /// assert_eq!(r.iterations_recorded, 1000);
167    /// ```
168    pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
169    where
170        F: FnMut(),
171    {
172        let start = Instant::now();
173        for _ in 0..n {
174            f();
175        }
176        let elapsed = start.elapsed();
177        self.samples.push(elapsed);
178        self.iterations_recorded += n;
179    }
180
181    /// Finalize the benchmark and produce a [`BenchmarkResult`].
182    pub fn finish(self) -> BenchmarkResult {
183        let n = self.samples.len();
184        let mean = if n == 0 {
185            Duration::ZERO
186        } else {
187            let total: Duration = self.samples.iter().copied().sum();
188            total / n as u32
189        };
190        let mut sorted = self.samples.clone();
191        sorted.sort();
192        let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
193        let p99 = sorted
194            .get((n as f64 * 0.99).floor() as usize)
195            .copied()
196            .unwrap_or(Duration::ZERO);
197        let cv = compute_cv(&self.samples, mean);
198        let total_elapsed: Duration = self.samples.iter().copied().sum();
199        BenchmarkResult {
200            name: self.name,
201            samples: self.samples,
202            iterations_recorded: self.iterations_recorded,
203            total_elapsed,
204            mean,
205            p50,
206            p99,
207            cv,
208        }
209    }
210}
211
212fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
213    if samples.is_empty() {
214        return 0.0;
215    }
216    let mean_s = mean.as_secs_f64();
217    if mean_s == 0.0 {
218        return 0.0;
219    }
220    let n = samples.len() as f64;
221    let var = samples
222        .iter()
223        .map(|d| (d.as_secs_f64() - mean_s).powi(2))
224        .sum::<f64>()
225        / n;
226    var.sqrt() / mean_s
227}
228
229/// The result of a finished benchmark.
230///
231/// Statistics are computed losslessly from the raw `samples`.
232///
233/// # Example
234///
235/// ```
236/// use dev_bench::Benchmark;
237///
238/// let mut b = Benchmark::new("noop");
239/// for _ in 0..10 {
240///     b.iter(|| std::hint::black_box(42));
241/// }
242/// let r = b.finish();
243/// assert!(r.mean.as_nanos() > 0);
244/// ```
245#[derive(Debug, Clone)]
246pub struct BenchmarkResult {
247    /// Stable name of the benchmark.
248    pub name: String,
249    /// All raw sample durations.
250    pub samples: Vec<Duration>,
251    /// Total iterations across all samples. With per-iter sampling this
252    /// equals `samples.len()`. With batched sampling, it is the sum of
253    /// `n` across all `iter_with_count` calls.
254    pub iterations_recorded: u64,
255    /// Sum of all sample durations.
256    pub total_elapsed: Duration,
257    /// Mean sample duration.
258    pub mean: Duration,
259    /// 50th percentile sample duration.
260    pub p50: Duration,
261    /// 99th percentile sample duration.
262    pub p99: Duration,
263    /// Coefficient of variation across samples (stddev / mean).
264    ///
265    /// Higher numbers indicate noisier measurements. A CV of `0.05`
266    /// means the standard deviation is 5% of the mean. Reported
267    /// regressions within the CV are downgraded from `Fail` to `Warn`
268    /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
269    pub cv: f64,
270}
271
272impl BenchmarkResult {
273    /// Effective throughput in operations per second.
274    ///
275    /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
276    /// `0.0` for an empty result or zero elapsed time.
277    ///
278    /// # Example
279    ///
280    /// ```
281    /// use dev_bench::Benchmark;
282    ///
283    /// let mut b = Benchmark::new("hot");
284    /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
285    /// let r = b.finish();
286    /// assert!(r.ops_per_sec() > 0.0);
287    /// ```
288    pub fn ops_per_sec(&self) -> f64 {
289        if self.total_elapsed.is_zero() {
290            return 0.0;
291        }
292        self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
293    }
294
295    /// Smallest sample. Returns `Duration::ZERO` for an empty result.
296    pub fn min(&self) -> Duration {
297        self.samples.iter().copied().min().unwrap_or(Duration::ZERO)
298    }
299
300    /// Largest sample. Returns `Duration::ZERO` for an empty result.
301    pub fn max(&self) -> Duration {
302        self.samples.iter().copied().max().unwrap_or(Duration::ZERO)
303    }
304
305    /// Sample standard deviation, in seconds. `0.0` for fewer than 2 samples.
306    ///
307    /// Uses `n-1` (Bessel's correction) for the sample variance.
308    pub fn stddev(&self) -> f64 {
309        let n = self.samples.len();
310        if n < 2 {
311            return 0.0;
312        }
313        let mean_s = self.mean.as_secs_f64();
314        let var = self
315            .samples
316            .iter()
317            .map(|d| (d.as_secs_f64() - mean_s).powi(2))
318            .sum::<f64>()
319            / (n as f64 - 1.0);
320        var.sqrt()
321    }
322
323    /// Median absolute deviation, in seconds. `0.0` for empty results.
324    ///
325    /// `MAD = median(|x_i - median(x)|)`. More robust to outliers than
326    /// standard deviation; useful for noisy measurements.
327    pub fn mad(&self) -> f64 {
328        if self.samples.is_empty() {
329            return 0.0;
330        }
331        let p50_s = self.p50.as_secs_f64();
332        let mut deviations: Vec<f64> = self
333            .samples
334            .iter()
335            .map(|d| (d.as_secs_f64() - p50_s).abs())
336            .collect();
337        deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
338        let mid = deviations.len() / 2;
339        deviations[mid]
340    }
341
342    /// 90th percentile sample duration. `Duration::ZERO` for empty results.
343    pub fn p90(&self) -> Duration {
344        self.percentile(0.90)
345    }
346
347    /// 99.9th percentile sample duration. `Duration::ZERO` for empty results.
348    ///
349    /// At least 1000 samples are required to be meaningful; with fewer
350    /// samples this returns the largest sample.
351    pub fn p999(&self) -> Duration {
352        self.percentile(0.999)
353    }
354
355    /// Compute an arbitrary percentile (0.0..=1.0). Returns `Duration::ZERO`
356    /// for empty results. Uses nearest-rank, the same as `p50`/`p99`.
357    pub fn percentile(&self, q: f64) -> Duration {
358        if self.samples.is_empty() {
359            return Duration::ZERO;
360        }
361        let q = q.clamp(0.0, 1.0);
362        let mut sorted = self.samples.clone();
363        sorted.sort();
364        let n = sorted.len();
365        let idx = ((n as f64) * q).floor() as usize;
366        let idx = idx.min(n - 1);
367        sorted[idx]
368    }
369
370    /// Compare this result against a baseline using a default-tuned
371    /// [`CompareOptions`].
372    ///
373    /// `baseline_mean` is the previous mean duration. If `None`, the
374    /// verdict is `Skip` and no comparison is made.
375    ///
376    /// # Example
377    ///
378    /// ```
379    /// use dev_bench::{Benchmark, Threshold};
380    /// use std::time::Duration;
381    ///
382    /// let mut b = Benchmark::new("x");
383    /// b.iter(|| std::hint::black_box(1 + 1));
384    /// let r = b.finish();
385    /// let _ = r.compare_against_baseline(
386    ///     Some(Duration::from_nanos(1)),
387    ///     Threshold::regression_pct(10.0),
388    /// );
389    /// ```
390    pub fn compare_against_baseline(
391        &self,
392        baseline_mean: Option<Duration>,
393        threshold: Threshold,
394    ) -> CheckResult {
395        self.compare_with_options(&CompareOptions {
396            baseline_mean,
397            threshold,
398            ..CompareOptions::default()
399        })
400    }
401
402    /// Compare this result against a baseline using full options.
403    ///
404    /// Behavior:
405    /// - No baseline -> `Skip`.
406    /// - Sample count below `min_samples` -> `Skip` with detail.
407    /// - Within threshold -> `Pass` with numeric evidence.
408    /// - Over threshold but within CV noise band -> `Warn`.
409    /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
410    ///
411    /// In every non-`Skip` case, the returned [`CheckResult`] carries
412    /// a `bench` tag and numeric `Evidence` for `mean_ns`,
413    /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
414    ///
415    /// # Example
416    ///
417    /// ```
418    /// use dev_bench::{Benchmark, CompareOptions, Threshold};
419    /// use std::time::Duration;
420    ///
421    /// let mut b = Benchmark::new("x");
422    /// b.iter(|| std::hint::black_box(1 + 1));
423    /// let r = b.finish();
424    /// let opts = CompareOptions {
425    ///     baseline_mean: Some(Duration::from_nanos(1)),
426    ///     threshold: Threshold::regression_pct(20.0),
427    ///     min_samples: 1,
428    ///     allow_cv_noise_band: true,
429    /// };
430    /// let _check = r.compare_with_options(&opts);
431    /// ```
432    pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
433        let name = format!("bench::{}", self.name);
434        let mut evidence = self.numeric_evidence();
435        let tags = vec!["bench".to_string()];
436
437        let Some(baseline) = opts.baseline_mean else {
438            let mut c = CheckResult::skip(name).with_detail("no baseline available");
439            c.tags = tags;
440            c.evidence = evidence;
441            return c;
442        };
443
444        if (self.samples.len() as u64) < opts.min_samples {
445            let mut c = CheckResult::skip(name).with_detail(format!(
446                "fewer samples than min_samples ({} < {})",
447                self.samples.len(),
448                opts.min_samples
449            ));
450            c.tags = tags;
451            c.evidence = evidence;
452            return c;
453        }
454
455        let current_ns = self.mean.as_nanos();
456        let baseline_ns = baseline.as_nanos();
457        evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
458
459        let regressed = match opts.threshold {
460            Threshold::RegressionPct(pct) => {
461                let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
462                current_ns as f64 > allowed
463            }
464            Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
465            Threshold::ThroughputDropPct(pct) => {
466                // Throughput-based; convert via mean.
467                let baseline_ops = if baseline.is_zero() {
468                    0.0
469                } else {
470                    1.0 / baseline.as_secs_f64()
471                };
472                let drop_floor = baseline_ops * (1.0 - pct / 100.0);
473                self.ops_per_sec() < drop_floor
474            }
475        };
476
477        let detail = format!(
478            "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
479            current_ns,
480            baseline_ns,
481            self.cv,
482            self.ops_per_sec()
483        );
484
485        if !regressed {
486            let mut c = CheckResult::pass(name).with_detail(detail);
487            c.tags = tags;
488            c.evidence = evidence;
489            return c;
490        }
491
492        // Regression detected. Decide Fail vs Warn based on CV noise band.
493        let in_noise_band = opts.allow_cv_noise_band && {
494            let allowed_noise_ns = baseline_ns as f64 * self.cv;
495            let delta_ns = (current_ns as f64) - (baseline_ns as f64);
496            delta_ns <= allowed_noise_ns
497        };
498        let mut tags = tags;
499        tags.push("regression".to_string());
500        if in_noise_band {
501            let mut c = CheckResult::warn(name, Severity::Warning)
502                .with_detail(format!("{} (within CV noise band)", detail));
503            c.tags = tags;
504            c.evidence = evidence;
505            c
506        } else {
507            let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
508            c.tags = tags;
509            c.evidence = evidence;
510            c
511        }
512    }
513
514    /// Build a one-check `Report` containing the comparison result.
515    ///
516    /// Convenience for producers that want a complete `Report` rather
517    /// than a single `CheckResult`. Sets `subject = self.name`,
518    /// `producer = "dev-bench"`.
519    ///
520    /// # Example
521    ///
522    /// ```
523    /// use dev_bench::{Benchmark, Threshold};
524    ///
525    /// let mut b = Benchmark::new("x");
526    /// b.iter(|| std::hint::black_box(1 + 1));
527    /// let r = b.finish();
528    /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
529    /// assert_eq!(report.checks.len(), 1);
530    /// ```
531    pub fn into_report(
532        self,
533        subject_version: impl Into<String>,
534        baseline_mean: Option<Duration>,
535        threshold: Threshold,
536    ) -> Report {
537        let name = self.name.clone();
538        let check = self.compare_against_baseline(baseline_mean, threshold);
539        let mut r = Report::new(name, subject_version).with_producer("dev-bench");
540        r.push(check);
541        r.finish();
542        r
543    }
544
545    fn numeric_evidence(&self) -> Vec<Evidence> {
546        vec![
547            Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
548            // baseline_ns inserted at index 1 by callers when available.
549            Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
550            Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
551            Evidence::numeric("cv", self.cv),
552            Evidence::numeric("ops_per_sec", self.ops_per_sec()),
553            Evidence::numeric("samples", self.samples.len() as f64),
554            Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
555        ]
556    }
557}
558
559/// A threshold defining how much slower-than-baseline is acceptable.
560#[derive(Debug, Clone, Copy)]
561pub enum Threshold {
562    /// Fail if the new mean is more than `pct` percent slower than baseline.
563    RegressionPct(f64),
564    /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
565    RegressionAbsoluteNs(u128),
566    /// Fail if throughput dropped more than `pct` percent below baseline.
567    ///
568    /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
569    /// assumes the baseline duration is a per-operation duration. Use
570    /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
571    /// per-op duration. For batched sampling
572    /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
573    /// or pre-compute the baseline manually.
574    ThroughputDropPct(f64),
575}
576
577impl Threshold {
578    /// Build a percent-based duration regression threshold.
579    ///
580    /// # Example
581    ///
582    /// ```
583    /// use dev_bench::Threshold;
584    /// let t = Threshold::regression_pct(20.0);
585    /// assert!(matches!(t, Threshold::RegressionPct(_)));
586    /// ```
587    pub fn regression_pct(pct: f64) -> Self {
588        Threshold::RegressionPct(pct)
589    }
590
591    /// Build an absolute duration regression threshold in nanoseconds.
592    ///
593    /// # Example
594    ///
595    /// ```
596    /// use dev_bench::Threshold;
597    /// let t = Threshold::regression_abs_ns(500);
598    /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
599    /// ```
600    pub fn regression_abs_ns(nanos: u128) -> Self {
601        Threshold::RegressionAbsoluteNs(nanos)
602    }
603
604    /// Build a percent-based throughput drop threshold.
605    ///
606    /// # Example
607    ///
608    /// ```
609    /// use dev_bench::Threshold;
610    /// let t = Threshold::throughput_drop_pct(10.0);
611    /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
612    /// ```
613    pub fn throughput_drop_pct(pct: f64) -> Self {
614        Threshold::ThroughputDropPct(pct)
615    }
616}
617
618/// Options for [`BenchmarkResult::compare_with_options`].
619///
620/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
621/// `allow_cv_noise_band = true`.
622///
623/// # Example
624///
625/// ```
626/// use dev_bench::{CompareOptions, Threshold};
627/// use std::time::Duration;
628///
629/// let opts = CompareOptions {
630///     baseline_mean: Some(Duration::from_nanos(1000)),
631///     threshold: Threshold::regression_pct(20.0),
632///     min_samples: 30,
633///     allow_cv_noise_band: true,
634/// };
635/// assert_eq!(opts.min_samples, 30);
636/// ```
637#[derive(Debug, Clone)]
638pub struct CompareOptions {
639    /// Baseline mean to compare against. `None` -> verdict is `Skip`.
640    pub baseline_mean: Option<Duration>,
641    /// Regression threshold to apply.
642    pub threshold: Threshold,
643    /// Minimum sample count required before a comparison can be made.
644    /// Below this, the verdict is `Skip` with a `min_samples` detail.
645    pub min_samples: u64,
646    /// If `true`, regressions within `baseline_ns * cv` are downgraded
647    /// from `Fail` to `Warn`.
648    pub allow_cv_noise_band: bool,
649}
650
651impl Default for CompareOptions {
652    fn default() -> Self {
653        Self {
654            baseline_mean: None,
655            threshold: Threshold::regression_pct(10.0),
656            min_samples: 1,
657            allow_cv_noise_band: true,
658        }
659    }
660}
661
662/// A trait for any object that can run a benchmark and produce a result.
663pub trait Bench {
664    /// Run the benchmark and return its result.
665    fn run(&mut self) -> BenchmarkResult;
666}
667
668/// Producer wrapper that runs a benchmark and emits a single-check
669/// [`Report`] via [`Producer::produce`].
670///
671/// # Example
672///
673/// ```no_run
674/// use dev_bench::{Benchmark, BenchProducer, Threshold};
675/// use dev_report::Producer;
676///
677/// fn run_bench() -> dev_bench::BenchmarkResult {
678///     let mut b = Benchmark::new("hot_path");
679///     for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
680///     b.finish()
681/// }
682///
683/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
684/// let report = producer.produce();
685/// assert_eq!(report.checks.len(), 1);
686/// ```
687pub struct BenchProducer<F>
688where
689    F: Fn() -> BenchmarkResult,
690{
691    run: F,
692    subject_version: String,
693    baseline_mean: Option<Duration>,
694    threshold: Threshold,
695}
696
697impl<F> BenchProducer<F>
698where
699    F: Fn() -> BenchmarkResult,
700{
701    /// Build a new producer.
702    pub fn new(
703        run: F,
704        subject_version: impl Into<String>,
705        baseline_mean: Option<Duration>,
706        threshold: Threshold,
707    ) -> Self {
708        Self {
709            run,
710            subject_version: subject_version.into(),
711            baseline_mean,
712            threshold,
713        }
714    }
715}
716
717impl<F> Producer for BenchProducer<F>
718where
719    F: Fn() -> BenchmarkResult,
720{
721    fn produce(&self) -> Report {
722        let result = (self.run)();
723        result.into_report(
724            self.subject_version.clone(),
725            self.baseline_mean,
726            self.threshold,
727        )
728    }
729}
730
731#[cfg(test)]
732mod tests {
733    use super::*;
734    use dev_report::Verdict;
735
736    #[test]
737    fn benchmark_runs_and_finishes() {
738        let mut b = Benchmark::new("noop");
739        for _ in 0..10 {
740            b.iter(|| std::hint::black_box(42));
741        }
742        let r = b.finish();
743        assert_eq!(r.samples.len(), 10);
744        assert_eq!(r.iterations_recorded, 10);
745        assert!(r.mean > Duration::ZERO);
746    }
747
748    #[test]
749    fn iter_with_count_records_one_sample() {
750        let mut b = Benchmark::new("hot");
751        b.iter_with_count(1000, || {
752            std::hint::black_box(1 + 1);
753        });
754        let r = b.finish();
755        assert_eq!(r.samples.len(), 1);
756        assert_eq!(r.iterations_recorded, 1000);
757        assert!(r.ops_per_sec() > 0.0);
758    }
759
760    #[test]
761    fn comparison_without_baseline_is_skip() {
762        let mut b = Benchmark::new("x");
763        b.iter(|| ());
764        let r = b.finish();
765        let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
766        assert_eq!(v.verdict, Verdict::Skip);
767        assert!(v.has_tag("bench"));
768    }
769
770    #[test]
771    fn min_samples_skip() {
772        let mut b = Benchmark::new("x");
773        b.iter(|| ());
774        let r = b.finish();
775        let opts = CompareOptions {
776            baseline_mean: Some(Duration::from_nanos(100)),
777            threshold: Threshold::regression_pct(5.0),
778            min_samples: 100,
779            allow_cv_noise_band: true,
780        };
781        let v = r.compare_with_options(&opts);
782        assert_eq!(v.verdict, Verdict::Skip);
783        assert!(v.detail.unwrap().contains("min_samples"));
784    }
785
786    #[test]
787    fn small_regression_under_threshold_passes() {
788        let mut b = Benchmark::new("x");
789        for _ in 0..5 {
790            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
791        }
792        let r = b.finish();
793        let baseline = r.mean;
794        let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
795        assert_eq!(v.verdict, Verdict::Pass);
796        assert!(v.has_tag("bench"));
797        // Numeric evidence is attached.
798        assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
799        assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
800        assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
801    }
802
803    #[test]
804    fn regression_outside_cv_band_fails() {
805        // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
806        let mut b = Benchmark::new("x");
807        // Inject controlled samples by running noop iterations.
808        for _ in 0..50 {
809            b.iter(|| std::hint::black_box(1 + 1));
810        }
811        let mut r = b.finish();
812        // Force a known mean and cv for deterministic comparison.
813        r.mean = Duration::from_nanos(200);
814        r.cv = 0.0;
815        let opts = CompareOptions {
816            baseline_mean: Some(Duration::from_nanos(100)),
817            threshold: Threshold::regression_pct(10.0),
818            min_samples: 1,
819            allow_cv_noise_band: true,
820        };
821        let v = r.compare_with_options(&opts);
822        assert_eq!(v.verdict, Verdict::Fail);
823        assert!(v.has_tag("regression"));
824    }
825
826    #[test]
827    fn regression_inside_cv_band_warns() {
828        let mut b = Benchmark::new("x");
829        for _ in 0..50 {
830            b.iter(|| std::hint::black_box(1 + 1));
831        }
832        let mut r = b.finish();
833        // Current is 12% over baseline but cv is 30% -> within noise band.
834        r.mean = Duration::from_nanos(112);
835        r.cv = 0.30;
836        let opts = CompareOptions {
837            baseline_mean: Some(Duration::from_nanos(100)),
838            threshold: Threshold::regression_pct(10.0),
839            min_samples: 1,
840            allow_cv_noise_band: true,
841        };
842        let v = r.compare_with_options(&opts);
843        assert_eq!(v.verdict, Verdict::Warn);
844        assert!(v.has_tag("regression"));
845        assert!(v.detail.unwrap().contains("CV noise band"));
846    }
847
848    #[test]
849    fn throughput_threshold_detects_drop() {
850        // ThroughputDropPct expects a per-op baseline duration. Use
851        // per-iter sampling so mean == per-op duration.
852        let mut b = Benchmark::new("x");
853        for _ in 0..10 {
854            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
855        }
856        let r = b.finish();
857        // Baseline 10x faster (per-op duration is 1/10 of current);
858        // current throughput is 90% lower than baseline -> regression.
859        let baseline = r.mean / 10;
860        let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
861        assert_eq!(v.verdict, Verdict::Fail);
862    }
863
864    #[test]
865    fn extra_stats_are_consistent() {
866        let mut b = Benchmark::new("uniform");
867        for _ in 0..20 {
868            b.iter(|| std::hint::black_box(1 + 1));
869        }
870        let r = b.finish();
871        // Bounds.
872        assert!(r.min() <= r.mean);
873        assert!(r.mean <= r.max());
874        assert!(r.p50 <= r.p90());
875        assert!(r.p90() <= r.p99);
876        assert!(r.p99 <= r.p999());
877        // Numbers are non-negative finite.
878        assert!(r.stddev() >= 0.0);
879        assert!(r.mad() >= 0.0);
880    }
881
882    #[test]
883    fn percentile_clamps_to_bounds() {
884        let mut b = Benchmark::new("p");
885        for _ in 0..10 {
886            b.iter(|| std::hint::black_box(1));
887        }
888        let r = b.finish();
889        // q < 0.0 -> first sample; q > 1.0 -> last sample.
890        let lo = r.percentile(-0.5);
891        let hi = r.percentile(1.5);
892        assert!(lo <= hi);
893    }
894
895    #[test]
896    fn empty_result_stats_are_zero() {
897        let r = Benchmark::new("empty").finish();
898        assert_eq!(r.min(), Duration::ZERO);
899        assert_eq!(r.max(), Duration::ZERO);
900        assert_eq!(r.p90(), Duration::ZERO);
901        assert_eq!(r.p999(), Duration::ZERO);
902        assert_eq!(r.stddev(), 0.0);
903        assert_eq!(r.mad(), 0.0);
904    }
905
906    #[test]
907    fn cv_is_zero_for_uniform_samples() {
908        // Samples are nearly identical -> cv near 0.
909        let mut b = Benchmark::new("x");
910        for _ in 0..10 {
911            b.iter(|| std::hint::black_box(1 + 1));
912        }
913        let r = b.finish();
914        // Not strictly zero on real machines, just bounded.
915        assert!(r.cv >= 0.0);
916    }
917
918    #[test]
919    fn into_report_emits_one_check() {
920        let mut b = Benchmark::new("x");
921        for _ in 0..5 {
922            b.iter(|| std::hint::black_box(1 + 1));
923        }
924        let r = b.finish();
925        let baseline = r.mean;
926        let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
927        assert_eq!(report.checks.len(), 1);
928        assert_eq!(report.producer.as_deref(), Some("dev-bench"));
929        assert_eq!(report.overall_verdict(), Verdict::Pass);
930    }
931
932    #[test]
933    fn bench_producer_implements_producer_trait() {
934        fn run() -> BenchmarkResult {
935            let mut b = Benchmark::new("noop");
936            for _ in 0..5 {
937                b.iter(|| std::hint::black_box(1 + 1));
938            }
939            b.finish()
940        }
941        let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
942        let report = p.produce();
943        assert_eq!(report.checks.len(), 1);
944    }
945}