Skip to main content

dev_bench/
lib.rs

1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//!     b.iter(|| {
18//!         std::hint::black_box(40 + 2);
19//!     });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0);   // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//!   alongside time, using `dhat`. See the [`alloc`] module.
37
38#![cfg_attr(docsrs, feature(doc_cfg))]
39#![warn(missing_docs)]
40#![warn(rust_2018_idioms)]
41
42use std::time::{Duration, Instant};
43
44use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
45
46#[cfg(feature = "alloc-tracking")]
47#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
48pub mod alloc;
49
50pub mod baseline;
51
52pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
53
54/// A single benchmark run.
55///
56/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
57/// to produce a [`BenchmarkResult`].
58///
59/// # Example
60///
61/// ```
62/// use dev_bench::Benchmark;
63///
64/// let mut b = Benchmark::new("noop");
65/// for _ in 0..10 {
66///     b.iter(|| std::hint::black_box(42));
67/// }
68/// let r = b.finish();
69/// assert_eq!(r.samples.len(), 10);
70/// ```
71pub struct Benchmark {
72    name: String,
73    samples: Vec<Duration>,
74    iterations_recorded: u64,
75}
76
77impl Benchmark {
78    /// Begin a new benchmark with a stable name.
79    pub fn new(name: impl Into<String>) -> Self {
80        Self {
81            name: name.into(),
82            samples: Vec::new(),
83            iterations_recorded: 0,
84        }
85    }
86
87    /// Run one iteration of the benchmark, capturing the duration.
88    ///
89    /// Each call records exactly one sample.
90    ///
91    /// # Example
92    ///
93    /// ```
94    /// use dev_bench::Benchmark;
95    ///
96    /// let mut b = Benchmark::new("noop");
97    /// b.iter(|| std::hint::black_box(1 + 1));
98    /// let r = b.finish();
99    /// assert_eq!(r.samples.len(), 1);
100    /// ```
101    pub fn iter<F, R>(&mut self, f: F) -> R
102    where
103        F: FnOnce() -> R,
104    {
105        let start = Instant::now();
106        let r = f();
107        let elapsed = start.elapsed();
108        self.samples.push(elapsed);
109        self.iterations_recorded += 1;
110        r
111    }
112
113    /// Run a closure `n` times and record ONE sample for the entire batch.
114    ///
115    /// Use for sub-microsecond operations where per-iteration timing
116    /// would be dominated by `Instant::now()` overhead. The reported
117    /// per-iteration mean is `batch_duration / n`.
118    ///
119    /// # Example
120    ///
121    /// ```
122    /// use dev_bench::Benchmark;
123    ///
124    /// let mut b = Benchmark::new("hot");
125    /// b.iter_with_count(1000, || {
126    ///     std::hint::black_box(40 + 2);
127    /// });
128    /// let r = b.finish();
129    /// assert_eq!(r.samples.len(), 1);
130    /// assert_eq!(r.iterations_recorded, 1000);
131    /// ```
132    pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
133    where
134        F: FnMut(),
135    {
136        let start = Instant::now();
137        for _ in 0..n {
138            f();
139        }
140        let elapsed = start.elapsed();
141        self.samples.push(elapsed);
142        self.iterations_recorded += n;
143    }
144
145    /// Finalize the benchmark and produce a [`BenchmarkResult`].
146    pub fn finish(self) -> BenchmarkResult {
147        let n = self.samples.len();
148        let mean = if n == 0 {
149            Duration::ZERO
150        } else {
151            let total: Duration = self.samples.iter().copied().sum();
152            total / n as u32
153        };
154        let mut sorted = self.samples.clone();
155        sorted.sort();
156        let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
157        let p99 = sorted
158            .get((n as f64 * 0.99).floor() as usize)
159            .copied()
160            .unwrap_or(Duration::ZERO);
161        let cv = compute_cv(&self.samples, mean);
162        let total_elapsed: Duration = self.samples.iter().copied().sum();
163        BenchmarkResult {
164            name: self.name,
165            samples: self.samples,
166            iterations_recorded: self.iterations_recorded,
167            total_elapsed,
168            mean,
169            p50,
170            p99,
171            cv,
172        }
173    }
174}
175
176fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
177    if samples.is_empty() {
178        return 0.0;
179    }
180    let mean_s = mean.as_secs_f64();
181    if mean_s == 0.0 {
182        return 0.0;
183    }
184    let n = samples.len() as f64;
185    let var = samples
186        .iter()
187        .map(|d| (d.as_secs_f64() - mean_s).powi(2))
188        .sum::<f64>()
189        / n;
190    var.sqrt() / mean_s
191}
192
193/// The result of a finished benchmark.
194///
195/// Statistics are computed losslessly from the raw `samples`.
196///
197/// # Example
198///
199/// ```
200/// use dev_bench::Benchmark;
201///
202/// let mut b = Benchmark::new("noop");
203/// for _ in 0..10 {
204///     b.iter(|| std::hint::black_box(42));
205/// }
206/// let r = b.finish();
207/// assert!(r.mean.as_nanos() > 0);
208/// ```
209#[derive(Debug, Clone)]
210pub struct BenchmarkResult {
211    /// Stable name of the benchmark.
212    pub name: String,
213    /// All raw sample durations.
214    pub samples: Vec<Duration>,
215    /// Total iterations across all samples. With per-iter sampling this
216    /// equals `samples.len()`. With batched sampling, it is the sum of
217    /// `n` across all `iter_with_count` calls.
218    pub iterations_recorded: u64,
219    /// Sum of all sample durations.
220    pub total_elapsed: Duration,
221    /// Mean sample duration.
222    pub mean: Duration,
223    /// 50th percentile sample duration.
224    pub p50: Duration,
225    /// 99th percentile sample duration.
226    pub p99: Duration,
227    /// Coefficient of variation across samples (stddev / mean).
228    ///
229    /// Higher numbers indicate noisier measurements. A CV of `0.05`
230    /// means the standard deviation is 5% of the mean. Reported
231    /// regressions within the CV are downgraded from `Fail` to `Warn`
232    /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
233    pub cv: f64,
234}
235
236impl BenchmarkResult {
237    /// Effective throughput in operations per second.
238    ///
239    /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
240    /// `0.0` for an empty result or zero elapsed time.
241    ///
242    /// # Example
243    ///
244    /// ```
245    /// use dev_bench::Benchmark;
246    ///
247    /// let mut b = Benchmark::new("hot");
248    /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
249    /// let r = b.finish();
250    /// assert!(r.ops_per_sec() > 0.0);
251    /// ```
252    pub fn ops_per_sec(&self) -> f64 {
253        if self.total_elapsed.is_zero() {
254            return 0.0;
255        }
256        self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
257    }
258
259    /// Compare this result against a baseline using a default-tuned
260    /// [`CompareOptions`].
261    ///
262    /// `baseline_mean` is the previous mean duration. If `None`, the
263    /// verdict is `Skip` and no comparison is made.
264    ///
265    /// # Example
266    ///
267    /// ```
268    /// use dev_bench::{Benchmark, Threshold};
269    /// use std::time::Duration;
270    ///
271    /// let mut b = Benchmark::new("x");
272    /// b.iter(|| std::hint::black_box(1 + 1));
273    /// let r = b.finish();
274    /// let _ = r.compare_against_baseline(
275    ///     Some(Duration::from_nanos(1)),
276    ///     Threshold::regression_pct(10.0),
277    /// );
278    /// ```
279    pub fn compare_against_baseline(
280        &self,
281        baseline_mean: Option<Duration>,
282        threshold: Threshold,
283    ) -> CheckResult {
284        self.compare_with_options(&CompareOptions {
285            baseline_mean,
286            threshold,
287            ..CompareOptions::default()
288        })
289    }
290
291    /// Compare this result against a baseline using full options.
292    ///
293    /// Behavior:
294    /// - No baseline -> `Skip`.
295    /// - Sample count below `min_samples` -> `Skip` with detail.
296    /// - Within threshold -> `Pass` with numeric evidence.
297    /// - Over threshold but within CV noise band -> `Warn`.
298    /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
299    ///
300    /// In every non-`Skip` case, the returned [`CheckResult`] carries
301    /// a `bench` tag and numeric `Evidence` for `mean_ns`,
302    /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
303    ///
304    /// # Example
305    ///
306    /// ```
307    /// use dev_bench::{Benchmark, CompareOptions, Threshold};
308    /// use std::time::Duration;
309    ///
310    /// let mut b = Benchmark::new("x");
311    /// b.iter(|| std::hint::black_box(1 + 1));
312    /// let r = b.finish();
313    /// let opts = CompareOptions {
314    ///     baseline_mean: Some(Duration::from_nanos(1)),
315    ///     threshold: Threshold::regression_pct(20.0),
316    ///     min_samples: 1,
317    ///     allow_cv_noise_band: true,
318    /// };
319    /// let _check = r.compare_with_options(&opts);
320    /// ```
321    pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
322        let name = format!("bench::{}", self.name);
323        let mut evidence = self.numeric_evidence();
324        let tags = vec!["bench".to_string()];
325
326        let Some(baseline) = opts.baseline_mean else {
327            let mut c = CheckResult::skip(name).with_detail("no baseline available");
328            c.tags = tags;
329            c.evidence = evidence;
330            return c;
331        };
332
333        if (self.samples.len() as u64) < opts.min_samples {
334            let mut c = CheckResult::skip(name).with_detail(format!(
335                "fewer samples than min_samples ({} < {})",
336                self.samples.len(),
337                opts.min_samples
338            ));
339            c.tags = tags;
340            c.evidence = evidence;
341            return c;
342        }
343
344        let current_ns = self.mean.as_nanos();
345        let baseline_ns = baseline.as_nanos();
346        evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
347
348        let regressed = match opts.threshold {
349            Threshold::RegressionPct(pct) => {
350                let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
351                current_ns as f64 > allowed
352            }
353            Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
354            Threshold::ThroughputDropPct(pct) => {
355                // Throughput-based; convert via mean.
356                let baseline_ops = if baseline.is_zero() {
357                    0.0
358                } else {
359                    1.0 / baseline.as_secs_f64()
360                };
361                let drop_floor = baseline_ops * (1.0 - pct / 100.0);
362                self.ops_per_sec() < drop_floor
363            }
364        };
365
366        let detail = format!(
367            "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
368            current_ns,
369            baseline_ns,
370            self.cv,
371            self.ops_per_sec()
372        );
373
374        if !regressed {
375            let mut c = CheckResult::pass(name).with_detail(detail);
376            c.tags = tags;
377            c.evidence = evidence;
378            return c;
379        }
380
381        // Regression detected. Decide Fail vs Warn based on CV noise band.
382        let in_noise_band = opts.allow_cv_noise_band && {
383            let allowed_noise_ns = baseline_ns as f64 * self.cv;
384            let delta_ns = (current_ns as f64) - (baseline_ns as f64);
385            delta_ns <= allowed_noise_ns
386        };
387        let mut tags = tags;
388        tags.push("regression".to_string());
389        if in_noise_band {
390            let mut c = CheckResult::warn(name, Severity::Warning)
391                .with_detail(format!("{} (within CV noise band)", detail));
392            c.tags = tags;
393            c.evidence = evidence;
394            c
395        } else {
396            let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
397            c.tags = tags;
398            c.evidence = evidence;
399            c
400        }
401    }
402
403    /// Build a one-check `Report` containing the comparison result.
404    ///
405    /// Convenience for producers that want a complete `Report` rather
406    /// than a single `CheckResult`. Sets `subject = self.name`,
407    /// `producer = "dev-bench"`.
408    ///
409    /// # Example
410    ///
411    /// ```
412    /// use dev_bench::{Benchmark, Threshold};
413    ///
414    /// let mut b = Benchmark::new("x");
415    /// b.iter(|| std::hint::black_box(1 + 1));
416    /// let r = b.finish();
417    /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
418    /// assert_eq!(report.checks.len(), 1);
419    /// ```
420    pub fn into_report(
421        self,
422        subject_version: impl Into<String>,
423        baseline_mean: Option<Duration>,
424        threshold: Threshold,
425    ) -> Report {
426        let name = self.name.clone();
427        let check = self.compare_against_baseline(baseline_mean, threshold);
428        let mut r = Report::new(name, subject_version).with_producer("dev-bench");
429        r.push(check);
430        r.finish();
431        r
432    }
433
434    fn numeric_evidence(&self) -> Vec<Evidence> {
435        vec![
436            Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
437            // baseline_ns inserted at index 1 by callers when available.
438            Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
439            Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
440            Evidence::numeric("cv", self.cv),
441            Evidence::numeric("ops_per_sec", self.ops_per_sec()),
442            Evidence::numeric("samples", self.samples.len() as f64),
443            Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
444        ]
445    }
446}
447
448/// A threshold defining how much slower-than-baseline is acceptable.
449#[derive(Debug, Clone, Copy)]
450pub enum Threshold {
451    /// Fail if the new mean is more than `pct` percent slower than baseline.
452    RegressionPct(f64),
453    /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
454    RegressionAbsoluteNs(u128),
455    /// Fail if throughput dropped more than `pct` percent below baseline.
456    ///
457    /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
458    /// assumes the baseline duration is a per-operation duration. Use
459    /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
460    /// per-op duration. For batched sampling
461    /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
462    /// or pre-compute the baseline manually.
463    ThroughputDropPct(f64),
464}
465
466impl Threshold {
467    /// Build a percent-based duration regression threshold.
468    ///
469    /// # Example
470    ///
471    /// ```
472    /// use dev_bench::Threshold;
473    /// let t = Threshold::regression_pct(20.0);
474    /// assert!(matches!(t, Threshold::RegressionPct(_)));
475    /// ```
476    pub fn regression_pct(pct: f64) -> Self {
477        Threshold::RegressionPct(pct)
478    }
479
480    /// Build an absolute duration regression threshold in nanoseconds.
481    ///
482    /// # Example
483    ///
484    /// ```
485    /// use dev_bench::Threshold;
486    /// let t = Threshold::regression_abs_ns(500);
487    /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
488    /// ```
489    pub fn regression_abs_ns(nanos: u128) -> Self {
490        Threshold::RegressionAbsoluteNs(nanos)
491    }
492
493    /// Build a percent-based throughput drop threshold.
494    ///
495    /// # Example
496    ///
497    /// ```
498    /// use dev_bench::Threshold;
499    /// let t = Threshold::throughput_drop_pct(10.0);
500    /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
501    /// ```
502    pub fn throughput_drop_pct(pct: f64) -> Self {
503        Threshold::ThroughputDropPct(pct)
504    }
505}
506
507/// Options for [`BenchmarkResult::compare_with_options`].
508///
509/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
510/// `allow_cv_noise_band = true`.
511///
512/// # Example
513///
514/// ```
515/// use dev_bench::{CompareOptions, Threshold};
516/// use std::time::Duration;
517///
518/// let opts = CompareOptions {
519///     baseline_mean: Some(Duration::from_nanos(1000)),
520///     threshold: Threshold::regression_pct(20.0),
521///     min_samples: 30,
522///     allow_cv_noise_band: true,
523/// };
524/// assert_eq!(opts.min_samples, 30);
525/// ```
526#[derive(Debug, Clone)]
527pub struct CompareOptions {
528    /// Baseline mean to compare against. `None` -> verdict is `Skip`.
529    pub baseline_mean: Option<Duration>,
530    /// Regression threshold to apply.
531    pub threshold: Threshold,
532    /// Minimum sample count required before a comparison can be made.
533    /// Below this, the verdict is `Skip` with a `min_samples` detail.
534    pub min_samples: u64,
535    /// If `true`, regressions within `baseline_ns * cv` are downgraded
536    /// from `Fail` to `Warn`.
537    pub allow_cv_noise_band: bool,
538}
539
540impl Default for CompareOptions {
541    fn default() -> Self {
542        Self {
543            baseline_mean: None,
544            threshold: Threshold::regression_pct(10.0),
545            min_samples: 1,
546            allow_cv_noise_band: true,
547        }
548    }
549}
550
551/// A trait for any object that can run a benchmark and produce a result.
552pub trait Bench {
553    /// Run the benchmark and return its result.
554    fn run(&mut self) -> BenchmarkResult;
555}
556
557/// Producer wrapper that runs a benchmark and emits a single-check
558/// [`Report`] via [`Producer::produce`].
559///
560/// # Example
561///
562/// ```no_run
563/// use dev_bench::{Benchmark, BenchProducer, Threshold};
564/// use dev_report::Producer;
565///
566/// fn run_bench() -> dev_bench::BenchmarkResult {
567///     let mut b = Benchmark::new("hot_path");
568///     for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
569///     b.finish()
570/// }
571///
572/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
573/// let report = producer.produce();
574/// assert_eq!(report.checks.len(), 1);
575/// ```
576pub struct BenchProducer<F>
577where
578    F: Fn() -> BenchmarkResult,
579{
580    run: F,
581    subject_version: String,
582    baseline_mean: Option<Duration>,
583    threshold: Threshold,
584}
585
586impl<F> BenchProducer<F>
587where
588    F: Fn() -> BenchmarkResult,
589{
590    /// Build a new producer.
591    pub fn new(
592        run: F,
593        subject_version: impl Into<String>,
594        baseline_mean: Option<Duration>,
595        threshold: Threshold,
596    ) -> Self {
597        Self {
598            run,
599            subject_version: subject_version.into(),
600            baseline_mean,
601            threshold,
602        }
603    }
604}
605
606impl<F> Producer for BenchProducer<F>
607where
608    F: Fn() -> BenchmarkResult,
609{
610    fn produce(&self) -> Report {
611        let result = (self.run)();
612        result.into_report(
613            self.subject_version.clone(),
614            self.baseline_mean,
615            self.threshold,
616        )
617    }
618}
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623    use dev_report::Verdict;
624
625    #[test]
626    fn benchmark_runs_and_finishes() {
627        let mut b = Benchmark::new("noop");
628        for _ in 0..10 {
629            b.iter(|| std::hint::black_box(42));
630        }
631        let r = b.finish();
632        assert_eq!(r.samples.len(), 10);
633        assert_eq!(r.iterations_recorded, 10);
634        assert!(r.mean > Duration::ZERO);
635    }
636
637    #[test]
638    fn iter_with_count_records_one_sample() {
639        let mut b = Benchmark::new("hot");
640        b.iter_with_count(1000, || {
641            std::hint::black_box(1 + 1);
642        });
643        let r = b.finish();
644        assert_eq!(r.samples.len(), 1);
645        assert_eq!(r.iterations_recorded, 1000);
646        assert!(r.ops_per_sec() > 0.0);
647    }
648
649    #[test]
650    fn comparison_without_baseline_is_skip() {
651        let mut b = Benchmark::new("x");
652        b.iter(|| ());
653        let r = b.finish();
654        let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
655        assert_eq!(v.verdict, Verdict::Skip);
656        assert!(v.has_tag("bench"));
657    }
658
659    #[test]
660    fn min_samples_skip() {
661        let mut b = Benchmark::new("x");
662        b.iter(|| ());
663        let r = b.finish();
664        let opts = CompareOptions {
665            baseline_mean: Some(Duration::from_nanos(100)),
666            threshold: Threshold::regression_pct(5.0),
667            min_samples: 100,
668            allow_cv_noise_band: true,
669        };
670        let v = r.compare_with_options(&opts);
671        assert_eq!(v.verdict, Verdict::Skip);
672        assert!(v.detail.unwrap().contains("min_samples"));
673    }
674
675    #[test]
676    fn small_regression_under_threshold_passes() {
677        let mut b = Benchmark::new("x");
678        for _ in 0..5 {
679            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
680        }
681        let r = b.finish();
682        let baseline = r.mean;
683        let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
684        assert_eq!(v.verdict, Verdict::Pass);
685        assert!(v.has_tag("bench"));
686        // Numeric evidence is attached.
687        assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
688        assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
689        assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
690    }
691
692    #[test]
693    fn regression_outside_cv_band_fails() {
694        // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
695        let mut b = Benchmark::new("x");
696        // Inject controlled samples by running noop iterations.
697        for _ in 0..50 {
698            b.iter(|| std::hint::black_box(1 + 1));
699        }
700        let mut r = b.finish();
701        // Force a known mean and cv for deterministic comparison.
702        r.mean = Duration::from_nanos(200);
703        r.cv = 0.0;
704        let opts = CompareOptions {
705            baseline_mean: Some(Duration::from_nanos(100)),
706            threshold: Threshold::regression_pct(10.0),
707            min_samples: 1,
708            allow_cv_noise_band: true,
709        };
710        let v = r.compare_with_options(&opts);
711        assert_eq!(v.verdict, Verdict::Fail);
712        assert!(v.has_tag("regression"));
713    }
714
715    #[test]
716    fn regression_inside_cv_band_warns() {
717        let mut b = Benchmark::new("x");
718        for _ in 0..50 {
719            b.iter(|| std::hint::black_box(1 + 1));
720        }
721        let mut r = b.finish();
722        // Current is 12% over baseline but cv is 30% -> within noise band.
723        r.mean = Duration::from_nanos(112);
724        r.cv = 0.30;
725        let opts = CompareOptions {
726            baseline_mean: Some(Duration::from_nanos(100)),
727            threshold: Threshold::regression_pct(10.0),
728            min_samples: 1,
729            allow_cv_noise_band: true,
730        };
731        let v = r.compare_with_options(&opts);
732        assert_eq!(v.verdict, Verdict::Warn);
733        assert!(v.has_tag("regression"));
734        assert!(v.detail.unwrap().contains("CV noise band"));
735    }
736
737    #[test]
738    fn throughput_threshold_detects_drop() {
739        // ThroughputDropPct expects a per-op baseline duration. Use
740        // per-iter sampling so mean == per-op duration.
741        let mut b = Benchmark::new("x");
742        for _ in 0..10 {
743            b.iter(|| std::thread::sleep(Duration::from_micros(1)));
744        }
745        let r = b.finish();
746        // Baseline 10x faster (per-op duration is 1/10 of current);
747        // current throughput is 90% lower than baseline -> regression.
748        let baseline = r.mean / 10;
749        let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
750        assert_eq!(v.verdict, Verdict::Fail);
751    }
752
753    #[test]
754    fn cv_is_zero_for_uniform_samples() {
755        // Samples are nearly identical -> cv near 0.
756        let mut b = Benchmark::new("x");
757        for _ in 0..10 {
758            b.iter(|| std::hint::black_box(1 + 1));
759        }
760        let r = b.finish();
761        // Not strictly zero on real machines, just bounded.
762        assert!(r.cv >= 0.0);
763    }
764
765    #[test]
766    fn into_report_emits_one_check() {
767        let mut b = Benchmark::new("x");
768        for _ in 0..5 {
769            b.iter(|| std::hint::black_box(1 + 1));
770        }
771        let r = b.finish();
772        let baseline = r.mean;
773        let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
774        assert_eq!(report.checks.len(), 1);
775        assert_eq!(report.producer.as_deref(), Some("dev-bench"));
776        assert_eq!(report.overall_verdict(), Verdict::Pass);
777    }
778
779    #[test]
780    fn bench_producer_implements_producer_trait() {
781        fn run() -> BenchmarkResult {
782            let mut b = Benchmark::new("noop");
783            for _ in 0..5 {
784                b.iter(|| std::hint::black_box(1 + 1));
785            }
786            b.finish()
787        }
788        let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
789        let report = p.produce();
790        assert_eq!(report.checks.len(), 1);
791    }
792}