dev_bench/lib.rs
1//! # dev-bench
2//!
3//! Performance measurement and regression detection for Rust. Part of
4//! the `dev-*` verification suite.
5//!
6//! `dev-bench` answers the question: did this change make the code
7//! faster, slower, or stay the same? It compares current measurements
8//! against a stored baseline and emits verdicts via `dev-report`.
9//!
10//! ## Quick example
11//!
12//! ```no_run
13//! use dev_bench::{Benchmark, Threshold};
14//!
15//! let mut b = Benchmark::new("parse_query");
16//! for _ in 0..1000 {
17//! b.iter(|| {
18//! std::hint::black_box(40 + 2);
19//! });
20//! }
21//!
22//! let result = b.finish();
23//! let threshold = Threshold::regression_pct(10.0); // fail on +10%
24//! let _check = result.compare_against_baseline(None, threshold);
25//! ```
26//!
27//! ## What's measured
28//!
29//! Per-sample wall-clock duration captured via `Instant::now()`. From
30//! the samples, `dev-bench` reports `mean`, `p50`, `p99`, `cv`, and a
31//! derived `ops_per_sec` throughput. See [`BenchmarkResult`].
32//!
33//! ## Features
34//!
35//! - `alloc-tracking` (opt-in): measures allocation count and bytes
36//! alongside time, using `dhat`. See the [`alloc`] module.
37
38#![cfg_attr(docsrs, feature(doc_cfg))]
39#![warn(missing_docs)]
40#![warn(rust_2018_idioms)]
41
42use std::time::{Duration, Instant};
43
44use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
45
46#[cfg(feature = "alloc-tracking")]
47#[cfg_attr(docsrs, doc(cfg(feature = "alloc-tracking")))]
48pub mod alloc;
49
50pub mod baseline;
51
52pub use baseline::{Baseline, BaselineStore, JsonFileBaselineStore};
53
54/// A single benchmark run.
55///
56/// Collects per-iteration duration samples. Call [`Benchmark::finish`]
57/// to produce a [`BenchmarkResult`].
58///
59/// # Example
60///
61/// ```
62/// use dev_bench::Benchmark;
63///
64/// let mut b = Benchmark::new("noop");
65/// for _ in 0..10 {
66/// b.iter(|| std::hint::black_box(42));
67/// }
68/// let r = b.finish();
69/// assert_eq!(r.samples.len(), 10);
70/// ```
71pub struct Benchmark {
72 name: String,
73 samples: Vec<Duration>,
74 iterations_recorded: u64,
75}
76
77impl Benchmark {
78 /// Begin a new benchmark with a stable name.
79 pub fn new(name: impl Into<String>) -> Self {
80 Self {
81 name: name.into(),
82 samples: Vec::new(),
83 iterations_recorded: 0,
84 }
85 }
86
87 /// Run one iteration of the benchmark, capturing the duration.
88 ///
89 /// Each call records exactly one sample.
90 ///
91 /// # Example
92 ///
93 /// ```
94 /// use dev_bench::Benchmark;
95 ///
96 /// let mut b = Benchmark::new("noop");
97 /// b.iter(|| std::hint::black_box(1 + 1));
98 /// let r = b.finish();
99 /// assert_eq!(r.samples.len(), 1);
100 /// ```
101 pub fn iter<F, R>(&mut self, f: F) -> R
102 where
103 F: FnOnce() -> R,
104 {
105 let start = Instant::now();
106 let r = f();
107 let elapsed = start.elapsed();
108 self.samples.push(elapsed);
109 self.iterations_recorded += 1;
110 r
111 }
112
113 /// Run a closure `n` times and record ONE sample for the entire batch.
114 ///
115 /// Use for sub-microsecond operations where per-iteration timing
116 /// would be dominated by `Instant::now()` overhead. The reported
117 /// per-iteration mean is `batch_duration / n`.
118 ///
119 /// # Example
120 ///
121 /// ```
122 /// use dev_bench::Benchmark;
123 ///
124 /// let mut b = Benchmark::new("hot");
125 /// b.iter_with_count(1000, || {
126 /// std::hint::black_box(40 + 2);
127 /// });
128 /// let r = b.finish();
129 /// assert_eq!(r.samples.len(), 1);
130 /// assert_eq!(r.iterations_recorded, 1000);
131 /// ```
132 pub fn iter_with_count<F>(&mut self, n: u64, mut f: F)
133 where
134 F: FnMut(),
135 {
136 let start = Instant::now();
137 for _ in 0..n {
138 f();
139 }
140 let elapsed = start.elapsed();
141 self.samples.push(elapsed);
142 self.iterations_recorded += n;
143 }
144
145 /// Finalize the benchmark and produce a [`BenchmarkResult`].
146 pub fn finish(self) -> BenchmarkResult {
147 let n = self.samples.len();
148 let mean = if n == 0 {
149 Duration::ZERO
150 } else {
151 let total: Duration = self.samples.iter().copied().sum();
152 total / n as u32
153 };
154 let mut sorted = self.samples.clone();
155 sorted.sort();
156 let p50 = sorted.get(n / 2).copied().unwrap_or(Duration::ZERO);
157 let p99 = sorted
158 .get((n as f64 * 0.99).floor() as usize)
159 .copied()
160 .unwrap_or(Duration::ZERO);
161 let cv = compute_cv(&self.samples, mean);
162 let total_elapsed: Duration = self.samples.iter().copied().sum();
163 BenchmarkResult {
164 name: self.name,
165 samples: self.samples,
166 iterations_recorded: self.iterations_recorded,
167 total_elapsed,
168 mean,
169 p50,
170 p99,
171 cv,
172 }
173 }
174}
175
176fn compute_cv(samples: &[Duration], mean: Duration) -> f64 {
177 if samples.is_empty() {
178 return 0.0;
179 }
180 let mean_s = mean.as_secs_f64();
181 if mean_s == 0.0 {
182 return 0.0;
183 }
184 let n = samples.len() as f64;
185 let var = samples
186 .iter()
187 .map(|d| (d.as_secs_f64() - mean_s).powi(2))
188 .sum::<f64>()
189 / n;
190 var.sqrt() / mean_s
191}
192
193/// The result of a finished benchmark.
194///
195/// Statistics are computed losslessly from the raw `samples`.
196///
197/// # Example
198///
199/// ```
200/// use dev_bench::Benchmark;
201///
202/// let mut b = Benchmark::new("noop");
203/// for _ in 0..10 {
204/// b.iter(|| std::hint::black_box(42));
205/// }
206/// let r = b.finish();
207/// assert!(r.mean.as_nanos() > 0);
208/// ```
209#[derive(Debug, Clone)]
210pub struct BenchmarkResult {
211 /// Stable name of the benchmark.
212 pub name: String,
213 /// All raw sample durations.
214 pub samples: Vec<Duration>,
215 /// Total iterations across all samples. With per-iter sampling this
216 /// equals `samples.len()`. With batched sampling, it is the sum of
217 /// `n` across all `iter_with_count` calls.
218 pub iterations_recorded: u64,
219 /// Sum of all sample durations.
220 pub total_elapsed: Duration,
221 /// Mean sample duration.
222 pub mean: Duration,
223 /// 50th percentile sample duration.
224 pub p50: Duration,
225 /// 99th percentile sample duration.
226 pub p99: Duration,
227 /// Coefficient of variation across samples (stddev / mean).
228 ///
229 /// Higher numbers indicate noisier measurements. A CV of `0.05`
230 /// means the standard deviation is 5% of the mean. Reported
231 /// regressions within the CV are downgraded from `Fail` to `Warn`
232 /// by [`compare_with_options`](BenchmarkResult::compare_with_options).
233 pub cv: f64,
234}
235
236impl BenchmarkResult {
237 /// Effective throughput in operations per second.
238 ///
239 /// Defined as `iterations_recorded / total_elapsed_seconds`. Returns
240 /// `0.0` for an empty result or zero elapsed time.
241 ///
242 /// # Example
243 ///
244 /// ```
245 /// use dev_bench::Benchmark;
246 ///
247 /// let mut b = Benchmark::new("hot");
248 /// b.iter_with_count(1000, || { std::hint::black_box(1 + 1); });
249 /// let r = b.finish();
250 /// assert!(r.ops_per_sec() > 0.0);
251 /// ```
252 pub fn ops_per_sec(&self) -> f64 {
253 if self.total_elapsed.is_zero() {
254 return 0.0;
255 }
256 self.iterations_recorded as f64 / self.total_elapsed.as_secs_f64()
257 }
258
259 /// Compare this result against a baseline using a default-tuned
260 /// [`CompareOptions`].
261 ///
262 /// `baseline_mean` is the previous mean duration. If `None`, the
263 /// verdict is `Skip` and no comparison is made.
264 ///
265 /// # Example
266 ///
267 /// ```
268 /// use dev_bench::{Benchmark, Threshold};
269 /// use std::time::Duration;
270 ///
271 /// let mut b = Benchmark::new("x");
272 /// b.iter(|| std::hint::black_box(1 + 1));
273 /// let r = b.finish();
274 /// let _ = r.compare_against_baseline(
275 /// Some(Duration::from_nanos(1)),
276 /// Threshold::regression_pct(10.0),
277 /// );
278 /// ```
279 pub fn compare_against_baseline(
280 &self,
281 baseline_mean: Option<Duration>,
282 threshold: Threshold,
283 ) -> CheckResult {
284 self.compare_with_options(&CompareOptions {
285 baseline_mean,
286 threshold,
287 ..CompareOptions::default()
288 })
289 }
290
291 /// Compare this result against a baseline using full options.
292 ///
293 /// Behavior:
294 /// - No baseline -> `Skip`.
295 /// - Sample count below `min_samples` -> `Skip` with detail.
296 /// - Within threshold -> `Pass` with numeric evidence.
297 /// - Over threshold but within CV noise band -> `Warn`.
298 /// - Over threshold and outside CV noise band -> `Fail (Warning)`.
299 ///
300 /// In every non-`Skip` case, the returned [`CheckResult`] carries
301 /// a `bench` tag and numeric `Evidence` for `mean_ns`,
302 /// `baseline_ns`, `p50_ns`, `p99_ns`, `cv`, and `ops_per_sec`.
303 ///
304 /// # Example
305 ///
306 /// ```
307 /// use dev_bench::{Benchmark, CompareOptions, Threshold};
308 /// use std::time::Duration;
309 ///
310 /// let mut b = Benchmark::new("x");
311 /// b.iter(|| std::hint::black_box(1 + 1));
312 /// let r = b.finish();
313 /// let opts = CompareOptions {
314 /// baseline_mean: Some(Duration::from_nanos(1)),
315 /// threshold: Threshold::regression_pct(20.0),
316 /// min_samples: 1,
317 /// allow_cv_noise_band: true,
318 /// };
319 /// let _check = r.compare_with_options(&opts);
320 /// ```
321 pub fn compare_with_options(&self, opts: &CompareOptions) -> CheckResult {
322 let name = format!("bench::{}", self.name);
323 let mut evidence = self.numeric_evidence();
324 let tags = vec!["bench".to_string()];
325
326 let Some(baseline) = opts.baseline_mean else {
327 let mut c = CheckResult::skip(name).with_detail("no baseline available");
328 c.tags = tags;
329 c.evidence = evidence;
330 return c;
331 };
332
333 if (self.samples.len() as u64) < opts.min_samples {
334 let mut c = CheckResult::skip(name).with_detail(format!(
335 "fewer samples than min_samples ({} < {})",
336 self.samples.len(),
337 opts.min_samples
338 ));
339 c.tags = tags;
340 c.evidence = evidence;
341 return c;
342 }
343
344 let current_ns = self.mean.as_nanos();
345 let baseline_ns = baseline.as_nanos();
346 evidence.insert(1, Evidence::numeric("baseline_ns", baseline_ns as f64));
347
348 let regressed = match opts.threshold {
349 Threshold::RegressionPct(pct) => {
350 let allowed = baseline_ns as f64 * (1.0 + pct / 100.0);
351 current_ns as f64 > allowed
352 }
353 Threshold::RegressionAbsoluteNs(abs) => current_ns.saturating_sub(baseline_ns) > abs,
354 Threshold::ThroughputDropPct(pct) => {
355 // Throughput-based; convert via mean.
356 let baseline_ops = if baseline.is_zero() {
357 0.0
358 } else {
359 1.0 / baseline.as_secs_f64()
360 };
361 let drop_floor = baseline_ops * (1.0 - pct / 100.0);
362 self.ops_per_sec() < drop_floor
363 }
364 };
365
366 let detail = format!(
367 "current_ns={} baseline_ns={} cv={:.4} ops/sec={:.0}",
368 current_ns,
369 baseline_ns,
370 self.cv,
371 self.ops_per_sec()
372 );
373
374 if !regressed {
375 let mut c = CheckResult::pass(name).with_detail(detail);
376 c.tags = tags;
377 c.evidence = evidence;
378 return c;
379 }
380
381 // Regression detected. Decide Fail vs Warn based on CV noise band.
382 let in_noise_band = opts.allow_cv_noise_band && {
383 let allowed_noise_ns = baseline_ns as f64 * self.cv;
384 let delta_ns = (current_ns as f64) - (baseline_ns as f64);
385 delta_ns <= allowed_noise_ns
386 };
387 let mut tags = tags;
388 tags.push("regression".to_string());
389 if in_noise_band {
390 let mut c = CheckResult::warn(name, Severity::Warning)
391 .with_detail(format!("{} (within CV noise band)", detail));
392 c.tags = tags;
393 c.evidence = evidence;
394 c
395 } else {
396 let mut c = CheckResult::fail(name, Severity::Warning).with_detail(detail);
397 c.tags = tags;
398 c.evidence = evidence;
399 c
400 }
401 }
402
403 /// Build a one-check `Report` containing the comparison result.
404 ///
405 /// Convenience for producers that want a complete `Report` rather
406 /// than a single `CheckResult`. Sets `subject = self.name`,
407 /// `producer = "dev-bench"`.
408 ///
409 /// # Example
410 ///
411 /// ```
412 /// use dev_bench::{Benchmark, Threshold};
413 ///
414 /// let mut b = Benchmark::new("x");
415 /// b.iter(|| std::hint::black_box(1 + 1));
416 /// let r = b.finish();
417 /// let report = r.into_report("0.1.0", None, Threshold::regression_pct(10.0));
418 /// assert_eq!(report.checks.len(), 1);
419 /// ```
420 pub fn into_report(
421 self,
422 subject_version: impl Into<String>,
423 baseline_mean: Option<Duration>,
424 threshold: Threshold,
425 ) -> Report {
426 let name = self.name.clone();
427 let check = self.compare_against_baseline(baseline_mean, threshold);
428 let mut r = Report::new(name, subject_version).with_producer("dev-bench");
429 r.push(check);
430 r.finish();
431 r
432 }
433
434 fn numeric_evidence(&self) -> Vec<Evidence> {
435 vec![
436 Evidence::numeric("mean_ns", self.mean.as_nanos() as f64),
437 // baseline_ns inserted at index 1 by callers when available.
438 Evidence::numeric("p50_ns", self.p50.as_nanos() as f64),
439 Evidence::numeric("p99_ns", self.p99.as_nanos() as f64),
440 Evidence::numeric("cv", self.cv),
441 Evidence::numeric("ops_per_sec", self.ops_per_sec()),
442 Evidence::numeric("samples", self.samples.len() as f64),
443 Evidence::numeric("iterations_recorded", self.iterations_recorded as f64),
444 ]
445 }
446}
447
448/// A threshold defining how much slower-than-baseline is acceptable.
449#[derive(Debug, Clone, Copy)]
450pub enum Threshold {
451 /// Fail if the new mean is more than `pct` percent slower than baseline.
452 RegressionPct(f64),
453 /// Fail if `current_mean - baseline_mean` exceeds `nanos`.
454 RegressionAbsoluteNs(u128),
455 /// Fail if throughput dropped more than `pct` percent below baseline.
456 ///
457 /// Baseline ops/sec is derived as `1.0 / baseline_mean_secs`, which
458 /// assumes the baseline duration is a per-operation duration. Use
459 /// with per-iter sampling (`Benchmark::iter`) where `mean` equals
460 /// per-op duration. For batched sampling
461 /// (`Benchmark::iter_with_count`), prefer a duration-based threshold
462 /// or pre-compute the baseline manually.
463 ThroughputDropPct(f64),
464}
465
466impl Threshold {
467 /// Build a percent-based duration regression threshold.
468 ///
469 /// # Example
470 ///
471 /// ```
472 /// use dev_bench::Threshold;
473 /// let t = Threshold::regression_pct(20.0);
474 /// assert!(matches!(t, Threshold::RegressionPct(_)));
475 /// ```
476 pub fn regression_pct(pct: f64) -> Self {
477 Threshold::RegressionPct(pct)
478 }
479
480 /// Build an absolute duration regression threshold in nanoseconds.
481 ///
482 /// # Example
483 ///
484 /// ```
485 /// use dev_bench::Threshold;
486 /// let t = Threshold::regression_abs_ns(500);
487 /// assert!(matches!(t, Threshold::RegressionAbsoluteNs(_)));
488 /// ```
489 pub fn regression_abs_ns(nanos: u128) -> Self {
490 Threshold::RegressionAbsoluteNs(nanos)
491 }
492
493 /// Build a percent-based throughput drop threshold.
494 ///
495 /// # Example
496 ///
497 /// ```
498 /// use dev_bench::Threshold;
499 /// let t = Threshold::throughput_drop_pct(10.0);
500 /// assert!(matches!(t, Threshold::ThroughputDropPct(_)));
501 /// ```
502 pub fn throughput_drop_pct(pct: f64) -> Self {
503 Threshold::ThroughputDropPct(pct)
504 }
505}
506
507/// Options for [`BenchmarkResult::compare_with_options`].
508///
509/// Defaults: no baseline, percent threshold of 10%, `min_samples = 1`,
510/// `allow_cv_noise_band = true`.
511///
512/// # Example
513///
514/// ```
515/// use dev_bench::{CompareOptions, Threshold};
516/// use std::time::Duration;
517///
518/// let opts = CompareOptions {
519/// baseline_mean: Some(Duration::from_nanos(1000)),
520/// threshold: Threshold::regression_pct(20.0),
521/// min_samples: 30,
522/// allow_cv_noise_band: true,
523/// };
524/// assert_eq!(opts.min_samples, 30);
525/// ```
526#[derive(Debug, Clone)]
527pub struct CompareOptions {
528 /// Baseline mean to compare against. `None` -> verdict is `Skip`.
529 pub baseline_mean: Option<Duration>,
530 /// Regression threshold to apply.
531 pub threshold: Threshold,
532 /// Minimum sample count required before a comparison can be made.
533 /// Below this, the verdict is `Skip` with a `min_samples` detail.
534 pub min_samples: u64,
535 /// If `true`, regressions within `baseline_ns * cv` are downgraded
536 /// from `Fail` to `Warn`.
537 pub allow_cv_noise_band: bool,
538}
539
540impl Default for CompareOptions {
541 fn default() -> Self {
542 Self {
543 baseline_mean: None,
544 threshold: Threshold::regression_pct(10.0),
545 min_samples: 1,
546 allow_cv_noise_band: true,
547 }
548 }
549}
550
551/// A trait for any object that can run a benchmark and produce a result.
552pub trait Bench {
553 /// Run the benchmark and return its result.
554 fn run(&mut self) -> BenchmarkResult;
555}
556
557/// Producer wrapper that runs a benchmark and emits a single-check
558/// [`Report`] via [`Producer::produce`].
559///
560/// # Example
561///
562/// ```no_run
563/// use dev_bench::{Benchmark, BenchProducer, Threshold};
564/// use dev_report::Producer;
565///
566/// fn run_bench() -> dev_bench::BenchmarkResult {
567/// let mut b = Benchmark::new("hot_path");
568/// for _ in 0..10 { b.iter(|| std::hint::black_box(1 + 1)); }
569/// b.finish()
570/// }
571///
572/// let producer = BenchProducer::new(run_bench, "0.1.0", None, Threshold::regression_pct(10.0));
573/// let report = producer.produce();
574/// assert_eq!(report.checks.len(), 1);
575/// ```
576pub struct BenchProducer<F>
577where
578 F: Fn() -> BenchmarkResult,
579{
580 run: F,
581 subject_version: String,
582 baseline_mean: Option<Duration>,
583 threshold: Threshold,
584}
585
586impl<F> BenchProducer<F>
587where
588 F: Fn() -> BenchmarkResult,
589{
590 /// Build a new producer.
591 pub fn new(
592 run: F,
593 subject_version: impl Into<String>,
594 baseline_mean: Option<Duration>,
595 threshold: Threshold,
596 ) -> Self {
597 Self {
598 run,
599 subject_version: subject_version.into(),
600 baseline_mean,
601 threshold,
602 }
603 }
604}
605
606impl<F> Producer for BenchProducer<F>
607where
608 F: Fn() -> BenchmarkResult,
609{
610 fn produce(&self) -> Report {
611 let result = (self.run)();
612 result.into_report(
613 self.subject_version.clone(),
614 self.baseline_mean,
615 self.threshold,
616 )
617 }
618}
619
620#[cfg(test)]
621mod tests {
622 use super::*;
623 use dev_report::Verdict;
624
625 #[test]
626 fn benchmark_runs_and_finishes() {
627 let mut b = Benchmark::new("noop");
628 for _ in 0..10 {
629 b.iter(|| std::hint::black_box(42));
630 }
631 let r = b.finish();
632 assert_eq!(r.samples.len(), 10);
633 assert_eq!(r.iterations_recorded, 10);
634 assert!(r.mean > Duration::ZERO);
635 }
636
637 #[test]
638 fn iter_with_count_records_one_sample() {
639 let mut b = Benchmark::new("hot");
640 b.iter_with_count(1000, || {
641 std::hint::black_box(1 + 1);
642 });
643 let r = b.finish();
644 assert_eq!(r.samples.len(), 1);
645 assert_eq!(r.iterations_recorded, 1000);
646 assert!(r.ops_per_sec() > 0.0);
647 }
648
649 #[test]
650 fn comparison_without_baseline_is_skip() {
651 let mut b = Benchmark::new("x");
652 b.iter(|| ());
653 let r = b.finish();
654 let v = r.compare_against_baseline(None, Threshold::regression_pct(5.0));
655 assert_eq!(v.verdict, Verdict::Skip);
656 assert!(v.has_tag("bench"));
657 }
658
659 #[test]
660 fn min_samples_skip() {
661 let mut b = Benchmark::new("x");
662 b.iter(|| ());
663 let r = b.finish();
664 let opts = CompareOptions {
665 baseline_mean: Some(Duration::from_nanos(100)),
666 threshold: Threshold::regression_pct(5.0),
667 min_samples: 100,
668 allow_cv_noise_band: true,
669 };
670 let v = r.compare_with_options(&opts);
671 assert_eq!(v.verdict, Verdict::Skip);
672 assert!(v.detail.unwrap().contains("min_samples"));
673 }
674
675 #[test]
676 fn small_regression_under_threshold_passes() {
677 let mut b = Benchmark::new("x");
678 for _ in 0..5 {
679 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
680 }
681 let r = b.finish();
682 let baseline = r.mean;
683 let v = r.compare_against_baseline(Some(baseline), Threshold::regression_pct(50.0));
684 assert_eq!(v.verdict, Verdict::Pass);
685 assert!(v.has_tag("bench"));
686 // Numeric evidence is attached.
687 assert!(v.evidence.iter().any(|e| e.label == "mean_ns"));
688 assert!(v.evidence.iter().any(|e| e.label == "baseline_ns"));
689 assert!(v.evidence.iter().any(|e| e.label == "ops_per_sec"));
690 }
691
692 #[test]
693 fn regression_outside_cv_band_fails() {
694 // Baseline 100ns, current 200ns, threshold 10%, cv ~0.
695 let mut b = Benchmark::new("x");
696 // Inject controlled samples by running noop iterations.
697 for _ in 0..50 {
698 b.iter(|| std::hint::black_box(1 + 1));
699 }
700 let mut r = b.finish();
701 // Force a known mean and cv for deterministic comparison.
702 r.mean = Duration::from_nanos(200);
703 r.cv = 0.0;
704 let opts = CompareOptions {
705 baseline_mean: Some(Duration::from_nanos(100)),
706 threshold: Threshold::regression_pct(10.0),
707 min_samples: 1,
708 allow_cv_noise_band: true,
709 };
710 let v = r.compare_with_options(&opts);
711 assert_eq!(v.verdict, Verdict::Fail);
712 assert!(v.has_tag("regression"));
713 }
714
715 #[test]
716 fn regression_inside_cv_band_warns() {
717 let mut b = Benchmark::new("x");
718 for _ in 0..50 {
719 b.iter(|| std::hint::black_box(1 + 1));
720 }
721 let mut r = b.finish();
722 // Current is 12% over baseline but cv is 30% -> within noise band.
723 r.mean = Duration::from_nanos(112);
724 r.cv = 0.30;
725 let opts = CompareOptions {
726 baseline_mean: Some(Duration::from_nanos(100)),
727 threshold: Threshold::regression_pct(10.0),
728 min_samples: 1,
729 allow_cv_noise_band: true,
730 };
731 let v = r.compare_with_options(&opts);
732 assert_eq!(v.verdict, Verdict::Warn);
733 assert!(v.has_tag("regression"));
734 assert!(v.detail.unwrap().contains("CV noise band"));
735 }
736
737 #[test]
738 fn throughput_threshold_detects_drop() {
739 // ThroughputDropPct expects a per-op baseline duration. Use
740 // per-iter sampling so mean == per-op duration.
741 let mut b = Benchmark::new("x");
742 for _ in 0..10 {
743 b.iter(|| std::thread::sleep(Duration::from_micros(1)));
744 }
745 let r = b.finish();
746 // Baseline 10x faster (per-op duration is 1/10 of current);
747 // current throughput is 90% lower than baseline -> regression.
748 let baseline = r.mean / 10;
749 let v = r.compare_against_baseline(Some(baseline), Threshold::throughput_drop_pct(50.0));
750 assert_eq!(v.verdict, Verdict::Fail);
751 }
752
753 #[test]
754 fn cv_is_zero_for_uniform_samples() {
755 // Samples are nearly identical -> cv near 0.
756 let mut b = Benchmark::new("x");
757 for _ in 0..10 {
758 b.iter(|| std::hint::black_box(1 + 1));
759 }
760 let r = b.finish();
761 // Not strictly zero on real machines, just bounded.
762 assert!(r.cv >= 0.0);
763 }
764
765 #[test]
766 fn into_report_emits_one_check() {
767 let mut b = Benchmark::new("x");
768 for _ in 0..5 {
769 b.iter(|| std::hint::black_box(1 + 1));
770 }
771 let r = b.finish();
772 let baseline = r.mean;
773 let report = r.into_report("0.1.0", Some(baseline), Threshold::regression_pct(50.0));
774 assert_eq!(report.checks.len(), 1);
775 assert_eq!(report.producer.as_deref(), Some("dev-bench"));
776 assert_eq!(report.overall_verdict(), Verdict::Pass);
777 }
778
779 #[test]
780 fn bench_producer_implements_producer_trait() {
781 fn run() -> BenchmarkResult {
782 let mut b = Benchmark::new("noop");
783 for _ in 0..5 {
784 b.iter(|| std::hint::black_box(1 + 1));
785 }
786 b.finish()
787 }
788 let p = BenchProducer::new(run, "0.1.0", None, Threshold::regression_pct(10.0));
789 let report = p.produce();
790 assert_eq!(report.checks.len(), 1);
791 }
792}