Skip to main content

pgrx_bench/
lib.rs

1//LICENSE Portions Copyright 2019-2021 ZomboDB, LLC.
2//LICENSE
3//LICENSE Portions Copyright 2021-2023 Technology Concepts & Design, Inc.
4//LICENSE
5//LICENSE Portions Copyright 2023-2023 PgCentral Foundation, Inc. <contact@pgcentral.org>
6//LICENSE
7//LICENSE All rights reserved.
8//LICENSE
9//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file.
10
11pub mod pgrx;
12
13use crate::pgrx::{
14    BenchArtifact, BenchComparison, BenchComparisonEstimate, BenchConfig, BenchDefinition,
15    BenchEstimate, BenchResult, BenchSample, BenchStatus, BenchThroughput, CriterionBenchmark,
16    Runtime, TransactionMode,
17};
18use criterion::{Criterion, measurement::WallTime};
19use oorandom::Rand64;
20use serde::Deserialize;
21use serde_json::Value;
22use std::any::Any;
23use std::cell::RefCell;
24use std::fs;
25use std::path::{Path, PathBuf};
26use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
27
28/// Re-export of `std::hint::black_box`, which helps keep the optimizer from removing the work
29/// you intend to measure.
30///
31/// ```ignore
32/// use pgrx::prelude::*;
33/// use pgrx_bench::{Bencher, black_box};
34///
35/// #[pg_bench]
36/// fn bench_parse_uuid(b: &mut Bencher) {
37///     let input = "550e8400-e29b-41d4-a716-446655440000";
38///     b.iter(|| crate::parse_uuid(black_box(input)));
39/// }
40/// ```
41pub use std::hint::black_box;
42
43/// Re-export of Criterion's batching strategy enum for `Bencher::iter_batched`.
44///
45/// ```ignore
46/// use pgrx_bench::{BatchSize, Bencher};
47///
48/// #[pg_bench]
49/// fn bench_transform_rows(b: &mut Bencher) {
50///     b.iter_batched(
51///         || (0..100).collect::<Vec<i32>>(),
52///         |rows| rows.into_iter().map(|value| value * 2).collect::<Vec<_>>(),
53///         BatchSize::SmallInput,
54///     );
55/// }
56/// ```
57pub use criterion::BatchSize;
58
59const DEFAULT_SAMPLE_SIZE: usize = 100;
60const DEFAULT_MEASUREMENT_TIME_MS: u64 = 5_000;
61const DEFAULT_WARM_UP_TIME_MS: u64 = 3_000;
62const DEFAULT_NRESAMPLES: usize = 100_000;
63const DEFAULT_NOISE_THRESHOLD: f64 = 0.01;
64const DEFAULT_SIGNIFICANCE_LEVEL: f64 = 0.05;
65
66const ARTIFACT_KIND_BENCHMARK_JSON: &str = "criterion_benchmark_json";
67const ARTIFACT_KIND_ESTIMATES_JSON: &str = "criterion_estimates_json";
68const ARTIFACT_KIND_SAMPLE_JSON: &str = "criterion_sample_json";
69const ARTIFACT_KIND_TUKEY_JSON: &str = "criterion_tukey_json";
70const ARTIFACT_KIND_CHANGE_ESTIMATES_JSON: &str = "criterion_change_estimates_json";
71
72/// Timing harness passed to a `#[pg_bench]` benchmark function.
73///
74/// Bench functions register exactly one timing loop with either `Bencher::iter` or
75/// `Bencher::iter_batched`.
76///
77/// ```ignore
78/// use pgrx_bench::{Bencher, black_box};
79///
80/// #[pg_bench]
81/// fn bench_normalize_phrase(b: &mut Bencher) {
82///     let phrase = "the quick brown fox";
83///     b.iter(|| crate::normalize_phrase(black_box(phrase)));
84/// }
85/// ```
86pub struct Bencher<'a> {
87    routine: Option<Routine<'a>>,
88}
89
90enum Routine<'a> {
91    Iter(Box<dyn FnMut() + 'a>),
92    IterBatched {
93        setup: Box<dyn FnMut() -> Box<dyn Any> + 'a>,
94        routine: Box<dyn FnMut(Box<dyn Any>) + 'a>,
95        batch_size: BatchSize,
96    },
97}
98
99impl<'a> Bencher<'a> {
100    #[doc(hidden)]
101    /// Internal constructor used by `#[pg_bench]` wrappers.
102    pub fn new(transaction_mode: TransactionMode) -> Self {
103        let _ = transaction_mode;
104        Self { routine: None }
105    }
106
107    /// Registers a simple timing loop for a benchmark.
108    ///
109    /// Use this when the benchmark body can reuse the same captured inputs for each iteration.
110    ///
111    /// ```ignore
112    /// use pgrx_bench::{Bencher, black_box};
113    ///
114    /// #[pg_bench]
115    /// fn bench_parse_uuid(b: &mut Bencher) {
116    ///     let input = "550e8400-e29b-41d4-a716-446655440000";
117    ///     b.iter(|| crate::parse_uuid(black_box(input)));
118    /// }
119    /// ```
120    pub fn iter<R, F>(&mut self, mut routine: F)
121    where
122        F: FnMut() -> R + 'a,
123    {
124        self.set_routine(Routine::Iter(Box::new(move || {
125            let _ = routine();
126        })));
127    }
128
129    /// Registers a timing loop that performs per-batch setup outside the measured routine body.
130    ///
131    /// Use this when each timing sample needs fresh inputs or temporary state.
132    ///
133    /// ```ignore
134    /// use pgrx_bench::{BatchSize, Bencher};
135    ///
136    /// #[pg_bench]
137    /// fn bench_transform_rows(b: &mut Bencher) {
138    ///     b.iter_batched(
139    ///         || (0..100).collect::<Vec<i32>>(),
140    ///         |rows| rows.into_iter().map(|value| value * 2).collect::<Vec<_>>(),
141    ///         BatchSize::SmallInput,
142    ///     );
143    /// }
144    /// ```
145    pub fn iter_batched<I, R, S, F>(&mut self, mut setup: S, mut routine: F, batch_size: BatchSize)
146    where
147        I: 'static,
148        S: FnMut() -> I + 'a,
149        F: FnMut(I) -> R + 'a,
150    {
151        self.set_routine(Routine::IterBatched {
152            setup: Box::new(move || Box::new(setup()) as Box<dyn Any>),
153            routine: Box::new(move |input| {
154                let input = *input
155                    .downcast::<I>()
156                    .expect("pgrx_bench internal type mismatch for iter_batched input");
157                let _ = routine(input);
158            }),
159            batch_size,
160        });
161    }
162
163    fn set_routine(&mut self, routine: Routine<'a>) {
164        if self.routine.is_some() {
165            panic!("only one bencher timing loop may be declared per #[pg_bench] function");
166        }
167        self.routine = Some(routine);
168    }
169
170    fn into_routine(self) -> Result<Routine<'a>, String> {
171        self.routine.ok_or_else(|| {
172            "benchmark function did not register a timing loop; call b.iter(...) or b.iter_batched(...)"
173                .to_string()
174        })
175    }
176}
177
178fn build_criterion(
179    config: &BenchConfig,
180    output_directory: &Path,
181    has_baseline: bool,
182) -> Criterion<WallTime> {
183    let criterion = Criterion::default()
184        .without_plots()
185        .output_directory(output_directory)
186        .sample_size(config.sample_size)
187        .measurement_time(Duration::from_millis(config.measurement_time_ms))
188        .warm_up_time(Duration::from_millis(config.warm_up_time_ms))
189        .nresamples(config.nresamples)
190        .noise_threshold(config.noise_threshold)
191        .significance_level(config.significance_level);
192
193    if has_baseline {
194        criterion.retain_baseline("base".to_string(), false)
195    } else {
196        criterion.save_baseline("base".to_string())
197    }
198}
199
200fn run_routine<R: Runtime>(
201    criterion_bencher: &mut criterion::Bencher<'_, WallTime>,
202    routine: &mut Routine<'_>,
203    transaction_mode: TransactionMode,
204    runtime: &R,
205) {
206    match routine {
207        Routine::Iter(routine) => match transaction_mode {
208            TransactionMode::Shared => criterion_bencher.iter(routine),
209            TransactionMode::SubtransactionPerBatch
210            | TransactionMode::SubtransactionPerIteration => {
211                criterion_bencher.iter_custom(|iters| {
212                    let started = Instant::now();
213                    for _ in 0..iters {
214                        runtime
215                            .with_subtransaction(|| routine())
216                            .unwrap_or_else(|error| panic!("{error}"));
217                    }
218                    started.elapsed()
219                });
220            }
221        },
222        Routine::IterBatched { setup, routine, batch_size } => {
223            criterion_bencher.iter_custom(|iters| {
224                let started = Instant::now();
225                let mut remaining = iters;
226                let per_batch = iterations_per_batch(*batch_size, iters).max(1);
227
228                while remaining > 0 {
229                    let current_batch = remaining.min(per_batch);
230                    match transaction_mode {
231                        TransactionMode::Shared => {
232                            for _ in 0..current_batch {
233                                let input = setup();
234                                routine(input);
235                            }
236                        }
237                        TransactionMode::SubtransactionPerBatch => {
238                            runtime
239                                .with_subtransaction(|| {
240                                    for _ in 0..current_batch {
241                                        let input = setup();
242                                        routine(input);
243                                    }
244                                })
245                                .unwrap_or_else(|error| panic!("{error}"));
246                        }
247                        TransactionMode::SubtransactionPerIteration => {
248                            for _ in 0..current_batch {
249                                runtime
250                                    .with_subtransaction(|| {
251                                        let input = setup();
252                                        routine(input);
253                                    })
254                                    .unwrap_or_else(|error| panic!("{error}"));
255                            }
256                        }
257                    }
258                    remaining -= current_batch;
259                }
260
261                started.elapsed()
262            });
263        }
264    }
265}
266
267fn iterations_per_batch(batch_size: BatchSize, iters: u64) -> u64 {
268    match batch_size {
269        BatchSize::SmallInput => (iters + 10 - 1) / 10,
270        BatchSize::LargeInput => (iters + 1000 - 1) / 1000,
271        BatchSize::PerIteration => 1,
272        BatchSize::NumBatches(batches) => (iters + batches - 1) / batches,
273        BatchSize::NumIterations(size) => size,
274        BatchSize::__NonExhaustive => panic!("invalid BatchSize"),
275    }
276}
277
278fn parse_benchmark_output(
279    definition: BenchDefinition,
280    root: &Path,
281    baseline_artifacts: Option<&[BenchArtifact]>,
282) -> Result<BenchResult, String> {
283    let report_dir = find_new_report_dir(root)
284        .ok_or_else(|| "criterion did not emit benchmark output".to_string())?;
285    let benchmark_path = report_dir.join("benchmark.json");
286    let estimates_path = report_dir.join("estimates.json");
287    let sample_path = report_dir.join("sample.json");
288
289    let benchmark_json = read_json_value(&benchmark_path)?;
290    let estimates_json = read_json_value(&estimates_path)?;
291    let sample_json = read_json_value(&sample_path)?;
292
293    let benchmark = serde_json::from_value::<CriterionBenchmarkJson>(benchmark_json.clone())
294        .map_err(|e| format!("failed to parse {}: {e}", benchmark_path.display()))?;
295    let estimates = serde_json::from_value::<CriterionEstimatesJson>(estimates_json.clone())
296        .map_err(|e| format!("failed to parse {}: {e}", estimates_path.display()))?;
297    let samples = serde_json::from_value::<CriterionSampleJson>(sample_json.clone())
298        .map_err(|e| format!("failed to parse {}: {e}", sample_path.display()))?;
299    let comparison = parse_comparison(
300        report_dir.parent().expect("criterion report dir should always have a parent"),
301        baseline_artifacts,
302        &samples,
303        &definition.config,
304    )?;
305    let artifacts = collect_artifacts(
306        report_dir.parent().expect("criterion report dir should always have a parent"),
307        &benchmark_json,
308        &estimates_json,
309        &sample_json,
310    )?;
311
312    Ok(BenchResult {
313        schema_name: definition.schema_name.to_string(),
314        bench_name: definition.bench_name.to_string(),
315        function_name: definition.function_name.to_string(),
316        setup_function: definition.setup_function.map(str::to_string),
317        transaction_mode: definition.transaction_mode,
318        source_file: definition.source_file.to_string(),
319        source_line: definition.source_line,
320        criterion_config: definition.config,
321        status: BenchStatus::Ok,
322        error_text: None,
323        benchmark: Some(CriterionBenchmark {
324            group_id: benchmark.group_id,
325            function_id: benchmark.function_id,
326            value_str: benchmark.value_str,
327            full_id: benchmark.full_id,
328            directory_name: benchmark.directory_name,
329            title: benchmark.title,
330        }),
331        estimates: estimates.into_estimates(),
332        samples: samples.into_samples()?,
333        throughput: benchmark.throughput.and_then(parse_throughput),
334        comparison,
335        artifacts,
336    })
337}
338
339fn read_json_file<T>(path: &Path) -> Result<T, String>
340where
341    T: for<'de> Deserialize<'de>,
342{
343    let raw =
344        fs::read_to_string(path).map_err(|e| format!("failed to read {}: {e}", path.display()))?;
345    serde_json::from_str(&raw).map_err(|e| format!("failed to parse {}: {e}", path.display()))
346}
347
348fn read_json_value(path: &Path) -> Result<Value, String> {
349    read_json_file(path)
350}
351
352fn write_json_value(path: &Path, value: &Value) -> Result<(), String> {
353    let raw = serde_json::to_vec_pretty(value)
354        .map_err(|error| format!("failed to serialize {}: {error}", path.display()))?;
355    fs::write(path, raw).map_err(|error| format!("failed to write {}: {error}", path.display()))
356}
357
358fn collect_artifacts(
359    benchmark_root: &Path,
360    benchmark_json: &Value,
361    estimates_json: &Value,
362    sample_json: &Value,
363) -> Result<Vec<BenchArtifact>, String> {
364    let mut artifacts = Vec::new();
365    push_json_artifact(&mut artifacts, ARTIFACT_KIND_BENCHMARK_JSON, benchmark_json.clone());
366    push_json_artifact(&mut artifacts, ARTIFACT_KIND_ESTIMATES_JSON, estimates_json.clone());
367    push_json_artifact(&mut artifacts, ARTIFACT_KIND_SAMPLE_JSON, sample_json.clone());
368
369    let tukey_path = benchmark_root.join("new").join("tukey.json");
370    if tukey_path.exists() {
371        push_json_artifact(&mut artifacts, ARTIFACT_KIND_TUKEY_JSON, read_json_value(&tukey_path)?);
372    }
373
374    let change_estimates_path = benchmark_root.join("change").join("estimates.json");
375    if change_estimates_path.exists() {
376        push_json_artifact(
377            &mut artifacts,
378            ARTIFACT_KIND_CHANGE_ESTIMATES_JSON,
379            read_json_value(&change_estimates_path)?,
380        );
381    }
382
383    Ok(artifacts)
384}
385
386fn push_json_artifact(
387    artifacts: &mut Vec<BenchArtifact>,
388    artifact_kind: &str,
389    payload_json: Value,
390) {
391    artifacts.push(BenchArtifact {
392        artifact_kind: artifact_kind.to_string(),
393        media_type: "application/json".to_string(),
394        payload_json,
395    });
396}
397
398fn materialize_baseline_artifacts(
399    output_directory: &Path,
400    baseline_artifacts: &[BenchArtifact],
401) -> Result<(), String> {
402    let benchmark_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_BENCHMARK_JSON)
403        .ok_or_else(|| "persisted Criterion baseline is missing benchmark.json".to_string())?;
404    let estimates_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_ESTIMATES_JSON)
405        .ok_or_else(|| "persisted Criterion baseline is missing estimates.json".to_string())?;
406    let sample_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_SAMPLE_JSON)
407        .ok_or_else(|| "persisted Criterion baseline is missing sample.json".to_string())?;
408
409    let directory_name = baseline_directory_name(benchmark_json)?;
410    let baseline_dir = output_directory.join(directory_name).join("base");
411    fs::create_dir_all(&baseline_dir)
412        .map_err(|error| format!("failed to create {}: {error}", baseline_dir.display()))?;
413
414    write_json_value(&baseline_dir.join("benchmark.json"), benchmark_json)?;
415    write_json_value(&baseline_dir.join("estimates.json"), estimates_json)?;
416    write_json_value(&baseline_dir.join("sample.json"), sample_json)?;
417
418    if let Some(tukey_json) = find_artifact(baseline_artifacts, ARTIFACT_KIND_TUKEY_JSON) {
419        write_json_value(&baseline_dir.join("tukey.json"), tukey_json)?;
420    }
421
422    Ok(())
423}
424
425fn find_artifact<'a>(artifacts: &'a [BenchArtifact], artifact_kind: &str) -> Option<&'a Value> {
426    artifacts
427        .iter()
428        .find(|artifact| artifact.artifact_kind == artifact_kind)
429        .map(|artifact| &artifact.payload_json)
430}
431
432fn baseline_directory_name(benchmark_json: &Value) -> Result<String, String> {
433    let benchmark = serde_json::from_value::<CriterionBenchmarkJson>(benchmark_json.clone())
434        .map_err(|error| format!("failed to parse persisted benchmark.json: {error}"))?;
435    Ok(benchmark.directory_name)
436}
437
438fn parse_comparison(
439    benchmark_root: &Path,
440    baseline_artifacts: Option<&[BenchArtifact]>,
441    current_samples: &CriterionSampleJson,
442    config: &BenchConfig,
443) -> Result<Option<BenchComparison>, String> {
444    let Some(baseline_artifacts) = baseline_artifacts else {
445        return Ok(None);
446    };
447
448    let change_estimates_path = benchmark_root.join("change").join("estimates.json");
449    if !change_estimates_path.exists() {
450        return Ok(None);
451    }
452
453    let change_estimates_json = read_json_value(&change_estimates_path)?;
454    let change_estimates = serde_json::from_value::<CriterionChangeEstimatesJson>(
455        change_estimates_json,
456    )
457    .map_err(|error| format!("failed to parse {}: {error}", change_estimates_path.display()))?;
458    let baseline_sample_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_SAMPLE_JSON)
459        .ok_or_else(|| "persisted Criterion baseline is missing sample.json".to_string())?;
460    let baseline_samples =
461        serde_json::from_value::<CriterionSampleJson>(baseline_sample_json.clone())
462            .map_err(|error| format!("failed to parse persisted sample.json: {error}"))?;
463
464    let p_value = criterion_p_value(
465        &current_samples.avg_times()?,
466        &baseline_samples.avg_times()?,
467        config.nresamples,
468    )?;
469    let summary = criterion_summary_from_change_estimate(
470        &change_estimates.mean,
471        p_value,
472        config.significance_level,
473        config.noise_threshold,
474    );
475
476    Ok(Some(BenchComparison {
477        mean: change_estimates.mean.into_relative_estimate("mean"),
478        median: change_estimates.median.into_relative_estimate("median"),
479        p_value,
480        significance_level: config.significance_level,
481        noise_threshold: config.noise_threshold,
482        summary,
483    }))
484}
485
486fn criterion_summary_from_change_estimate(
487    estimate: &CriterionEstimateJson,
488    p_value: f64,
489    significance_level: f64,
490    noise_threshold: f64,
491) -> String {
492    // Match Criterion's reporting rule: significance is based on the bootstrap T distribution,
493    // then the final label is chosen from the relative mean estimate's confidence interval.
494    if p_value >= significance_level {
495        return "No change in performance detected.".to_string();
496    }
497
498    let lower_bound = estimate.confidence_interval.lower_bound;
499    let upper_bound = estimate.confidence_interval.upper_bound;
500
501    if lower_bound < -noise_threshold && upper_bound < -noise_threshold {
502        "Performance has improved.".to_string()
503    } else if lower_bound > noise_threshold && upper_bound > noise_threshold {
504        "Performance has regressed.".to_string()
505    } else {
506        "Change within noise threshold.".to_string()
507    }
508}
509
510fn find_new_report_dir(root: &Path) -> Option<PathBuf> {
511    let mut stack = vec![root.to_path_buf()];
512    while let Some(path) = stack.pop() {
513        let Ok(entries) = fs::read_dir(&path) else {
514            continue;
515        };
516
517        let mut files = Vec::new();
518        for entry in entries.flatten() {
519            let entry_path = entry.path();
520            if entry_path.is_dir() {
521                stack.push(entry_path);
522            } else {
523                files.push(entry.file_name());
524            }
525        }
526
527        // Criterion writes both `base/` and `new/` directories with the same core files. For the
528        // current benchmark result we must read `new/`, otherwise comparisons end up mixing the
529        // baseline's absolute estimates with the current run's relative change output.
530        let is_new_dir = path.file_name().and_then(|name| name.to_str()) == Some("new");
531        let has_benchmark = files.iter().any(|name| name == "benchmark.json");
532        let has_estimates = files.iter().any(|name| name == "estimates.json");
533        let has_samples = files.iter().any(|name| name == "sample.json");
534        if is_new_dir && has_benchmark && has_estimates && has_samples {
535            return Some(path);
536        }
537    }
538
539    None
540}
541
542fn parse_throughput(value: Value) -> Option<BenchThroughput> {
543    let object = value.as_object()?;
544    let (kind, value) = object.iter().next()?;
545    value.as_f64().map(|value| BenchThroughput { kind: kind.to_lowercase(), value })
546}
547
548const fn ends_with(value: &[u8], suffix: &[u8]) -> bool {
549    if suffix.len() > value.len() {
550        return false;
551    }
552
553    let offset = value.len() - suffix.len();
554    let mut index = 0;
555    while index < suffix.len() {
556        if value[offset + index] != suffix[index] {
557            return false;
558        }
559        index += 1;
560    }
561    true
562}
563
564const fn equals(left: &[u8], right: &[u8]) -> bool {
565    if left.len() != right.len() {
566        return false;
567    }
568
569    let mut index = 0;
570    while index < left.len() {
571        if left[index] != right[index] {
572            return false;
573        }
574        index += 1;
575    }
576    true
577}
578
579#[derive(Debug, Deserialize)]
580struct CriterionBenchmarkJson {
581    group_id: String,
582    function_id: Option<String>,
583    value_str: Option<String>,
584    throughput: Option<Value>,
585    full_id: String,
586    directory_name: String,
587    title: String,
588}
589
590#[derive(Debug, Deserialize)]
591struct CriterionEstimatesJson {
592    mean: Option<CriterionEstimateJson>,
593    median: Option<CriterionEstimateJson>,
594    median_abs_dev: Option<CriterionEstimateJson>,
595    slope: Option<CriterionEstimateJson>,
596    std_dev: Option<CriterionEstimateJson>,
597}
598
599impl CriterionEstimatesJson {
600    fn into_estimates(self) -> Vec<BenchEstimate> {
601        let mut estimates = Vec::new();
602        push_estimate(&mut estimates, "mean", self.mean);
603        push_estimate(&mut estimates, "median", self.median);
604        push_estimate(&mut estimates, "median_abs_dev", self.median_abs_dev);
605        push_estimate(&mut estimates, "slope", self.slope);
606        push_estimate(&mut estimates, "std_dev", self.std_dev);
607        estimates
608    }
609}
610
611fn push_estimate(
612    estimates: &mut Vec<BenchEstimate>,
613    estimate_kind: &str,
614    estimate: Option<CriterionEstimateJson>,
615) {
616    if let Some(estimate) = estimate {
617        estimates.push(BenchEstimate {
618            estimate_kind: estimate_kind.to_string(),
619            point_estimate_ns: estimate.point_estimate,
620            standard_error_ns: Some(estimate.standard_error),
621            confidence_level: Some(estimate.confidence_interval.confidence_level),
622            ci_lower_bound_ns: Some(estimate.confidence_interval.lower_bound),
623            ci_upper_bound_ns: Some(estimate.confidence_interval.upper_bound),
624        });
625    }
626}
627
628#[derive(Debug, Deserialize)]
629struct CriterionChangeEstimatesJson {
630    mean: CriterionEstimateJson,
631    median: CriterionEstimateJson,
632}
633
634#[derive(Debug, Deserialize)]
635struct CriterionEstimateJson {
636    confidence_interval: CriterionConfidenceIntervalJson,
637    point_estimate: f64,
638    standard_error: f64,
639}
640
641impl CriterionEstimateJson {
642    fn into_relative_estimate(self, estimate_kind: &str) -> BenchComparisonEstimate {
643        BenchComparisonEstimate {
644            estimate_kind: estimate_kind.to_string(),
645            point_estimate: self.point_estimate,
646            standard_error: self.standard_error,
647            confidence_level: self.confidence_interval.confidence_level,
648            ci_lower_bound: self.confidence_interval.lower_bound,
649            ci_upper_bound: self.confidence_interval.upper_bound,
650        }
651    }
652}
653
654#[derive(Debug, Deserialize)]
655struct CriterionConfidenceIntervalJson {
656    confidence_level: f64,
657    lower_bound: f64,
658    upper_bound: f64,
659}
660
661#[derive(Debug, Clone, Deserialize)]
662struct CriterionSampleJson {
663    iters: Vec<CriterionIterationCount>,
664    times: Vec<f64>,
665}
666
667impl CriterionSampleJson {
668    fn into_samples(self) -> Result<Vec<BenchSample>, String> {
669        self.iters
670            .into_iter()
671            .zip(self.times)
672            .enumerate()
673            .map(|(sample_index, (iteration_count, elapsed_ns))| {
674                Ok(BenchSample {
675                    sample_index,
676                    iteration_count: iteration_count.into_u64()?,
677                    elapsed_ns,
678                })
679            })
680            .collect()
681    }
682
683    fn avg_times(&self) -> Result<Vec<f64>, String> {
684        self.iters
685            .iter()
686            .zip(&self.times)
687            .map(|(iteration_count, elapsed_ns)| {
688                let iteration_count = iteration_count.as_f64()?;
689                if iteration_count == 0.0 {
690                    return Err("criterion sample iteration count was zero".to_string());
691                }
692                Ok(*elapsed_ns / iteration_count)
693            })
694            .collect()
695    }
696}
697
698#[derive(Debug, Clone, Deserialize)]
699#[serde(untagged)]
700enum CriterionIterationCount {
701    Integer(u64),
702    Float(f64),
703}
704
705impl CriterionIterationCount {
706    fn into_u64(self) -> Result<u64, String> {
707        match self {
708            Self::Integer(value) => Ok(value),
709            Self::Float(value)
710                if value.is_finite()
711                    && value >= 0.0
712                    && value.fract() == 0.0
713                    && value <= u64::MAX as f64 =>
714            {
715                Ok(value as u64)
716            }
717            Self::Float(value) => Err(format!(
718                "criterion sample iteration count `{value}` is not a non-negative whole number"
719            )),
720        }
721    }
722
723    fn as_f64(&self) -> Result<f64, String> {
724        match self {
725            Self::Integer(value) => Ok(*value as f64),
726            Self::Float(value)
727                if value.is_finite()
728                    && *value >= 0.0
729                    && value.fract() == 0.0
730                    && *value <= u64::MAX as f64 =>
731            {
732                Ok(*value)
733            }
734            Self::Float(value) => Err(format!(
735                "criterion sample iteration count `{value}` is not a non-negative whole number"
736            )),
737        }
738    }
739}
740
741thread_local! {
742    static SEED_RNG: RefCell<Rand64> = RefCell::new(Rand64::new(
743        SystemTime::now()
744            .duration_since(UNIX_EPOCH)
745            .unwrap_or_else(|_| panic!("time went backwards"))
746            .as_millis(),
747    ));
748}
749
750fn criterion_p_value(
751    current_samples: &[f64],
752    baseline_samples: &[f64],
753    nresamples: usize,
754) -> Result<f64, String> {
755    criterion_p_value_with_rng(current_samples, baseline_samples, nresamples, criterion_new_rng())
756}
757
758fn criterion_p_value_with_rng(
759    current_samples: &[f64],
760    baseline_samples: &[f64],
761    nresamples: usize,
762    rng: Rand64,
763) -> Result<f64, String> {
764    if current_samples.len() < 2 || baseline_samples.len() < 2 {
765        return Err("criterion comparison requires at least two samples in each run".to_string());
766    }
767
768    let t_statistic = sample_t(current_samples, baseline_samples);
769    if !t_statistic.is_finite() {
770        return Err("criterion comparison could not compute a finite T statistic".to_string());
771    }
772    let mut combined = Vec::with_capacity(current_samples.len() + baseline_samples.len());
773    combined.extend_from_slice(current_samples);
774    combined.extend_from_slice(baseline_samples);
775
776    let mut resampler = CriterionResamples::with_rng(combined, rng);
777    let mut t_distribution = Vec::with_capacity(nresamples);
778    // Criterion derives `p_value` from a mixed-bootstrap T distribution rather than from a
779    // closed-form Welch test. We mirror that private 0.5.1 implementation here so the CLI can
780    // report the same style of comparison data while still persisting exact Criterion JSON files.
781    for _ in 0..nresamples {
782        let resample = resampler.next();
783        let split = current_samples.len();
784        let t_value = sample_t(&resample[..split], &resample[split..]);
785        if t_value.is_finite() {
786            t_distribution.push(t_value);
787        }
788    }
789
790    if t_distribution.is_empty() {
791        return Err("criterion comparison produced an empty T distribution".to_string());
792    }
793
794    let hits = t_distribution.iter().filter(|value| **value < t_statistic).count();
795    let tails = 2.0;
796    Ok((usize::min(hits, t_distribution.len() - hits) as f64 / t_distribution.len() as f64) * tails)
797}
798
799fn sample_t(current_samples: &[f64], baseline_samples: &[f64]) -> f64 {
800    let current_mean = sample_mean(current_samples);
801    let baseline_mean = sample_mean(baseline_samples);
802    let current_variance = sample_variance(current_samples, current_mean);
803    let baseline_variance = sample_variance(baseline_samples, baseline_mean);
804    let denominator = (current_variance / current_samples.len() as f64
805        + baseline_variance / baseline_samples.len() as f64)
806        .sqrt();
807
808    (current_mean - baseline_mean) / denominator
809}
810
811fn sample_mean(values: &[f64]) -> f64 {
812    values.iter().copied().sum::<f64>() / values.len() as f64
813}
814
815fn sample_variance(values: &[f64], mean: f64) -> f64 {
816    let squared_diffs = values.iter().map(|value| (*value - mean).powi(2)).sum::<f64>();
817    squared_diffs / (values.len() - 1) as f64
818}
819
820struct CriterionResamples {
821    rng: Rand64,
822    sample: Vec<f64>,
823    stage: Vec<f64>,
824}
825
826impl CriterionResamples {
827    fn with_rng(sample: Vec<f64>, rng: Rand64) -> Self {
828        let sample_len = sample.len();
829        Self { rng, sample, stage: Vec::with_capacity(sample_len) }
830    }
831
832    fn next(&mut self) -> &[f64] {
833        if self.stage.is_empty() {
834            self.stage.resize(self.sample.len(), 0.0);
835        }
836
837        for slot in &mut self.stage {
838            let index = self.rng.rand_range(0..self.sample.len() as u64) as usize;
839            *slot = self.sample[index];
840        }
841
842        &self.stage
843    }
844}
845
846fn criterion_new_rng() -> Rand64 {
847    SEED_RNG.with(|rng| {
848        let mut rng = rng.borrow_mut();
849        let seed = ((rng.rand_u64() as u128) << 64) | (rng.rand_u64() as u128);
850        Rand64::new(seed)
851    })
852}
853
854#[cfg(test)]
855mod tests {
856    use super::*;
857    use oorandom::Rand64;
858    use serde_json::json;
859    use std::fs;
860    use tempfile::TempDir;
861
862    #[test]
863    fn find_new_report_dir_prefers_criterion_new_directory() {
864        let tempdir = TempDir::new().expect("tempdir");
865        let benchmark_root = tempdir.path().join("bench_normalize_phrase");
866        let base_dir = benchmark_root.join("base");
867        let new_dir = benchmark_root.join("new");
868
869        fs::create_dir_all(&base_dir).expect("base dir");
870        fs::create_dir_all(&new_dir).expect("new dir");
871
872        for directory in [&base_dir, &new_dir] {
873            fs::write(directory.join("benchmark.json"), "{}").expect("benchmark.json");
874            fs::write(directory.join("estimates.json"), "{}").expect("estimates.json");
875            fs::write(directory.join("sample.json"), "{}").expect("sample.json");
876        }
877
878        let discovered = find_new_report_dir(tempdir.path()).expect("criterion new dir");
879        assert_eq!(discovered, new_dir);
880    }
881
882    #[test]
883    fn criterion_summary_matches_expected_labels() {
884        let improved = estimate_json(-1.5, -1.2, -1.0);
885        assert_eq!(
886            criterion_summary_from_change_estimate(&improved, 0.01, 0.05, 0.01),
887            "Performance has improved."
888        );
889
890        let regressed = estimate_json(1.5, 1.2, 1.8);
891        assert_eq!(
892            criterion_summary_from_change_estimate(&regressed, 0.01, 0.05, 0.01),
893            "Performance has regressed."
894        );
895
896        let within_noise = estimate_json(0.004, -0.009, 0.008);
897        assert_eq!(
898            criterion_summary_from_change_estimate(&within_noise, 0.01, 0.05, 0.01),
899            "Change within noise threshold."
900        );
901
902        let not_significant = estimate_json(1.5, 1.2, 1.8);
903        assert_eq!(
904            criterion_summary_from_change_estimate(&not_significant, 0.75, 0.05, 0.01),
905            "No change in performance detected."
906        );
907    }
908
909    #[test]
910    fn materialize_baseline_artifacts_writes_criterion_base_layout() {
911        let tempdir = TempDir::new().expect("tempdir");
912        let artifacts = baseline_artifacts("bench_normalize_phrase");
913
914        materialize_baseline_artifacts(tempdir.path(), &artifacts).expect("materialize baseline");
915
916        let base_dir = tempdir.path().join("bench_normalize_phrase").join("base");
917        assert!(base_dir.join("benchmark.json").exists());
918        assert!(base_dir.join("estimates.json").exists());
919        assert!(base_dir.join("sample.json").exists());
920        assert!(base_dir.join("tukey.json").exists());
921    }
922
923    #[test]
924    fn collect_artifacts_includes_change_estimates_when_present() {
925        let tempdir = TempDir::new().expect("tempdir");
926        let benchmark_root = tempdir.path().join("bench_normalize_phrase");
927        let new_dir = benchmark_root.join("new");
928        let change_dir = benchmark_root.join("change");
929        fs::create_dir_all(&new_dir).expect("new dir");
930        fs::create_dir_all(&change_dir).expect("change dir");
931
932        let benchmark_json = benchmark_json("bench_normalize_phrase");
933        let estimates_json = absolute_estimates_json();
934        let sample_json = sample_json(&[1, 2, 3], &[10.0, 20.0, 30.0]);
935
936        write_json_value(&new_dir.join("benchmark.json"), &benchmark_json).expect("benchmark");
937        write_json_value(&new_dir.join("estimates.json"), &estimates_json).expect("estimates");
938        write_json_value(&new_dir.join("sample.json"), &sample_json).expect("sample");
939        write_json_value(&new_dir.join("tukey.json"), &json!({"a": 1})).expect("tukey");
940        write_json_value(&change_dir.join("estimates.json"), &change_estimates_json(1.5, 1.2, 1.8))
941            .expect("change estimates");
942
943        let artifacts =
944            collect_artifacts(&benchmark_root, &benchmark_json, &estimates_json, &sample_json)
945                .expect("collect artifacts");
946        let artifact_kinds =
947            artifacts.iter().map(|artifact| artifact.artifact_kind.as_str()).collect::<Vec<_>>();
948
949        assert!(artifact_kinds.contains(&ARTIFACT_KIND_BENCHMARK_JSON));
950        assert!(artifact_kinds.contains(&ARTIFACT_KIND_ESTIMATES_JSON));
951        assert!(artifact_kinds.contains(&ARTIFACT_KIND_SAMPLE_JSON));
952        assert!(artifact_kinds.contains(&ARTIFACT_KIND_TUKEY_JSON));
953        assert!(artifact_kinds.contains(&ARTIFACT_KIND_CHANGE_ESTIMATES_JSON));
954    }
955
956    #[test]
957    fn criterion_p_value_with_rng_detects_large_regression() {
958        let current = [1000.0, 1001.0, 1002.5, 998.0, 1003.0, 999.5];
959        let baseline = [1.0, 2.0, 2.5, 1.5, 3.0, 2.2];
960
961        let p_value =
962            criterion_p_value_with_rng(&current, &baseline, 10_000, Rand64::new(42)).unwrap();
963        assert!(p_value < 0.05, "expected a significant difference, got p={p_value}");
964    }
965
966    #[test]
967    fn criterion_p_value_with_rng_is_high_for_identical_samples() {
968        let sample = [10.0, 12.0, 13.5, 11.5, 9.5, 14.0];
969
970        let p_value =
971            criterion_p_value_with_rng(&sample, &sample, 10_000, Rand64::new(42)).expect("p value");
972        assert!(p_value >= 0.5, "expected no significant difference, got p={p_value}");
973    }
974
975    #[test]
976    fn parse_comparison_uses_persisted_baseline_artifacts() {
977        let tempdir = TempDir::new().expect("tempdir");
978        let benchmark_root = tempdir.path().join("bench_normalize_phrase");
979        let change_dir = benchmark_root.join("change");
980        fs::create_dir_all(&change_dir).expect("change dir");
981
982        write_json_value(&change_dir.join("estimates.json"), &change_estimates_json(1.5, 1.2, 1.8))
983            .expect("change estimates");
984
985        let current_samples = serde_json::from_value::<CriterionSampleJson>(sample_json(
986            &[1, 1, 1, 1, 1, 1],
987            &[1000.0, 1001.0, 1002.0, 998.0, 1003.0, 999.0],
988        ))
989        .expect("current samples");
990        let config = BenchConfig {
991            sample_size: 100,
992            measurement_time_ms: 5_000,
993            warm_up_time_ms: 3_000,
994            nresamples: 10_000,
995            noise_threshold: 0.01,
996            significance_level: 0.05,
997        };
998
999        let comparison = parse_comparison(
1000            &benchmark_root,
1001            Some(&baseline_artifacts("bench_normalize_phrase")),
1002            &current_samples,
1003            &config,
1004        )
1005        .expect("comparison")
1006        .expect("comparison payload");
1007
1008        assert_eq!(comparison.summary, "Performance has regressed.");
1009        assert!(comparison.p_value < 0.05, "expected a significant difference");
1010        assert!(comparison.mean.point_estimate > 1.0);
1011    }
1012
1013    fn baseline_artifacts(directory_name: &str) -> Vec<BenchArtifact> {
1014        vec![
1015            BenchArtifact {
1016                artifact_kind: ARTIFACT_KIND_BENCHMARK_JSON.to_string(),
1017                media_type: "application/json".to_string(),
1018                payload_json: benchmark_json(directory_name),
1019            },
1020            BenchArtifact {
1021                artifact_kind: ARTIFACT_KIND_ESTIMATES_JSON.to_string(),
1022                media_type: "application/json".to_string(),
1023                payload_json: absolute_estimates_json(),
1024            },
1025            BenchArtifact {
1026                artifact_kind: ARTIFACT_KIND_SAMPLE_JSON.to_string(),
1027                media_type: "application/json".to_string(),
1028                payload_json: sample_json(&[1, 1, 1, 1, 1, 1], &[1.0, 2.0, 2.5, 1.5, 3.0, 2.2]),
1029            },
1030            BenchArtifact {
1031                artifact_kind: ARTIFACT_KIND_TUKEY_JSON.to_string(),
1032                media_type: "application/json".to_string(),
1033                payload_json: json!({"fences": [0.0, 1.0, 2.0, 3.0]}),
1034            },
1035        ]
1036    }
1037
1038    fn benchmark_json(directory_name: &str) -> Value {
1039        json!({
1040            "group_id": "bench_normalize_phrase",
1041            "function_id": null,
1042            "value_str": null,
1043            "throughput": null,
1044            "full_id": "bench_normalize_phrase",
1045            "directory_name": directory_name,
1046            "title": "bench_normalize_phrase",
1047        })
1048    }
1049
1050    fn absolute_estimates_json() -> Value {
1051        json!({
1052            "mean": estimate_value(300.0, 290.0, 310.0),
1053            "median": estimate_value(295.0, 285.0, 305.0),
1054            "median_abs_dev": estimate_value(5.0, 4.0, 6.0),
1055            "slope": estimate_value(280.0, 270.0, 290.0),
1056            "std_dev": estimate_value(8.0, 7.0, 9.0),
1057        })
1058    }
1059
1060    fn change_estimates_json(point: f64, lower: f64, upper: f64) -> Value {
1061        json!({
1062            "mean": estimate_value(point, lower, upper),
1063            "median": estimate_value(point, lower, upper),
1064        })
1065    }
1066
1067    fn estimate_value(point: f64, lower: f64, upper: f64) -> Value {
1068        json!({
1069            "confidence_interval": {
1070                "confidence_level": 0.95,
1071                "lower_bound": lower,
1072                "upper_bound": upper,
1073            },
1074            "point_estimate": point,
1075            "standard_error": 0.01,
1076        })
1077    }
1078
1079    fn estimate_json(point: f64, lower: f64, upper: f64) -> CriterionEstimateJson {
1080        serde_json::from_value(estimate_value(point, lower, upper)).expect("estimate")
1081    }
1082
1083    fn sample_json(iters: &[u64], times: &[f64]) -> Value {
1084        json!({
1085            "iters": iters,
1086            "times": times,
1087        })
1088    }
1089}