1pub mod pgrx;
12
13use crate::pgrx::{
14 BenchArtifact, BenchComparison, BenchComparisonEstimate, BenchConfig, BenchDefinition,
15 BenchEstimate, BenchResult, BenchSample, BenchStatus, BenchThroughput, CriterionBenchmark,
16 Runtime, TransactionMode,
17};
18use criterion::{Criterion, measurement::WallTime};
19use oorandom::Rand64;
20use serde::Deserialize;
21use serde_json::Value;
22use std::any::Any;
23use std::cell::RefCell;
24use std::fs;
25use std::path::{Path, PathBuf};
26use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
27
28pub use std::hint::black_box;
42
43pub use criterion::BatchSize;
58
59const DEFAULT_SAMPLE_SIZE: usize = 100;
60const DEFAULT_MEASUREMENT_TIME_MS: u64 = 5_000;
61const DEFAULT_WARM_UP_TIME_MS: u64 = 3_000;
62const DEFAULT_NRESAMPLES: usize = 100_000;
63const DEFAULT_NOISE_THRESHOLD: f64 = 0.01;
64const DEFAULT_SIGNIFICANCE_LEVEL: f64 = 0.05;
65
66const ARTIFACT_KIND_BENCHMARK_JSON: &str = "criterion_benchmark_json";
67const ARTIFACT_KIND_ESTIMATES_JSON: &str = "criterion_estimates_json";
68const ARTIFACT_KIND_SAMPLE_JSON: &str = "criterion_sample_json";
69const ARTIFACT_KIND_TUKEY_JSON: &str = "criterion_tukey_json";
70const ARTIFACT_KIND_CHANGE_ESTIMATES_JSON: &str = "criterion_change_estimates_json";
71
72pub struct Bencher<'a> {
87 routine: Option<Routine<'a>>,
88}
89
90enum Routine<'a> {
91 Iter(Box<dyn FnMut() + 'a>),
92 IterBatched {
93 setup: Box<dyn FnMut() -> Box<dyn Any> + 'a>,
94 routine: Box<dyn FnMut(Box<dyn Any>) + 'a>,
95 batch_size: BatchSize,
96 },
97}
98
99impl<'a> Bencher<'a> {
100 #[doc(hidden)]
101 pub fn new(transaction_mode: TransactionMode) -> Self {
103 let _ = transaction_mode;
104 Self { routine: None }
105 }
106
107 pub fn iter<R, F>(&mut self, mut routine: F)
121 where
122 F: FnMut() -> R + 'a,
123 {
124 self.set_routine(Routine::Iter(Box::new(move || {
125 let _ = routine();
126 })));
127 }
128
129 pub fn iter_batched<I, R, S, F>(&mut self, mut setup: S, mut routine: F, batch_size: BatchSize)
146 where
147 I: 'static,
148 S: FnMut() -> I + 'a,
149 F: FnMut(I) -> R + 'a,
150 {
151 self.set_routine(Routine::IterBatched {
152 setup: Box::new(move || Box::new(setup()) as Box<dyn Any>),
153 routine: Box::new(move |input| {
154 let input = *input
155 .downcast::<I>()
156 .expect("pgrx_bench internal type mismatch for iter_batched input");
157 let _ = routine(input);
158 }),
159 batch_size,
160 });
161 }
162
163 fn set_routine(&mut self, routine: Routine<'a>) {
164 if self.routine.is_some() {
165 panic!("only one bencher timing loop may be declared per #[pg_bench] function");
166 }
167 self.routine = Some(routine);
168 }
169
170 fn into_routine(self) -> Result<Routine<'a>, String> {
171 self.routine.ok_or_else(|| {
172 "benchmark function did not register a timing loop; call b.iter(...) or b.iter_batched(...)"
173 .to_string()
174 })
175 }
176}
177
178fn build_criterion(
179 config: &BenchConfig,
180 output_directory: &Path,
181 has_baseline: bool,
182) -> Criterion<WallTime> {
183 let criterion = Criterion::default()
184 .without_plots()
185 .output_directory(output_directory)
186 .sample_size(config.sample_size)
187 .measurement_time(Duration::from_millis(config.measurement_time_ms))
188 .warm_up_time(Duration::from_millis(config.warm_up_time_ms))
189 .nresamples(config.nresamples)
190 .noise_threshold(config.noise_threshold)
191 .significance_level(config.significance_level);
192
193 if has_baseline {
194 criterion.retain_baseline("base".to_string(), false)
195 } else {
196 criterion.save_baseline("base".to_string())
197 }
198}
199
200fn run_routine<R: Runtime>(
201 criterion_bencher: &mut criterion::Bencher<'_, WallTime>,
202 routine: &mut Routine<'_>,
203 transaction_mode: TransactionMode,
204 runtime: &R,
205) {
206 match routine {
207 Routine::Iter(routine) => match transaction_mode {
208 TransactionMode::Shared => criterion_bencher.iter(routine),
209 TransactionMode::SubtransactionPerBatch
210 | TransactionMode::SubtransactionPerIteration => {
211 criterion_bencher.iter_custom(|iters| {
212 let started = Instant::now();
213 for _ in 0..iters {
214 runtime
215 .with_subtransaction(|| routine())
216 .unwrap_or_else(|error| panic!("{error}"));
217 }
218 started.elapsed()
219 });
220 }
221 },
222 Routine::IterBatched { setup, routine, batch_size } => {
223 criterion_bencher.iter_custom(|iters| {
224 let started = Instant::now();
225 let mut remaining = iters;
226 let per_batch = iterations_per_batch(*batch_size, iters).max(1);
227
228 while remaining > 0 {
229 let current_batch = remaining.min(per_batch);
230 match transaction_mode {
231 TransactionMode::Shared => {
232 for _ in 0..current_batch {
233 let input = setup();
234 routine(input);
235 }
236 }
237 TransactionMode::SubtransactionPerBatch => {
238 runtime
239 .with_subtransaction(|| {
240 for _ in 0..current_batch {
241 let input = setup();
242 routine(input);
243 }
244 })
245 .unwrap_or_else(|error| panic!("{error}"));
246 }
247 TransactionMode::SubtransactionPerIteration => {
248 for _ in 0..current_batch {
249 runtime
250 .with_subtransaction(|| {
251 let input = setup();
252 routine(input);
253 })
254 .unwrap_or_else(|error| panic!("{error}"));
255 }
256 }
257 }
258 remaining -= current_batch;
259 }
260
261 started.elapsed()
262 });
263 }
264 }
265}
266
267fn iterations_per_batch(batch_size: BatchSize, iters: u64) -> u64 {
268 match batch_size {
269 BatchSize::SmallInput => (iters + 10 - 1) / 10,
270 BatchSize::LargeInput => (iters + 1000 - 1) / 1000,
271 BatchSize::PerIteration => 1,
272 BatchSize::NumBatches(batches) => (iters + batches - 1) / batches,
273 BatchSize::NumIterations(size) => size,
274 BatchSize::__NonExhaustive => panic!("invalid BatchSize"),
275 }
276}
277
278fn parse_benchmark_output(
279 definition: BenchDefinition,
280 root: &Path,
281 baseline_artifacts: Option<&[BenchArtifact]>,
282) -> Result<BenchResult, String> {
283 let report_dir = find_new_report_dir(root)
284 .ok_or_else(|| "criterion did not emit benchmark output".to_string())?;
285 let benchmark_path = report_dir.join("benchmark.json");
286 let estimates_path = report_dir.join("estimates.json");
287 let sample_path = report_dir.join("sample.json");
288
289 let benchmark_json = read_json_value(&benchmark_path)?;
290 let estimates_json = read_json_value(&estimates_path)?;
291 let sample_json = read_json_value(&sample_path)?;
292
293 let benchmark = serde_json::from_value::<CriterionBenchmarkJson>(benchmark_json.clone())
294 .map_err(|e| format!("failed to parse {}: {e}", benchmark_path.display()))?;
295 let estimates = serde_json::from_value::<CriterionEstimatesJson>(estimates_json.clone())
296 .map_err(|e| format!("failed to parse {}: {e}", estimates_path.display()))?;
297 let samples = serde_json::from_value::<CriterionSampleJson>(sample_json.clone())
298 .map_err(|e| format!("failed to parse {}: {e}", sample_path.display()))?;
299 let comparison = parse_comparison(
300 report_dir.parent().expect("criterion report dir should always have a parent"),
301 baseline_artifacts,
302 &samples,
303 &definition.config,
304 )?;
305 let artifacts = collect_artifacts(
306 report_dir.parent().expect("criterion report dir should always have a parent"),
307 &benchmark_json,
308 &estimates_json,
309 &sample_json,
310 )?;
311
312 Ok(BenchResult {
313 schema_name: definition.schema_name.to_string(),
314 bench_name: definition.bench_name.to_string(),
315 function_name: definition.function_name.to_string(),
316 setup_function: definition.setup_function.map(str::to_string),
317 transaction_mode: definition.transaction_mode,
318 source_file: definition.source_file.to_string(),
319 source_line: definition.source_line,
320 criterion_config: definition.config,
321 status: BenchStatus::Ok,
322 error_text: None,
323 benchmark: Some(CriterionBenchmark {
324 group_id: benchmark.group_id,
325 function_id: benchmark.function_id,
326 value_str: benchmark.value_str,
327 full_id: benchmark.full_id,
328 directory_name: benchmark.directory_name,
329 title: benchmark.title,
330 }),
331 estimates: estimates.into_estimates(),
332 samples: samples.into_samples()?,
333 throughput: benchmark.throughput.and_then(parse_throughput),
334 comparison,
335 artifacts,
336 })
337}
338
339fn read_json_file<T>(path: &Path) -> Result<T, String>
340where
341 T: for<'de> Deserialize<'de>,
342{
343 let raw =
344 fs::read_to_string(path).map_err(|e| format!("failed to read {}: {e}", path.display()))?;
345 serde_json::from_str(&raw).map_err(|e| format!("failed to parse {}: {e}", path.display()))
346}
347
348fn read_json_value(path: &Path) -> Result<Value, String> {
349 read_json_file(path)
350}
351
352fn write_json_value(path: &Path, value: &Value) -> Result<(), String> {
353 let raw = serde_json::to_vec_pretty(value)
354 .map_err(|error| format!("failed to serialize {}: {error}", path.display()))?;
355 fs::write(path, raw).map_err(|error| format!("failed to write {}: {error}", path.display()))
356}
357
358fn collect_artifacts(
359 benchmark_root: &Path,
360 benchmark_json: &Value,
361 estimates_json: &Value,
362 sample_json: &Value,
363) -> Result<Vec<BenchArtifact>, String> {
364 let mut artifacts = Vec::new();
365 push_json_artifact(&mut artifacts, ARTIFACT_KIND_BENCHMARK_JSON, benchmark_json.clone());
366 push_json_artifact(&mut artifacts, ARTIFACT_KIND_ESTIMATES_JSON, estimates_json.clone());
367 push_json_artifact(&mut artifacts, ARTIFACT_KIND_SAMPLE_JSON, sample_json.clone());
368
369 let tukey_path = benchmark_root.join("new").join("tukey.json");
370 if tukey_path.exists() {
371 push_json_artifact(&mut artifacts, ARTIFACT_KIND_TUKEY_JSON, read_json_value(&tukey_path)?);
372 }
373
374 let change_estimates_path = benchmark_root.join("change").join("estimates.json");
375 if change_estimates_path.exists() {
376 push_json_artifact(
377 &mut artifacts,
378 ARTIFACT_KIND_CHANGE_ESTIMATES_JSON,
379 read_json_value(&change_estimates_path)?,
380 );
381 }
382
383 Ok(artifacts)
384}
385
386fn push_json_artifact(
387 artifacts: &mut Vec<BenchArtifact>,
388 artifact_kind: &str,
389 payload_json: Value,
390) {
391 artifacts.push(BenchArtifact {
392 artifact_kind: artifact_kind.to_string(),
393 media_type: "application/json".to_string(),
394 payload_json,
395 });
396}
397
398fn materialize_baseline_artifacts(
399 output_directory: &Path,
400 baseline_artifacts: &[BenchArtifact],
401) -> Result<(), String> {
402 let benchmark_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_BENCHMARK_JSON)
403 .ok_or_else(|| "persisted Criterion baseline is missing benchmark.json".to_string())?;
404 let estimates_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_ESTIMATES_JSON)
405 .ok_or_else(|| "persisted Criterion baseline is missing estimates.json".to_string())?;
406 let sample_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_SAMPLE_JSON)
407 .ok_or_else(|| "persisted Criterion baseline is missing sample.json".to_string())?;
408
409 let directory_name = baseline_directory_name(benchmark_json)?;
410 let baseline_dir = output_directory.join(directory_name).join("base");
411 fs::create_dir_all(&baseline_dir)
412 .map_err(|error| format!("failed to create {}: {error}", baseline_dir.display()))?;
413
414 write_json_value(&baseline_dir.join("benchmark.json"), benchmark_json)?;
415 write_json_value(&baseline_dir.join("estimates.json"), estimates_json)?;
416 write_json_value(&baseline_dir.join("sample.json"), sample_json)?;
417
418 if let Some(tukey_json) = find_artifact(baseline_artifacts, ARTIFACT_KIND_TUKEY_JSON) {
419 write_json_value(&baseline_dir.join("tukey.json"), tukey_json)?;
420 }
421
422 Ok(())
423}
424
425fn find_artifact<'a>(artifacts: &'a [BenchArtifact], artifact_kind: &str) -> Option<&'a Value> {
426 artifacts
427 .iter()
428 .find(|artifact| artifact.artifact_kind == artifact_kind)
429 .map(|artifact| &artifact.payload_json)
430}
431
432fn baseline_directory_name(benchmark_json: &Value) -> Result<String, String> {
433 let benchmark = serde_json::from_value::<CriterionBenchmarkJson>(benchmark_json.clone())
434 .map_err(|error| format!("failed to parse persisted benchmark.json: {error}"))?;
435 Ok(benchmark.directory_name)
436}
437
438fn parse_comparison(
439 benchmark_root: &Path,
440 baseline_artifacts: Option<&[BenchArtifact]>,
441 current_samples: &CriterionSampleJson,
442 config: &BenchConfig,
443) -> Result<Option<BenchComparison>, String> {
444 let Some(baseline_artifacts) = baseline_artifacts else {
445 return Ok(None);
446 };
447
448 let change_estimates_path = benchmark_root.join("change").join("estimates.json");
449 if !change_estimates_path.exists() {
450 return Ok(None);
451 }
452
453 let change_estimates_json = read_json_value(&change_estimates_path)?;
454 let change_estimates = serde_json::from_value::<CriterionChangeEstimatesJson>(
455 change_estimates_json,
456 )
457 .map_err(|error| format!("failed to parse {}: {error}", change_estimates_path.display()))?;
458 let baseline_sample_json = find_artifact(baseline_artifacts, ARTIFACT_KIND_SAMPLE_JSON)
459 .ok_or_else(|| "persisted Criterion baseline is missing sample.json".to_string())?;
460 let baseline_samples =
461 serde_json::from_value::<CriterionSampleJson>(baseline_sample_json.clone())
462 .map_err(|error| format!("failed to parse persisted sample.json: {error}"))?;
463
464 let p_value = criterion_p_value(
465 ¤t_samples.avg_times()?,
466 &baseline_samples.avg_times()?,
467 config.nresamples,
468 )?;
469 let summary = criterion_summary_from_change_estimate(
470 &change_estimates.mean,
471 p_value,
472 config.significance_level,
473 config.noise_threshold,
474 );
475
476 Ok(Some(BenchComparison {
477 mean: change_estimates.mean.into_relative_estimate("mean"),
478 median: change_estimates.median.into_relative_estimate("median"),
479 p_value,
480 significance_level: config.significance_level,
481 noise_threshold: config.noise_threshold,
482 summary,
483 }))
484}
485
486fn criterion_summary_from_change_estimate(
487 estimate: &CriterionEstimateJson,
488 p_value: f64,
489 significance_level: f64,
490 noise_threshold: f64,
491) -> String {
492 if p_value >= significance_level {
495 return "No change in performance detected.".to_string();
496 }
497
498 let lower_bound = estimate.confidence_interval.lower_bound;
499 let upper_bound = estimate.confidence_interval.upper_bound;
500
501 if lower_bound < -noise_threshold && upper_bound < -noise_threshold {
502 "Performance has improved.".to_string()
503 } else if lower_bound > noise_threshold && upper_bound > noise_threshold {
504 "Performance has regressed.".to_string()
505 } else {
506 "Change within noise threshold.".to_string()
507 }
508}
509
510fn find_new_report_dir(root: &Path) -> Option<PathBuf> {
511 let mut stack = vec![root.to_path_buf()];
512 while let Some(path) = stack.pop() {
513 let Ok(entries) = fs::read_dir(&path) else {
514 continue;
515 };
516
517 let mut files = Vec::new();
518 for entry in entries.flatten() {
519 let entry_path = entry.path();
520 if entry_path.is_dir() {
521 stack.push(entry_path);
522 } else {
523 files.push(entry.file_name());
524 }
525 }
526
527 let is_new_dir = path.file_name().and_then(|name| name.to_str()) == Some("new");
531 let has_benchmark = files.iter().any(|name| name == "benchmark.json");
532 let has_estimates = files.iter().any(|name| name == "estimates.json");
533 let has_samples = files.iter().any(|name| name == "sample.json");
534 if is_new_dir && has_benchmark && has_estimates && has_samples {
535 return Some(path);
536 }
537 }
538
539 None
540}
541
542fn parse_throughput(value: Value) -> Option<BenchThroughput> {
543 let object = value.as_object()?;
544 let (kind, value) = object.iter().next()?;
545 value.as_f64().map(|value| BenchThroughput { kind: kind.to_lowercase(), value })
546}
547
548const fn ends_with(value: &[u8], suffix: &[u8]) -> bool {
549 if suffix.len() > value.len() {
550 return false;
551 }
552
553 let offset = value.len() - suffix.len();
554 let mut index = 0;
555 while index < suffix.len() {
556 if value[offset + index] != suffix[index] {
557 return false;
558 }
559 index += 1;
560 }
561 true
562}
563
564const fn equals(left: &[u8], right: &[u8]) -> bool {
565 if left.len() != right.len() {
566 return false;
567 }
568
569 let mut index = 0;
570 while index < left.len() {
571 if left[index] != right[index] {
572 return false;
573 }
574 index += 1;
575 }
576 true
577}
578
579#[derive(Debug, Deserialize)]
580struct CriterionBenchmarkJson {
581 group_id: String,
582 function_id: Option<String>,
583 value_str: Option<String>,
584 throughput: Option<Value>,
585 full_id: String,
586 directory_name: String,
587 title: String,
588}
589
590#[derive(Debug, Deserialize)]
591struct CriterionEstimatesJson {
592 mean: Option<CriterionEstimateJson>,
593 median: Option<CriterionEstimateJson>,
594 median_abs_dev: Option<CriterionEstimateJson>,
595 slope: Option<CriterionEstimateJson>,
596 std_dev: Option<CriterionEstimateJson>,
597}
598
599impl CriterionEstimatesJson {
600 fn into_estimates(self) -> Vec<BenchEstimate> {
601 let mut estimates = Vec::new();
602 push_estimate(&mut estimates, "mean", self.mean);
603 push_estimate(&mut estimates, "median", self.median);
604 push_estimate(&mut estimates, "median_abs_dev", self.median_abs_dev);
605 push_estimate(&mut estimates, "slope", self.slope);
606 push_estimate(&mut estimates, "std_dev", self.std_dev);
607 estimates
608 }
609}
610
611fn push_estimate(
612 estimates: &mut Vec<BenchEstimate>,
613 estimate_kind: &str,
614 estimate: Option<CriterionEstimateJson>,
615) {
616 if let Some(estimate) = estimate {
617 estimates.push(BenchEstimate {
618 estimate_kind: estimate_kind.to_string(),
619 point_estimate_ns: estimate.point_estimate,
620 standard_error_ns: Some(estimate.standard_error),
621 confidence_level: Some(estimate.confidence_interval.confidence_level),
622 ci_lower_bound_ns: Some(estimate.confidence_interval.lower_bound),
623 ci_upper_bound_ns: Some(estimate.confidence_interval.upper_bound),
624 });
625 }
626}
627
628#[derive(Debug, Deserialize)]
629struct CriterionChangeEstimatesJson {
630 mean: CriterionEstimateJson,
631 median: CriterionEstimateJson,
632}
633
634#[derive(Debug, Deserialize)]
635struct CriterionEstimateJson {
636 confidence_interval: CriterionConfidenceIntervalJson,
637 point_estimate: f64,
638 standard_error: f64,
639}
640
641impl CriterionEstimateJson {
642 fn into_relative_estimate(self, estimate_kind: &str) -> BenchComparisonEstimate {
643 BenchComparisonEstimate {
644 estimate_kind: estimate_kind.to_string(),
645 point_estimate: self.point_estimate,
646 standard_error: self.standard_error,
647 confidence_level: self.confidence_interval.confidence_level,
648 ci_lower_bound: self.confidence_interval.lower_bound,
649 ci_upper_bound: self.confidence_interval.upper_bound,
650 }
651 }
652}
653
654#[derive(Debug, Deserialize)]
655struct CriterionConfidenceIntervalJson {
656 confidence_level: f64,
657 lower_bound: f64,
658 upper_bound: f64,
659}
660
661#[derive(Debug, Clone, Deserialize)]
662struct CriterionSampleJson {
663 iters: Vec<CriterionIterationCount>,
664 times: Vec<f64>,
665}
666
667impl CriterionSampleJson {
668 fn into_samples(self) -> Result<Vec<BenchSample>, String> {
669 self.iters
670 .into_iter()
671 .zip(self.times)
672 .enumerate()
673 .map(|(sample_index, (iteration_count, elapsed_ns))| {
674 Ok(BenchSample {
675 sample_index,
676 iteration_count: iteration_count.into_u64()?,
677 elapsed_ns,
678 })
679 })
680 .collect()
681 }
682
683 fn avg_times(&self) -> Result<Vec<f64>, String> {
684 self.iters
685 .iter()
686 .zip(&self.times)
687 .map(|(iteration_count, elapsed_ns)| {
688 let iteration_count = iteration_count.as_f64()?;
689 if iteration_count == 0.0 {
690 return Err("criterion sample iteration count was zero".to_string());
691 }
692 Ok(*elapsed_ns / iteration_count)
693 })
694 .collect()
695 }
696}
697
698#[derive(Debug, Clone, Deserialize)]
699#[serde(untagged)]
700enum CriterionIterationCount {
701 Integer(u64),
702 Float(f64),
703}
704
705impl CriterionIterationCount {
706 fn into_u64(self) -> Result<u64, String> {
707 match self {
708 Self::Integer(value) => Ok(value),
709 Self::Float(value)
710 if value.is_finite()
711 && value >= 0.0
712 && value.fract() == 0.0
713 && value <= u64::MAX as f64 =>
714 {
715 Ok(value as u64)
716 }
717 Self::Float(value) => Err(format!(
718 "criterion sample iteration count `{value}` is not a non-negative whole number"
719 )),
720 }
721 }
722
723 fn as_f64(&self) -> Result<f64, String> {
724 match self {
725 Self::Integer(value) => Ok(*value as f64),
726 Self::Float(value)
727 if value.is_finite()
728 && *value >= 0.0
729 && value.fract() == 0.0
730 && *value <= u64::MAX as f64 =>
731 {
732 Ok(*value)
733 }
734 Self::Float(value) => Err(format!(
735 "criterion sample iteration count `{value}` is not a non-negative whole number"
736 )),
737 }
738 }
739}
740
741thread_local! {
742 static SEED_RNG: RefCell<Rand64> = RefCell::new(Rand64::new(
743 SystemTime::now()
744 .duration_since(UNIX_EPOCH)
745 .unwrap_or_else(|_| panic!("time went backwards"))
746 .as_millis(),
747 ));
748}
749
750fn criterion_p_value(
751 current_samples: &[f64],
752 baseline_samples: &[f64],
753 nresamples: usize,
754) -> Result<f64, String> {
755 criterion_p_value_with_rng(current_samples, baseline_samples, nresamples, criterion_new_rng())
756}
757
758fn criterion_p_value_with_rng(
759 current_samples: &[f64],
760 baseline_samples: &[f64],
761 nresamples: usize,
762 rng: Rand64,
763) -> Result<f64, String> {
764 if current_samples.len() < 2 || baseline_samples.len() < 2 {
765 return Err("criterion comparison requires at least two samples in each run".to_string());
766 }
767
768 let t_statistic = sample_t(current_samples, baseline_samples);
769 if !t_statistic.is_finite() {
770 return Err("criterion comparison could not compute a finite T statistic".to_string());
771 }
772 let mut combined = Vec::with_capacity(current_samples.len() + baseline_samples.len());
773 combined.extend_from_slice(current_samples);
774 combined.extend_from_slice(baseline_samples);
775
776 let mut resampler = CriterionResamples::with_rng(combined, rng);
777 let mut t_distribution = Vec::with_capacity(nresamples);
778 for _ in 0..nresamples {
782 let resample = resampler.next();
783 let split = current_samples.len();
784 let t_value = sample_t(&resample[..split], &resample[split..]);
785 if t_value.is_finite() {
786 t_distribution.push(t_value);
787 }
788 }
789
790 if t_distribution.is_empty() {
791 return Err("criterion comparison produced an empty T distribution".to_string());
792 }
793
794 let hits = t_distribution.iter().filter(|value| **value < t_statistic).count();
795 let tails = 2.0;
796 Ok((usize::min(hits, t_distribution.len() - hits) as f64 / t_distribution.len() as f64) * tails)
797}
798
799fn sample_t(current_samples: &[f64], baseline_samples: &[f64]) -> f64 {
800 let current_mean = sample_mean(current_samples);
801 let baseline_mean = sample_mean(baseline_samples);
802 let current_variance = sample_variance(current_samples, current_mean);
803 let baseline_variance = sample_variance(baseline_samples, baseline_mean);
804 let denominator = (current_variance / current_samples.len() as f64
805 + baseline_variance / baseline_samples.len() as f64)
806 .sqrt();
807
808 (current_mean - baseline_mean) / denominator
809}
810
811fn sample_mean(values: &[f64]) -> f64 {
812 values.iter().copied().sum::<f64>() / values.len() as f64
813}
814
815fn sample_variance(values: &[f64], mean: f64) -> f64 {
816 let squared_diffs = values.iter().map(|value| (*value - mean).powi(2)).sum::<f64>();
817 squared_diffs / (values.len() - 1) as f64
818}
819
820struct CriterionResamples {
821 rng: Rand64,
822 sample: Vec<f64>,
823 stage: Vec<f64>,
824}
825
826impl CriterionResamples {
827 fn with_rng(sample: Vec<f64>, rng: Rand64) -> Self {
828 let sample_len = sample.len();
829 Self { rng, sample, stage: Vec::with_capacity(sample_len) }
830 }
831
832 fn next(&mut self) -> &[f64] {
833 if self.stage.is_empty() {
834 self.stage.resize(self.sample.len(), 0.0);
835 }
836
837 for slot in &mut self.stage {
838 let index = self.rng.rand_range(0..self.sample.len() as u64) as usize;
839 *slot = self.sample[index];
840 }
841
842 &self.stage
843 }
844}
845
846fn criterion_new_rng() -> Rand64 {
847 SEED_RNG.with(|rng| {
848 let mut rng = rng.borrow_mut();
849 let seed = ((rng.rand_u64() as u128) << 64) | (rng.rand_u64() as u128);
850 Rand64::new(seed)
851 })
852}
853
854#[cfg(test)]
855mod tests {
856 use super::*;
857 use oorandom::Rand64;
858 use serde_json::json;
859 use std::fs;
860 use tempfile::TempDir;
861
862 #[test]
863 fn find_new_report_dir_prefers_criterion_new_directory() {
864 let tempdir = TempDir::new().expect("tempdir");
865 let benchmark_root = tempdir.path().join("bench_normalize_phrase");
866 let base_dir = benchmark_root.join("base");
867 let new_dir = benchmark_root.join("new");
868
869 fs::create_dir_all(&base_dir).expect("base dir");
870 fs::create_dir_all(&new_dir).expect("new dir");
871
872 for directory in [&base_dir, &new_dir] {
873 fs::write(directory.join("benchmark.json"), "{}").expect("benchmark.json");
874 fs::write(directory.join("estimates.json"), "{}").expect("estimates.json");
875 fs::write(directory.join("sample.json"), "{}").expect("sample.json");
876 }
877
878 let discovered = find_new_report_dir(tempdir.path()).expect("criterion new dir");
879 assert_eq!(discovered, new_dir);
880 }
881
882 #[test]
883 fn criterion_summary_matches_expected_labels() {
884 let improved = estimate_json(-1.5, -1.2, -1.0);
885 assert_eq!(
886 criterion_summary_from_change_estimate(&improved, 0.01, 0.05, 0.01),
887 "Performance has improved."
888 );
889
890 let regressed = estimate_json(1.5, 1.2, 1.8);
891 assert_eq!(
892 criterion_summary_from_change_estimate(®ressed, 0.01, 0.05, 0.01),
893 "Performance has regressed."
894 );
895
896 let within_noise = estimate_json(0.004, -0.009, 0.008);
897 assert_eq!(
898 criterion_summary_from_change_estimate(&within_noise, 0.01, 0.05, 0.01),
899 "Change within noise threshold."
900 );
901
902 let not_significant = estimate_json(1.5, 1.2, 1.8);
903 assert_eq!(
904 criterion_summary_from_change_estimate(¬_significant, 0.75, 0.05, 0.01),
905 "No change in performance detected."
906 );
907 }
908
909 #[test]
910 fn materialize_baseline_artifacts_writes_criterion_base_layout() {
911 let tempdir = TempDir::new().expect("tempdir");
912 let artifacts = baseline_artifacts("bench_normalize_phrase");
913
914 materialize_baseline_artifacts(tempdir.path(), &artifacts).expect("materialize baseline");
915
916 let base_dir = tempdir.path().join("bench_normalize_phrase").join("base");
917 assert!(base_dir.join("benchmark.json").exists());
918 assert!(base_dir.join("estimates.json").exists());
919 assert!(base_dir.join("sample.json").exists());
920 assert!(base_dir.join("tukey.json").exists());
921 }
922
923 #[test]
924 fn collect_artifacts_includes_change_estimates_when_present() {
925 let tempdir = TempDir::new().expect("tempdir");
926 let benchmark_root = tempdir.path().join("bench_normalize_phrase");
927 let new_dir = benchmark_root.join("new");
928 let change_dir = benchmark_root.join("change");
929 fs::create_dir_all(&new_dir).expect("new dir");
930 fs::create_dir_all(&change_dir).expect("change dir");
931
932 let benchmark_json = benchmark_json("bench_normalize_phrase");
933 let estimates_json = absolute_estimates_json();
934 let sample_json = sample_json(&[1, 2, 3], &[10.0, 20.0, 30.0]);
935
936 write_json_value(&new_dir.join("benchmark.json"), &benchmark_json).expect("benchmark");
937 write_json_value(&new_dir.join("estimates.json"), &estimates_json).expect("estimates");
938 write_json_value(&new_dir.join("sample.json"), &sample_json).expect("sample");
939 write_json_value(&new_dir.join("tukey.json"), &json!({"a": 1})).expect("tukey");
940 write_json_value(&change_dir.join("estimates.json"), &change_estimates_json(1.5, 1.2, 1.8))
941 .expect("change estimates");
942
943 let artifacts =
944 collect_artifacts(&benchmark_root, &benchmark_json, &estimates_json, &sample_json)
945 .expect("collect artifacts");
946 let artifact_kinds =
947 artifacts.iter().map(|artifact| artifact.artifact_kind.as_str()).collect::<Vec<_>>();
948
949 assert!(artifact_kinds.contains(&ARTIFACT_KIND_BENCHMARK_JSON));
950 assert!(artifact_kinds.contains(&ARTIFACT_KIND_ESTIMATES_JSON));
951 assert!(artifact_kinds.contains(&ARTIFACT_KIND_SAMPLE_JSON));
952 assert!(artifact_kinds.contains(&ARTIFACT_KIND_TUKEY_JSON));
953 assert!(artifact_kinds.contains(&ARTIFACT_KIND_CHANGE_ESTIMATES_JSON));
954 }
955
956 #[test]
957 fn criterion_p_value_with_rng_detects_large_regression() {
958 let current = [1000.0, 1001.0, 1002.5, 998.0, 1003.0, 999.5];
959 let baseline = [1.0, 2.0, 2.5, 1.5, 3.0, 2.2];
960
961 let p_value =
962 criterion_p_value_with_rng(¤t, &baseline, 10_000, Rand64::new(42)).unwrap();
963 assert!(p_value < 0.05, "expected a significant difference, got p={p_value}");
964 }
965
966 #[test]
967 fn criterion_p_value_with_rng_is_high_for_identical_samples() {
968 let sample = [10.0, 12.0, 13.5, 11.5, 9.5, 14.0];
969
970 let p_value =
971 criterion_p_value_with_rng(&sample, &sample, 10_000, Rand64::new(42)).expect("p value");
972 assert!(p_value >= 0.5, "expected no significant difference, got p={p_value}");
973 }
974
975 #[test]
976 fn parse_comparison_uses_persisted_baseline_artifacts() {
977 let tempdir = TempDir::new().expect("tempdir");
978 let benchmark_root = tempdir.path().join("bench_normalize_phrase");
979 let change_dir = benchmark_root.join("change");
980 fs::create_dir_all(&change_dir).expect("change dir");
981
982 write_json_value(&change_dir.join("estimates.json"), &change_estimates_json(1.5, 1.2, 1.8))
983 .expect("change estimates");
984
985 let current_samples = serde_json::from_value::<CriterionSampleJson>(sample_json(
986 &[1, 1, 1, 1, 1, 1],
987 &[1000.0, 1001.0, 1002.0, 998.0, 1003.0, 999.0],
988 ))
989 .expect("current samples");
990 let config = BenchConfig {
991 sample_size: 100,
992 measurement_time_ms: 5_000,
993 warm_up_time_ms: 3_000,
994 nresamples: 10_000,
995 noise_threshold: 0.01,
996 significance_level: 0.05,
997 };
998
999 let comparison = parse_comparison(
1000 &benchmark_root,
1001 Some(&baseline_artifacts("bench_normalize_phrase")),
1002 ¤t_samples,
1003 &config,
1004 )
1005 .expect("comparison")
1006 .expect("comparison payload");
1007
1008 assert_eq!(comparison.summary, "Performance has regressed.");
1009 assert!(comparison.p_value < 0.05, "expected a significant difference");
1010 assert!(comparison.mean.point_estimate > 1.0);
1011 }
1012
1013 fn baseline_artifacts(directory_name: &str) -> Vec<BenchArtifact> {
1014 vec![
1015 BenchArtifact {
1016 artifact_kind: ARTIFACT_KIND_BENCHMARK_JSON.to_string(),
1017 media_type: "application/json".to_string(),
1018 payload_json: benchmark_json(directory_name),
1019 },
1020 BenchArtifact {
1021 artifact_kind: ARTIFACT_KIND_ESTIMATES_JSON.to_string(),
1022 media_type: "application/json".to_string(),
1023 payload_json: absolute_estimates_json(),
1024 },
1025 BenchArtifact {
1026 artifact_kind: ARTIFACT_KIND_SAMPLE_JSON.to_string(),
1027 media_type: "application/json".to_string(),
1028 payload_json: sample_json(&[1, 1, 1, 1, 1, 1], &[1.0, 2.0, 2.5, 1.5, 3.0, 2.2]),
1029 },
1030 BenchArtifact {
1031 artifact_kind: ARTIFACT_KIND_TUKEY_JSON.to_string(),
1032 media_type: "application/json".to_string(),
1033 payload_json: json!({"fences": [0.0, 1.0, 2.0, 3.0]}),
1034 },
1035 ]
1036 }
1037
1038 fn benchmark_json(directory_name: &str) -> Value {
1039 json!({
1040 "group_id": "bench_normalize_phrase",
1041 "function_id": null,
1042 "value_str": null,
1043 "throughput": null,
1044 "full_id": "bench_normalize_phrase",
1045 "directory_name": directory_name,
1046 "title": "bench_normalize_phrase",
1047 })
1048 }
1049
1050 fn absolute_estimates_json() -> Value {
1051 json!({
1052 "mean": estimate_value(300.0, 290.0, 310.0),
1053 "median": estimate_value(295.0, 285.0, 305.0),
1054 "median_abs_dev": estimate_value(5.0, 4.0, 6.0),
1055 "slope": estimate_value(280.0, 270.0, 290.0),
1056 "std_dev": estimate_value(8.0, 7.0, 9.0),
1057 })
1058 }
1059
1060 fn change_estimates_json(point: f64, lower: f64, upper: f64) -> Value {
1061 json!({
1062 "mean": estimate_value(point, lower, upper),
1063 "median": estimate_value(point, lower, upper),
1064 })
1065 }
1066
1067 fn estimate_value(point: f64, lower: f64, upper: f64) -> Value {
1068 json!({
1069 "confidence_interval": {
1070 "confidence_level": 0.95,
1071 "lower_bound": lower,
1072 "upper_bound": upper,
1073 },
1074 "point_estimate": point,
1075 "standard_error": 0.01,
1076 })
1077 }
1078
1079 fn estimate_json(point: f64, lower: f64, upper: f64) -> CriterionEstimateJson {
1080 serde_json::from_value(estimate_value(point, lower, upper)).expect("estimate")
1081 }
1082
1083 fn sample_json(iters: &[u64], times: &[f64]) -> Value {
1084 json!({
1085 "iters": iters,
1086 "times": times,
1087 })
1088 }
1089}