1use crate::{
3 dylib::{FunctionIdx, Spi, SpiModeKind},
4 CacheFirewall, Error, FlatSampleLength, LinearSampleLength, MeasurementSettings,
5 RandomSampleLength, SampleLength, SampleLengthKind,
6};
7use anyhow::{bail, Context};
8use clap::{ArgAction, Parser};
9use colorz::mode::{self, Mode};
10use core::fmt;
11use glob_match::glob_match;
12use std::{
13 env::{self, args, temp_dir},
14 fmt::Display,
15 fs,
16 io::{stderr, Write},
17 num::NonZeroUsize,
18 path::{Path, PathBuf},
19 process::{Command, ExitCode, Stdio},
20 str::FromStr,
21 time::Duration,
22};
23
24pub type Result<T> = anyhow::Result<T>;
25pub(crate) type StdResult<T, E> = std::result::Result<T, E>;
26
27#[derive(Parser, Debug)]
28enum BenchmarkMode {
29 List {
30 #[command(flatten)]
31 bench_flags: CargoBenchFlags,
32 },
33 Compare(PairedOpts),
34 Solo(SoloOpts),
35}
36
37#[derive(Parser, Debug)]
38struct PairedOpts {
39 #[command(flatten)]
40 bench_flags: CargoBenchFlags,
41
42 path: Option<PathBuf>,
44
45 #[arg(short = 'd', long = "dump")]
47 path_to_dump: Option<PathBuf>,
48
49 #[arg(long = "gnuplot")]
51 gnuplot: bool,
52
53 #[arg(long = "seed")]
55 seed: Option<u64>,
56
57 #[arg(short = 's', long = "samples")]
59 samples: Option<NonZeroUsize>,
60
61 #[arg(long = "sampler")]
63 sampler: Option<SampleLengthKind>,
64
65 #[arg(short = 't', long = "time")]
67 time: Option<f64>,
68
69 #[arg(long = "fail-threshold")]
71 fail_threshold: Option<f64>,
72
73 #[arg(long = "fail-fast")]
75 fail_fast: bool,
76
77 #[arg(long = "cache-firewall")]
80 cache_firewall: Option<usize>,
81
82 #[arg(long = "randomize-stack")]
85 randomize_stack: Option<usize>,
86
87 #[arg(long = "yield-before-sample")]
89 yield_before_sample: Option<bool>,
90
91 #[arg(short = 'f', long = "filter")]
93 filter: Option<String>,
94
95 #[arg(short = 'g', long = "significant-only", default_value_t = false)]
97 significant_only: bool,
98
99 #[arg(short = 'o', long = "filter-outliers")]
101 filter_outliers: bool,
102
103 #[arg(long = "warmup")]
105 warmup_enabled: Option<bool>,
106
107 #[arg(short = 'p', long = "parallel")]
108 parallel: bool,
109
110 #[arg(short = 'q')]
112 quiet: bool,
113
114 #[arg(short = 'v', long = "verbose", default_value_t = false)]
115 verbose: bool,
116
117 #[arg(long = "no-system-time-check", default_value_t = true, action = ArgAction::SetFalse)]
119 system_time_check: bool,
120}
121
122#[derive(Parser, Debug)]
123struct SoloOpts {
124 #[command(flatten)]
125 bench_flags: CargoBenchFlags,
126
127 #[arg(long = "seed")]
129 seed: Option<u64>,
130
131 #[arg(short = 's', long = "samples")]
133 samples: Option<NonZeroUsize>,
134
135 #[arg(long = "sampler")]
137 sampler: Option<SampleLengthKind>,
138
139 #[arg(short = 't', long = "time")]
141 time: Option<f64>,
142
143 #[arg(long = "cache-firewall")]
146 cache_firewall: Option<usize>,
147
148 #[arg(long = "randomize-stack")]
151 randomize_stack: Option<usize>,
152
153 #[arg(long = "yield-before-sample")]
155 yield_before_sample: Option<bool>,
156
157 #[arg(short = 'f', long = "filter")]
159 filter: Option<String>,
160
161 #[arg(long = "warmup")]
163 warmup_enabled: Option<bool>,
164}
165
166#[derive(Parser, Debug)]
167#[command(author, version, about, long_about = None)]
168struct Opts {
169 #[command(subcommand)]
170 subcommand: Option<BenchmarkMode>,
171
172 #[command(flatten)]
173 bench_flags: CargoBenchFlags,
174
175 #[arg(long = "color", default_value = "detect")]
176 coloring_mode: String,
177}
178
179impl FromStr for SampleLengthKind {
180 type Err = Error;
181
182 fn from_str(s: &str) -> StdResult<Self, Self::Err> {
183 match s {
184 "flat" => Ok(SampleLengthKind::Flat),
185 "linear" => Ok(SampleLengthKind::Linear),
186 "random" => Ok(SampleLengthKind::Random),
187 _ => Err(Error::UnknownSamplerType),
188 }
189 }
190}
191
192#[derive(Parser, Debug, Clone)]
194struct CargoBenchFlags {
195 #[arg(long = "bench", default_value_t = true)]
196 bench: bool,
197}
198
199pub fn run(settings: MeasurementSettings) -> Result<ExitCode> {
200 let opts = Opts::parse();
201
202 match Mode::from_str(&opts.coloring_mode) {
203 Ok(coloring_mode) => mode::set_coloring_mode(coloring_mode),
204 Err(_) => eprintln!("[WARN] Invalid coloring mode: {}", opts.coloring_mode),
205 }
206
207 let subcommand = opts.subcommand.unwrap_or(BenchmarkMode::List {
208 bench_flags: opts.bench_flags,
209 });
210
211 match subcommand {
212 BenchmarkMode::List { bench_flags: _ } => {
213 let spi = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
214 for func in spi.tests() {
215 println!("{}", func.name);
216 }
217 Ok(ExitCode::SUCCESS)
218 }
219 BenchmarkMode::Compare(opts) => paired_test::run_test(opts, settings),
220 BenchmarkMode::Solo(opts) => solo_test::run_test(opts, settings),
221 }
222}
223
224struct AutoDelete(PathBuf);
226
227impl std::ops::Deref for AutoDelete {
228 type Target = PathBuf;
229
230 fn deref(&self) -> &Self::Target {
231 &self.0
232 }
233}
234
235impl Drop for AutoDelete {
236 fn drop(&mut self) {
237 if let Err(e) = fs::remove_file(&self.0) {
238 eprintln!("Failed to delete file {}: {}", self.0.display(), e);
239 }
240 }
241}
242
243fn create_loop_mode(samples: Option<NonZeroUsize>, time: Option<f64>) -> Result<LoopMode> {
244 let loop_mode = match (samples, time) {
245 (Some(samples), None) => LoopMode::Samples(samples.into()),
246 (None, Some(time)) => LoopMode::Time(Duration::from_millis((time * 1000.) as u64)),
247 (None, None) => LoopMode::Time(Duration::from_millis(100)),
248 (Some(_), Some(_)) => bail!("-t and -s are mutually exclusive"),
249 };
250 Ok(loop_mode)
251}
252
253#[derive(Clone, Copy)]
254enum LoopMode {
255 Samples(usize),
256 Time(Duration),
257}
258
259impl LoopMode {
260 fn should_continue(&self, iter_no: usize, loop_time: Duration) -> bool {
261 match self {
262 LoopMode::Samples(samples) => iter_no < *samples,
263 LoopMode::Time(duration) => loop_time < *duration,
264 }
265 }
266}
267
268mod solo_test {
269 use super::*;
270 use crate::{dylib::Spi, CacheFirewall, Summary};
271 use alloca::with_alloca;
272 use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
273 use std::thread;
274
275 pub(super) fn run_test(opts: SoloOpts, mut settings: MeasurementSettings) -> Result<ExitCode> {
276 let SoloOpts {
277 bench_flags: _,
278 filter,
279 samples,
280 time,
281 seed,
282 sampler,
283 cache_firewall,
284 yield_before_sample,
285 warmup_enabled,
286 randomize_stack,
287 } = opts;
288
289 let mut spi_self = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
290
291 settings.cache_firewall = cache_firewall;
292 settings.randomize_stack = randomize_stack;
293
294 if let Some(warmup_enabled) = warmup_enabled {
295 settings.warmup_enabled = warmup_enabled;
296 }
297 if let Some(yield_before_sample) = yield_before_sample {
298 settings.yield_before_sample = yield_before_sample;
299 }
300 if let Some(sampler) = sampler {
301 settings.sampler_type = sampler;
302 }
303
304 let filter = filter.as_deref().unwrap_or("");
305 let loop_mode = create_loop_mode(samples, time)?;
306
307 let test_names = spi_self
308 .tests()
309 .iter()
310 .map(|t| &t.name)
311 .cloned()
312 .collect::<Vec<_>>();
313 for func_name in test_names {
314 if !filter.is_empty() && !glob_match(filter, &func_name) {
315 continue;
316 }
317
318 let result = run_solo_test(&mut spi_self, &func_name, settings, seed, loop_mode)?;
319
320 reporting::default_reporter_solo(&func_name, &result);
321 }
322
323 Ok(ExitCode::SUCCESS)
324 }
325
326 fn run_solo_test(
327 spi: &mut Spi,
328 test_name: &str,
329 settings: MeasurementSettings,
330 seed: Option<u64>,
331 loop_mode: LoopMode,
332 ) -> Result<Summary<f64>> {
333 const TIME_SLICE_MS: u32 = 10;
334
335 let firewall = settings
336 .cache_firewall
337 .map(|s| s * 1024)
338 .map(CacheFirewall::new);
339 let baseline_func = spi.lookup(test_name).ok_or(Error::InvalidTestName)?;
340
341 let mut spi_func = TestedFunction::new(spi, baseline_func.idx);
342
343 let seed = seed.unwrap_or_else(rand::random);
344
345 spi_func.spi.prepare_state(seed)?;
346 let iters = spi_func.spi.estimate_iterations(TIME_SLICE_MS)?;
347 let mut iterations_per_sample = (iters / 2).max(1);
348 let mut sampler = create_sampler(&settings, seed);
349
350 let mut rng = SmallRng::seed_from_u64(seed);
351 let stack_offset_distr = settings
352 .randomize_stack
353 .map(|offset| distributions::Uniform::new(0, offset));
354
355 let mut i = 0;
356
357 let mut sample_iterations = vec![];
358
359 if let LoopMode::Samples(samples) = loop_mode {
360 sample_iterations.reserve(samples);
361 spi_func.samples.reserve(samples);
362 }
363
364 let mut loop_time = Duration::from_secs(0);
365 let mut loop_iterations = 0;
366 while loop_mode.should_continue(i, loop_time) {
367 if loop_time > Duration::from_millis(100) {
368 iterations_per_sample =
370 loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
371 }
372 let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
373 loop_iterations += iterations;
374 let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
375
376 if settings.yield_before_sample {
377 thread::yield_now();
378 }
379
380 let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
381
382 prepare_func(
383 prepare_state_seed,
384 &mut spi_func,
385 warmup_iterations,
386 firewall.as_ref(),
387 )?;
388
389 if let Some(distr) = stack_offset_distr {
391 with_alloca(rng.sample(distr), |_| {
392 spi_func.spi.measure(iterations).unwrap();
393 });
394 } else {
395 spi_func.spi.measure(iterations)?;
396 }
397
398 loop_time += Duration::from_nanos(spi_func.read_sample()?);
399 sample_iterations.push(iterations);
400 i += 1;
401 }
402
403 let samples = spi_func
404 .samples
405 .iter()
406 .zip(sample_iterations.iter())
407 .map(|(sample, iterations)| *sample as f64 / *iterations as f64)
408 .collect::<Vec<_>>();
409 Ok(Summary::from(&samples).unwrap())
410 }
411}
412
413mod paired_test {
414 use super::*;
415 use crate::{
416 calculate_run_result,
417 platform::{self, RUsage},
418 CacheFirewall, RunResult,
419 };
420 use alloca::with_alloca;
421 use fs::File;
422 use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
423 use std::{
424 io::{self, BufWriter},
425 mem, thread,
426 };
427
428 pub(super) fn run_test(
429 opts: PairedOpts,
430 mut settings: MeasurementSettings,
431 ) -> Result<ExitCode> {
432 let PairedOpts {
433 bench_flags: _,
434 path,
435 verbose,
436 filter,
437 samples,
438 time,
439 filter_outliers,
440 path_to_dump,
441 gnuplot,
442 fail_threshold,
443 fail_fast,
444 significant_only,
445 seed,
446 sampler,
447 cache_firewall,
448 yield_before_sample,
449 warmup_enabled,
450 parallel,
451 quiet,
452 randomize_stack,
453 system_time_check,
454 } = opts;
455 let mut path = path
456 .or_else(|| args().next().map(PathBuf::from))
457 .expect("No path given");
458 if path.is_relative() {
459 if let Ok(pwd) = env::var("PWD") {
461 path = PathBuf::from(pwd).join(path)
462 }
463 };
464
465 #[cfg(target_os = "linux")]
466 let path = crate::linux::patch_pie_binary_if_needed(&path)?.unwrap_or(path);
467
468 let mode = if parallel {
469 SpiModeKind::Asynchronous
470 } else {
471 SpiModeKind::Synchronous
472 };
473
474 let mut spi_self = Spi::for_self(mode).ok_or(Error::SpiSelfWasMoved)?;
475 let mut spi_lib = Spi::for_library(&path, mode).with_context(|| {
476 format!(
477 "Unable to load benchmark: {}. Make sure it exists and it is valid tango benchmark.",
478 path.display()
479 )
480 })?;
481
482 settings.filter_outliers = filter_outliers;
483 settings.cache_firewall = cache_firewall;
484 settings.randomize_stack = randomize_stack;
485
486 if let Some(warmup_enabled) = warmup_enabled {
487 settings.warmup_enabled = warmup_enabled;
488 }
489 if let Some(yield_before_sample) = yield_before_sample {
490 settings.yield_before_sample = yield_before_sample;
491 }
492 if let Some(sampler) = sampler {
493 settings.sampler_type = sampler;
494 }
495
496 let filter = filter.as_deref().unwrap_or("");
497 let loop_mode = create_loop_mode(samples, time)?;
498
499 let mut exit_code = ExitCode::SUCCESS;
500
501 if let Some(path) = &path_to_dump {
502 if !path.exists() {
503 fs::create_dir_all(path)?;
504 }
505 }
506 if gnuplot && path_to_dump.is_none() {
507 eprintln!("warn: --gnuplot requires -d to be specified. No plots will be generated")
508 }
509
510 let mut sample_dumps = vec![];
511
512 let test_names = spi_self
513 .tests()
514 .iter()
515 .map(|t| &t.name)
516 .cloned()
517 .collect::<Vec<_>>();
518 for func_name in test_names {
519 if !filter.is_empty() && !glob_match(filter, &func_name) {
520 continue;
521 }
522
523 if spi_lib.lookup(&func_name).is_none() {
524 if !quiet {
525 writeln!(stderr(), "{} skipped...", &func_name)?;
526 }
527 continue;
528 }
529
530 let rusage_before = system_time_check.then(platform::rusage);
531 let (result, sample_dump) = run_paired_test(
532 &mut spi_lib,
533 &mut spi_self,
534 &func_name,
535 settings,
536 seed,
537 loop_mode,
538 path_to_dump.as_ref(),
539 )?;
540 if let Some(usage_before) = rusage_before {
541 let rusage = platform::rusage() - usage_before;
542 if detect_system_time_bias(&rusage) {
543 reporting::report_system_time_bias(&result, &rusage);
544 }
545 }
546
547 if let Some(dump) = sample_dump {
548 sample_dumps.push(dump);
549 }
550
551 if result.diff_estimate.significant || !significant_only {
552 if verbose {
553 reporting::verbose_reporter(&result);
554 } else {
555 reporting::default_reporter(&result);
556 }
557 }
558
559 if result.diff_estimate.significant {
560 if let Some(threshold) = fail_threshold {
561 if result.diff_estimate.pct >= threshold {
562 eprintln!(
563 "[ERROR] Performance regressed {:+.1}% >= {:.1}% - test: {}",
564 result.diff_estimate.pct, threshold, func_name
565 );
566 if fail_fast {
567 return Ok(ExitCode::FAILURE);
568 } else {
569 exit_code = ExitCode::FAILURE;
570 }
571 }
572 }
573 }
574 }
575
576 if gnuplot && !sample_dumps.is_empty() {
577 generate_plots(sample_dumps.as_slice())?;
578 }
579
580 Ok(exit_code)
581 }
582
583 fn detect_system_time_bias(rusage: &RUsage) -> bool {
587 let system = rusage.system_time.as_secs_f64();
589 let overall = (rusage.user_time + rusage.system_time).as_secs_f64();
590 system / overall > 0.05
591 }
592
593 fn run_paired_test(
609 baseline: &mut Spi,
610 candidate: &mut Spi,
611 test_name: &str,
612 settings: MeasurementSettings,
613 seed: Option<u64>,
614 loop_mode: LoopMode,
615 samples_dump_path: Option<&PathBuf>,
616 ) -> Result<(RunResult, Option<PathBuf>)> {
617 const TIME_SLICE_MS: u32 = 10;
618
619 let firewall = settings
620 .cache_firewall
621 .map(|s| s * 1024)
622 .map(CacheFirewall::new);
623 let baseline_func = baseline.lookup(test_name).ok_or(Error::InvalidTestName)?;
624 let candidate_func = candidate.lookup(test_name).ok_or(Error::InvalidTestName)?;
625
626 let mut baseline = TestedFunction::new(baseline, baseline_func.idx);
627 let mut candidate = TestedFunction::new(candidate, candidate_func.idx);
628
629 let mut a_func = &mut baseline;
630 let mut b_func = &mut candidate;
631
632 let seed = seed.unwrap_or_else(rand::random);
633
634 a_func
635 .spi
636 .prepare_state(seed)
637 .context("Unable to prepare benchmark state")?;
638 let a_iters = a_func
639 .spi
640 .estimate_iterations(TIME_SLICE_MS)
641 .context("Failed to estimate required iterations number")?;
642 let a_estimate = (a_iters / 2).max(1);
643
644 b_func
645 .spi
646 .prepare_state(seed)
647 .context("Unable to prepare benchmark state")?;
648 let b_iters = b_func
649 .spi
650 .estimate_iterations(TIME_SLICE_MS)
651 .context("Failed to estimate required iterations number")?;
652 let b_estimate = (b_iters / 2).max(1);
653
654 let mut iterations_per_sample = a_estimate.min(b_estimate);
655 let mut sampler = create_sampler(&settings, seed);
656
657 let mut rng = SmallRng::seed_from_u64(seed);
658 let stack_offset_distr = settings
659 .randomize_stack
660 .map(|offset| distributions::Uniform::new(0, offset));
661
662 let mut i = 0;
663 let mut switch_counter = 0;
664
665 let mut sample_iterations = vec![];
666
667 if let LoopMode::Samples(samples) = loop_mode {
668 sample_iterations.reserve(samples);
669 a_func.samples.reserve(samples);
670 b_func.samples.reserve(samples);
671 }
672
673 let mut loop_time = Duration::from_secs(0);
674 let mut loop_iterations = 0;
675 while loop_mode.should_continue(i, loop_time) {
676 if loop_time > Duration::from_millis(100) {
677 iterations_per_sample =
679 loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
680 }
681 let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
682 loop_iterations += iterations;
683 let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
684
685 {
694 mem::swap(&mut a_func, &mut b_func);
695 switch_counter += 1;
696 }
697
698 if settings.yield_before_sample {
699 thread::yield_now();
700 }
701
702 let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
703 let mut sample_time = 0;
704
705 prepare_func(
706 prepare_state_seed,
707 a_func,
708 warmup_iterations,
709 firewall.as_ref(),
710 )?;
711 prepare_func(
712 prepare_state_seed,
713 b_func,
714 warmup_iterations,
715 firewall.as_ref(),
716 )?;
717
718 if let Some(distr) = stack_offset_distr {
720 with_alloca(rng.sample(distr), |_| {
721 a_func.spi.measure(iterations).unwrap();
722 b_func.spi.measure(iterations).unwrap();
723 });
724 } else {
725 a_func.spi.measure(iterations)?;
726 b_func.spi.measure(iterations)?;
727 }
728
729 let a_sample_time = a_func.read_sample()?;
730 let b_sample_time = b_func.read_sample()?;
731 sample_time += a_sample_time.max(b_sample_time);
732
733 loop_time += Duration::from_nanos(sample_time);
734 sample_iterations.push(iterations);
735 i += 1;
736 }
737
738 if switch_counter % 2 != 0 {
741 mem::swap(&mut a_func, &mut b_func);
742 }
743
744 let run_result = calculate_run_result(
745 test_name,
746 &a_func.samples,
747 &b_func.samples,
748 &sample_iterations,
749 settings.filter_outliers,
750 )
751 .ok_or(Error::NoMeasurements)?;
752
753 let samples_path = if let Some(path) = samples_dump_path {
754 let file_path = write_samples(path, test_name, a_func, b_func, sample_iterations)?;
755 Some(file_path)
756 } else {
757 None
758 };
759
760 Ok((run_result, samples_path))
761 }
762
763 fn write_samples(
764 path: &Path,
765 test_name: &str,
766 a_func: &TestedFunction,
767 b_func: &TestedFunction,
768 iterations: Vec<usize>,
769 ) -> Result<PathBuf> {
770 let file_name = format!("{}.csv", test_name.replace('/', "-"));
771 let file_path = path.join(file_name);
772 let s_samples = a_func.samples.iter().copied();
773 let b_samples = b_func.samples.iter().copied();
774 let values = s_samples
775 .zip(b_samples)
776 .zip(iterations.iter().copied())
777 .map(|((a, b), c)| (a, b, c));
778 write_csv(&file_path, values).context("Unable to write raw measurements")?;
779 Ok(file_path)
780 }
781
782 fn write_csv<A: Display, B: Display, C: Display>(
783 path: impl AsRef<Path>,
784 values: impl IntoIterator<Item = (A, B, C)>,
785 ) -> io::Result<()> {
786 let mut file = BufWriter::new(File::create(path)?);
787 for (a, b, c) in values {
788 writeln!(&mut file, "{},{},{}", a, b, c)?;
789 }
790 Ok(())
791 }
792
793 fn generate_plots(sample_dumps: &[PathBuf]) -> Result<()> {
794 let gnuplot_file = AutoDelete(temp_dir().join("tango-plot.gnuplot"));
795 fs::write(&*gnuplot_file, include_bytes!("plot.gnuplot"))?;
796 let gnuplot_file_str = gnuplot_file.to_str().unwrap();
797
798 for input in sample_dumps {
799 let csv_input = input.to_str().unwrap();
800 let svg_path = input.with_extension("svg");
801 let cmd = Command::new("gnuplot")
802 .args([
803 "-c",
804 gnuplot_file_str,
805 csv_input,
806 svg_path.to_str().unwrap(),
807 ])
808 .stdin(Stdio::null())
809 .stdout(Stdio::inherit())
810 .stderr(Stdio::inherit())
811 .status()
812 .context("Failed to execute gnuplot")?;
813
814 if !cmd.success() {
815 bail!("gnuplot command failed");
816 }
817 }
818 Ok(())
819 }
820}
821
822mod reporting {
823 use crate::{
824 cli::{colorize, HumanTime},
825 platform::RUsage,
826 RunResult, Summary,
827 };
828 use colorz::{ansi, mode::Stream, Colorize, Style};
829
830 pub(super) fn verbose_reporter(results: &RunResult) {
831 let base = results.baseline;
832 let candidate = results.candidate;
833
834 let significant = results.diff_estimate.significant;
835
836 println!(
837 "{} (n: {}, outliers: {})",
838 results.name.bold().stream(Stream::Stdout),
839 results.diff.n,
840 results.outliers
841 );
842
843 println!(
844 " {:12} {:>15} {:>15} {:>15}",
845 "",
846 "baseline".bold().stream(Stream::Stdout),
847 "candidate".bold().stream(Stream::Stdout),
848 "∆".bold().stream(Stream::Stdout),
849 );
850 println!(
851 " {:12} ╭────────────────────────────────────────────────",
852 ""
853 );
854 println!(
855 " {:12} │ {:>15} {:>15} {:>15} {:+4.2}{}{}",
856 "mean",
857 HumanTime(base.mean),
858 HumanTime(candidate.mean),
859 colorize(
860 HumanTime(results.diff.mean),
861 significant,
862 results.diff.mean < 0.
863 ),
864 colorize(
865 results.diff_estimate.pct,
866 significant,
867 results.diff.mean < 0.
868 ),
869 colorize("%", significant, results.diff.mean < 0.),
870 if significant { "*" } else { "" },
871 );
872 println!(
873 " {:12} │ {:>15} {:>15} {:>15}",
874 "min",
875 HumanTime(base.min),
876 HumanTime(candidate.min),
877 HumanTime(candidate.min - base.min)
878 );
879 println!(
880 " {:12} │ {:>15} {:>15} {:>15}",
881 "max",
882 HumanTime(base.max),
883 HumanTime(candidate.max),
884 HumanTime(candidate.max - base.max),
885 );
886 println!(
887 " {:12} │ {:>15} {:>15} {:>15}",
888 "std. dev.",
889 HumanTime(base.variance.sqrt()),
890 HumanTime(candidate.variance.sqrt()),
891 HumanTime(results.diff.variance.sqrt()),
892 );
893 println!();
894 }
895
896 pub(super) fn default_reporter(results: &RunResult) {
897 let base = results.baseline;
898 let candidate = results.candidate;
899 let diff = results.diff;
900
901 let significant = results.diff_estimate.significant;
902
903 let speedup = results.diff_estimate.pct;
904 let candidate_faster = diff.mean < 0.;
905 println!(
906 "{:50} [ {:>8} ... {:>8} ] {:>+7.2}{}{}",
907 colorize(&results.name, significant, candidate_faster),
908 HumanTime(base.mean),
909 colorize(HumanTime(candidate.mean), significant, candidate_faster),
910 colorize(speedup, significant, candidate_faster),
911 colorize("%", significant, candidate_faster),
912 if significant { "*" } else { "" },
913 )
914 }
915
916 pub(super) fn default_reporter_solo(name: &str, results: &Summary<f64>) {
917 println!(
918 "{:50} [ {:>8} ... {:>8} ... {:>8} ] stddev: {:>8}",
919 name,
920 HumanTime(results.min),
921 HumanTime(results.mean),
922 HumanTime(results.max),
923 HumanTime(results.variance.sqrt()),
924 )
925 }
926
927 pub(super) fn report_system_time_bias(result: &RunResult, rusage: &RUsage) {
928 const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style();
929
930 eprintln!(
931 "{}: {} benchmark spent too much time in system mode (sys: {:?}, usr: {:?}). Results may be inaccurate",
932 "WARN".into_style_with(RED).stream(Stream::Stderr),
933 &result.name,
934 rusage.system_time,
935 rusage.user_time
936 );
937 }
938}
939
940struct TestedFunction<'a> {
941 pub(crate) spi: &'a mut Spi,
942 pub(crate) samples: Vec<u64>,
943}
944
945impl<'a> TestedFunction<'a> {
946 pub(crate) fn new(spi: &'a mut Spi, func: FunctionIdx) -> Self {
947 spi.select(func);
948 TestedFunction {
949 spi,
950 samples: Vec::new(),
951 }
952 }
953
954 pub(crate) fn read_sample(&mut self) -> Result<u64> {
955 let sample = self.spi.read_sample().context("Unable to read sample")?;
956 self.samples.push(sample);
957 Ok(sample)
958 }
959
960 pub(crate) fn run(&mut self, iterations: usize) -> Result<u64> {
961 self.spi
962 .run(iterations)
963 .context("Unable to run measurement")
964 }
965}
966
967fn prepare_func(
968 prepare_state_seed: Option<u64>,
969 f: &mut TestedFunction,
970 warmup_iterations: Option<usize>,
971 firewall: Option<&CacheFirewall>,
972) -> Result<()> {
973 if let Some(seed) = prepare_state_seed {
974 f.spi.prepare_state(seed)?;
975 if let Some(firewall) = firewall {
976 firewall.issue_read();
977 }
978 }
979 if let Some(warmup_iterations) = warmup_iterations {
980 f.run(warmup_iterations)?;
981 }
982 Ok(())
983}
984
985fn create_sampler(settings: &MeasurementSettings, seed: u64) -> Box<dyn SampleLength> {
986 match settings.sampler_type {
987 SampleLengthKind::Flat => Box::new(FlatSampleLength::new(settings)),
988 SampleLengthKind::Linear => Box::new(LinearSampleLength::new(settings)),
989 SampleLengthKind::Random => Box::new(RandomSampleLength::new(settings, seed)),
990 }
991}
992
993fn colorize<T: Display>(value: T, do_paint: bool, is_improved: bool) -> impl Display {
994 use colorz::{ansi, mode::Stream::Stdout, Colorize, Style};
995
996 const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style();
997 const GREEN: Style = Style::new().fg(ansi::Green).const_into_runtime_style();
998 const DEFAULT: Style = Style::new().const_into_runtime_style();
999
1000 if do_paint {
1001 if is_improved {
1002 value.into_style_with(GREEN).stream(Stdout)
1003 } else {
1004 value.into_style_with(RED).stream(Stdout)
1005 }
1006 } else {
1007 value.into_style_with(DEFAULT).stream(Stdout)
1008 }
1009}
1010
1011struct HumanTime(f64);
1012
1013impl fmt::Display for HumanTime {
1014 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1015 const USEC: f64 = 1_000.;
1016 const MSEC: f64 = USEC * 1_000.;
1017 const SEC: f64 = MSEC * 1_000.;
1018
1019 if self.0.abs() > SEC {
1020 f.pad(&format!("{:.1} s", self.0 / SEC))
1021 } else if self.0.abs() > MSEC {
1022 f.pad(&format!("{:.1} ms", self.0 / MSEC))
1023 } else if self.0.abs() > USEC {
1024 f.pad(&format!("{:.1} us", self.0 / USEC))
1025 } else if self.0 == 0. {
1026 f.pad("0 ns")
1027 } else {
1028 f.pad(&format!("{:.1} ns", self.0))
1029 }
1030 }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035 use super::*;
1036
1037 #[test]
1038 fn check_human_time() {
1039 assert_eq!(format!("{}", HumanTime(0.1)), "0.1 ns");
1040 assert_eq!(format!("{:>5}", HumanTime(0.)), " 0 ns");
1041
1042 assert_eq!(format!("{}", HumanTime(120.)), "120.0 ns");
1043
1044 assert_eq!(format!("{}", HumanTime(1200.)), "1.2 us");
1045
1046 assert_eq!(format!("{}", HumanTime(1200000.)), "1.2 ms");
1047
1048 assert_eq!(format!("{}", HumanTime(1200000000.)), "1.2 s");
1049
1050 assert_eq!(format!("{}", HumanTime(-1200000.)), "-1.2 ms");
1051 }
1052
1053 #[test]
1055 fn check_glob() {
1056 let patterns = vec!["a/*/*", "a/**", "*/32/*", "**/b", "a/{32,64}/*"];
1057 let input = "a/32/b";
1058 for pattern in patterns {
1059 assert!(
1060 glob_match(pattern, input),
1061 "failed to match {} against {}",
1062 pattern,
1063 input
1064 );
1065 }
1066 }
1067}