tango_bench/
cli.rs

1//! Contains functionality of a `cargo bench` harness
2use crate::{
3    dylib::{FunctionIdx, Spi, SpiModeKind},
4    CacheFirewall, Error, FlatSampleLength, LinearSampleLength, MeasurementSettings,
5    RandomSampleLength, SampleLength, SampleLengthKind,
6};
7use anyhow::{bail, Context};
8use clap::Parser;
9use colorz::mode::{self, Mode};
10use core::fmt;
11use glob_match::glob_match;
12use std::{
13    env::{self, args, temp_dir},
14    fmt::Display,
15    fs,
16    io::{stderr, Write},
17    num::NonZeroUsize,
18    path::{Path, PathBuf},
19    process::{Command, ExitCode, Stdio},
20    str::FromStr,
21    time::Duration,
22};
23
24pub type Result<T> = anyhow::Result<T>;
25pub(crate) type StdResult<T, E> = std::result::Result<T, E>;
26
27#[derive(Parser, Debug)]
28enum BenchmarkMode {
29    List {
30        #[command(flatten)]
31        bench_flags: CargoBenchFlags,
32    },
33    Compare(PairedOpts),
34    Solo(SoloOpts),
35}
36
37#[derive(Parser, Debug)]
38struct PairedOpts {
39    #[command(flatten)]
40    bench_flags: CargoBenchFlags,
41
42    /// Path to the executable to test against. Tango will test against itself if no executable given
43    path: Option<PathBuf>,
44
45    /// write CSV dumps of all the measurements in a given location
46    #[arg(short = 'd', long = "dump")]
47    path_to_dump: Option<PathBuf>,
48
49    /// generate gnuplot graphs for each test (requires --dump [path] to be specified)
50    #[arg(short = 'g', long = "gnuplot")]
51    gnuplot: bool,
52
53    /// seed for the random number generator or omit to use a random seed
54    #[arg(long = "seed")]
55    seed: Option<u64>,
56
57    /// Number of samples to take for each test
58    #[arg(short = 's', long = "samples")]
59    samples: Option<NonZeroUsize>,
60
61    /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random)
62    #[arg(long = "sampler")]
63    sampler: Option<SampleLengthKind>,
64
65    /// Duration of each sample in seconds
66    #[arg(short = 't', long = "time")]
67    time: Option<f64>,
68
69    /// Fail if the difference between the two measurements is greater than the given threshold in percent
70    #[arg(long = "fail-threshold")]
71    fail_threshold: Option<f64>,
72
73    /// Should we terminate early if --fail-threshold is exceed
74    #[arg(long = "fail-fast")]
75    fail_fast: bool,
76
77    /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance
78    /// (size in Kbytes)
79    #[arg(long = "cache-firewall")]
80    cache_firewall: Option<usize>,
81
82    /// Perform a randomized offset to the stack frame for each sample.
83    /// (size in bytes)
84    #[arg(long = "randomize-stack")]
85    randomize_stack: Option<usize>,
86
87    /// Delegate control back to the OS before each sample
88    #[arg(long = "yield-before-sample")]
89    yield_before_sample: Option<bool>,
90
91    /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*')
92    #[arg(short = 'f', long = "filter")]
93    filter: Option<String>,
94
95    /// Report only statistically significant results
96    #[arg(short = 'g', long = "significant-only", default_value_t = false)]
97    significant_only: bool,
98
99    /// Enable outlier detection
100    #[arg(short = 'o', long = "filter-outliers")]
101    filter_outliers: bool,
102
103    /// Perform warmup iterations before taking measurements (1/10 of sample iterations)
104    #[arg(long = "warmup")]
105    warmup_enabled: Option<bool>,
106
107    #[arg(short = 'p', long = "parallel")]
108    parallel: bool,
109
110    /// Quiet mode
111    #[arg(short = 'q')]
112    quiet: bool,
113
114    #[arg(short = 'v', long = "verbose", default_value_t = false)]
115    verbose: bool,
116}
117
118#[derive(Parser, Debug)]
119struct SoloOpts {
120    #[command(flatten)]
121    bench_flags: CargoBenchFlags,
122
123    /// seed for the random number generator or omit to use a random seed
124    #[arg(long = "seed")]
125    seed: Option<u64>,
126
127    /// Number of samples to take for each test
128    #[arg(short = 's', long = "samples")]
129    samples: Option<NonZeroUsize>,
130
131    /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random)
132    #[arg(long = "sampler")]
133    sampler: Option<SampleLengthKind>,
134
135    /// Duration of each sample in seconds
136    #[arg(short = 't', long = "time")]
137    time: Option<f64>,
138
139    /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance
140    /// (size in Kbytes)
141    #[arg(long = "cache-firewall")]
142    cache_firewall: Option<usize>,
143
144    /// Perform a randomized offset to the stack frame for each sample.
145    /// (size in bytes)
146    #[arg(long = "randomize-stack")]
147    randomize_stack: Option<usize>,
148
149    /// Delegate control back to the OS before each sample
150    #[arg(long = "yield-before-sample")]
151    yield_before_sample: Option<bool>,
152
153    /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*')
154    #[arg(short = 'f', long = "filter")]
155    filter: Option<String>,
156
157    /// Perform warmup iterations before taking measurements (1/10 of sample iterations)
158    #[arg(long = "warmup")]
159    warmup_enabled: Option<bool>,
160
161    /// Quiet mode
162    #[arg(short = 'q')]
163    quiet: bool,
164
165    #[arg(short = 'v', long = "verbose", default_value_t = false)]
166    verbose: bool,
167}
168
169#[derive(Parser, Debug)]
170#[command(author, version, about, long_about = None)]
171struct Opts {
172    #[command(subcommand)]
173    subcommand: Option<BenchmarkMode>,
174
175    #[command(flatten)]
176    bench_flags: CargoBenchFlags,
177
178    #[arg(long = "color", default_value = "detect")]
179    coloring_mode: String,
180}
181
182impl FromStr for SampleLengthKind {
183    type Err = Error;
184
185    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
186        match s {
187            "flat" => Ok(SampleLengthKind::Flat),
188            "linear" => Ok(SampleLengthKind::Linear),
189            "random" => Ok(SampleLengthKind::Random),
190            _ => Err(Error::UnknownSamplerType),
191        }
192    }
193}
194
195/// Definition of the flags required to comply with `cargo bench` calling conventions.
196#[derive(Parser, Debug, Clone)]
197struct CargoBenchFlags {
198    #[arg(long = "bench", default_value_t = true)]
199    bench: bool,
200}
201
202pub fn run(settings: MeasurementSettings) -> Result<ExitCode> {
203    let opts = Opts::parse();
204
205    match Mode::from_str(&opts.coloring_mode) {
206        Ok(coloring_mode) => mode::set_coloring_mode(coloring_mode),
207        Err(_) => eprintln!("[WARN] Invalid coloring mode: {}", opts.coloring_mode),
208    }
209
210    let subcommand = opts.subcommand.unwrap_or(BenchmarkMode::List {
211        bench_flags: opts.bench_flags,
212    });
213
214    match subcommand {
215        BenchmarkMode::List { bench_flags: _ } => {
216            let spi = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
217            for func in spi.tests() {
218                println!("{}", func.name);
219            }
220            Ok(ExitCode::SUCCESS)
221        }
222        BenchmarkMode::Compare(opts) => paired_test::run_test(opts, settings),
223        BenchmarkMode::Solo(opts) => solo_test::run_test(opts, settings),
224    }
225}
226
227// Automatically removes a file when goes out of scope
228struct AutoDelete(PathBuf);
229
230impl std::ops::Deref for AutoDelete {
231    type Target = PathBuf;
232
233    fn deref(&self) -> &Self::Target {
234        &self.0
235    }
236}
237
238impl Drop for AutoDelete {
239    fn drop(&mut self) {
240        if let Err(e) = fs::remove_file(&self.0) {
241            eprintln!("Failed to delete file {}: {}", self.0.display(), e);
242        }
243    }
244}
245
246fn create_loop_mode(samples: Option<NonZeroUsize>, time: Option<f64>) -> Result<LoopMode> {
247    let loop_mode = match (samples, time) {
248        (Some(samples), None) => LoopMode::Samples(samples.into()),
249        (None, Some(time)) => LoopMode::Time(Duration::from_millis((time * 1000.) as u64)),
250        (None, None) => LoopMode::Time(Duration::from_millis(100)),
251        (Some(_), Some(_)) => bail!("-t and -s are mutually exclusive"),
252    };
253    Ok(loop_mode)
254}
255
256#[derive(Clone, Copy)]
257enum LoopMode {
258    Samples(usize),
259    Time(Duration),
260}
261
262impl LoopMode {
263    fn should_continue(&self, iter_no: usize, loop_time: Duration) -> bool {
264        match self {
265            LoopMode::Samples(samples) => iter_no < *samples,
266            LoopMode::Time(duration) => loop_time < *duration,
267        }
268    }
269}
270
271mod solo_test {
272    use super::*;
273    use crate::{dylib::Spi, CacheFirewall, Summary};
274    use alloca::with_alloca;
275    use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
276    use std::thread;
277
278    pub(super) fn run_test(opts: SoloOpts, mut settings: MeasurementSettings) -> Result<ExitCode> {
279        let SoloOpts {
280            bench_flags: _,
281            quiet: _,
282            verbose: _,
283            filter,
284            samples,
285            time,
286            seed,
287            sampler,
288            cache_firewall,
289            yield_before_sample,
290            warmup_enabled,
291            randomize_stack,
292        } = opts;
293
294        let mut spi_self = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
295
296        settings.cache_firewall = cache_firewall;
297        settings.randomize_stack = randomize_stack;
298
299        if let Some(warmup_enabled) = warmup_enabled {
300            settings.warmup_enabled = warmup_enabled;
301        }
302        if let Some(yield_before_sample) = yield_before_sample {
303            settings.yield_before_sample = yield_before_sample;
304        }
305        if let Some(sampler) = sampler {
306            settings.sampler_type = sampler;
307        }
308
309        let filter = filter.as_deref().unwrap_or("");
310        let loop_mode = create_loop_mode(samples, time)?;
311
312        let test_names = spi_self
313            .tests()
314            .iter()
315            .map(|t| &t.name)
316            .cloned()
317            .collect::<Vec<_>>();
318        for func_name in test_names {
319            if !filter.is_empty() && !glob_match(filter, &func_name) {
320                continue;
321            }
322
323            let result = run_solo_test(&mut spi_self, &func_name, settings, seed, loop_mode)?;
324
325            reporting::default_reporter_solo(&func_name, &result);
326        }
327
328        Ok(ExitCode::SUCCESS)
329    }
330
331    fn run_solo_test(
332        spi: &mut Spi,
333        test_name: &str,
334        settings: MeasurementSettings,
335        seed: Option<u64>,
336        loop_mode: LoopMode,
337    ) -> Result<Summary<f64>> {
338        const TIME_SLICE_MS: u32 = 10;
339
340        let firewall = settings
341            .cache_firewall
342            .map(|s| s * 1024)
343            .map(CacheFirewall::new);
344        let baseline_func = spi.lookup(test_name).ok_or(Error::InvalidTestName)?;
345
346        let mut spi_func = TestedFunction::new(spi, baseline_func.idx);
347
348        let seed = seed.unwrap_or_else(rand::random);
349
350        spi_func.prepare_state(seed);
351        let mut iterations_per_sample = (spi_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1);
352        let mut sampler = create_sampler(&settings, seed);
353
354        let mut rng = SmallRng::seed_from_u64(seed);
355        let stack_offset_distr = settings
356            .randomize_stack
357            .map(|offset| distributions::Uniform::new(0, offset));
358
359        let mut i = 0;
360
361        let mut sample_iterations = vec![];
362
363        if let LoopMode::Samples(samples) = loop_mode {
364            sample_iterations.reserve(samples);
365            spi_func.samples.reserve(samples);
366        }
367
368        let mut loop_time = Duration::from_secs(0);
369        let mut loop_iterations = 0;
370        while loop_mode.should_continue(i, loop_time) {
371            if loop_time > Duration::from_millis(100) {
372                // correcting time slice estimates
373                iterations_per_sample =
374                    loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
375            }
376            let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
377            loop_iterations += iterations;
378            let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
379
380            if settings.yield_before_sample {
381                thread::yield_now();
382            }
383
384            let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
385
386            prepare_func(
387                prepare_state_seed,
388                &mut spi_func,
389                warmup_iterations,
390                firewall.as_ref(),
391            );
392
393            // Allocate a custom stack frame during runtime, to try to offset alignment of the stack.
394            if let Some(distr) = stack_offset_distr {
395                with_alloca(rng.sample(distr), |_| {
396                    spi_func.measure(iterations);
397                });
398            } else {
399                spi_func.measure(iterations);
400            }
401
402            loop_time += Duration::from_nanos(spi_func.read_sample());
403            sample_iterations.push(iterations);
404            i += 1;
405        }
406
407        let samples = spi_func
408            .samples
409            .iter()
410            .zip(sample_iterations.iter())
411            .map(|(sample, iterations)| *sample as f64 / *iterations as f64)
412            .collect::<Vec<_>>();
413        Ok(Summary::from(&samples).unwrap())
414    }
415}
416
417mod paired_test {
418    use super::*;
419    use crate::{calculate_run_result, CacheFirewall, RunResult};
420    use alloca::with_alloca;
421    use fs::File;
422    use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
423    use std::{
424        io::{self, BufWriter},
425        mem, thread,
426    };
427
428    pub(super) fn run_test(
429        opts: PairedOpts,
430        mut settings: MeasurementSettings,
431    ) -> Result<ExitCode> {
432        let PairedOpts {
433            bench_flags: _,
434            path,
435            verbose,
436            filter,
437            samples,
438            time,
439            filter_outliers,
440            path_to_dump,
441            gnuplot,
442            fail_threshold,
443            fail_fast,
444            significant_only,
445            seed,
446            sampler,
447            cache_firewall,
448            yield_before_sample,
449            warmup_enabled,
450            parallel,
451            quiet,
452            randomize_stack,
453        } = opts;
454        let mut path = path
455            .or_else(|| args().next().map(PathBuf::from))
456            .expect("No path given");
457        if path.is_relative() {
458            // Resolving paths relative to PWD if given
459            if let Ok(pwd) = env::var("PWD") {
460                path = PathBuf::from(pwd).join(path)
461            }
462        };
463
464        #[cfg(target_os = "linux")]
465        let path = crate::linux::patch_pie_binary_if_needed(&path)?.unwrap_or(path);
466
467        let mode = if parallel {
468            SpiModeKind::Asynchronous
469        } else {
470            SpiModeKind::Synchronous
471        };
472
473        let mut spi_self = Spi::for_self(mode).ok_or(Error::SpiSelfWasMoved)?;
474        let mut spi_lib = Spi::for_library(path, mode);
475
476        settings.filter_outliers = filter_outliers;
477        settings.cache_firewall = cache_firewall;
478        settings.randomize_stack = randomize_stack;
479
480        if let Some(warmup_enabled) = warmup_enabled {
481            settings.warmup_enabled = warmup_enabled;
482        }
483        if let Some(yield_before_sample) = yield_before_sample {
484            settings.yield_before_sample = yield_before_sample;
485        }
486        if let Some(sampler) = sampler {
487            settings.sampler_type = sampler;
488        }
489
490        let filter = filter.as_deref().unwrap_or("");
491        let loop_mode = create_loop_mode(samples, time)?;
492
493        let mut exit_code = ExitCode::SUCCESS;
494
495        if let Some(path) = &path_to_dump {
496            if !path.exists() {
497                fs::create_dir_all(path)?;
498            }
499        }
500        if gnuplot && path_to_dump.is_none() {
501            eprintln!("warn: --gnuplot requires -d to be specified. No plots will be generated")
502        }
503
504        let mut sample_dumps = vec![];
505
506        let test_names = spi_self
507            .tests()
508            .iter()
509            .map(|t| &t.name)
510            .cloned()
511            .collect::<Vec<_>>();
512        for func_name in test_names {
513            if !filter.is_empty() && !glob_match(filter, &func_name) {
514                continue;
515            }
516
517            if spi_lib.lookup(&func_name).is_none() {
518                if !quiet {
519                    writeln!(stderr(), "{} skipped...", &func_name)?;
520                }
521                continue;
522            }
523
524            let (result, sample_dump) = run_paired_test(
525                &mut spi_lib,
526                &mut spi_self,
527                &func_name,
528                settings,
529                seed,
530                loop_mode,
531                path_to_dump.as_ref(),
532            )?;
533
534            if let Some(dump) = sample_dump {
535                sample_dumps.push(dump);
536            }
537
538            if result.diff_estimate.significant || !significant_only {
539                if verbose {
540                    reporting::verbose_reporter(&result);
541                } else {
542                    reporting::default_reporter(&result);
543                }
544            }
545
546            if result.diff_estimate.significant {
547                if let Some(threshold) = fail_threshold {
548                    if result.diff_estimate.pct >= threshold {
549                        eprintln!(
550                            "[ERROR] Performance regressed {:+.1}% >= {:.1}%  -  test: {}",
551                            result.diff_estimate.pct, threshold, func_name
552                        );
553                        if fail_fast {
554                            return Ok(ExitCode::FAILURE);
555                        } else {
556                            exit_code = ExitCode::FAILURE;
557                        }
558                    }
559                }
560            }
561        }
562
563        if let Some(path_to_dump) = path_to_dump {
564            if gnuplot && !sample_dumps.is_empty() {
565                generate_plots(&path_to_dump, sample_dumps.as_slice())?;
566            }
567        }
568
569        Ok(exit_code)
570    }
571
572    /// Measure the difference in performance of two functions
573    ///
574    /// Provides a way to save a raw dump of measurements into directory
575    ///
576    /// The format is as follows
577    /// ```txt
578    /// b_1,c_1
579    /// b_2,c_2
580    /// ...
581    /// b_n,c_n
582    /// ```
583    /// where `b_1..b_n` are baseline absolute time (in nanoseconds) measurements
584    /// and `c_1..c_n` are candidate time measurements
585    ///
586    /// Returns a statistical results of a test run and path to raw samples of sample dump was requested
587    fn run_paired_test(
588        baseline: &mut Spi,
589        candidate: &mut Spi,
590        test_name: &str,
591        settings: MeasurementSettings,
592        seed: Option<u64>,
593        loop_mode: LoopMode,
594        samples_dump_path: Option<&PathBuf>,
595    ) -> Result<(RunResult, Option<PathBuf>)> {
596        const TIME_SLICE_MS: u32 = 10;
597
598        let firewall = settings
599            .cache_firewall
600            .map(|s| s * 1024)
601            .map(CacheFirewall::new);
602        let baseline_func = baseline.lookup(test_name).ok_or(Error::InvalidTestName)?;
603        let candidate_func = candidate.lookup(test_name).ok_or(Error::InvalidTestName)?;
604
605        let mut baseline = TestedFunction::new(baseline, baseline_func.idx);
606        let mut candidate = TestedFunction::new(candidate, candidate_func.idx);
607
608        let mut a_func = &mut baseline;
609        let mut b_func = &mut candidate;
610
611        let seed = seed.unwrap_or_else(rand::random);
612
613        a_func.prepare_state(seed);
614        let a_estimate = (a_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1);
615
616        b_func.prepare_state(seed);
617        let b_estimate = (b_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1);
618
619        let mut iterations_per_sample = a_estimate.min(b_estimate);
620        let mut sampler = create_sampler(&settings, seed);
621
622        let mut rng = SmallRng::seed_from_u64(seed);
623        let stack_offset_distr = settings
624            .randomize_stack
625            .map(|offset| distributions::Uniform::new(0, offset));
626
627        let mut i = 0;
628        let mut switch_counter = 0;
629
630        let mut sample_iterations = vec![];
631
632        if let LoopMode::Samples(samples) = loop_mode {
633            sample_iterations.reserve(samples);
634            a_func.samples.reserve(samples);
635            b_func.samples.reserve(samples);
636        }
637
638        let mut loop_time = Duration::from_secs(0);
639        let mut loop_iterations = 0;
640        while loop_mode.should_continue(i, loop_time) {
641            if loop_time > Duration::from_millis(100) {
642                // correcting time slice estimates
643                iterations_per_sample =
644                    loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
645            }
646            let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
647            loop_iterations += iterations;
648            let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
649
650            // !!! IMPORTANT !!!
651            // Algorithms should be called in different order on each new iteration.
652            // This equalize the probability of facing unfortunate circumstances like cache misses or page faults
653            // for both functions. Although both algorithms are from distinct shared objects and therefore
654            // must be fully self-contained in terms of virtual address space (each shared object has its own
655            // generator instances, static variables, memory mappings, etc.) it might be the case that
656            // on the level of physical memory both of them rely on the same memory-mapped test data, for example.
657            // In that case first function will experience the larger amount of major page faults.
658            {
659                mem::swap(&mut a_func, &mut b_func);
660                switch_counter += 1;
661            }
662
663            if settings.yield_before_sample {
664                thread::yield_now();
665            }
666
667            let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
668            let mut sample_time = 0;
669
670            prepare_func(
671                prepare_state_seed,
672                a_func,
673                warmup_iterations,
674                firewall.as_ref(),
675            );
676            prepare_func(
677                prepare_state_seed,
678                b_func,
679                warmup_iterations,
680                firewall.as_ref(),
681            );
682
683            // Allocate a custom stack frame during runtime, to try to offset alignment of the stack.
684            if let Some(distr) = stack_offset_distr {
685                with_alloca(rng.sample(distr), |_| {
686                    a_func.measure(iterations);
687                    b_func.measure(iterations);
688                });
689            } else {
690                a_func.measure(iterations);
691                b_func.measure(iterations);
692            }
693
694            let a_sample_time = a_func.read_sample();
695            let b_sample_time = b_func.read_sample();
696            sample_time += a_sample_time.max(b_sample_time);
697
698            loop_time += Duration::from_nanos(sample_time);
699            sample_iterations.push(iterations);
700            i += 1;
701        }
702
703        // If we switched functions odd number of times then we need to swap them back so that
704        // the first function is always the baseline.
705        if switch_counter % 2 != 0 {
706            mem::swap(&mut a_func, &mut b_func);
707        }
708
709        let run_result = calculate_run_result(
710            test_name,
711            &a_func.samples,
712            &b_func.samples,
713            &sample_iterations,
714            settings.filter_outliers,
715        )
716        .ok_or(Error::NoMeasurements)?;
717
718        let samples_path = if let Some(path) = samples_dump_path {
719            let file_path = write_samples(path, test_name, a_func, b_func, sample_iterations)?;
720            Some(file_path)
721        } else {
722            None
723        };
724
725        Ok((run_result, samples_path))
726    }
727
728    fn write_samples(
729        path: &Path,
730        test_name: &str,
731        a_func: &TestedFunction,
732        b_func: &TestedFunction,
733        iterations: Vec<usize>,
734    ) -> Result<PathBuf> {
735        let file_name = format!("{}.csv", test_name.replace('/', "-"));
736        let file_path = path.join(file_name);
737        let s_samples = a_func.samples.iter().copied();
738        let b_samples = b_func.samples.iter().copied();
739        let values = s_samples
740            .zip(b_samples)
741            .zip(iterations.iter().copied())
742            .map(|((a, b), c)| (a, b, c));
743        write_csv(&file_path, values).context("Unable to write raw measurements")?;
744        Ok(file_path)
745    }
746
747    fn write_csv<A: Display, B: Display, C: Display>(
748        path: impl AsRef<Path>,
749        values: impl IntoIterator<Item = (A, B, C)>,
750    ) -> io::Result<()> {
751        let mut file = BufWriter::new(File::create(path)?);
752        for (a, b, c) in values {
753            writeln!(&mut file, "{},{},{}", a, b, c)?;
754        }
755        Ok(())
756    }
757
758    fn generate_plots(path: &Path, sample_dumps: &[PathBuf]) -> Result<()> {
759        let gnuplot_file = AutoDelete(temp_dir().join("tango-plot.gnuplot"));
760        fs::write(&*gnuplot_file, include_bytes!("plot.gnuplot"))?;
761        let gnuplot_file_str = gnuplot_file.to_str().unwrap();
762
763        for input in sample_dumps {
764            let csv_input = input.to_str().unwrap();
765            let svg_path = input.with_extension("svg");
766            let cmd = Command::new("gnuplot")
767                .args([
768                    "-c",
769                    gnuplot_file_str,
770                    csv_input,
771                    svg_path.to_str().unwrap(),
772                ])
773                .stdin(Stdio::null())
774                .stdout(Stdio::inherit())
775                .stderr(Stdio::inherit())
776                .status()
777                .context("Failed to execute gnuplot")?;
778
779            if !cmd.success() {
780                bail!("gnuplot command failed");
781            }
782        }
783        Ok(())
784    }
785}
786
787mod reporting {
788    use crate::cli::{colorize, HumanTime};
789    use crate::{RunResult, Summary};
790    use colorz::{mode::Stream, Colorize};
791
792    pub(super) fn verbose_reporter(results: &RunResult) {
793        let base = results.baseline;
794        let candidate = results.candidate;
795
796        let significant = results.diff_estimate.significant;
797
798        println!(
799            "{}  (n: {}, outliers: {})",
800            results.name.bold().stream(Stream::Stdout),
801            results.diff.n,
802            results.outliers
803        );
804
805        println!(
806            "    {:12}   {:>15} {:>15} {:>15}",
807            "",
808            "baseline".bold().stream(Stream::Stdout),
809            "candidate".bold().stream(Stream::Stdout),
810            "∆".bold().stream(Stream::Stdout),
811        );
812        println!(
813            "    {:12} ╭────────────────────────────────────────────────",
814            ""
815        );
816        println!(
817            "    {:12} │ {:>15} {:>15} {:>15}  {:+4.2}{}{}",
818            "mean",
819            HumanTime(base.mean),
820            HumanTime(candidate.mean),
821            colorize(
822                HumanTime(results.diff.mean),
823                significant,
824                results.diff.mean < 0.
825            ),
826            colorize(
827                results.diff_estimate.pct,
828                significant,
829                results.diff.mean < 0.
830            ),
831            colorize("%", significant, results.diff.mean < 0.),
832            if significant { "*" } else { "" },
833        );
834        println!(
835            "    {:12} │ {:>15} {:>15} {:>15}",
836            "min",
837            HumanTime(base.min),
838            HumanTime(candidate.min),
839            HumanTime(candidate.min - base.min)
840        );
841        println!(
842            "    {:12} │ {:>15} {:>15} {:>15}",
843            "max",
844            HumanTime(base.max),
845            HumanTime(candidate.max),
846            HumanTime(candidate.max - base.max),
847        );
848        println!(
849            "    {:12} │ {:>15} {:>15} {:>15}",
850            "std. dev.",
851            HumanTime(base.variance.sqrt()),
852            HumanTime(candidate.variance.sqrt()),
853            HumanTime(results.diff.variance.sqrt()),
854        );
855        println!();
856    }
857
858    pub(super) fn default_reporter(results: &RunResult) {
859        let base = results.baseline;
860        let candidate = results.candidate;
861        let diff = results.diff;
862
863        let significant = results.diff_estimate.significant;
864
865        let speedup = results.diff_estimate.pct;
866        let candidate_faster = diff.mean < 0.;
867        println!(
868            "{:50} [ {:>8} ... {:>8} ]    {:>+7.2}{}{}",
869            colorize(&results.name, significant, candidate_faster),
870            HumanTime(base.mean),
871            colorize(HumanTime(candidate.mean), significant, candidate_faster),
872            colorize(speedup, significant, candidate_faster),
873            colorize("%", significant, candidate_faster),
874            if significant { "*" } else { "" },
875        )
876    }
877
878    pub(super) fn default_reporter_solo(name: &str, results: &Summary<f64>) {
879        println!(
880            "{:50}  [ {:>8} ... {:>8} ... {:>8} ]  stddev: {:>8}",
881            name,
882            HumanTime(results.min),
883            HumanTime(results.mean),
884            HumanTime(results.max),
885            HumanTime(results.variance.sqrt()),
886        )
887    }
888}
889
890struct TestedFunction<'a> {
891    pub(crate) spi: &'a mut Spi,
892    pub(crate) samples: Vec<u64>,
893}
894
895impl<'a> TestedFunction<'a> {
896    pub(crate) fn new(spi: &'a mut Spi, func: FunctionIdx) -> Self {
897        spi.select(func);
898        TestedFunction {
899            spi,
900            samples: Vec::new(),
901        }
902    }
903
904    pub(crate) fn measure(&mut self, iterations: usize) {
905        self.spi.measure(iterations);
906    }
907
908    pub(crate) fn read_sample(&mut self) -> u64 {
909        let sample = self.spi.read_sample();
910        self.samples.push(sample);
911        sample
912    }
913
914    pub(crate) fn run(&mut self, iterations: usize) -> u64 {
915        self.spi.run(iterations)
916    }
917
918    pub(crate) fn prepare_state(&mut self, seed: u64) {
919        self.spi.prepare_state(seed);
920    }
921
922    pub(crate) fn estimate_iterations(&mut self, time_ms: u32) -> usize {
923        self.spi.estimate_iterations(time_ms)
924    }
925}
926
927fn prepare_func(
928    prepare_state_seed: Option<u64>,
929    f: &mut TestedFunction,
930    warmup_iterations: Option<usize>,
931    firewall: Option<&CacheFirewall>,
932) {
933    if let Some(seed) = prepare_state_seed {
934        f.prepare_state(seed);
935        if let Some(firewall) = firewall {
936            firewall.issue_read();
937        }
938    }
939    if let Some(warmup_iterations) = warmup_iterations {
940        f.run(warmup_iterations);
941    }
942}
943
944fn create_sampler(settings: &MeasurementSettings, seed: u64) -> Box<dyn SampleLength> {
945    match settings.sampler_type {
946        SampleLengthKind::Flat => Box::new(FlatSampleLength::new(settings)),
947        SampleLengthKind::Linear => Box::new(LinearSampleLength::new(settings)),
948        SampleLengthKind::Random => Box::new(RandomSampleLength::new(settings, seed)),
949    }
950}
951
952fn colorize<T: Display>(value: T, do_paint: bool, is_improved: bool) -> impl Display {
953    use colorz::{ansi, mode::Stream::Stdout, Colorize, Style};
954
955    const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style();
956    const GREEN: Style = Style::new().fg(ansi::Green).const_into_runtime_style();
957    const DEFAULT: Style = Style::new().const_into_runtime_style();
958
959    if do_paint {
960        if is_improved {
961            value.into_style_with(GREEN).stream(Stdout)
962        } else {
963            value.into_style_with(RED).stream(Stdout)
964        }
965    } else {
966        value.into_style_with(DEFAULT).stream(Stdout)
967    }
968}
969
970struct HumanTime(f64);
971
972impl fmt::Display for HumanTime {
973    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
974        const USEC: f64 = 1_000.;
975        const MSEC: f64 = USEC * 1_000.;
976        const SEC: f64 = MSEC * 1_000.;
977
978        if self.0.abs() > SEC {
979            f.pad(&format!("{:.1} s", self.0 / SEC))
980        } else if self.0.abs() > MSEC {
981            f.pad(&format!("{:.1} ms", self.0 / MSEC))
982        } else if self.0.abs() > USEC {
983            f.pad(&format!("{:.1} us", self.0 / USEC))
984        } else if self.0 == 0. {
985            f.pad("0 ns")
986        } else {
987            f.pad(&format!("{:.1} ns", self.0))
988        }
989    }
990}
991
992#[cfg(test)]
993mod tests {
994    use super::*;
995
996    #[test]
997    fn check_human_time() {
998        assert_eq!(format!("{}", HumanTime(0.1)), "0.1 ns");
999        assert_eq!(format!("{:>5}", HumanTime(0.)), " 0 ns");
1000
1001        assert_eq!(format!("{}", HumanTime(120.)), "120.0 ns");
1002
1003        assert_eq!(format!("{}", HumanTime(1200.)), "1.2 us");
1004
1005        assert_eq!(format!("{}", HumanTime(1200000.)), "1.2 ms");
1006
1007        assert_eq!(format!("{}", HumanTime(1200000000.)), "1.2 s");
1008
1009        assert_eq!(format!("{}", HumanTime(-1200000.)), "-1.2 ms");
1010    }
1011
1012    // Sane checking some simple patterns
1013    #[test]
1014    fn check_glob() {
1015        let patterns = vec!["a/*/*", "a/**", "*/32/*", "**/b", "a/{32,64}/*"];
1016        let input = "a/32/b";
1017        for pattern in patterns {
1018            assert!(
1019                glob_match(pattern, input),
1020                "failed to match {} against {}",
1021                pattern,
1022                input
1023            );
1024        }
1025    }
1026}