tango_bench/
cli.rs

1//! Contains functionality of a `cargo bench` harness
2use crate::{
3    dylib::{FunctionIdx, Spi, SpiModeKind},
4    CacheFirewall, Error, FlatSampleLength, LinearSampleLength, MeasurementSettings,
5    RandomSampleLength, SampleLength, SampleLengthKind,
6};
7use anyhow::{bail, Context};
8use clap::{ArgAction, Parser};
9use colorz::mode::{self, Mode};
10use core::fmt;
11use glob_match::glob_match;
12use std::{
13    env::{self, args, temp_dir},
14    fmt::Display,
15    fs,
16    io::{stderr, Write},
17    num::NonZeroUsize,
18    path::{Path, PathBuf},
19    process::{Command, ExitCode, Stdio},
20    str::FromStr,
21    time::Duration,
22};
23
24pub type Result<T> = anyhow::Result<T>;
25pub(crate) type StdResult<T, E> = std::result::Result<T, E>;
26
27#[derive(Parser, Debug)]
28enum BenchmarkMode {
29    List {
30        #[command(flatten)]
31        bench_flags: CargoBenchFlags,
32    },
33    Compare(PairedOpts),
34    Solo(SoloOpts),
35}
36
37#[derive(Parser, Debug)]
38struct PairedOpts {
39    #[command(flatten)]
40    bench_flags: CargoBenchFlags,
41
42    /// Path to the executable to test against. Tango will test against itself if no executable given
43    path: Option<PathBuf>,
44
45    /// write CSV dumps of all the measurements in a given location
46    #[arg(short = 'd', long = "dump")]
47    path_to_dump: Option<PathBuf>,
48
49    /// generate gnuplot graphs for each test (requires --dump [path] to be specified)
50    #[arg(long = "gnuplot")]
51    gnuplot: bool,
52
53    /// seed for the random number generator or omit to use a random seed
54    #[arg(long = "seed")]
55    seed: Option<u64>,
56
57    /// Number of samples to take for each test
58    #[arg(short = 's', long = "samples")]
59    samples: Option<NonZeroUsize>,
60
61    /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random)
62    #[arg(long = "sampler")]
63    sampler: Option<SampleLengthKind>,
64
65    /// Duration of each sample in seconds
66    #[arg(short = 't', long = "time")]
67    time: Option<f64>,
68
69    /// Fail if the difference between the two measurements is greater than the given threshold in percent
70    #[arg(long = "fail-threshold")]
71    fail_threshold: Option<f64>,
72
73    /// Should we terminate early if --fail-threshold is exceed
74    #[arg(long = "fail-fast")]
75    fail_fast: bool,
76
77    /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance
78    /// (size in Kbytes)
79    #[arg(long = "cache-firewall")]
80    cache_firewall: Option<usize>,
81
82    /// Perform a randomized offset to the stack frame for each sample.
83    /// (size in bytes)
84    #[arg(long = "randomize-stack")]
85    randomize_stack: Option<usize>,
86
87    /// Delegate control back to the OS before each sample
88    #[arg(long = "yield-before-sample")]
89    yield_before_sample: Option<bool>,
90
91    /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*')
92    #[arg(short = 'f', long = "filter")]
93    filter: Option<String>,
94
95    /// Report only statistically significant results
96    #[arg(short = 'g', long = "significant-only", default_value_t = false)]
97    significant_only: bool,
98
99    /// Enable outlier detection
100    #[arg(short = 'o', long = "filter-outliers")]
101    filter_outliers: bool,
102
103    /// Perform warmup iterations before taking measurements (1/10 of sample iterations)
104    #[arg(long = "warmup")]
105    warmup_enabled: Option<bool>,
106
107    #[arg(short = 'p', long = "parallel")]
108    parallel: bool,
109
110    /// Quiet mode
111    #[arg(short = 'q')]
112    quiet: bool,
113
114    #[arg(short = 'v', long = "verbose", default_value_t = false)]
115    verbose: bool,
116
117    /// Disables checking proportion of the time spent in a system/kernel mode
118    #[arg(long = "no-system-time-check", default_value_t = true, action = ArgAction::SetFalse)]
119    system_time_check: bool,
120}
121
122#[derive(Parser, Debug)]
123struct SoloOpts {
124    #[command(flatten)]
125    bench_flags: CargoBenchFlags,
126
127    /// seed for the random number generator or omit to use a random seed
128    #[arg(long = "seed")]
129    seed: Option<u64>,
130
131    /// Number of samples to take for each test
132    #[arg(short = 's', long = "samples")]
133    samples: Option<NonZeroUsize>,
134
135    /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random)
136    #[arg(long = "sampler")]
137    sampler: Option<SampleLengthKind>,
138
139    /// Duration of each sample in seconds
140    #[arg(short = 't', long = "time")]
141    time: Option<f64>,
142
143    /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance
144    /// (size in Kbytes)
145    #[arg(long = "cache-firewall")]
146    cache_firewall: Option<usize>,
147
148    /// Perform a randomized offset to the stack frame for each sample.
149    /// (size in bytes)
150    #[arg(long = "randomize-stack")]
151    randomize_stack: Option<usize>,
152
153    /// Delegate control back to the OS before each sample
154    #[arg(long = "yield-before-sample")]
155    yield_before_sample: Option<bool>,
156
157    /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*')
158    #[arg(short = 'f', long = "filter")]
159    filter: Option<String>,
160
161    /// Perform warmup iterations before taking measurements (1/10 of sample iterations)
162    #[arg(long = "warmup")]
163    warmup_enabled: Option<bool>,
164}
165
166#[derive(Parser, Debug)]
167#[command(author, version, about, long_about = None)]
168struct Opts {
169    #[command(subcommand)]
170    subcommand: Option<BenchmarkMode>,
171
172    #[command(flatten)]
173    bench_flags: CargoBenchFlags,
174
175    #[arg(long = "color", default_value = "detect")]
176    coloring_mode: String,
177}
178
179impl FromStr for SampleLengthKind {
180    type Err = Error;
181
182    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
183        match s {
184            "flat" => Ok(SampleLengthKind::Flat),
185            "linear" => Ok(SampleLengthKind::Linear),
186            "random" => Ok(SampleLengthKind::Random),
187            _ => Err(Error::UnknownSamplerType),
188        }
189    }
190}
191
192/// Definition of the flags required to comply with `cargo bench` calling conventions.
193#[derive(Parser, Debug, Clone)]
194struct CargoBenchFlags {
195    #[arg(long = "bench", default_value_t = true)]
196    bench: bool,
197}
198
199pub fn run(settings: MeasurementSettings) -> Result<ExitCode> {
200    let opts = Opts::parse();
201
202    match Mode::from_str(&opts.coloring_mode) {
203        Ok(coloring_mode) => mode::set_coloring_mode(coloring_mode),
204        Err(_) => eprintln!("[WARN] Invalid coloring mode: {}", opts.coloring_mode),
205    }
206
207    let subcommand = opts.subcommand.unwrap_or(BenchmarkMode::List {
208        bench_flags: opts.bench_flags,
209    });
210
211    match subcommand {
212        BenchmarkMode::List { bench_flags: _ } => {
213            let spi = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
214            for func in spi.tests() {
215                println!("{}", func.name);
216            }
217            Ok(ExitCode::SUCCESS)
218        }
219        BenchmarkMode::Compare(opts) => paired_test::run_test(opts, settings),
220        BenchmarkMode::Solo(opts) => solo_test::run_test(opts, settings),
221    }
222}
223
224// Automatically removes a file when goes out of scope
225struct AutoDelete(PathBuf);
226
227impl std::ops::Deref for AutoDelete {
228    type Target = PathBuf;
229
230    fn deref(&self) -> &Self::Target {
231        &self.0
232    }
233}
234
235impl Drop for AutoDelete {
236    fn drop(&mut self) {
237        if let Err(e) = fs::remove_file(&self.0) {
238            eprintln!("Failed to delete file {}: {}", self.0.display(), e);
239        }
240    }
241}
242
243fn create_loop_mode(samples: Option<NonZeroUsize>, time: Option<f64>) -> Result<LoopMode> {
244    let loop_mode = match (samples, time) {
245        (Some(samples), None) => LoopMode::Samples(samples.into()),
246        (None, Some(time)) => LoopMode::Time(Duration::from_millis((time * 1000.) as u64)),
247        (None, None) => LoopMode::Time(Duration::from_millis(100)),
248        (Some(_), Some(_)) => bail!("-t and -s are mutually exclusive"),
249    };
250    Ok(loop_mode)
251}
252
253#[derive(Clone, Copy)]
254enum LoopMode {
255    Samples(usize),
256    Time(Duration),
257}
258
259impl LoopMode {
260    fn should_continue(&self, iter_no: usize, loop_time: Duration) -> bool {
261        match self {
262            LoopMode::Samples(samples) => iter_no < *samples,
263            LoopMode::Time(duration) => loop_time < *duration,
264        }
265    }
266}
267
268mod solo_test {
269    use super::*;
270    use crate::{dylib::Spi, CacheFirewall, Summary};
271    use alloca::with_alloca;
272    use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
273    use std::thread;
274
275    pub(super) fn run_test(opts: SoloOpts, mut settings: MeasurementSettings) -> Result<ExitCode> {
276        let SoloOpts {
277            bench_flags: _,
278            filter,
279            samples,
280            time,
281            seed,
282            sampler,
283            cache_firewall,
284            yield_before_sample,
285            warmup_enabled,
286            randomize_stack,
287        } = opts;
288
289        let mut spi_self = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?;
290
291        settings.cache_firewall = cache_firewall;
292        settings.randomize_stack = randomize_stack;
293
294        if let Some(warmup_enabled) = warmup_enabled {
295            settings.warmup_enabled = warmup_enabled;
296        }
297        if let Some(yield_before_sample) = yield_before_sample {
298            settings.yield_before_sample = yield_before_sample;
299        }
300        if let Some(sampler) = sampler {
301            settings.sampler_type = sampler;
302        }
303
304        let filter = filter.as_deref().unwrap_or("");
305        let loop_mode = create_loop_mode(samples, time)?;
306
307        let test_names = spi_self
308            .tests()
309            .iter()
310            .map(|t| &t.name)
311            .cloned()
312            .collect::<Vec<_>>();
313        for func_name in test_names {
314            if !filter.is_empty() && !glob_match(filter, &func_name) {
315                continue;
316            }
317
318            let result = run_solo_test(&mut spi_self, &func_name, settings, seed, loop_mode)?;
319
320            reporting::default_reporter_solo(&func_name, &result);
321        }
322
323        Ok(ExitCode::SUCCESS)
324    }
325
326    fn run_solo_test(
327        spi: &mut Spi,
328        test_name: &str,
329        settings: MeasurementSettings,
330        seed: Option<u64>,
331        loop_mode: LoopMode,
332    ) -> Result<Summary<f64>> {
333        const TIME_SLICE_MS: u32 = 10;
334
335        let firewall = settings
336            .cache_firewall
337            .map(|s| s * 1024)
338            .map(CacheFirewall::new);
339        let baseline_func = spi.lookup(test_name).ok_or(Error::InvalidTestName)?;
340
341        let mut spi_func = TestedFunction::new(spi, baseline_func.idx);
342
343        let seed = seed.unwrap_or_else(rand::random);
344
345        spi_func.spi.prepare_state(seed)?;
346        let iters = spi_func.spi.estimate_iterations(TIME_SLICE_MS)?;
347        let mut iterations_per_sample = (iters / 2).max(1);
348        let mut sampler = create_sampler(&settings, seed);
349
350        let mut rng = SmallRng::seed_from_u64(seed);
351        let stack_offset_distr = settings
352            .randomize_stack
353            .map(|offset| distributions::Uniform::new(0, offset));
354
355        let mut i = 0;
356
357        let mut sample_iterations = vec![];
358
359        if let LoopMode::Samples(samples) = loop_mode {
360            sample_iterations.reserve(samples);
361            spi_func.samples.reserve(samples);
362        }
363
364        let mut loop_time = Duration::from_secs(0);
365        let mut loop_iterations = 0;
366        while loop_mode.should_continue(i, loop_time) {
367            if loop_time > Duration::from_millis(100) {
368                // correcting time slice estimates
369                iterations_per_sample =
370                    loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
371            }
372            let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
373            loop_iterations += iterations;
374            let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
375
376            if settings.yield_before_sample {
377                thread::yield_now();
378            }
379
380            let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
381
382            prepare_func(
383                prepare_state_seed,
384                &mut spi_func,
385                warmup_iterations,
386                firewall.as_ref(),
387            )?;
388
389            // Allocate a custom stack frame during runtime, to try to offset alignment of the stack.
390            if let Some(distr) = stack_offset_distr {
391                with_alloca(rng.sample(distr), |_| {
392                    spi_func.spi.measure(iterations).unwrap();
393                });
394            } else {
395                spi_func.spi.measure(iterations)?;
396            }
397
398            loop_time += Duration::from_nanos(spi_func.read_sample()?);
399            sample_iterations.push(iterations);
400            i += 1;
401        }
402
403        let samples = spi_func
404            .samples
405            .iter()
406            .zip(sample_iterations.iter())
407            .map(|(sample, iterations)| *sample as f64 / *iterations as f64)
408            .collect::<Vec<_>>();
409        Ok(Summary::from(&samples).unwrap())
410    }
411}
412
413mod paired_test {
414    use super::*;
415    use crate::{
416        calculate_run_result,
417        platform::{self, RUsage},
418        CacheFirewall, RunResult,
419    };
420    use alloca::with_alloca;
421    use fs::File;
422    use rand::{distributions, rngs::SmallRng, Rng, SeedableRng};
423    use std::{
424        io::{self, BufWriter},
425        mem, thread,
426    };
427
428    pub(super) fn run_test(
429        opts: PairedOpts,
430        mut settings: MeasurementSettings,
431    ) -> Result<ExitCode> {
432        let PairedOpts {
433            bench_flags: _,
434            path,
435            verbose,
436            filter,
437            samples,
438            time,
439            filter_outliers,
440            path_to_dump,
441            gnuplot,
442            fail_threshold,
443            fail_fast,
444            significant_only,
445            seed,
446            sampler,
447            cache_firewall,
448            yield_before_sample,
449            warmup_enabled,
450            parallel,
451            quiet,
452            randomize_stack,
453            system_time_check,
454        } = opts;
455        let mut path = path
456            .or_else(|| args().next().map(PathBuf::from))
457            .expect("No path given");
458        if path.is_relative() {
459            // Resolving paths relative to PWD if given
460            if let Ok(pwd) = env::var("PWD") {
461                path = PathBuf::from(pwd).join(path)
462            }
463        };
464
465        #[cfg(target_os = "linux")]
466        let path = crate::linux::patch_pie_binary_if_needed(&path)?.unwrap_or(path);
467
468        let mode = if parallel {
469            SpiModeKind::Asynchronous
470        } else {
471            SpiModeKind::Synchronous
472        };
473
474        let mut spi_self = Spi::for_self(mode).ok_or(Error::SpiSelfWasMoved)?;
475        let mut spi_lib = Spi::for_library(&path, mode).with_context(|| {
476            format!(
477                "Unable to load benchmark: {}. Make sure it exists and it is valid tango benchmark.",
478                path.display()
479            )
480        })?;
481
482        settings.filter_outliers = filter_outliers;
483        settings.cache_firewall = cache_firewall;
484        settings.randomize_stack = randomize_stack;
485
486        if let Some(warmup_enabled) = warmup_enabled {
487            settings.warmup_enabled = warmup_enabled;
488        }
489        if let Some(yield_before_sample) = yield_before_sample {
490            settings.yield_before_sample = yield_before_sample;
491        }
492        if let Some(sampler) = sampler {
493            settings.sampler_type = sampler;
494        }
495
496        let filter = filter.as_deref().unwrap_or("");
497        let loop_mode = create_loop_mode(samples, time)?;
498
499        let mut exit_code = ExitCode::SUCCESS;
500
501        if let Some(path) = &path_to_dump {
502            if !path.exists() {
503                fs::create_dir_all(path)?;
504            }
505        }
506        if gnuplot && path_to_dump.is_none() {
507            eprintln!("warn: --gnuplot requires -d to be specified. No plots will be generated")
508        }
509
510        let mut sample_dumps = vec![];
511
512        let test_names = spi_self
513            .tests()
514            .iter()
515            .map(|t| &t.name)
516            .cloned()
517            .collect::<Vec<_>>();
518        for func_name in test_names {
519            if !filter.is_empty() && !glob_match(filter, &func_name) {
520                continue;
521            }
522
523            if spi_lib.lookup(&func_name).is_none() {
524                if !quiet {
525                    writeln!(stderr(), "{} skipped...", &func_name)?;
526                }
527                continue;
528            }
529
530            let rusage_before = system_time_check.then(platform::rusage);
531            let (result, sample_dump) = run_paired_test(
532                &mut spi_lib,
533                &mut spi_self,
534                &func_name,
535                settings,
536                seed,
537                loop_mode,
538                path_to_dump.as_ref(),
539            )?;
540            if let Some(usage_before) = rusage_before {
541                let rusage = platform::rusage() - usage_before;
542                if detect_system_time_bias(&rusage) {
543                    reporting::report_system_time_bias(&result, &rusage);
544                }
545            }
546
547            if let Some(dump) = sample_dump {
548                sample_dumps.push(dump);
549            }
550
551            if result.diff_estimate.significant || !significant_only {
552                if verbose {
553                    reporting::verbose_reporter(&result);
554                } else {
555                    reporting::default_reporter(&result);
556                }
557            }
558
559            if result.diff_estimate.significant {
560                if let Some(threshold) = fail_threshold {
561                    if result.diff_estimate.pct >= threshold {
562                        eprintln!(
563                            "[ERROR] Performance regressed {:+.1}% >= {:.1}%  -  test: {}",
564                            result.diff_estimate.pct, threshold, func_name
565                        );
566                        if fail_fast {
567                            return Ok(ExitCode::FAILURE);
568                        } else {
569                            exit_code = ExitCode::FAILURE;
570                        }
571                    }
572                }
573            }
574        }
575
576        if gnuplot && !sample_dumps.is_empty() {
577            generate_plots(sample_dumps.as_slice())?;
578        }
579
580        Ok(exit_code)
581    }
582
583    /// Checking if test spent too much time in a system/kernel mode.
584    ///
585    /// OS doesn't provide fairness guarantees, this can influence result
586    fn detect_system_time_bias(rusage: &RUsage) -> bool {
587        // system time is at least 5% of CPU time overall
588        let system = rusage.system_time.as_secs_f64();
589        let overall = (rusage.user_time + rusage.system_time).as_secs_f64();
590        system / overall > 0.05
591    }
592
593    /// Measure the difference in performance of two functions
594    ///
595    /// Provides a way to save a raw dump of measurements into directory
596    ///
597    /// The format is as follows
598    /// ```txt
599    /// b_1,c_1
600    /// b_2,c_2
601    /// ...
602    /// b_n,c_n
603    /// ```
604    /// where `b_1..b_n` are baseline absolute time (in nanoseconds) measurements
605    /// and `c_1..c_n` are candidate time measurements
606    ///
607    /// Returns a statistical results of a test run and path to raw samples of sample dump was requested
608    fn run_paired_test(
609        baseline: &mut Spi,
610        candidate: &mut Spi,
611        test_name: &str,
612        settings: MeasurementSettings,
613        seed: Option<u64>,
614        loop_mode: LoopMode,
615        samples_dump_path: Option<&PathBuf>,
616    ) -> Result<(RunResult, Option<PathBuf>)> {
617        const TIME_SLICE_MS: u32 = 10;
618
619        let firewall = settings
620            .cache_firewall
621            .map(|s| s * 1024)
622            .map(CacheFirewall::new);
623        let baseline_func = baseline.lookup(test_name).ok_or(Error::InvalidTestName)?;
624        let candidate_func = candidate.lookup(test_name).ok_or(Error::InvalidTestName)?;
625
626        let mut baseline = TestedFunction::new(baseline, baseline_func.idx);
627        let mut candidate = TestedFunction::new(candidate, candidate_func.idx);
628
629        let mut a_func = &mut baseline;
630        let mut b_func = &mut candidate;
631
632        let seed = seed.unwrap_or_else(rand::random);
633
634        a_func
635            .spi
636            .prepare_state(seed)
637            .context("Unable to prepare benchmark state")?;
638        let a_iters = a_func
639            .spi
640            .estimate_iterations(TIME_SLICE_MS)
641            .context("Failed to estimate required iterations number")?;
642        let a_estimate = (a_iters / 2).max(1);
643
644        b_func
645            .spi
646            .prepare_state(seed)
647            .context("Unable to prepare benchmark state")?;
648        let b_iters = b_func
649            .spi
650            .estimate_iterations(TIME_SLICE_MS)
651            .context("Failed to estimate required iterations number")?;
652        let b_estimate = (b_iters / 2).max(1);
653
654        let mut iterations_per_sample = a_estimate.min(b_estimate);
655        let mut sampler = create_sampler(&settings, seed);
656
657        let mut rng = SmallRng::seed_from_u64(seed);
658        let stack_offset_distr = settings
659            .randomize_stack
660            .map(|offset| distributions::Uniform::new(0, offset));
661
662        let mut i = 0;
663        let mut switch_counter = 0;
664
665        let mut sample_iterations = vec![];
666
667        if let LoopMode::Samples(samples) = loop_mode {
668            sample_iterations.reserve(samples);
669            a_func.samples.reserve(samples);
670            b_func.samples.reserve(samples);
671        }
672
673        let mut loop_time = Duration::from_secs(0);
674        let mut loop_iterations = 0;
675        while loop_mode.should_continue(i, loop_time) {
676            if loop_time > Duration::from_millis(100) {
677                // correcting time slice estimates
678                iterations_per_sample =
679                    loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize;
680            }
681            let iterations = sampler.next_sample_iterations(i, iterations_per_sample);
682            loop_iterations += iterations;
683            let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1));
684
685            // !!! IMPORTANT !!!
686            // Algorithms should be called in different order on each new iteration.
687            // This equalize the probability of facing unfortunate circumstances like cache misses or page faults
688            // for both functions. Although both algorithms are from distinct shared objects and therefore
689            // must be fully self-contained in terms of virtual address space (each shared object has its own
690            // generator instances, static variables, memory mappings, etc.) it might be the case that
691            // on the level of physical memory both of them rely on the same memory-mapped test data, for example.
692            // In that case first function will experience the larger amount of major page faults.
693            {
694                mem::swap(&mut a_func, &mut b_func);
695                switch_counter += 1;
696            }
697
698            if settings.yield_before_sample {
699                thread::yield_now();
700            }
701
702            let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed);
703            let mut sample_time = 0;
704
705            prepare_func(
706                prepare_state_seed,
707                a_func,
708                warmup_iterations,
709                firewall.as_ref(),
710            )?;
711            prepare_func(
712                prepare_state_seed,
713                b_func,
714                warmup_iterations,
715                firewall.as_ref(),
716            )?;
717
718            // Allocate a custom stack frame during runtime, to try to offset alignment of the stack.
719            if let Some(distr) = stack_offset_distr {
720                with_alloca(rng.sample(distr), |_| {
721                    a_func.spi.measure(iterations).unwrap();
722                    b_func.spi.measure(iterations).unwrap();
723                });
724            } else {
725                a_func.spi.measure(iterations)?;
726                b_func.spi.measure(iterations)?;
727            }
728
729            let a_sample_time = a_func.read_sample()?;
730            let b_sample_time = b_func.read_sample()?;
731            sample_time += a_sample_time.max(b_sample_time);
732
733            loop_time += Duration::from_nanos(sample_time);
734            sample_iterations.push(iterations);
735            i += 1;
736        }
737
738        // If we switched functions odd number of times then we need to swap them back so that
739        // the first function is always the baseline.
740        if switch_counter % 2 != 0 {
741            mem::swap(&mut a_func, &mut b_func);
742        }
743
744        let run_result = calculate_run_result(
745            test_name,
746            &a_func.samples,
747            &b_func.samples,
748            &sample_iterations,
749            settings.filter_outliers,
750        )
751        .ok_or(Error::NoMeasurements)?;
752
753        let samples_path = if let Some(path) = samples_dump_path {
754            let file_path = write_samples(path, test_name, a_func, b_func, sample_iterations)?;
755            Some(file_path)
756        } else {
757            None
758        };
759
760        Ok((run_result, samples_path))
761    }
762
763    fn write_samples(
764        path: &Path,
765        test_name: &str,
766        a_func: &TestedFunction,
767        b_func: &TestedFunction,
768        iterations: Vec<usize>,
769    ) -> Result<PathBuf> {
770        let file_name = format!("{}.csv", test_name.replace('/', "-"));
771        let file_path = path.join(file_name);
772        let s_samples = a_func.samples.iter().copied();
773        let b_samples = b_func.samples.iter().copied();
774        let values = s_samples
775            .zip(b_samples)
776            .zip(iterations.iter().copied())
777            .map(|((a, b), c)| (a, b, c));
778        write_csv(&file_path, values).context("Unable to write raw measurements")?;
779        Ok(file_path)
780    }
781
782    fn write_csv<A: Display, B: Display, C: Display>(
783        path: impl AsRef<Path>,
784        values: impl IntoIterator<Item = (A, B, C)>,
785    ) -> io::Result<()> {
786        let mut file = BufWriter::new(File::create(path)?);
787        for (a, b, c) in values {
788            writeln!(&mut file, "{},{},{}", a, b, c)?;
789        }
790        Ok(())
791    }
792
793    fn generate_plots(sample_dumps: &[PathBuf]) -> Result<()> {
794        let gnuplot_file = AutoDelete(temp_dir().join("tango-plot.gnuplot"));
795        fs::write(&*gnuplot_file, include_bytes!("plot.gnuplot"))?;
796        let gnuplot_file_str = gnuplot_file.to_str().unwrap();
797
798        for input in sample_dumps {
799            let csv_input = input.to_str().unwrap();
800            let svg_path = input.with_extension("svg");
801            let cmd = Command::new("gnuplot")
802                .args([
803                    "-c",
804                    gnuplot_file_str,
805                    csv_input,
806                    svg_path.to_str().unwrap(),
807                ])
808                .stdin(Stdio::null())
809                .stdout(Stdio::inherit())
810                .stderr(Stdio::inherit())
811                .status()
812                .context("Failed to execute gnuplot")?;
813
814            if !cmd.success() {
815                bail!("gnuplot command failed");
816            }
817        }
818        Ok(())
819    }
820}
821
822mod reporting {
823    use crate::{
824        cli::{colorize, HumanTime},
825        platform::RUsage,
826        RunResult, Summary,
827    };
828    use colorz::{ansi, mode::Stream, Colorize, Style};
829
830    pub(super) fn verbose_reporter(results: &RunResult) {
831        let base = results.baseline;
832        let candidate = results.candidate;
833
834        let significant = results.diff_estimate.significant;
835
836        println!(
837            "{}  (n: {}, outliers: {})",
838            results.name.bold().stream(Stream::Stdout),
839            results.diff.n,
840            results.outliers
841        );
842
843        println!(
844            "    {:12}   {:>15} {:>15} {:>15}",
845            "",
846            "baseline".bold().stream(Stream::Stdout),
847            "candidate".bold().stream(Stream::Stdout),
848            "∆".bold().stream(Stream::Stdout),
849        );
850        println!(
851            "    {:12} ╭────────────────────────────────────────────────",
852            ""
853        );
854        println!(
855            "    {:12} │ {:>15} {:>15} {:>15}  {:+4.2}{}{}",
856            "mean",
857            HumanTime(base.mean),
858            HumanTime(candidate.mean),
859            colorize(
860                HumanTime(results.diff.mean),
861                significant,
862                results.diff.mean < 0.
863            ),
864            colorize(
865                results.diff_estimate.pct,
866                significant,
867                results.diff.mean < 0.
868            ),
869            colorize("%", significant, results.diff.mean < 0.),
870            if significant { "*" } else { "" },
871        );
872        println!(
873            "    {:12} │ {:>15} {:>15} {:>15}",
874            "min",
875            HumanTime(base.min),
876            HumanTime(candidate.min),
877            HumanTime(candidate.min - base.min)
878        );
879        println!(
880            "    {:12} │ {:>15} {:>15} {:>15}",
881            "max",
882            HumanTime(base.max),
883            HumanTime(candidate.max),
884            HumanTime(candidate.max - base.max),
885        );
886        println!(
887            "    {:12} │ {:>15} {:>15} {:>15}",
888            "std. dev.",
889            HumanTime(base.variance.sqrt()),
890            HumanTime(candidate.variance.sqrt()),
891            HumanTime(results.diff.variance.sqrt()),
892        );
893        println!();
894    }
895
896    pub(super) fn default_reporter(results: &RunResult) {
897        let base = results.baseline;
898        let candidate = results.candidate;
899        let diff = results.diff;
900
901        let significant = results.diff_estimate.significant;
902
903        let speedup = results.diff_estimate.pct;
904        let candidate_faster = diff.mean < 0.;
905        println!(
906            "{:50} [ {:>8} ... {:>8} ]    {:>+7.2}{}{}",
907            colorize(&results.name, significant, candidate_faster),
908            HumanTime(base.mean),
909            colorize(HumanTime(candidate.mean), significant, candidate_faster),
910            colorize(speedup, significant, candidate_faster),
911            colorize("%", significant, candidate_faster),
912            if significant { "*" } else { "" },
913        )
914    }
915
916    pub(super) fn default_reporter_solo(name: &str, results: &Summary<f64>) {
917        println!(
918            "{:50}  [ {:>8} ... {:>8} ... {:>8} ]  stddev: {:>8}",
919            name,
920            HumanTime(results.min),
921            HumanTime(results.mean),
922            HumanTime(results.max),
923            HumanTime(results.variance.sqrt()),
924        )
925    }
926
927    pub(super) fn report_system_time_bias(result: &RunResult, rusage: &RUsage) {
928        const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style();
929
930        eprintln!(
931            "{}: {} benchmark spent too much time in system mode (sys: {:?}, usr: {:?}). Results may be inaccurate",
932            "WARN".into_style_with(RED).stream(Stream::Stderr),
933            &result.name,
934            rusage.system_time,
935            rusage.user_time
936        );
937    }
938}
939
940struct TestedFunction<'a> {
941    pub(crate) spi: &'a mut Spi,
942    pub(crate) samples: Vec<u64>,
943}
944
945impl<'a> TestedFunction<'a> {
946    pub(crate) fn new(spi: &'a mut Spi, func: FunctionIdx) -> Self {
947        spi.select(func);
948        TestedFunction {
949            spi,
950            samples: Vec::new(),
951        }
952    }
953
954    pub(crate) fn read_sample(&mut self) -> Result<u64> {
955        let sample = self.spi.read_sample().context("Unable to read sample")?;
956        self.samples.push(sample);
957        Ok(sample)
958    }
959
960    pub(crate) fn run(&mut self, iterations: usize) -> Result<u64> {
961        self.spi
962            .run(iterations)
963            .context("Unable to run measurement")
964    }
965}
966
967fn prepare_func(
968    prepare_state_seed: Option<u64>,
969    f: &mut TestedFunction,
970    warmup_iterations: Option<usize>,
971    firewall: Option<&CacheFirewall>,
972) -> Result<()> {
973    if let Some(seed) = prepare_state_seed {
974        f.spi.prepare_state(seed)?;
975        if let Some(firewall) = firewall {
976            firewall.issue_read();
977        }
978    }
979    if let Some(warmup_iterations) = warmup_iterations {
980        f.run(warmup_iterations)?;
981    }
982    Ok(())
983}
984
985fn create_sampler(settings: &MeasurementSettings, seed: u64) -> Box<dyn SampleLength> {
986    match settings.sampler_type {
987        SampleLengthKind::Flat => Box::new(FlatSampleLength::new(settings)),
988        SampleLengthKind::Linear => Box::new(LinearSampleLength::new(settings)),
989        SampleLengthKind::Random => Box::new(RandomSampleLength::new(settings, seed)),
990    }
991}
992
993fn colorize<T: Display>(value: T, do_paint: bool, is_improved: bool) -> impl Display {
994    use colorz::{ansi, mode::Stream::Stdout, Colorize, Style};
995
996    const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style();
997    const GREEN: Style = Style::new().fg(ansi::Green).const_into_runtime_style();
998    const DEFAULT: Style = Style::new().const_into_runtime_style();
999
1000    if do_paint {
1001        if is_improved {
1002            value.into_style_with(GREEN).stream(Stdout)
1003        } else {
1004            value.into_style_with(RED).stream(Stdout)
1005        }
1006    } else {
1007        value.into_style_with(DEFAULT).stream(Stdout)
1008    }
1009}
1010
1011struct HumanTime(f64);
1012
1013impl fmt::Display for HumanTime {
1014    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1015        const USEC: f64 = 1_000.;
1016        const MSEC: f64 = USEC * 1_000.;
1017        const SEC: f64 = MSEC * 1_000.;
1018
1019        if self.0.abs() > SEC {
1020            f.pad(&format!("{:.1} s", self.0 / SEC))
1021        } else if self.0.abs() > MSEC {
1022            f.pad(&format!("{:.1} ms", self.0 / MSEC))
1023        } else if self.0.abs() > USEC {
1024            f.pad(&format!("{:.1} us", self.0 / USEC))
1025        } else if self.0 == 0. {
1026            f.pad("0 ns")
1027        } else {
1028            f.pad(&format!("{:.1} ns", self.0))
1029        }
1030    }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035    use super::*;
1036
1037    #[test]
1038    fn check_human_time() {
1039        assert_eq!(format!("{}", HumanTime(0.1)), "0.1 ns");
1040        assert_eq!(format!("{:>5}", HumanTime(0.)), " 0 ns");
1041
1042        assert_eq!(format!("{}", HumanTime(120.)), "120.0 ns");
1043
1044        assert_eq!(format!("{}", HumanTime(1200.)), "1.2 us");
1045
1046        assert_eq!(format!("{}", HumanTime(1200000.)), "1.2 ms");
1047
1048        assert_eq!(format!("{}", HumanTime(1200000000.)), "1.2 s");
1049
1050        assert_eq!(format!("{}", HumanTime(-1200000.)), "-1.2 ms");
1051    }
1052
1053    // Sane checking some simple patterns
1054    #[test]
1055    fn check_glob() {
1056        let patterns = vec!["a/*/*", "a/**", "*/32/*", "**/b", "a/{32,64}/*"];
1057        let input = "a/32/b";
1058        for pattern in patterns {
1059            assert!(
1060                glob_match(pattern, input),
1061                "failed to match {} against {}",
1062                pattern,
1063                input
1064            );
1065        }
1066    }
1067}
tango_bench/cli.rs

tango_bench/
cli.rs