Skip to main content

diskann_benchmark_runner/
app.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6//! The CLI frontend for benchmark applications built with this crate.
7//!
8//! [`App`] provides a [`clap`]-based command line interface that handles input parsing,
9//! benchmark dispatch, and regression checking. Consumers build a binary by registering
10//! [`Input`](crate::Input)s and [`Benchmark`](crate::Benchmark)s, then forwarding to
11//! [`App::parse`] and [`App::run`].
12//!
13//! # Subcommands
14//!
15//! ## Standard Workflow
16//!
17//! * `inputs [NAME]`: List available input kinds, or describe one by name.
18//! * `benchmarks`: List registered benchmarks and their descriptions.
19//! * `skeleton`: Print a skeleton input JSON file.
20//! * `run --input-file <FILE> --output-file <FILE> [--dry-run]`: Run benchmarks.
21//!
22//! ## Regression Checks
23//!
24//! These are accessed via `check <SUBCOMMAND>`:
25//!
26//! * `check skeleton`: Print a skeleton tolerance JSON file.
27//! * `check tolerances [NAME]`: List tolerance kinds, or describe one by name.
28//! * `check verify --tolerances <FILE> --input-file <FILE>`: Validate a tolerance file
29//!   against an input file.
30//! * `check run --tolerances <FILE> --input-file <FILE> --before <FILE> --after <FILE> [--output-file <FILE>]`:
31//!   Run regression checks.
32//!
33//! # Example
34//!
35//! A typical binary using this crate:
36//!
37//! ```rust,no_run
38//! use diskann_benchmark_runner::{App, registry};
39//!
40//! fn main() -> anyhow::Result<()> {
41//!     let mut inputs = registry::Inputs::new();
42//!     // inputs.register::<MyInput>()?;
43//!
44//!     let mut benchmarks = registry::Benchmarks::new();
45//!     // benchmarks.register::<MyBenchmark>("my-bench");
46//!     // benchmarks.register_regression::<MyRegressionBenchmark>("my-regression");
47//!
48//!     let app = App::parse();
49//!     let mut output = diskann_benchmark_runner::output::default();
50//!     app.run(&inputs, &benchmarks, &mut output)
51//! }
52//! ```
53//!
54//! # Regression Workflow
55//!
56//! 1. Run benchmarks twice (e.g. before and after a code change) with `run`, producing
57//!    two output files.
58//! 2. Author a tolerance file describing acceptable variation (use `check skeleton` and
59//!    `check tolerances` for guidance).
60//! 3. Validate the tolerance file with `check verify`.
61//! 4. Compare the two output files with `check run`.
62
63use std::{io::Write, path::PathBuf};
64
65use clap::{Parser, Subcommand};
66
67use crate::{
68    internal,
69    jobs::{self, Jobs},
70    output::Output,
71    registry,
72    result::Checkpoint,
73    utils::fmt::Banner,
74};
75
76/// Check if we're running in debug mode and error if not allowed.
77fn check_debug_mode(allow_debug: bool) -> anyhow::Result<()> {
78    // Unit tests are treated as debug mode to ensure consistent behavior across builds.
79    if cfg!(any(test, debug_assertions)) && !allow_debug {
80        anyhow::bail!(
81            "Benchmarking in debug mode produces misleading performance results.\n\
82             Please compile in release mode or use the --allow-debug flag to bypass this check."
83        );
84    }
85    Ok(())
86}
87
88/// Parsed command line options.
89#[derive(Debug, Subcommand)]
90pub enum Commands {
91    /// List the kinds of input formats available for ingestion.
92    Inputs {
93        /// Describe the layout of the named input kind.
94        describe: Option<String>,
95    },
96    /// List the available benchmarks.
97    Benchmarks {},
98    /// Provide a skeleton JSON file for running a set of benchmarks.
99    Skeleton,
100    /// Run a list of benchmarks.
101    Run {
102        /// The input file to run.
103        #[arg(long = "input-file")]
104        input_file: PathBuf,
105        /// The path where the output file should reside.
106        #[arg(long = "output-file")]
107        output_file: PathBuf,
108        /// Parse an input file and perform all validation checks, but don't actually run any
109        /// benchmarks.
110        #[arg(long, action)]
111        dry_run: bool,
112        /// Allow running benchmarks in debug mode (not recommended).
113        #[arg(long, action)]
114        allow_debug: bool,
115    },
116    #[command(subcommand)]
117    Check(Check),
118}
119
120/// Subcommands for regression check operations.
121#[derive(Debug, Subcommand)]
122pub enum Check {
123    /// Provide a skeleton of the overall tolerance files.
124    Skeleton,
125    /// List all the tolerance inputs accepted by the benchmark executable.
126    Tolerances {
127        /// Describe the layout for the named tolerance kind.
128        describe: Option<String>,
129    },
130    /// Verify the tolerance file with the accompanying input file.
131    Verify {
132        /// The tolerance file to check.
133        #[arg(long = "tolerances")]
134        tolerances: PathBuf,
135        /// The benchmark input file used to generate the data that will be compared.
136        #[arg(long = "input-file")]
137        input_file: PathBuf,
138    },
139    /// Run regression checks against before/after output files.
140    Run {
141        /// The tolerance file to check.
142        #[arg(long = "tolerances")]
143        tolerances: PathBuf,
144        /// The benchmark input file used to generate the data that will be compared.
145        #[arg(long = "input-file")]
146        input_file: PathBuf,
147        /// The `--output-file` from a benchmark to use as a baseline.
148        #[arg(long = "before")]
149        before: PathBuf,
150        /// The `--output-file` that will be checked for regression against `before`.
151        #[arg(long = "after")]
152        after: PathBuf,
153        /// Optional path to write the JSON check results.
154        #[arg(long = "output-file")]
155        output_file: Option<PathBuf>,
156    },
157}
158
159/// The CLI used to drive a benchmark application.
160#[derive(Debug, Parser)]
161pub struct App {
162    #[command(subcommand)]
163    command: Commands,
164}
165
166impl App {
167    /// Construct [`Self`] by parsing commandline arguments from [`std::env::args`].
168    ///
169    /// This simply redirects to [`clap::Parser::parse`] and is provided to allow parsing
170    /// without the [`clap::Parser`] trait in scope.
171    pub fn parse() -> Self {
172        <Self as clap::Parser>::parse()
173    }
174
175    /// Construct [`Self`] by parsing command line arguments from the iterator.
176    ///
177    /// This simply redirects to [`clap::Parser::try_parse_from`] and is provided to allow
178    /// parsing without the [`clap::Parser`] trait in scope.
179    pub fn try_parse_from<I, T>(itr: I) -> anyhow::Result<Self>
180    where
181        I: IntoIterator<Item = T>,
182        T: Into<std::ffi::OsString> + Clone,
183    {
184        Ok(<Self as clap::Parser>::try_parse_from(itr)?)
185    }
186
187    /// Construct [`Self`] directly from a [`Commands`] enum.
188    pub fn from_commands(command: Commands) -> Self {
189        Self { command }
190    }
191
192    /// Run the application using the registered `inputs` and `benchmarks`.
193    pub fn run(
194        &self,
195        inputs: &registry::Inputs,
196        benchmarks: &registry::Benchmarks,
197        mut output: &mut dyn Output,
198    ) -> anyhow::Result<()> {
199        match &self.command {
200            // If a named benchmark isn't given, then list the available benchmarks.
201            Commands::Inputs { describe } => {
202                if let Some(describe) = describe {
203                    if let Some(input) = inputs.get(describe) {
204                        let repr = jobs::Unprocessed::format_input(input)?;
205                        writeln!(
206                            output,
207                            "The example JSON representation for \"{}\" is:",
208                            describe
209                        )?;
210                        writeln!(output, "{}", serde_json::to_string_pretty(&repr)?)?;
211                        return Ok(());
212                    } else {
213                        writeln!(output, "No input found for \"{}\"", describe)?;
214                    }
215
216                    return Ok(());
217                }
218
219                writeln!(output, "Available input kinds are listed below:")?;
220                let mut tags: Vec<_> = inputs.tags().collect();
221                tags.sort();
222                for i in tags.iter() {
223                    writeln!(output, "    {}", i)?;
224                }
225            }
226            // List the available benchmarks.
227            Commands::Benchmarks {} => {
228                writeln!(output, "Registered Benchmarks:")?;
229                for (name, description) in benchmarks.names() {
230                    let mut lines = description.lines();
231                    if let Some(first) = lines.next() {
232                        writeln!(output, "    {}: {}", name, first)?;
233                        for line in lines {
234                            writeln!(output, "        {}", line)?;
235                        }
236                    } else {
237                        writeln!(output, "    {}: <no description>", name)?;
238                    }
239                }
240            }
241            Commands::Skeleton => {
242                writeln!(output, "Skeleton input file:")?;
243                writeln!(output, "{}", Jobs::example()?)?;
244            }
245            // Run the benchmarks
246            Commands::Run {
247                input_file,
248                output_file,
249                dry_run,
250                allow_debug,
251            } => {
252                // Parse and validate the input.
253                let run = Jobs::load(input_file, inputs)?;
254                // Check if we have a match for each benchmark.
255                for job in run.jobs().iter() {
256                    const MAX_METHODS: usize = 3;
257                    if let Err(mismatches) = benchmarks.debug(job, MAX_METHODS) {
258                        let repr = serde_json::to_string_pretty(&job.serialize()?)?;
259
260                        writeln!(
261                            output,
262                            "Could not find a match for the following input:\n\n{}\n",
263                            repr
264                        )?;
265                        writeln!(output, "Closest matches:\n")?;
266                        for (i, mismatch) in mismatches.into_iter().enumerate() {
267                            writeln!(
268                                output,
269                                "    {}. \"{}\": {}",
270                                i + 1,
271                                mismatch.method(),
272                                mismatch.reason(),
273                            )?;
274                        }
275                        writeln!(output)?;
276
277                        return Err(anyhow::Error::msg(
278                            "could not find a benchmark for all inputs",
279                        ));
280                    }
281                }
282
283                if *dry_run {
284                    writeln!(
285                        output,
286                        "Success - skipping running benchmarks because \"--dry-run\" was used."
287                    )?;
288                    return Ok(());
289                }
290
291                // Check for debug mode before running benchmarks.
292                // This check is placed after the dry-run early return since dry-run doesn't
293                // actually execute benchmarks and thus won't produce misleading performance results.
294                check_debug_mode(*allow_debug)?;
295
296                // The collection of output results for each run.
297                let mut results = Vec::<serde_json::Value>::new();
298
299                // Now - we've verified the integrity of all the jobs we want to run and that
300                // each job can match an associated benchmark.
301                //
302                // All that's left is to actually run the benchmarks.
303                let jobs = run.jobs();
304                let serialized = jobs
305                    .iter()
306                    .map(|job| {
307                        serde_json::to_value(jobs::Unprocessed::new(
308                            job.tag().into(),
309                            job.serialize()?,
310                        ))
311                    })
312                    .collect::<Result<Vec<_>, serde_json::Error>>()?;
313                for (i, job) in jobs.iter().enumerate() {
314                    let prefix: &str = if i != 0 { "\n\n" } else { "" };
315                    writeln!(
316                        output,
317                        "{}{}",
318                        prefix,
319                        Banner::new(&format!("Running Job {} of {}", i + 1, jobs.len()))
320                    )?;
321
322                    // Run the specified job.
323                    let checkpoint = Checkpoint::new(&serialized, &results, output_file)?;
324                    let r = benchmarks.call(job, checkpoint, output)?;
325
326                    // Collect the results
327                    results.push(r);
328
329                    // Save everything.
330                    Checkpoint::new(&serialized, &results, output_file)?.save()?;
331                }
332            }
333            // Extensions
334            Commands::Check(check) => return self.check(check, inputs, benchmarks, output),
335        };
336        Ok(())
337    }
338
339    // Extensions
340    fn check(
341        &self,
342        check: &Check,
343        inputs: &registry::Inputs,
344        benchmarks: &registry::Benchmarks,
345        mut output: &mut dyn Output,
346    ) -> anyhow::Result<()> {
347        match check {
348            Check::Skeleton => {
349                let message = "Skeleton tolerance file.\n\n\
350                               Each tolerance is paired with an input that is structurally\n\
351                               matched with an entry in the corresponding `--input-file`.\n\n\
352                               This allow a single tolerance entry to be applied to multiple\n\
353                               benchmark runs as long as this structural mapping is unambiguous.\n";
354
355                writeln!(output, "{}", message)?;
356                writeln!(output, "{}", internal::regression::Raw::example())?;
357                Ok(())
358            }
359            Check::Tolerances { describe } => {
360                let tolerances = benchmarks.tolerances();
361
362                match describe {
363                    Some(name) => match tolerances.get(&**name) {
364                        Some(registered) => {
365                            let repr = internal::regression::RawInner::new(
366                                jobs::Unprocessed::new(
367                                    "".to_string(),
368                                    serde_json::Value::Object(Default::default()),
369                                ),
370                                jobs::Unprocessed::format_input(registered.tolerance)?,
371                            );
372
373                            write!(
374                                output,
375                                "The example JSON representation for \"{}\" is shown below.\n\
376                                 Populate the \"input\" field with a compatible benchmark input.\n\
377                                 Matching will be performed by partial structural map on the input.\n\n",
378                                name
379                            )?;
380                            writeln!(output, "{}", serde_json::to_string_pretty(&repr)?)?;
381                            Ok(())
382                        }
383                        None => {
384                            writeln!(output, "No tolerance input found for \"{}\"", name)?;
385                            Ok(())
386                        }
387                    },
388                    None => {
389                        writeln!(output, "Available tolerance kinds are listed below.")?;
390
391                        // Print the registered tolerance files in alphabetical order.
392                        let mut keys: Vec<_> = tolerances.keys().collect();
393                        keys.sort();
394                        for k in keys {
395                            // This access should not panic - we just obtained all the keys.
396                            let registered = &tolerances[k];
397                            writeln!(output, "    {}", registered.tolerance.tag())?;
398                            for pair in registered.regressions.iter() {
399                                writeln!(
400                                    output,
401                                    "    - \"{}\" => \"{}\"",
402                                    pair.input_tag(),
403                                    pair.name(),
404                                )?;
405                            }
406                        }
407                        Ok(())
408                    }
409                }
410            }
411            Check::Verify {
412                tolerances,
413                input_file,
414            } => {
415                // For verification - we merely check that we can successfully construct
416                // the regression `Checks` struct. It performs all the necessary preflight
417                // checks.
418                let benchmarks = benchmarks.tolerances();
419                let _ =
420                    internal::regression::Checks::new(tolerances, input_file, inputs, &benchmarks)?;
421                Ok(())
422            }
423            Check::Run {
424                tolerances,
425                input_file,
426                before,
427                after,
428                output_file,
429            } => {
430                let registered = benchmarks.tolerances();
431                let checks =
432                    internal::regression::Checks::new(tolerances, input_file, inputs, &registered)?;
433                let jobs = checks.jobs(before, after)?;
434                jobs.run(output, output_file.as_deref())?;
435                Ok(())
436            }
437        }
438    }
439}
440
441///////////
442// Tests //
443///////////
444
445/// The integration test below look inside the `tests` directory for folders.
446///
447/// ## Input Files
448///
449/// Each folder should have at least a `stdin.txt` file specifying the command line to give
450/// to the `App` parser.
451///
452/// Within the `stdin.txt` command line, there are several special symbols:
453///
454/// * $INPUT - Resolves to `input.json` in the same directory as the `stdin.txt` file.
455/// * $OUTPUT - Resolves to `output.json` in a temporary directory.
456/// * $TOLERANCES - Resolves to `tolerances.json` in the test directory.
457/// * $REGRESSION_INPUT - Resolves to `regression_input.json` in the test directory.
458/// * $CHECK_OUTPUT - Resolves to `checks.json` in a temporary directory.
459///
460/// As mentioned - an input JSON file can be included and must be named "input.json" to be
461/// discoverable.
462///
463/// ## Output Files
464///
465/// Tests should have at least a `stdout.txt` file with the expected outputs for running the
466/// command in `stdin.txt`. If an output JSON file is expected, it should be named `output.json`.
467///
468/// ## Test Discovery and Running
469///
470/// The unit test will visit each folder in `tests` and run the outlined scenario. The
471/// `stdout.txt` expected output is compared to the actual output and if they do not match,
472/// the test fails.
473///
474/// Additionally, if `output.json` is present, the unit test will verify that (1) the command
475/// did in fact produce an output JSON file and (2) the generated file matches the expected file.
476///
477/// ## Regenerating Expected Results
478///
479/// The benchmark output will naturally change over time. Running the unit tests with the
480/// environment variable
481/// ```text
482/// POCKETBENCH_TEST=overwrite
483/// ```
484/// will replace the `stdout.txt` (and `output.json` if one was generated) for each test
485/// scenario. Developers should then consult `git diff` to ensure that major regressions
486/// to the output did not occur.
487#[cfg(test)]
488mod tests {
489    use super::*;
490
491    use std::{
492        ffi::OsString,
493        path::{Path, PathBuf},
494    };
495
496    use crate::{registry, ux};
497
498    const ENV: &str = "POCKETBENCH_TEST";
499
500    // Expected I/O files.
501    const STDIN: &str = "stdin.txt";
502    const STDOUT: &str = "stdout.txt";
503    const INPUT_FILE: &str = "input.json";
504    const OUTPUT_FILE: &str = "output.json";
505
506    // Regression Extension
507    const TOLERANCES_FILE: &str = "tolerances.json";
508    const REGRESSION_INPUT_FILE: &str = "regression_input.json";
509    const CHECK_OUTPUT_FILE: &str = "checks.json";
510
511    const ALL_GENERATED_OUTPUTS: [&str; 2] = [OUTPUT_FILE, CHECK_OUTPUT_FILE];
512
513    // Read the entire contents of a file to a string.
514    fn read_to_string<P: AsRef<Path>>(path: P, ctx: &str) -> String {
515        match std::fs::read_to_string(path.as_ref()) {
516            Ok(s) => ux::normalize(s),
517            Err(err) => panic!(
518                "failed to read {} {:?} with error: {}",
519                ctx,
520                path.as_ref(),
521                err
522            ),
523        }
524    }
525
526    // Check if `POCKETBENCH_TEST=overwrite` is configured. Return `true` if so - otherwise
527    // return `false`.
528    //
529    // If `POCKETBENCH_TEST` is set but its value is not `overwrite` - panic.
530    fn overwrite() -> bool {
531        match std::env::var(ENV) {
532            Ok(v) => {
533                if v == "overwrite" {
534                    true
535                } else {
536                    panic!(
537                        "Unknown value for {}: \"{}\". Expected \"overwrite\"",
538                        ENV, v
539                    );
540                }
541            }
542            Err(std::env::VarError::NotPresent) => false,
543            Err(std::env::VarError::NotUnicode(_)) => {
544                panic!("Value for {} is not unicode", ENV);
545            }
546        }
547    }
548
549    // Test Runner
550    struct Test {
551        dir: PathBuf,
552        overwrite: bool,
553    }
554
555    impl Test {
556        fn new(dir: &Path) -> Self {
557            Self {
558                dir: dir.into(),
559                overwrite: overwrite(),
560            }
561        }
562
563        fn parse_stdin(&self, tempdir: &Path) -> Vec<App> {
564            let path = self.dir.join(STDIN);
565
566            // Read the standard input file to a string.
567            let stdin = read_to_string(&path, "standard input");
568
569            let output: Vec<App> = stdin
570                .lines()
571                .filter_map(|line| {
572                    if line.starts_with('#') || line.is_empty() {
573                        None
574                    } else {
575                        Some(self.parse_line(line, tempdir))
576                    }
577                })
578                .collect();
579
580            if output.is_empty() {
581                panic!("File \"{}/stdin.txt\" has no command!", self.dir.display());
582            }
583
584            output
585        }
586
587        fn parse_line(&self, line: &str, tempdir: &Path) -> App {
588            // Split and resolve special symbols
589            let args: Vec<OsString> = line
590                .split_whitespace()
591                .map(|v| -> OsString { self.resolve(v, tempdir).into() })
592                .collect();
593
594            App::try_parse_from(std::iter::once(OsString::from("test-app")).chain(args)).unwrap()
595        }
596
597        fn resolve(&self, s: &str, tempdir: &Path) -> PathBuf {
598            match s {
599                // Standard workflow
600                "$INPUT" => self.dir.join(INPUT_FILE),
601                "$OUTPUT" => tempdir.join(OUTPUT_FILE),
602                // Regression extension
603                "$TOLERANCES" => self.dir.join(TOLERANCES_FILE),
604                "$REGRESSION_INPUT" => self.dir.join(REGRESSION_INPUT_FILE),
605                "$CHECK_OUTPUT" => tempdir.join(CHECK_OUTPUT_FILE),
606
607                // Catch-all: no interpolation
608                _ => s.into(),
609            }
610        }
611
612        fn run(&self, tempdir: &Path) {
613            let apps = self.parse_stdin(tempdir);
614
615            // Register inputs
616            let mut inputs = registry::Inputs::new();
617            crate::test::register_inputs(&mut inputs).unwrap();
618
619            // Register outputs
620            let mut benchmarks = registry::Benchmarks::new();
621            crate::test::register_benchmarks(&mut benchmarks);
622
623            // Run each app invocation - collecting the last output into a buffer.
624            //
625            // Only the last run is allowed to return an error - if it does, format the
626            // error to the output buffer as well using the debug formatting option.
627            let mut buffer = crate::output::Memory::new();
628            for (i, app) in apps.iter().enumerate() {
629                let is_last = i + 1 == apps.len();
630
631                // Select where to route the test output.
632                //
633                // Only the last run gets saved. Setup output is discarded — if a setup
634                // command fails, the panic message includes the error.
635                let mut b: &mut dyn crate::Output = if is_last {
636                    &mut buffer
637                } else {
638                    &mut crate::output::Sink::new()
639                };
640
641                if let Err(err) = app.run(&inputs, &benchmarks, b) {
642                    if is_last {
643                        write!(b, "{:?}", err).unwrap();
644                    } else {
645                        panic!(
646                            "App {} of {} failed with error: {:?}",
647                            i + 1,
648                            apps.len(),
649                            err
650                        );
651                    }
652                }
653            }
654
655            // Check that `stdout` matches
656            let stdout: String =
657                ux::normalize(ux::strip_backtrace(buffer.into_inner().try_into().unwrap()));
658            let stdout = ux::scrub_path(stdout, tempdir, "$TEMPDIR");
659            let output = self.dir.join(STDOUT);
660            if self.overwrite {
661                std::fs::write(output, stdout).unwrap();
662            } else {
663                let expected = read_to_string(&output, "expected standard output");
664                if stdout != expected {
665                    panic!("Got:\n--\n{}\n--\nExpected:\n--\n{}\n--", stdout, expected);
666                }
667            }
668
669            // Check that the output files match.
670            for file in ALL_GENERATED_OUTPUTS {
671                self.check_output_file(tempdir, file);
672            }
673        }
674
675        fn check_output_file(&self, tempdir: &Path, filename: &str) {
676            let generated_path = tempdir.join(filename);
677            let was_generated = generated_path.is_file();
678
679            let expected_path = self.dir.join(filename);
680            let is_expected = expected_path.is_file();
681
682            if self.overwrite {
683                // Copy the output file to the destination.
684                if was_generated {
685                    println!(
686                        "Moving generated file {:?} to {:?}",
687                        generated_path, expected_path
688                    );
689
690                    if let Err(err) = std::fs::rename(&generated_path, &expected_path) {
691                        panic!(
692                            "Moving generated file {:?} to expected location {:?} failed: {}",
693                            generated_path, expected_path, err
694                        );
695                    }
696                } else if is_expected {
697                    println!("Removing outdated file {:?}", expected_path);
698                    if let Err(err) = std::fs::remove_file(&expected_path) {
699                        panic!("Failed removing outdated file {:?}: {}", expected_path, err);
700                    }
701                }
702            } else {
703                match (was_generated, is_expected) {
704                    (true, true) => {
705                        let output_contents = read_to_string(generated_path, "generated");
706
707                        let expected_contents = read_to_string(expected_path, "expected");
708
709                        if output_contents != expected_contents {
710                            panic!(
711                                "{}: Got:\n\n{}\n\nExpected:\n\n{}\n",
712                                filename, output_contents, expected_contents
713                            );
714                        }
715                    }
716                    (true, false) => {
717                        let output_contents = read_to_string(generated_path, "generated");
718
719                        panic!(
720                            "{} was generated when none was expected. Contents:\n\n{}",
721                            filename, output_contents
722                        );
723                    }
724                    (false, true) => {
725                        panic!("{} was not generated when it was expected", filename);
726                    }
727                    (false, false) => { /* this is okay */ }
728                }
729            }
730        }
731    }
732
733    fn run_specific_test(test_dir: &Path) {
734        println!("running test in {:?}", test_dir);
735        let temp_dir = tempfile::tempdir().unwrap();
736        Test::new(test_dir).run(temp_dir.path());
737    }
738
739    fn run_all_tests_in(dir: &str) {
740        let dir: PathBuf = format!("{}/tests/{}", env!("CARGO_MANIFEST_DIR"), dir).into();
741        for entry in std::fs::read_dir(dir).unwrap() {
742            let entry = entry.unwrap();
743            if let Ok(file_type) = entry.file_type() {
744                if file_type.is_dir() {
745                    run_specific_test(&entry.path());
746                }
747            } else {
748                panic!("couldn't get file type for {:?}", entry.path());
749            }
750        }
751    }
752
753    #[test]
754    fn benchmark_tests() {
755        run_all_tests_in("benchmark");
756    }
757
758    #[test]
759    fn regression_tests() {
760        run_all_tests_in("regression");
761    }
762}