Skip to main content

zeph_bench/
cli.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Clap subcommand definitions for `zeph bench`.
5//!
6//! The top-level entry point is [`BenchCommand`], which is nested under the root
7//! `zeph` binary as `zeph bench <subcommand>`.
8
9/// Top-level subcommands available under `zeph bench`.
10///
11/// Each variant maps to one logical operation:
12/// - [`List`][BenchCommand::List] — inspect what datasets are available locally.
13/// - [`Download`][BenchCommand::Download] — fetch a dataset from its canonical URL.
14/// - [`Run`][BenchCommand::Run] — execute a full benchmark run and write results.
15/// - [`Show`][BenchCommand::Show] — print a summary of a previously saved run.
16///
17/// # Examples
18///
19/// ```
20/// use zeph_bench::BenchCommand;
21///
22/// // The enum is parsed by Clap; construct directly in tests.
23/// let cmd = BenchCommand::List;
24/// assert!(matches!(cmd, BenchCommand::List));
25/// ```
26#[derive(clap::Subcommand, Debug)]
27#[non_exhaustive]
28pub enum BenchCommand {
29    /// List available benchmark datasets and their cache status
30    List,
31
32    /// Download a dataset to the local cache
33    Download {
34        /// Dataset name (e.g. gaia, tau-bench)
35        #[arg(long)]
36        dataset: String,
37    },
38
39    /// Run a benchmark against the agent
40    Run {
41        /// Dataset name (e.g. `locomo`, `gaia`, `frames`)
42        #[arg(long)]
43        dataset: String,
44
45        /// Directory where `results.json` and `summary.md` are written
46        #[arg(long)]
47        output: std::path::PathBuf,
48
49        /// Path to the local dataset file (JSON or JSONL).
50        ///
51        /// Required until automatic download is implemented. Obtain the file manually
52        /// from the URL shown by `zeph bench list`.
53        #[arg(long)]
54        data_file: Option<std::path::PathBuf>,
55
56        /// Run only the scenario with this ID (runs all scenarios if omitted)
57        #[arg(long)]
58        scenario: Option<String>,
59
60        /// LLM provider name as declared in `[[llm.providers]]` (uses default if omitted)
61        #[arg(long)]
62        provider: Option<String>,
63
64        /// Run with a baseline (non-agentic) configuration that disables tools and memory
65        #[arg(long)]
66        baseline: bool,
67
68        /// Resume a previously interrupted run, skipping already-completed scenarios
69        #[arg(long)]
70        resume: bool,
71
72        /// Disable deterministic mode — by default temperature is forced to 0.0 for
73        /// reproducibility; pass this flag to use the provider's configured temperature
74        #[arg(long)]
75        no_deterministic: bool,
76    },
77
78    /// Print a human-readable summary of results from a previous benchmark run
79    Show {
80        /// Path to the `results.json` file produced by `bench run`
81        #[arg(long)]
82        results: std::path::PathBuf,
83    },
84}