Skip to main content

adk_bench/
config.rs

1//! Benchmark configuration types mapped from CLI flags.
2//!
3//! This module defines [`BenchConfig`], the top-level configuration struct
4//! that maps CLI parameters to structured settings for the [`BenchRunner`].
5//! It also defines supporting types like [`OutputFormat`], [`TaskSuite`],
6//! and [`ExternalFrameworkConfig`].
7
8use std::path::PathBuf;
9
10use serde::{Deserialize, Serialize};
11
12/// Top-level benchmark configuration mapped from CLI flags.
13///
14/// Controls all aspects of benchmark execution including iteration count,
15/// concurrency, output format, regression detection, and cost guards.
16///
17/// # Example
18///
19/// ```rust
20/// use adk_bench::BenchConfig;
21///
22/// let config = BenchConfig {
23///     model: "gemini-2.5-flash".to_string(),
24///     runs: 10,
25///     concurrency: 4,
26///     ..Default::default()
27/// };
28/// assert_eq!(config.runs, 10);
29/// assert_eq!(config.warmup, 3);
30/// ```
31#[derive(Debug, Clone, Serialize, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct BenchConfig {
34    /// Model identifier (e.g., "gemini-2.5-flash").
35    pub model: String,
36
37    /// Number of measurement iterations per workload.
38    pub runs: usize,
39
40    /// Agent concurrency level (1 = sequential).
41    pub concurrency: usize,
42
43    /// Specific workload to run (None = all built-in).
44    pub workload: Option<String>,
45
46    /// Output format for results.
47    pub output_format: OutputFormat,
48
49    /// Output file path (None = stdout).
50    pub output_path: Option<PathBuf>,
51
52    /// Warm-up iterations before measurement begins (discarded).
53    pub warmup: usize,
54
55    /// Whether to save results as baseline after the run.
56    pub save_baseline: bool,
57
58    /// Whether to check regression against a saved baseline.
59    pub check_regression: bool,
60
61    /// Maximum allowed relative degradation (default 0.10 = 10%).
62    pub tolerance: f64,
63
64    /// External framework configurations for comparison.
65    pub external_frameworks: Vec<ExternalFrameworkConfig>,
66
67    /// Timeout for external framework runs in seconds.
68    pub external_timeout_secs: u64,
69
70    /// Concurrency sweep levels (if sweep mode enabled).
71    /// When set, the runner tests each level sequentially: e.g., [1, 2, 4, 8, 16, 32, 64].
72    pub concurrency_sweep: Option<Vec<usize>>,
73
74    /// Memory sampling interval in milliseconds.
75    pub memory_sample_interval_ms: u64,
76
77    /// Task quality suite to run (tau2, bfcl).
78    pub suite: Option<TaskSuite>,
79
80    /// Baseline file path for regression detection.
81    pub baseline_path: PathBuf,
82
83    /// Dry-run mode: compute and display estimated cost without executing API calls.
84    pub dry_run: bool,
85
86    /// Maximum allowed API cost in USD; abort if estimated cost exceeds this.
87    pub max_cost_usd: Option<f64>,
88
89    /// Skip interactive cost confirmation (auto-confirm when estimated cost > $1.00).
90    pub confirm_cost: bool,
91
92    /// Enable experimental workloads (e.g., multi-agent delegation).
93    pub experimental: bool,
94}
95
96impl Default for BenchConfig {
97    /// Creates a `BenchConfig` with documented defaults:
98    ///
99    /// - `model`: `"gemini-2.5-flash"`
100    /// - `runs`: 5
101    /// - `concurrency`: 1 (sequential)
102    /// - `warmup`: 3
103    /// - `tolerance`: 0.10 (10%)
104    /// - `external_timeout_secs`: 300
105    /// - `memory_sample_interval_ms`: 100
106    /// - `output_format`: Table
107    /// - `baseline_path`: `.bench-baseline.json`
108    /// - `dry_run`: false
109    /// - `max_cost_usd`: None
110    /// - `confirm_cost`: false
111    /// - `experimental`: false
112    fn default() -> Self {
113        Self {
114            model: "gemini-2.5-flash".to_string(),
115            runs: 5,
116            concurrency: 1,
117            workload: None,
118            output_format: OutputFormat::Table,
119            output_path: None,
120            warmup: 3,
121            save_baseline: false,
122            check_regression: false,
123            tolerance: 0.10,
124            external_frameworks: Vec::new(),
125            external_timeout_secs: 300,
126            concurrency_sweep: None,
127            memory_sample_interval_ms: 100,
128            suite: None,
129            baseline_path: PathBuf::from(".bench-baseline.json"),
130            dry_run: false,
131            max_cost_usd: None,
132            confirm_cost: false,
133            experimental: false,
134        }
135    }
136}
137
138/// Output format for benchmark results.
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
140#[serde(rename_all = "lowercase")]
141pub enum OutputFormat {
142    /// Machine-readable JSON with all raw metrics.
143    Json,
144    /// Human-readable aligned table for terminal display.
145    Table,
146    /// Markdown table suitable for README inclusion.
147    Markdown,
148}
149
150/// Task quality benchmark suite selection.
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
152#[serde(rename_all = "lowercase")]
153pub enum TaskSuite {
154    /// τ²-bench task quality scenarios.
155    Tau2,
156    /// Berkeley Function Calling Leaderboard dataset.
157    Bfcl,
158}
159
160/// Configuration for an external framework comparison target.
161///
162/// Describes how to invoke a competitor framework benchmark subprocess
163/// that emits metrics in the External Benchmark Protocol (EBP) JSON format.
164///
165/// # Example
166///
167/// ```rust
168/// use adk_bench::ExternalFrameworkConfig;
169///
170/// let config = ExternalFrameworkConfig {
171///     name: "langgraph".to_string(),
172///     command: "python".to_string(),
173///     args: vec!["-m".to_string(), "bench_langgraph".to_string()],
174///     working_dir: None,
175///     env: vec![("PYTHONPATH".to_string(), "./src".to_string())],
176/// };
177/// assert_eq!(config.name, "langgraph");
178/// ```
179#[derive(Debug, Clone, Serialize, Deserialize)]
180#[serde(rename_all = "camelCase")]
181pub struct ExternalFrameworkConfig {
182    /// Framework name (e.g., "adk-python", "langgraph", "crewai").
183    pub name: String,
184
185    /// Command to execute the framework benchmark.
186    pub command: String,
187
188    /// Arguments passed to the command.
189    pub args: Vec<String>,
190
191    /// Working directory for execution.
192    pub working_dir: Option<PathBuf>,
193
194    /// Environment variables to set for the subprocess.
195    pub env: Vec<(String, String)>,
196}