adk_bench/config.rs
1//! Benchmark configuration types mapped from CLI flags.
2//!
3//! This module defines [`BenchConfig`], the top-level configuration struct
4//! that maps CLI parameters to structured settings for the [`BenchRunner`].
5//! It also defines supporting types like [`OutputFormat`], [`TaskSuite`],
6//! and [`ExternalFrameworkConfig`].
7
8use std::path::PathBuf;
9
10use serde::{Deserialize, Serialize};
11
12/// Top-level benchmark configuration mapped from CLI flags.
13///
14/// Controls all aspects of benchmark execution including iteration count,
15/// concurrency, output format, regression detection, and cost guards.
16///
17/// # Example
18///
19/// ```rust
20/// use adk_bench::BenchConfig;
21///
22/// let config = BenchConfig {
23/// model: "gemini-2.5-flash".to_string(),
24/// runs: 10,
25/// concurrency: 4,
26/// ..Default::default()
27/// };
28/// assert_eq!(config.runs, 10);
29/// assert_eq!(config.warmup, 3);
30/// ```
31#[derive(Debug, Clone, Serialize, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct BenchConfig {
34 /// Model identifier (e.g., "gemini-2.5-flash").
35 pub model: String,
36
37 /// Number of measurement iterations per workload.
38 pub runs: usize,
39
40 /// Agent concurrency level (1 = sequential).
41 pub concurrency: usize,
42
43 /// Specific workload to run (None = all built-in).
44 pub workload: Option<String>,
45
46 /// Output format for results.
47 pub output_format: OutputFormat,
48
49 /// Output file path (None = stdout).
50 pub output_path: Option<PathBuf>,
51
52 /// Warm-up iterations before measurement begins (discarded).
53 pub warmup: usize,
54
55 /// Whether to save results as baseline after the run.
56 pub save_baseline: bool,
57
58 /// Whether to check regression against a saved baseline.
59 pub check_regression: bool,
60
61 /// Maximum allowed relative degradation (default 0.10 = 10%).
62 pub tolerance: f64,
63
64 /// External framework configurations for comparison.
65 pub external_frameworks: Vec<ExternalFrameworkConfig>,
66
67 /// Timeout for external framework runs in seconds.
68 pub external_timeout_secs: u64,
69
70 /// Concurrency sweep levels (if sweep mode enabled).
71 /// When set, the runner tests each level sequentially: e.g., [1, 2, 4, 8, 16, 32, 64].
72 pub concurrency_sweep: Option<Vec<usize>>,
73
74 /// Memory sampling interval in milliseconds.
75 pub memory_sample_interval_ms: u64,
76
77 /// Task quality suite to run (tau2, bfcl).
78 pub suite: Option<TaskSuite>,
79
80 /// Baseline file path for regression detection.
81 pub baseline_path: PathBuf,
82
83 /// Dry-run mode: compute and display estimated cost without executing API calls.
84 pub dry_run: bool,
85
86 /// Maximum allowed API cost in USD; abort if estimated cost exceeds this.
87 pub max_cost_usd: Option<f64>,
88
89 /// Skip interactive cost confirmation (auto-confirm when estimated cost > $1.00).
90 pub confirm_cost: bool,
91
92 /// Enable experimental workloads (e.g., multi-agent delegation).
93 pub experimental: bool,
94}
95
96impl Default for BenchConfig {
97 /// Creates a `BenchConfig` with documented defaults:
98 ///
99 /// - `model`: `"gemini-2.5-flash"`
100 /// - `runs`: 5
101 /// - `concurrency`: 1 (sequential)
102 /// - `warmup`: 3
103 /// - `tolerance`: 0.10 (10%)
104 /// - `external_timeout_secs`: 300
105 /// - `memory_sample_interval_ms`: 100
106 /// - `output_format`: Table
107 /// - `baseline_path`: `.bench-baseline.json`
108 /// - `dry_run`: false
109 /// - `max_cost_usd`: None
110 /// - `confirm_cost`: false
111 /// - `experimental`: false
112 fn default() -> Self {
113 Self {
114 model: "gemini-2.5-flash".to_string(),
115 runs: 5,
116 concurrency: 1,
117 workload: None,
118 output_format: OutputFormat::Table,
119 output_path: None,
120 warmup: 3,
121 save_baseline: false,
122 check_regression: false,
123 tolerance: 0.10,
124 external_frameworks: Vec::new(),
125 external_timeout_secs: 300,
126 concurrency_sweep: None,
127 memory_sample_interval_ms: 100,
128 suite: None,
129 baseline_path: PathBuf::from(".bench-baseline.json"),
130 dry_run: false,
131 max_cost_usd: None,
132 confirm_cost: false,
133 experimental: false,
134 }
135 }
136}
137
138/// Output format for benchmark results.
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
140#[serde(rename_all = "lowercase")]
141pub enum OutputFormat {
142 /// Machine-readable JSON with all raw metrics.
143 Json,
144 /// Human-readable aligned table for terminal display.
145 Table,
146 /// Markdown table suitable for README inclusion.
147 Markdown,
148}
149
150/// Task quality benchmark suite selection.
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
152#[serde(rename_all = "lowercase")]
153pub enum TaskSuite {
154 /// τ²-bench task quality scenarios.
155 Tau2,
156 /// Berkeley Function Calling Leaderboard dataset.
157 Bfcl,
158}
159
160/// Configuration for an external framework comparison target.
161///
162/// Describes how to invoke a competitor framework benchmark subprocess
163/// that emits metrics in the External Benchmark Protocol (EBP) JSON format.
164///
165/// # Example
166///
167/// ```rust
168/// use adk_bench::ExternalFrameworkConfig;
169///
170/// let config = ExternalFrameworkConfig {
171/// name: "langgraph".to_string(),
172/// command: "python".to_string(),
173/// args: vec!["-m".to_string(), "bench_langgraph".to_string()],
174/// working_dir: None,
175/// env: vec![("PYTHONPATH".to_string(), "./src".to_string())],
176/// };
177/// assert_eq!(config.name, "langgraph");
178/// ```
179#[derive(Debug, Clone, Serialize, Deserialize)]
180#[serde(rename_all = "camelCase")]
181pub struct ExternalFrameworkConfig {
182 /// Framework name (e.g., "adk-python", "langgraph", "crewai").
183 pub name: String,
184
185 /// Command to execute the framework benchmark.
186 pub command: String,
187
188 /// Arguments passed to the command.
189 pub args: Vec<String>,
190
191 /// Working directory for execution.
192 pub working_dir: Option<PathBuf>,
193
194 /// Environment variables to set for the subprocess.
195 pub env: Vec<(String, String)>,
196}