Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6fn default_planner_max_tokens() -> u32 {
7    4096
8}
9
10fn default_aggregator_max_tokens() -> u32 {
11    4096
12}
13
14fn default_deferral_backoff_ms() -> u64 {
15    100
16}
17
18fn default_experiment_max_experiments() -> u32 {
19    20
20}
21
22fn default_experiment_max_wall_time_secs() -> u64 {
23    3600
24}
25
26fn default_experiment_min_improvement() -> f64 {
27    0.5
28}
29
30fn default_experiment_eval_budget_tokens() -> u64 {
31    100_000
32}
33
34fn default_experiment_schedule_cron() -> String {
35    "0 3 * * *".to_string()
36}
37
38fn default_experiment_max_experiments_per_run() -> u32 {
39    20
40}
41
42fn default_experiment_schedule_max_wall_time_secs() -> u64 {
43    1800
44}
45
46fn default_verify_max_tokens() -> u32 {
47    1024
48}
49
50fn default_max_replans() -> u32 {
51    2
52}
53
54fn default_plan_cache_similarity_threshold() -> f32 {
55    0.90
56}
57
58fn default_plan_cache_ttl_days() -> u32 {
59    30
60}
61
62fn default_plan_cache_max_templates() -> u32 {
63    100
64}
65
66/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
67#[derive(Debug, Clone, Deserialize, Serialize)]
68#[serde(default)]
69pub struct PlanCacheConfig {
70    /// Enable plan template caching. Default: false.
71    pub enabled: bool,
72    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
73    #[serde(default = "default_plan_cache_similarity_threshold")]
74    pub similarity_threshold: f32,
75    /// Days since last access before a template is evicted. Default: 30.
76    #[serde(default = "default_plan_cache_ttl_days")]
77    pub ttl_days: u32,
78    /// Maximum number of cached templates. Default: 100.
79    #[serde(default = "default_plan_cache_max_templates")]
80    pub max_templates: u32,
81}
82
83impl Default for PlanCacheConfig {
84    fn default() -> Self {
85        Self {
86            enabled: false,
87            similarity_threshold: default_plan_cache_similarity_threshold(),
88            ttl_days: default_plan_cache_ttl_days(),
89            max_templates: default_plan_cache_max_templates(),
90        }
91    }
92}
93
94impl PlanCacheConfig {
95    /// Validate that all fields are within sane operating limits.
96    ///
97    /// # Errors
98    ///
99    /// Returns a description string if any field is outside the allowed range.
100    pub fn validate(&self) -> Result<(), String> {
101        if !(0.5..=1.0).contains(&self.similarity_threshold) {
102            return Err(format!(
103                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
104                self.similarity_threshold
105            ));
106        }
107        if self.max_templates == 0 || self.max_templates > 10_000 {
108            return Err(format!(
109                "plan_cache.max_templates must be in [1, 10000], got {}",
110                self.max_templates
111            ));
112        }
113        if self.ttl_days == 0 || self.ttl_days > 365 {
114            return Err(format!(
115                "plan_cache.ttl_days must be in [1, 365], got {}",
116                self.ttl_days
117            ));
118        }
119        Ok(())
120    }
121}
122
123/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
124#[derive(Debug, Clone, Deserialize, Serialize)]
125#[serde(default)]
126#[allow(clippy::struct_excessive_bools)]
127pub struct OrchestrationConfig {
128    /// Enable the orchestration subsystem.
129    pub enabled: bool,
130    /// Maximum number of tasks in a single graph.
131    pub max_tasks: u32,
132    /// Maximum number of tasks that can run in parallel.
133    pub max_parallel: u32,
134    /// Default failure strategy for all tasks unless overridden per-task.
135    pub default_failure_strategy: String,
136    /// Default number of retries for the `retry` failure strategy.
137    pub default_max_retries: u32,
138    /// Timeout in seconds for a single task. `0` means no timeout.
139    pub task_timeout_secs: u64,
140    /// Provider name from `[[llm.providers]]` for planning LLM calls.
141    /// Empty string = use the agent's primary provider.
142    #[serde(default)]
143    pub planner_provider: String,
144    /// Maximum tokens budget hint for planner responses. Reserved for future use when
145    /// per-call token limits are added to the `LlmProvider::chat` API.
146    #[serde(default = "default_planner_max_tokens")]
147    pub planner_max_tokens: u32,
148    /// Total character budget for cross-task dependency context injection.
149    pub dependency_context_budget: usize,
150    /// Whether to show a confirmation prompt before executing a plan.
151    pub confirm_before_execute: bool,
152    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
153    #[serde(default = "default_aggregator_max_tokens")]
154    pub aggregator_max_tokens: u32,
155    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
156    #[serde(default = "default_deferral_backoff_ms")]
157    pub deferral_backoff_ms: u64,
158    /// Plan template caching configuration.
159    #[serde(default)]
160    pub plan_cache: PlanCacheConfig,
161    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
162    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
163    #[serde(default)]
164    pub topology_selection: bool,
165    /// Provider name from `[[llm.providers]]` for verification LLM calls.
166    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
167    #[serde(default)]
168    pub verify_provider: String,
169    /// Maximum tokens budget for verification LLM calls. Default: 1024.
170    #[serde(default = "default_verify_max_tokens")]
171    pub verify_max_tokens: u32,
172    /// Maximum number of replan cycles per graph execution. Default: 2.
173    ///
174    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
175    /// runs, gaps are logged only).
176    #[serde(default = "default_max_replans")]
177    pub max_replans: u32,
178    /// Enable post-task completeness verification. Default: false (opt-in).
179    ///
180    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
181    /// `Completed` during verification; downstream tasks are unblocked immediately.
182    /// Verification is best-effort and does not gate dispatch.
183    #[serde(default)]
184    pub verify_completeness: bool,
185}
186
187impl Default for OrchestrationConfig {
188    fn default() -> Self {
189        Self {
190            enabled: false,
191            max_tasks: 20,
192            max_parallel: 4,
193            default_failure_strategy: "abort".to_string(),
194            default_max_retries: 3,
195            task_timeout_secs: 300,
196            planner_provider: String::new(),
197            planner_max_tokens: default_planner_max_tokens(),
198            dependency_context_budget: 16384,
199            confirm_before_execute: true,
200            aggregator_max_tokens: default_aggregator_max_tokens(),
201            deferral_backoff_ms: default_deferral_backoff_ms(),
202            plan_cache: PlanCacheConfig::default(),
203            topology_selection: false,
204            verify_provider: String::new(),
205            verify_max_tokens: default_verify_max_tokens(),
206            max_replans: default_max_replans(),
207            verify_completeness: false,
208        }
209    }
210}
211
212/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
213#[derive(Debug, Clone, Deserialize, Serialize)]
214#[serde(default)]
215pub struct ExperimentConfig {
216    pub enabled: bool,
217    pub eval_model: Option<String>,
218    pub benchmark_file: Option<std::path::PathBuf>,
219    #[serde(default = "default_experiment_max_experiments")]
220    pub max_experiments: u32,
221    #[serde(default = "default_experiment_max_wall_time_secs")]
222    pub max_wall_time_secs: u64,
223    #[serde(default = "default_experiment_min_improvement")]
224    pub min_improvement: f64,
225    #[serde(default = "default_experiment_eval_budget_tokens")]
226    pub eval_budget_tokens: u64,
227    pub auto_apply: bool,
228    #[serde(default)]
229    pub schedule: ExperimentSchedule,
230}
231
232impl Default for ExperimentConfig {
233    fn default() -> Self {
234        Self {
235            enabled: false,
236            eval_model: None,
237            benchmark_file: None,
238            max_experiments: default_experiment_max_experiments(),
239            max_wall_time_secs: default_experiment_max_wall_time_secs(),
240            min_improvement: default_experiment_min_improvement(),
241            eval_budget_tokens: default_experiment_eval_budget_tokens(),
242            auto_apply: false,
243            schedule: ExperimentSchedule::default(),
244        }
245    }
246}
247
248/// Cron scheduling configuration for automatic experiment runs.
249#[derive(Debug, Clone, Deserialize, Serialize)]
250#[serde(default)]
251pub struct ExperimentSchedule {
252    pub enabled: bool,
253    #[serde(default = "default_experiment_schedule_cron")]
254    pub cron: String,
255    #[serde(default = "default_experiment_max_experiments_per_run")]
256    pub max_experiments_per_run: u32,
257    /// Wall-time cap for a single scheduled experiment session (seconds).
258    ///
259    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
260    /// a background session cannot overlap the next cron trigger on typical schedules.
261    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
262    pub max_wall_time_secs: u64,
263}
264
265impl Default for ExperimentSchedule {
266    fn default() -> Self {
267        Self {
268            enabled: false,
269            cron: default_experiment_schedule_cron(),
270            max_experiments_per_run: default_experiment_max_experiments_per_run(),
271            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
272        }
273    }
274}
275
276impl ExperimentConfig {
277    /// Validate that numeric bounds are within sane operating limits.
278    ///
279    /// # Errors
280    ///
281    /// Returns a description string if any field is outside allowed range.
282    pub fn validate(&self) -> Result<(), String> {
283        if !(1..=1_000).contains(&self.max_experiments) {
284            return Err(format!(
285                "experiments.max_experiments must be in 1..=1000, got {}",
286                self.max_experiments
287            ));
288        }
289        if !(60..=86_400).contains(&self.max_wall_time_secs) {
290            return Err(format!(
291                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
292                self.max_wall_time_secs
293            ));
294        }
295        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
296            return Err(format!(
297                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
298                self.eval_budget_tokens
299            ));
300        }
301        if !(0.0..=100.0).contains(&self.min_improvement) {
302            return Err(format!(
303                "experiments.min_improvement must be in 0.0..=100.0, got {}",
304                self.min_improvement
305            ));
306        }
307        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
308            return Err(format!(
309                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
310                self.schedule.max_experiments_per_run
311            ));
312        }
313        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
314            return Err(format!(
315                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
316                self.schedule.max_wall_time_secs
317            ));
318        }
319        Ok(())
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn plan_cache_similarity_threshold_above_one_is_rejected() {
329        let cfg = PlanCacheConfig {
330            similarity_threshold: 1.1,
331            ..PlanCacheConfig::default()
332        };
333        let result = cfg.validate();
334        assert!(
335            result.is_err(),
336            "similarity_threshold = 1.1 must return a validation error"
337        );
338    }
339}