Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_planner_max_tokens() -> u32 {
8    4096
9}
10
11fn default_aggregator_max_tokens() -> u32 {
12    4096
13}
14
15fn default_deferral_backoff_ms() -> u64 {
16    100
17}
18
19fn default_experiment_max_experiments() -> u32 {
20    20
21}
22
23fn default_experiment_max_wall_time_secs() -> u64 {
24    3600
25}
26
27fn default_experiment_min_improvement() -> f64 {
28    0.5
29}
30
31fn default_experiment_eval_budget_tokens() -> u64 {
32    100_000
33}
34
35fn default_experiment_schedule_cron() -> String {
36    "0 3 * * *".to_string()
37}
38
39fn default_experiment_max_experiments_per_run() -> u32 {
40    20
41}
42
43fn default_experiment_schedule_max_wall_time_secs() -> u64 {
44    1800
45}
46
47fn default_verify_max_tokens() -> u32 {
48    1024
49}
50
51fn default_max_replans() -> u32 {
52    2
53}
54
55fn default_completeness_threshold() -> f32 {
56    0.7
57}
58
59fn default_cascade_failure_threshold() -> f32 {
60    0.5
61}
62
63fn default_plan_cache_similarity_threshold() -> f32 {
64    0.90
65}
66
67fn default_plan_cache_ttl_days() -> u32 {
68    30
69}
70
71fn default_plan_cache_max_templates() -> u32 {
72    100
73}
74
75/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
76#[derive(Debug, Clone, Deserialize, Serialize)]
77#[serde(default)]
78pub struct PlanCacheConfig {
79    /// Enable plan template caching. Default: false.
80    pub enabled: bool,
81    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
82    #[serde(default = "default_plan_cache_similarity_threshold")]
83    pub similarity_threshold: f32,
84    /// Days since last access before a template is evicted. Default: 30.
85    #[serde(default = "default_plan_cache_ttl_days")]
86    pub ttl_days: u32,
87    /// Maximum number of cached templates. Default: 100.
88    #[serde(default = "default_plan_cache_max_templates")]
89    pub max_templates: u32,
90}
91
92impl Default for PlanCacheConfig {
93    fn default() -> Self {
94        Self {
95            enabled: false,
96            similarity_threshold: default_plan_cache_similarity_threshold(),
97            ttl_days: default_plan_cache_ttl_days(),
98            max_templates: default_plan_cache_max_templates(),
99        }
100    }
101}
102
103impl PlanCacheConfig {
104    /// Validate that all fields are within sane operating limits.
105    ///
106    /// # Errors
107    ///
108    /// Returns a description string if any field is outside the allowed range.
109    pub fn validate(&self) -> Result<(), String> {
110        if !(0.5..=1.0).contains(&self.similarity_threshold) {
111            return Err(format!(
112                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
113                self.similarity_threshold
114            ));
115        }
116        if self.max_templates == 0 || self.max_templates > 10_000 {
117            return Err(format!(
118                "plan_cache.max_templates must be in [1, 10000], got {}",
119                self.max_templates
120            ));
121        }
122        if self.ttl_days == 0 || self.ttl_days > 365 {
123            return Err(format!(
124                "plan_cache.ttl_days must be in [1, 365], got {}",
125                self.ttl_days
126            ));
127        }
128        Ok(())
129    }
130}
131
132/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
133#[derive(Debug, Clone, Deserialize, Serialize)]
134#[serde(default)]
135#[allow(clippy::struct_excessive_bools)]
136pub struct OrchestrationConfig {
137    /// Enable the orchestration subsystem.
138    pub enabled: bool,
139    /// Maximum number of tasks in a single graph.
140    pub max_tasks: u32,
141    /// Maximum number of tasks that can run in parallel.
142    pub max_parallel: u32,
143    /// Default failure strategy for all tasks unless overridden per-task.
144    pub default_failure_strategy: String,
145    /// Default number of retries for the `retry` failure strategy.
146    pub default_max_retries: u32,
147    /// Timeout in seconds for a single task. `0` means no timeout.
148    pub task_timeout_secs: u64,
149    /// Provider name from `[[llm.providers]]` for planning LLM calls.
150    /// Empty string = use the agent's primary provider.
151    #[serde(default)]
152    pub planner_provider: ProviderName,
153    /// Maximum tokens budget hint for planner responses. Reserved for future use when
154    /// per-call token limits are added to the `LlmProvider::chat` API.
155    #[serde(default = "default_planner_max_tokens")]
156    pub planner_max_tokens: u32,
157    /// Total character budget for cross-task dependency context injection.
158    pub dependency_context_budget: usize,
159    /// Whether to show a confirmation prompt before executing a plan.
160    pub confirm_before_execute: bool,
161    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
162    #[serde(default = "default_aggregator_max_tokens")]
163    pub aggregator_max_tokens: u32,
164    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
165    #[serde(default = "default_deferral_backoff_ms")]
166    pub deferral_backoff_ms: u64,
167    /// Plan template caching configuration.
168    #[serde(default)]
169    pub plan_cache: PlanCacheConfig,
170    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
171    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
172    #[serde(default)]
173    pub topology_selection: bool,
174    /// Provider name from `[[llm.providers]]` for verification LLM calls.
175    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
176    #[serde(default)]
177    pub verify_provider: ProviderName,
178    /// Maximum tokens budget for verification LLM calls. Default: 1024.
179    #[serde(default = "default_verify_max_tokens")]
180    pub verify_max_tokens: u32,
181    /// Maximum number of replan cycles per graph execution. Default: 2.
182    ///
183    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
184    /// runs, gaps are logged only).
185    #[serde(default = "default_max_replans")]
186    pub max_replans: u32,
187    /// Enable post-task completeness verification. Default: false (opt-in).
188    ///
189    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
190    /// `Completed` during verification; downstream tasks are unblocked immediately.
191    /// Verification is best-effort and does not gate dispatch.
192    #[serde(default)]
193    pub verify_completeness: bool,
194    /// Provider name from `[[llm.providers]]` for tool-dispatch routing.
195    /// When set, tool-heavy tasks prefer this provider over the primary.
196    /// Prefer mid-tier models (e.g., qwen2.5:14b) for reliability per arXiv:2601.16280.
197    /// Empty string = use the primary provider.
198    #[serde(default)]
199    pub tool_provider: ProviderName,
200    /// Minimum completeness score (0.0–1.0) for the plan to be accepted without
201    /// replanning. Default: 0.7. When the verifier reports `confidence <
202    /// completeness_threshold` AND gaps exist, a replan cycle is triggered.
203    /// Used by both per-task and whole-plan verification.
204    /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
205    #[serde(default = "default_completeness_threshold")]
206    pub completeness_threshold: f32,
207    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
208    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
209    /// Default: false (opt-in).
210    #[serde(default)]
211    pub cascade_routing: bool,
212    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
213    /// Must be in (0.0, 1.0]. Default: 0.5.
214    #[serde(default = "default_cascade_failure_threshold")]
215    pub cascade_failure_threshold: f32,
216    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
217    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
218    /// end-to-end latency. Default: false (opt-in).
219    #[serde(default)]
220    pub tree_optimized_dispatch: bool,
221}
222
223impl Default for OrchestrationConfig {
224    fn default() -> Self {
225        Self {
226            enabled: false,
227            max_tasks: 20,
228            max_parallel: 4,
229            default_failure_strategy: "abort".to_string(),
230            default_max_retries: 3,
231            task_timeout_secs: 300,
232            planner_provider: ProviderName::default(),
233            planner_max_tokens: default_planner_max_tokens(),
234            dependency_context_budget: 16384,
235            confirm_before_execute: true,
236            aggregator_max_tokens: default_aggregator_max_tokens(),
237            deferral_backoff_ms: default_deferral_backoff_ms(),
238            plan_cache: PlanCacheConfig::default(),
239            topology_selection: false,
240            verify_provider: ProviderName::default(),
241            verify_max_tokens: default_verify_max_tokens(),
242            max_replans: default_max_replans(),
243            verify_completeness: false,
244            completeness_threshold: default_completeness_threshold(),
245            tool_provider: ProviderName::default(),
246            cascade_routing: false,
247            cascade_failure_threshold: default_cascade_failure_threshold(),
248            tree_optimized_dispatch: false,
249        }
250    }
251}
252
253/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
254#[derive(Debug, Clone, Deserialize, Serialize)]
255#[serde(default)]
256pub struct ExperimentConfig {
257    pub enabled: bool,
258    pub eval_model: Option<String>,
259    pub benchmark_file: Option<std::path::PathBuf>,
260    #[serde(default = "default_experiment_max_experiments")]
261    pub max_experiments: u32,
262    #[serde(default = "default_experiment_max_wall_time_secs")]
263    pub max_wall_time_secs: u64,
264    #[serde(default = "default_experiment_min_improvement")]
265    pub min_improvement: f64,
266    #[serde(default = "default_experiment_eval_budget_tokens")]
267    pub eval_budget_tokens: u64,
268    pub auto_apply: bool,
269    #[serde(default)]
270    pub schedule: ExperimentSchedule,
271}
272
273impl Default for ExperimentConfig {
274    fn default() -> Self {
275        Self {
276            enabled: false,
277            eval_model: None,
278            benchmark_file: None,
279            max_experiments: default_experiment_max_experiments(),
280            max_wall_time_secs: default_experiment_max_wall_time_secs(),
281            min_improvement: default_experiment_min_improvement(),
282            eval_budget_tokens: default_experiment_eval_budget_tokens(),
283            auto_apply: false,
284            schedule: ExperimentSchedule::default(),
285        }
286    }
287}
288
289/// Cron scheduling configuration for automatic experiment runs.
290#[derive(Debug, Clone, Deserialize, Serialize)]
291#[serde(default)]
292pub struct ExperimentSchedule {
293    pub enabled: bool,
294    #[serde(default = "default_experiment_schedule_cron")]
295    pub cron: String,
296    #[serde(default = "default_experiment_max_experiments_per_run")]
297    pub max_experiments_per_run: u32,
298    /// Wall-time cap for a single scheduled experiment session (seconds).
299    ///
300    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
301    /// a background session cannot overlap the next cron trigger on typical schedules.
302    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
303    pub max_wall_time_secs: u64,
304}
305
306impl Default for ExperimentSchedule {
307    fn default() -> Self {
308        Self {
309            enabled: false,
310            cron: default_experiment_schedule_cron(),
311            max_experiments_per_run: default_experiment_max_experiments_per_run(),
312            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
313        }
314    }
315}
316
317impl ExperimentConfig {
318    /// Validate that numeric bounds are within sane operating limits.
319    ///
320    /// # Errors
321    ///
322    /// Returns a description string if any field is outside allowed range.
323    pub fn validate(&self) -> Result<(), String> {
324        if !(1..=1_000).contains(&self.max_experiments) {
325            return Err(format!(
326                "experiments.max_experiments must be in 1..=1000, got {}",
327                self.max_experiments
328            ));
329        }
330        if !(60..=86_400).contains(&self.max_wall_time_secs) {
331            return Err(format!(
332                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
333                self.max_wall_time_secs
334            ));
335        }
336        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
337            return Err(format!(
338                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
339                self.eval_budget_tokens
340            ));
341        }
342        if !(0.0..=100.0).contains(&self.min_improvement) {
343            return Err(format!(
344                "experiments.min_improvement must be in 0.0..=100.0, got {}",
345                self.min_improvement
346            ));
347        }
348        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
349            return Err(format!(
350                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
351                self.schedule.max_experiments_per_run
352            ));
353        }
354        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
355            return Err(format!(
356                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
357                self.schedule.max_wall_time_secs
358            ));
359        }
360        Ok(())
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    #[test]
369    fn plan_cache_similarity_threshold_above_one_is_rejected() {
370        let cfg = PlanCacheConfig {
371            similarity_threshold: 1.1,
372            ..PlanCacheConfig::default()
373        };
374        let result = cfg.validate();
375        assert!(
376            result.is_err(),
377            "similarity_threshold = 1.1 must return a validation error"
378        );
379    }
380
381    #[test]
382    fn completeness_threshold_default_is_0_7() {
383        let cfg = OrchestrationConfig::default();
384        assert!(
385            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
386            "completeness_threshold default must be 0.7, got {}",
387            cfg.completeness_threshold
388        );
389    }
390
391    #[test]
392    fn completeness_threshold_serde_round_trip() {
393        let toml_in = r"
394            enabled = true
395            completeness_threshold = 0.85
396        ";
397        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
398        assert!((cfg.completeness_threshold - 0.85).abs() < f32::EPSILON);
399
400        let serialized = toml::to_string(&cfg).expect("serialize");
401        let cfg2: OrchestrationConfig = toml::from_str(&serialized).expect("re-deserialize");
402        assert!((cfg2.completeness_threshold - 0.85).abs() < f32::EPSILON);
403    }
404
405    #[test]
406    fn completeness_threshold_missing_uses_default() {
407        let toml_in = "enabled = true\n";
408        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
409        assert!(
410            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
411            "missing field must use default 0.7, got {}",
412            cfg.completeness_threshold
413        );
414    }
415}