Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6fn default_planner_max_tokens() -> u32 {
7    4096
8}
9
10fn default_aggregator_max_tokens() -> u32 {
11    4096
12}
13
14fn default_deferral_backoff_ms() -> u64 {
15    100
16}
17
18fn default_experiment_max_experiments() -> u32 {
19    20
20}
21
22fn default_experiment_max_wall_time_secs() -> u64 {
23    3600
24}
25
26fn default_experiment_min_improvement() -> f64 {
27    0.5
28}
29
30fn default_experiment_eval_budget_tokens() -> u64 {
31    100_000
32}
33
34fn default_experiment_schedule_cron() -> String {
35    "0 3 * * *".to_string()
36}
37
38fn default_experiment_max_experiments_per_run() -> u32 {
39    20
40}
41
42fn default_experiment_schedule_max_wall_time_secs() -> u64 {
43    1800
44}
45
46fn default_verify_max_tokens() -> u32 {
47    1024
48}
49
50fn default_max_replans() -> u32 {
51    2
52}
53
54fn default_completeness_threshold() -> f32 {
55    0.7
56}
57
58fn default_cascade_failure_threshold() -> f32 {
59    0.5
60}
61
62fn default_plan_cache_similarity_threshold() -> f32 {
63    0.90
64}
65
66fn default_plan_cache_ttl_days() -> u32 {
67    30
68}
69
70fn default_plan_cache_max_templates() -> u32 {
71    100
72}
73
74/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
75#[derive(Debug, Clone, Deserialize, Serialize)]
76#[serde(default)]
77pub struct PlanCacheConfig {
78    /// Enable plan template caching. Default: false.
79    pub enabled: bool,
80    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
81    #[serde(default = "default_plan_cache_similarity_threshold")]
82    pub similarity_threshold: f32,
83    /// Days since last access before a template is evicted. Default: 30.
84    #[serde(default = "default_plan_cache_ttl_days")]
85    pub ttl_days: u32,
86    /// Maximum number of cached templates. Default: 100.
87    #[serde(default = "default_plan_cache_max_templates")]
88    pub max_templates: u32,
89}
90
91impl Default for PlanCacheConfig {
92    fn default() -> Self {
93        Self {
94            enabled: false,
95            similarity_threshold: default_plan_cache_similarity_threshold(),
96            ttl_days: default_plan_cache_ttl_days(),
97            max_templates: default_plan_cache_max_templates(),
98        }
99    }
100}
101
102impl PlanCacheConfig {
103    /// Validate that all fields are within sane operating limits.
104    ///
105    /// # Errors
106    ///
107    /// Returns a description string if any field is outside the allowed range.
108    pub fn validate(&self) -> Result<(), String> {
109        if !(0.5..=1.0).contains(&self.similarity_threshold) {
110            return Err(format!(
111                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
112                self.similarity_threshold
113            ));
114        }
115        if self.max_templates == 0 || self.max_templates > 10_000 {
116            return Err(format!(
117                "plan_cache.max_templates must be in [1, 10000], got {}",
118                self.max_templates
119            ));
120        }
121        if self.ttl_days == 0 || self.ttl_days > 365 {
122            return Err(format!(
123                "plan_cache.ttl_days must be in [1, 365], got {}",
124                self.ttl_days
125            ));
126        }
127        Ok(())
128    }
129}
130
131/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
132#[derive(Debug, Clone, Deserialize, Serialize)]
133#[serde(default)]
134#[allow(clippy::struct_excessive_bools)]
135pub struct OrchestrationConfig {
136    /// Enable the orchestration subsystem.
137    pub enabled: bool,
138    /// Maximum number of tasks in a single graph.
139    pub max_tasks: u32,
140    /// Maximum number of tasks that can run in parallel.
141    pub max_parallel: u32,
142    /// Default failure strategy for all tasks unless overridden per-task.
143    pub default_failure_strategy: String,
144    /// Default number of retries for the `retry` failure strategy.
145    pub default_max_retries: u32,
146    /// Timeout in seconds for a single task. `0` means no timeout.
147    pub task_timeout_secs: u64,
148    /// Provider name from `[[llm.providers]]` for planning LLM calls.
149    /// Empty string = use the agent's primary provider.
150    #[serde(default)]
151    pub planner_provider: String,
152    /// Maximum tokens budget hint for planner responses. Reserved for future use when
153    /// per-call token limits are added to the `LlmProvider::chat` API.
154    #[serde(default = "default_planner_max_tokens")]
155    pub planner_max_tokens: u32,
156    /// Total character budget for cross-task dependency context injection.
157    pub dependency_context_budget: usize,
158    /// Whether to show a confirmation prompt before executing a plan.
159    pub confirm_before_execute: bool,
160    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
161    #[serde(default = "default_aggregator_max_tokens")]
162    pub aggregator_max_tokens: u32,
163    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
164    #[serde(default = "default_deferral_backoff_ms")]
165    pub deferral_backoff_ms: u64,
166    /// Plan template caching configuration.
167    #[serde(default)]
168    pub plan_cache: PlanCacheConfig,
169    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
170    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
171    #[serde(default)]
172    pub topology_selection: bool,
173    /// Provider name from `[[llm.providers]]` for verification LLM calls.
174    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
175    #[serde(default)]
176    pub verify_provider: String,
177    /// Maximum tokens budget for verification LLM calls. Default: 1024.
178    #[serde(default = "default_verify_max_tokens")]
179    pub verify_max_tokens: u32,
180    /// Maximum number of replan cycles per graph execution. Default: 2.
181    ///
182    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
183    /// runs, gaps are logged only).
184    #[serde(default = "default_max_replans")]
185    pub max_replans: u32,
186    /// Enable post-task completeness verification. Default: false (opt-in).
187    ///
188    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
189    /// `Completed` during verification; downstream tasks are unblocked immediately.
190    /// Verification is best-effort and does not gate dispatch.
191    #[serde(default)]
192    pub verify_completeness: bool,
193    /// Provider name from `[[llm.providers]]` for tool-dispatch routing.
194    /// When set, tool-heavy tasks prefer this provider over the primary.
195    /// Prefer mid-tier models (e.g., qwen2.5:14b) for reliability per arXiv:2601.16280.
196    /// Empty string = use the primary provider.
197    #[serde(default)]
198    pub tool_provider: String,
199    /// Minimum completeness score (0.0–1.0) for the plan to be accepted without
200    /// replanning. Default: 0.7. When the verifier reports `confidence <
201    /// completeness_threshold` AND gaps exist, a replan cycle is triggered.
202    /// Used by both per-task and whole-plan verification.
203    /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
204    #[serde(default = "default_completeness_threshold")]
205    pub completeness_threshold: f32,
206    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
207    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
208    /// Default: false (opt-in).
209    #[serde(default)]
210    pub cascade_routing: bool,
211    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
212    /// Must be in (0.0, 1.0]. Default: 0.5.
213    #[serde(default = "default_cascade_failure_threshold")]
214    pub cascade_failure_threshold: f32,
215    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
216    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
217    /// end-to-end latency. Default: false (opt-in).
218    #[serde(default)]
219    pub tree_optimized_dispatch: bool,
220}
221
222impl Default for OrchestrationConfig {
223    fn default() -> Self {
224        Self {
225            enabled: false,
226            max_tasks: 20,
227            max_parallel: 4,
228            default_failure_strategy: "abort".to_string(),
229            default_max_retries: 3,
230            task_timeout_secs: 300,
231            planner_provider: String::new(),
232            planner_max_tokens: default_planner_max_tokens(),
233            dependency_context_budget: 16384,
234            confirm_before_execute: true,
235            aggregator_max_tokens: default_aggregator_max_tokens(),
236            deferral_backoff_ms: default_deferral_backoff_ms(),
237            plan_cache: PlanCacheConfig::default(),
238            topology_selection: false,
239            verify_provider: String::new(),
240            verify_max_tokens: default_verify_max_tokens(),
241            max_replans: default_max_replans(),
242            verify_completeness: false,
243            completeness_threshold: default_completeness_threshold(),
244            tool_provider: String::new(),
245            cascade_routing: false,
246            cascade_failure_threshold: default_cascade_failure_threshold(),
247            tree_optimized_dispatch: false,
248        }
249    }
250}
251
252/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
253#[derive(Debug, Clone, Deserialize, Serialize)]
254#[serde(default)]
255pub struct ExperimentConfig {
256    pub enabled: bool,
257    pub eval_model: Option<String>,
258    pub benchmark_file: Option<std::path::PathBuf>,
259    #[serde(default = "default_experiment_max_experiments")]
260    pub max_experiments: u32,
261    #[serde(default = "default_experiment_max_wall_time_secs")]
262    pub max_wall_time_secs: u64,
263    #[serde(default = "default_experiment_min_improvement")]
264    pub min_improvement: f64,
265    #[serde(default = "default_experiment_eval_budget_tokens")]
266    pub eval_budget_tokens: u64,
267    pub auto_apply: bool,
268    #[serde(default)]
269    pub schedule: ExperimentSchedule,
270}
271
272impl Default for ExperimentConfig {
273    fn default() -> Self {
274        Self {
275            enabled: false,
276            eval_model: None,
277            benchmark_file: None,
278            max_experiments: default_experiment_max_experiments(),
279            max_wall_time_secs: default_experiment_max_wall_time_secs(),
280            min_improvement: default_experiment_min_improvement(),
281            eval_budget_tokens: default_experiment_eval_budget_tokens(),
282            auto_apply: false,
283            schedule: ExperimentSchedule::default(),
284        }
285    }
286}
287
288/// Cron scheduling configuration for automatic experiment runs.
289#[derive(Debug, Clone, Deserialize, Serialize)]
290#[serde(default)]
291pub struct ExperimentSchedule {
292    pub enabled: bool,
293    #[serde(default = "default_experiment_schedule_cron")]
294    pub cron: String,
295    #[serde(default = "default_experiment_max_experiments_per_run")]
296    pub max_experiments_per_run: u32,
297    /// Wall-time cap for a single scheduled experiment session (seconds).
298    ///
299    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
300    /// a background session cannot overlap the next cron trigger on typical schedules.
301    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
302    pub max_wall_time_secs: u64,
303}
304
305impl Default for ExperimentSchedule {
306    fn default() -> Self {
307        Self {
308            enabled: false,
309            cron: default_experiment_schedule_cron(),
310            max_experiments_per_run: default_experiment_max_experiments_per_run(),
311            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
312        }
313    }
314}
315
316impl ExperimentConfig {
317    /// Validate that numeric bounds are within sane operating limits.
318    ///
319    /// # Errors
320    ///
321    /// Returns a description string if any field is outside allowed range.
322    pub fn validate(&self) -> Result<(), String> {
323        if !(1..=1_000).contains(&self.max_experiments) {
324            return Err(format!(
325                "experiments.max_experiments must be in 1..=1000, got {}",
326                self.max_experiments
327            ));
328        }
329        if !(60..=86_400).contains(&self.max_wall_time_secs) {
330            return Err(format!(
331                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
332                self.max_wall_time_secs
333            ));
334        }
335        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
336            return Err(format!(
337                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
338                self.eval_budget_tokens
339            ));
340        }
341        if !(0.0..=100.0).contains(&self.min_improvement) {
342            return Err(format!(
343                "experiments.min_improvement must be in 0.0..=100.0, got {}",
344                self.min_improvement
345            ));
346        }
347        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
348            return Err(format!(
349                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
350                self.schedule.max_experiments_per_run
351            ));
352        }
353        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
354            return Err(format!(
355                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
356                self.schedule.max_wall_time_secs
357            ));
358        }
359        Ok(())
360    }
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366
367    #[test]
368    fn plan_cache_similarity_threshold_above_one_is_rejected() {
369        let cfg = PlanCacheConfig {
370            similarity_threshold: 1.1,
371            ..PlanCacheConfig::default()
372        };
373        let result = cfg.validate();
374        assert!(
375            result.is_err(),
376            "similarity_threshold = 1.1 must return a validation error"
377        );
378    }
379
380    #[test]
381    fn completeness_threshold_default_is_0_7() {
382        let cfg = OrchestrationConfig::default();
383        assert!(
384            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
385            "completeness_threshold default must be 0.7, got {}",
386            cfg.completeness_threshold
387        );
388    }
389
390    #[test]
391    fn completeness_threshold_serde_round_trip() {
392        let toml_in = r"
393            enabled = true
394            completeness_threshold = 0.85
395        ";
396        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
397        assert!((cfg.completeness_threshold - 0.85).abs() < f32::EPSILON);
398
399        let serialized = toml::to_string(&cfg).expect("serialize");
400        let cfg2: OrchestrationConfig = toml::from_str(&serialized).expect("re-deserialize");
401        assert!((cfg2.completeness_threshold - 0.85).abs() < f32::EPSILON);
402    }
403
404    #[test]
405    fn completeness_threshold_missing_uses_default() {
406        let toml_in = "enabled = true\n";
407        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
408        assert!(
409            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
410            "missing field must use default 0.7, got {}",
411            cfg.completeness_threshold
412        );
413    }
414}