Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_planner_max_tokens() -> u32 {
8    4096
9}
10
11fn default_aggregator_max_tokens() -> u32 {
12    4096
13}
14
15fn default_deferral_backoff_ms() -> u64 {
16    100
17}
18
19fn default_experiment_max_experiments() -> u32 {
20    20
21}
22
23fn default_experiment_max_wall_time_secs() -> u64 {
24    3600
25}
26
27fn default_experiment_min_improvement() -> f64 {
28    0.5
29}
30
31fn default_experiment_eval_budget_tokens() -> u64 {
32    100_000
33}
34
35fn default_experiment_schedule_cron() -> String {
36    "0 3 * * *".to_string()
37}
38
39fn default_experiment_max_experiments_per_run() -> u32 {
40    20
41}
42
43fn default_experiment_schedule_max_wall_time_secs() -> u64 {
44    1800
45}
46
47fn default_verify_max_tokens() -> u32 {
48    1024
49}
50
51fn default_max_replans() -> u32 {
52    2
53}
54
55fn default_completeness_threshold() -> f32 {
56    0.7
57}
58
59fn default_cascade_failure_threshold() -> f32 {
60    0.5
61}
62
63fn default_plan_cache_similarity_threshold() -> f32 {
64    0.90
65}
66
67fn default_plan_cache_ttl_days() -> u32 {
68    30
69}
70
71fn default_plan_cache_max_templates() -> u32 {
72    100
73}
74
75/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
76#[derive(Debug, Clone, Deserialize, Serialize)]
77#[serde(default)]
78pub struct PlanCacheConfig {
79    /// Enable plan template caching. Default: false.
80    pub enabled: bool,
81    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
82    #[serde(default = "default_plan_cache_similarity_threshold")]
83    pub similarity_threshold: f32,
84    /// Days since last access before a template is evicted. Default: 30.
85    #[serde(default = "default_plan_cache_ttl_days")]
86    pub ttl_days: u32,
87    /// Maximum number of cached templates. Default: 100.
88    #[serde(default = "default_plan_cache_max_templates")]
89    pub max_templates: u32,
90}
91
92impl Default for PlanCacheConfig {
93    fn default() -> Self {
94        Self {
95            enabled: false,
96            similarity_threshold: default_plan_cache_similarity_threshold(),
97            ttl_days: default_plan_cache_ttl_days(),
98            max_templates: default_plan_cache_max_templates(),
99        }
100    }
101}
102
103impl PlanCacheConfig {
104    /// Validate that all fields are within sane operating limits.
105    ///
106    /// # Errors
107    ///
108    /// Returns a description string if any field is outside the allowed range.
109    pub fn validate(&self) -> Result<(), String> {
110        if !(0.5..=1.0).contains(&self.similarity_threshold) {
111            return Err(format!(
112                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
113                self.similarity_threshold
114            ));
115        }
116        if self.max_templates == 0 || self.max_templates > 10_000 {
117            return Err(format!(
118                "plan_cache.max_templates must be in [1, 10000], got {}",
119                self.max_templates
120            ));
121        }
122        if self.ttl_days == 0 || self.ttl_days > 365 {
123            return Err(format!(
124                "plan_cache.ttl_days must be in [1, 365], got {}",
125                self.ttl_days
126            ));
127        }
128        Ok(())
129    }
130}
131
132/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
133#[derive(Debug, Clone, Deserialize, Serialize)]
134#[serde(default)]
135#[allow(clippy::struct_excessive_bools)]
136pub struct OrchestrationConfig {
137    /// Enable the orchestration subsystem.
138    pub enabled: bool,
139    /// Maximum number of tasks in a single graph.
140    pub max_tasks: u32,
141    /// Maximum number of tasks that can run in parallel.
142    pub max_parallel: u32,
143    /// Default failure strategy for all tasks unless overridden per-task.
144    pub default_failure_strategy: String,
145    /// Default number of retries for the `retry` failure strategy.
146    pub default_max_retries: u32,
147    /// Timeout in seconds for a single task. `0` means no timeout.
148    pub task_timeout_secs: u64,
149    /// Provider name from `[[llm.providers]]` for planning LLM calls.
150    /// Empty string = use the agent's primary provider.
151    #[serde(default)]
152    pub planner_provider: ProviderName,
153    /// Maximum tokens budget hint for planner responses. Reserved for future use when
154    /// per-call token limits are added to the `LlmProvider::chat` API.
155    #[serde(default = "default_planner_max_tokens")]
156    pub planner_max_tokens: u32,
157    /// Total character budget for cross-task dependency context injection.
158    pub dependency_context_budget: usize,
159    /// Whether to show a confirmation prompt before executing a plan.
160    pub confirm_before_execute: bool,
161    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
162    #[serde(default = "default_aggregator_max_tokens")]
163    pub aggregator_max_tokens: u32,
164    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
165    #[serde(default = "default_deferral_backoff_ms")]
166    pub deferral_backoff_ms: u64,
167    /// Plan template caching configuration.
168    #[serde(default)]
169    pub plan_cache: PlanCacheConfig,
170    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
171    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
172    #[serde(default)]
173    pub topology_selection: bool,
174    /// Provider name from `[[llm.providers]]` for verification LLM calls.
175    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
176    #[serde(default)]
177    pub verify_provider: ProviderName,
178    /// Maximum tokens budget for verification LLM calls. Default: 1024.
179    #[serde(default = "default_verify_max_tokens")]
180    pub verify_max_tokens: u32,
181    /// Maximum number of replan cycles per graph execution. Default: 2.
182    ///
183    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
184    /// runs, gaps are logged only).
185    #[serde(default = "default_max_replans")]
186    pub max_replans: u32,
187    /// Enable post-task completeness verification. Default: false (opt-in).
188    ///
189    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
190    /// `Completed` during verification; downstream tasks are unblocked immediately.
191    /// Verification is best-effort and does not gate dispatch.
192    #[serde(default)]
193    pub verify_completeness: bool,
194    /// Provider name from `[[llm.providers]]` for tool-dispatch routing.
195    /// When set, tool-heavy tasks prefer this provider over the primary.
196    /// Prefer mid-tier models (e.g., qwen2.5:14b) for reliability per arXiv:2601.16280.
197    /// Empty string = use the primary provider.
198    #[serde(default)]
199    pub tool_provider: ProviderName,
200    /// Minimum completeness score (0.0–1.0) for the plan to be accepted without
201    /// replanning. Default: 0.7. When the verifier reports `confidence <
202    /// completeness_threshold` AND gaps exist, a replan cycle is triggered.
203    /// Used by both per-task and whole-plan verification.
204    /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
205    #[serde(default = "default_completeness_threshold")]
206    pub completeness_threshold: f32,
207    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
208    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
209    /// Default: false (opt-in).
210    #[serde(default)]
211    pub cascade_routing: bool,
212    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
213    /// Must be in (0.0, 1.0]. Default: 0.5.
214    #[serde(default = "default_cascade_failure_threshold")]
215    pub cascade_failure_threshold: f32,
216    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
217    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
218    /// end-to-end latency. Default: false (opt-in).
219    #[serde(default)]
220    pub tree_optimized_dispatch: bool,
221}
222
223impl Default for OrchestrationConfig {
224    fn default() -> Self {
225        Self {
226            enabled: false,
227            max_tasks: 20,
228            max_parallel: 4,
229            default_failure_strategy: "abort".to_string(),
230            default_max_retries: 3,
231            task_timeout_secs: 300,
232            planner_provider: ProviderName::default(),
233            planner_max_tokens: default_planner_max_tokens(),
234            dependency_context_budget: 16384,
235            confirm_before_execute: true,
236            aggregator_max_tokens: default_aggregator_max_tokens(),
237            deferral_backoff_ms: default_deferral_backoff_ms(),
238            plan_cache: PlanCacheConfig::default(),
239            topology_selection: false,
240            verify_provider: ProviderName::default(),
241            verify_max_tokens: default_verify_max_tokens(),
242            max_replans: default_max_replans(),
243            verify_completeness: false,
244            completeness_threshold: default_completeness_threshold(),
245            tool_provider: ProviderName::default(),
246            cascade_routing: false,
247            cascade_failure_threshold: default_cascade_failure_threshold(),
248            tree_optimized_dispatch: false,
249        }
250    }
251}
252
253/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
254///
255/// When `enabled = true`, Zeph periodically runs A/B experiments on its own skill and
256/// prompt configurations to find improvements automatically.
257///
258/// # Example (TOML)
259///
260/// ```toml
261/// [experiments]
262/// enabled = false
263/// max_experiments = 20
264/// auto_apply = false
265/// ```
266#[derive(Debug, Clone, Deserialize, Serialize)]
267#[serde(default)]
268pub struct ExperimentConfig {
269    /// Enable autonomous self-experimentation. Default: `false`.
270    pub enabled: bool,
271    /// Model identifier used for evaluating experiment outcomes.
272    pub eval_model: Option<String>,
273    /// Path to a benchmark JSONL file for evaluating experiments.
274    pub benchmark_file: Option<std::path::PathBuf>,
275    #[serde(default = "default_experiment_max_experiments")]
276    pub max_experiments: u32,
277    #[serde(default = "default_experiment_max_wall_time_secs")]
278    pub max_wall_time_secs: u64,
279    #[serde(default = "default_experiment_min_improvement")]
280    pub min_improvement: f64,
281    #[serde(default = "default_experiment_eval_budget_tokens")]
282    pub eval_budget_tokens: u64,
283    pub auto_apply: bool,
284    #[serde(default)]
285    pub schedule: ExperimentSchedule,
286}
287
288impl Default for ExperimentConfig {
289    fn default() -> Self {
290        Self {
291            enabled: false,
292            eval_model: None,
293            benchmark_file: None,
294            max_experiments: default_experiment_max_experiments(),
295            max_wall_time_secs: default_experiment_max_wall_time_secs(),
296            min_improvement: default_experiment_min_improvement(),
297            eval_budget_tokens: default_experiment_eval_budget_tokens(),
298            auto_apply: false,
299            schedule: ExperimentSchedule::default(),
300        }
301    }
302}
303
304/// Cron scheduling configuration for automatic experiment runs.
305#[derive(Debug, Clone, Deserialize, Serialize)]
306#[serde(default)]
307pub struct ExperimentSchedule {
308    pub enabled: bool,
309    #[serde(default = "default_experiment_schedule_cron")]
310    pub cron: String,
311    #[serde(default = "default_experiment_max_experiments_per_run")]
312    pub max_experiments_per_run: u32,
313    /// Wall-time cap for a single scheduled experiment session (seconds).
314    ///
315    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
316    /// a background session cannot overlap the next cron trigger on typical schedules.
317    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
318    pub max_wall_time_secs: u64,
319}
320
321impl Default for ExperimentSchedule {
322    fn default() -> Self {
323        Self {
324            enabled: false,
325            cron: default_experiment_schedule_cron(),
326            max_experiments_per_run: default_experiment_max_experiments_per_run(),
327            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
328        }
329    }
330}
331
332impl ExperimentConfig {
333    /// Validate that numeric bounds are within sane operating limits.
334    ///
335    /// # Errors
336    ///
337    /// Returns a description string if any field is outside allowed range.
338    pub fn validate(&self) -> Result<(), String> {
339        if !(1..=1_000).contains(&self.max_experiments) {
340            return Err(format!(
341                "experiments.max_experiments must be in 1..=1000, got {}",
342                self.max_experiments
343            ));
344        }
345        if !(60..=86_400).contains(&self.max_wall_time_secs) {
346            return Err(format!(
347                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
348                self.max_wall_time_secs
349            ));
350        }
351        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
352            return Err(format!(
353                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
354                self.eval_budget_tokens
355            ));
356        }
357        if !(0.0..=100.0).contains(&self.min_improvement) {
358            return Err(format!(
359                "experiments.min_improvement must be in 0.0..=100.0, got {}",
360                self.min_improvement
361            ));
362        }
363        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
364            return Err(format!(
365                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
366                self.schedule.max_experiments_per_run
367            ));
368        }
369        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
370            return Err(format!(
371                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
372                self.schedule.max_wall_time_secs
373            ));
374        }
375        Ok(())
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382
383    #[test]
384    fn plan_cache_similarity_threshold_above_one_is_rejected() {
385        let cfg = PlanCacheConfig {
386            similarity_threshold: 1.1,
387            ..PlanCacheConfig::default()
388        };
389        let result = cfg.validate();
390        assert!(
391            result.is_err(),
392            "similarity_threshold = 1.1 must return a validation error"
393        );
394    }
395
396    #[test]
397    fn completeness_threshold_default_is_0_7() {
398        let cfg = OrchestrationConfig::default();
399        assert!(
400            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
401            "completeness_threshold default must be 0.7, got {}",
402            cfg.completeness_threshold
403        );
404    }
405
406    #[test]
407    fn completeness_threshold_serde_round_trip() {
408        let toml_in = r"
409            enabled = true
410            completeness_threshold = 0.85
411        ";
412        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
413        assert!((cfg.completeness_threshold - 0.85).abs() < f32::EPSILON);
414
415        let serialized = toml::to_string(&cfg).expect("serialize");
416        let cfg2: OrchestrationConfig = toml::from_str(&serialized).expect("re-deserialize");
417        assert!((cfg2.completeness_threshold - 0.85).abs() < f32::EPSILON);
418    }
419
420    #[test]
421    fn completeness_threshold_missing_uses_default() {
422        let toml_in = "enabled = true\n";
423        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
424        assert!(
425            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
426            "missing field must use default 0.7, got {}",
427            cfg.completeness_threshold
428        );
429    }
430}