Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_planner_max_tokens() -> u32 {
8    4096
9}
10
11fn default_aggregator_max_tokens() -> u32 {
12    4096
13}
14
15fn default_deferral_backoff_ms() -> u64 {
16    100
17}
18
19fn default_experiment_max_experiments() -> u32 {
20    20
21}
22
23fn default_experiment_max_wall_time_secs() -> u64 {
24    3600
25}
26
27fn default_experiment_min_improvement() -> f64 {
28    0.5
29}
30
31fn default_experiment_eval_budget_tokens() -> u64 {
32    100_000
33}
34
35fn default_experiment_schedule_cron() -> String {
36    "0 3 * * *".to_string()
37}
38
39fn default_experiment_max_experiments_per_run() -> u32 {
40    20
41}
42
43fn default_experiment_schedule_max_wall_time_secs() -> u64 {
44    1800
45}
46
47fn default_verify_max_tokens() -> u32 {
48    1024
49}
50
51fn default_max_replans() -> u32 {
52    2
53}
54
55fn default_completeness_threshold() -> f32 {
56    0.7
57}
58
59fn default_cascade_failure_threshold() -> f32 {
60    0.5
61}
62
63fn default_cascade_chain_threshold() -> usize {
64    3
65}
66
67fn default_lineage_ttl_secs() -> u64 {
68    300
69}
70
71fn default_max_predicate_replans() -> u32 {
72    2
73}
74
75fn default_predicate_timeout_secs() -> u64 {
76    30
77}
78
79fn default_persistence_enabled() -> bool {
80    true
81}
82
83fn default_plan_cache_similarity_threshold() -> f32 {
84    0.90
85}
86
87fn default_plan_cache_ttl_days() -> u32 {
88    30
89}
90
91fn default_plan_cache_max_templates() -> u32 {
92    100
93}
94
95/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
96#[derive(Debug, Clone, Deserialize, Serialize)]
97#[serde(default)]
98pub struct PlanCacheConfig {
99    /// Enable plan template caching. Default: false.
100    pub enabled: bool,
101    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
102    #[serde(default = "default_plan_cache_similarity_threshold")]
103    pub similarity_threshold: f32,
104    /// Days since last access before a template is evicted. Default: 30.
105    #[serde(default = "default_plan_cache_ttl_days")]
106    pub ttl_days: u32,
107    /// Maximum number of cached templates. Default: 100.
108    #[serde(default = "default_plan_cache_max_templates")]
109    pub max_templates: u32,
110}
111
112impl Default for PlanCacheConfig {
113    fn default() -> Self {
114        Self {
115            enabled: false,
116            similarity_threshold: default_plan_cache_similarity_threshold(),
117            ttl_days: default_plan_cache_ttl_days(),
118            max_templates: default_plan_cache_max_templates(),
119        }
120    }
121}
122
123impl PlanCacheConfig {
124    /// Validate that all fields are within sane operating limits.
125    ///
126    /// # Errors
127    ///
128    /// Returns a description string if any field is outside the allowed range.
129    pub fn validate(&self) -> Result<(), String> {
130        if !(0.5..=1.0).contains(&self.similarity_threshold) {
131            return Err(format!(
132                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
133                self.similarity_threshold
134            ));
135        }
136        if self.max_templates == 0 || self.max_templates > 10_000 {
137            return Err(format!(
138                "plan_cache.max_templates must be in [1, 10000], got {}",
139                self.max_templates
140            ));
141        }
142        if self.ttl_days == 0 || self.ttl_days > 365 {
143            return Err(format!(
144                "plan_cache.ttl_days must be in [1, 365], got {}",
145                self.ttl_days
146            ));
147        }
148        Ok(())
149    }
150}
151
152/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
153#[derive(Debug, Clone, Deserialize, Serialize)]
154#[serde(default)]
155#[allow(clippy::struct_excessive_bools)]
156pub struct OrchestrationConfig {
157    /// Enable the orchestration subsystem.
158    pub enabled: bool,
159    /// Maximum number of tasks in a single graph.
160    pub max_tasks: u32,
161    /// Maximum number of tasks that can run in parallel.
162    pub max_parallel: u32,
163    /// Default failure strategy for all tasks unless overridden per-task.
164    pub default_failure_strategy: String,
165    /// Default number of retries for the `retry` failure strategy.
166    pub default_max_retries: u32,
167    /// Timeout in seconds for a single task. `0` means no timeout.
168    pub task_timeout_secs: u64,
169    /// Provider name from `[[llm.providers]]` for planning LLM calls.
170    /// Empty string = use the agent's primary provider.
171    #[serde(default)]
172    pub planner_provider: ProviderName,
173    /// Maximum tokens budget hint for planner responses. Reserved for future use when
174    /// per-call token limits are added to the `LlmProvider::chat` API.
175    #[serde(default = "default_planner_max_tokens")]
176    pub planner_max_tokens: u32,
177    /// Total character budget for cross-task dependency context injection.
178    pub dependency_context_budget: usize,
179    /// Whether to show a confirmation prompt before executing a plan.
180    pub confirm_before_execute: bool,
181    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
182    #[serde(default = "default_aggregator_max_tokens")]
183    pub aggregator_max_tokens: u32,
184    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
185    #[serde(default = "default_deferral_backoff_ms")]
186    pub deferral_backoff_ms: u64,
187    /// Plan template caching configuration.
188    #[serde(default)]
189    pub plan_cache: PlanCacheConfig,
190    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
191    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
192    #[serde(default)]
193    pub topology_selection: bool,
194    /// Provider name from `[[llm.providers]]` for verification LLM calls.
195    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
196    #[serde(default)]
197    pub verify_provider: ProviderName,
198    /// Maximum tokens budget for verification LLM calls. Default: 1024.
199    #[serde(default = "default_verify_max_tokens")]
200    pub verify_max_tokens: u32,
201    /// Maximum number of replan cycles per graph execution. Default: 2.
202    ///
203    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
204    /// runs, gaps are logged only).
205    #[serde(default = "default_max_replans")]
206    pub max_replans: u32,
207    /// Enable post-task completeness verification. Default: false (opt-in).
208    ///
209    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
210    /// `Completed` during verification; downstream tasks are unblocked immediately.
211    /// Verification is best-effort and does not gate dispatch.
212    #[serde(default)]
213    pub verify_completeness: bool,
214    /// Provider name from `[[llm.providers]]` for tool-dispatch routing.
215    /// When set, tool-heavy tasks prefer this provider over the primary.
216    /// Prefer mid-tier models (e.g., qwen2.5:14b) for reliability per arXiv:2601.16280.
217    /// Empty string = use the primary provider.
218    #[serde(default)]
219    pub tool_provider: ProviderName,
220    /// Minimum completeness score (0.0–1.0) for the plan to be accepted without
221    /// replanning. Default: 0.7. When the verifier reports `confidence <
222    /// completeness_threshold` AND gaps exist, a replan cycle is triggered.
223    /// Used by both per-task and whole-plan verification.
224    /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
225    #[serde(default = "default_completeness_threshold")]
226    pub completeness_threshold: f32,
227    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
228    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
229    /// Default: false (opt-in).
230    #[serde(default)]
231    pub cascade_routing: bool,
232    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
233    /// Must be in (0.0, 1.0]. Default: 0.5.
234    #[serde(default = "default_cascade_failure_threshold")]
235    pub cascade_failure_threshold: f32,
236    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
237    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
238    /// end-to-end latency. Default: false (opt-in).
239    #[serde(default)]
240    pub tree_optimized_dispatch: bool,
241
242    /// `AdaptOrch` bandit-driven topology advisor. Default: disabled.
243    #[serde(default)]
244    pub adaptorch: AdaptOrchConfig,
245    /// Consecutive-chain cascade abort threshold: number of consecutive `Failed` entries
246    /// in a `depends_on` chain that triggers a DAG abort.
247    ///
248    /// `0` disables linear-chain cascade abort. Default: 3.
249    /// Must not be `1` — a threshold of 1 would abort on every single failure.
250    #[serde(default = "default_cascade_chain_threshold")]
251    pub cascade_chain_threshold: usize,
252    /// Fan-out cascade abort failure-rate threshold (0.0–1.0).
253    ///
254    /// When a DAG region's failure rate reaches this value AND the region has ≥ 3 tasks,
255    /// the DAG is aborted immediately. `0.0` disables this signal (opt-in).
256    /// Recommended production value: `0.7`.
257    #[serde(default)]
258    pub cascade_failure_rate_abort_threshold: f32,
259    /// TTL for lineage entries in seconds. Entries older than this are pruned during
260    /// chain merge. Setting this too low can prevent detection of slow-build cascades.
261    ///
262    /// Default: 300 seconds (5 minutes).
263    #[serde(default = "default_lineage_ttl_secs")]
264    pub lineage_ttl_secs: u64,
265    /// Enable per-subtask predicate verification gate.
266    ///
267    /// Requires `predicate_provider` or a primary LLM provider to be configured.
268    /// Default: false (opt-in).
269    #[serde(default)]
270    pub verify_predicate_enabled: bool,
271    /// Provider name from `[[llm.providers]]` for predicate evaluation.
272    ///
273    /// Empty string = fall back to `verify_provider`, then primary.
274    #[serde(default)]
275    pub predicate_provider: ProviderName,
276    /// Maximum number of predicate-driven task re-runs across the entire DAG.
277    ///
278    /// Independent of `max_replans` (verifier completeness budget). Default: 2.
279    #[serde(default = "default_max_predicate_replans")]
280    pub max_predicate_replans: u32,
281    /// Timeout in seconds for each predicate LLM evaluation call.
282    ///
283    /// On timeout the evaluator returns a fail-open outcome (`passed = true`,
284    /// `confidence = 0.0`) and logs a warning. Default: 30.
285    #[serde(default = "default_predicate_timeout_secs")]
286    pub predicate_timeout_secs: u64,
287    /// Persist task graph state to `SQLite` across scheduler ticks.
288    ///
289    /// When `true` and a `SemanticMemory` store is available, the scheduler
290    /// snapshots the graph once per tick and on plan completion. Graphs can
291    /// then be rehydrated via `/plan resume <id>` after a restart.
292    /// Default: `true`.
293    #[serde(default = "default_persistence_enabled")]
294    pub persistence_enabled: bool,
295}
296
297impl Default for OrchestrationConfig {
298    fn default() -> Self {
299        Self {
300            enabled: false,
301            max_tasks: 20,
302            max_parallel: 4,
303            default_failure_strategy: "abort".to_string(),
304            default_max_retries: 3,
305            task_timeout_secs: 300,
306            planner_provider: ProviderName::default(),
307            planner_max_tokens: default_planner_max_tokens(),
308            dependency_context_budget: 16384,
309            confirm_before_execute: true,
310            aggregator_max_tokens: default_aggregator_max_tokens(),
311            deferral_backoff_ms: default_deferral_backoff_ms(),
312            plan_cache: PlanCacheConfig::default(),
313            topology_selection: false,
314            verify_provider: ProviderName::default(),
315            verify_max_tokens: default_verify_max_tokens(),
316            max_replans: default_max_replans(),
317            verify_completeness: false,
318            completeness_threshold: default_completeness_threshold(),
319            tool_provider: ProviderName::default(),
320            cascade_routing: false,
321            cascade_failure_threshold: default_cascade_failure_threshold(),
322            tree_optimized_dispatch: false,
323            adaptorch: AdaptOrchConfig::default(),
324            cascade_chain_threshold: default_cascade_chain_threshold(),
325            cascade_failure_rate_abort_threshold: 0.0,
326            lineage_ttl_secs: default_lineage_ttl_secs(),
327            verify_predicate_enabled: false,
328            predicate_provider: ProviderName::default(),
329            max_predicate_replans: default_max_predicate_replans(),
330            predicate_timeout_secs: default_predicate_timeout_secs(),
331            persistence_enabled: default_persistence_enabled(),
332        }
333    }
334}
335
336/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
337///
338/// When `enabled = true`, Zeph periodically runs A/B experiments on its own skill and
339/// prompt configurations to find improvements automatically.
340///
341/// # Example (TOML)
342///
343/// ```toml
344/// [experiments]
345/// enabled = false
346/// max_experiments = 20
347/// auto_apply = false
348/// ```
349#[derive(Debug, Clone, Deserialize, Serialize)]
350#[serde(default)]
351pub struct ExperimentConfig {
352    /// Enable autonomous self-experimentation. Default: `false`.
353    pub enabled: bool,
354    /// Model identifier used for evaluating experiment outcomes.
355    pub eval_model: Option<String>,
356    /// Path to a benchmark JSONL file for evaluating experiments.
357    pub benchmark_file: Option<std::path::PathBuf>,
358    #[serde(default = "default_experiment_max_experiments")]
359    pub max_experiments: u32,
360    #[serde(default = "default_experiment_max_wall_time_secs")]
361    pub max_wall_time_secs: u64,
362    #[serde(default = "default_experiment_min_improvement")]
363    pub min_improvement: f64,
364    #[serde(default = "default_experiment_eval_budget_tokens")]
365    pub eval_budget_tokens: u64,
366    pub auto_apply: bool,
367    #[serde(default)]
368    pub schedule: ExperimentSchedule,
369}
370
371impl Default for ExperimentConfig {
372    fn default() -> Self {
373        Self {
374            enabled: false,
375            eval_model: None,
376            benchmark_file: None,
377            max_experiments: default_experiment_max_experiments(),
378            max_wall_time_secs: default_experiment_max_wall_time_secs(),
379            min_improvement: default_experiment_min_improvement(),
380            eval_budget_tokens: default_experiment_eval_budget_tokens(),
381            auto_apply: false,
382            schedule: ExperimentSchedule::default(),
383        }
384    }
385}
386
387/// Configuration for `AdaptOrch` — bandit-driven topology advisor (`[orchestration.adaptorch]`).
388///
389/// # Example
390///
391/// ```toml
392/// [orchestration.adaptorch]
393/// enabled = true
394/// topology_provider = "fast"
395/// classify_timeout_secs = 4
396/// state_path = ""
397/// ```
398#[derive(Debug, Clone, Deserialize, Serialize)]
399#[serde(default)]
400pub struct AdaptOrchConfig {
401    /// Enable `AdaptOrch`. When `false`, planning uses the default `plan()` path.
402    pub enabled: bool,
403    /// Provider name from `[[llm.providers]]` for goal classification. Empty → primary provider.
404    pub topology_provider: ProviderName,
405    /// Hard timeout (seconds) for the classification LLM call.
406    #[serde(default = "default_classify_timeout_secs")]
407    pub classify_timeout_secs: u64,
408    /// Path to the persisted Beta-arm JSON state file.
409    /// Empty string → `~/.zeph/adaptorch_state.json` (resolved at runtime).
410    #[serde(default)]
411    pub state_path: String,
412    /// Maximum tokens for the classification LLM call.
413    #[serde(default = "default_max_classify_tokens")]
414    pub max_classify_tokens: u32,
415}
416
417fn default_classify_timeout_secs() -> u64 {
418    4
419}
420
421fn default_max_classify_tokens() -> u32 {
422    80
423}
424
425impl Default for AdaptOrchConfig {
426    fn default() -> Self {
427        Self {
428            enabled: false,
429            topology_provider: ProviderName::default(),
430            classify_timeout_secs: default_classify_timeout_secs(),
431            state_path: String::new(),
432            max_classify_tokens: default_max_classify_tokens(),
433        }
434    }
435}
436
437/// Cron scheduling configuration for automatic experiment runs.
438#[derive(Debug, Clone, Deserialize, Serialize)]
439#[serde(default)]
440pub struct ExperimentSchedule {
441    pub enabled: bool,
442    #[serde(default = "default_experiment_schedule_cron")]
443    pub cron: String,
444    #[serde(default = "default_experiment_max_experiments_per_run")]
445    pub max_experiments_per_run: u32,
446    /// Wall-time cap for a single scheduled experiment session (seconds).
447    ///
448    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
449    /// a background session cannot overlap the next cron trigger on typical schedules.
450    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
451    pub max_wall_time_secs: u64,
452}
453
454impl Default for ExperimentSchedule {
455    fn default() -> Self {
456        Self {
457            enabled: false,
458            cron: default_experiment_schedule_cron(),
459            max_experiments_per_run: default_experiment_max_experiments_per_run(),
460            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
461        }
462    }
463}
464
465impl ExperimentConfig {
466    /// Validate that numeric bounds are within sane operating limits.
467    ///
468    /// # Errors
469    ///
470    /// Returns a description string if any field is outside allowed range.
471    pub fn validate(&self) -> Result<(), String> {
472        if !(1..=1_000).contains(&self.max_experiments) {
473            return Err(format!(
474                "experiments.max_experiments must be in 1..=1000, got {}",
475                self.max_experiments
476            ));
477        }
478        if !(60..=86_400).contains(&self.max_wall_time_secs) {
479            return Err(format!(
480                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
481                self.max_wall_time_secs
482            ));
483        }
484        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
485            return Err(format!(
486                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
487                self.eval_budget_tokens
488            ));
489        }
490        if !(0.0..=100.0).contains(&self.min_improvement) {
491            return Err(format!(
492                "experiments.min_improvement must be in 0.0..=100.0, got {}",
493                self.min_improvement
494            ));
495        }
496        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
497            return Err(format!(
498                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
499                self.schedule.max_experiments_per_run
500            ));
501        }
502        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
503            return Err(format!(
504                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
505                self.schedule.max_wall_time_secs
506            ));
507        }
508        Ok(())
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    #[test]
517    fn plan_cache_similarity_threshold_above_one_is_rejected() {
518        let cfg = PlanCacheConfig {
519            similarity_threshold: 1.1,
520            ..PlanCacheConfig::default()
521        };
522        let result = cfg.validate();
523        assert!(
524            result.is_err(),
525            "similarity_threshold = 1.1 must return a validation error"
526        );
527    }
528
529    #[test]
530    fn completeness_threshold_default_is_0_7() {
531        let cfg = OrchestrationConfig::default();
532        assert!(
533            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
534            "completeness_threshold default must be 0.7, got {}",
535            cfg.completeness_threshold
536        );
537    }
538
539    #[test]
540    fn completeness_threshold_serde_round_trip() {
541        let toml_in = r"
542            enabled = true
543            completeness_threshold = 0.85
544        ";
545        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
546        assert!((cfg.completeness_threshold - 0.85).abs() < f32::EPSILON);
547
548        let serialized = toml::to_string(&cfg).expect("serialize");
549        let cfg2: OrchestrationConfig = toml::from_str(&serialized).expect("re-deserialize");
550        assert!((cfg2.completeness_threshold - 0.85).abs() < f32::EPSILON);
551    }
552
553    #[test]
554    fn completeness_threshold_missing_uses_default() {
555        let toml_in = "enabled = true\n";
556        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
557        assert!(
558            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
559            "missing field must use default 0.7, got {}",
560            cfg.completeness_threshold
561        );
562    }
563}