Skip to main content

zeph_config/
experiment.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_planner_max_tokens() -> u32 {
8    4096
9}
10
11fn default_aggregator_max_tokens() -> u32 {
12    4096
13}
14
15fn default_deferral_backoff_ms() -> u64 {
16    100
17}
18
19fn default_experiment_max_experiments() -> u32 {
20    20
21}
22
23fn default_experiment_max_wall_time_secs() -> u64 {
24    3600
25}
26
27fn default_experiment_min_improvement() -> f64 {
28    0.5
29}
30
31fn default_experiment_eval_budget_tokens() -> u64 {
32    100_000
33}
34
35fn default_experiment_schedule_cron() -> String {
36    "0 3 * * *".to_string()
37}
38
39fn default_experiment_max_experiments_per_run() -> u32 {
40    20
41}
42
43fn default_experiment_schedule_max_wall_time_secs() -> u64 {
44    1800
45}
46
47fn default_verify_max_tokens() -> u32 {
48    1024
49}
50
51fn default_max_replans() -> u32 {
52    2
53}
54
55fn default_completeness_threshold() -> f32 {
56    0.7
57}
58
59fn default_cascade_failure_threshold() -> f32 {
60    0.5
61}
62
63fn default_cascade_chain_threshold() -> usize {
64    3
65}
66
67fn default_lineage_ttl_secs() -> u64 {
68    300
69}
70
71fn default_max_predicate_replans() -> u32 {
72    2
73}
74
75fn default_predicate_timeout_secs() -> u64 {
76    30
77}
78
79fn default_persistence_enabled() -> bool {
80    true
81}
82
83fn default_plan_cache_similarity_threshold() -> f32 {
84    0.90
85}
86
87fn default_plan_cache_ttl_days() -> u32 {
88    30
89}
90
91fn default_plan_cache_max_templates() -> u32 {
92    100
93}
94
95/// Configuration for plan template caching (`[orchestration.plan_cache]` TOML section).
96#[derive(Debug, Clone, Deserialize, Serialize)]
97#[serde(default)]
98pub struct PlanCacheConfig {
99    /// Enable plan template caching. Default: false.
100    pub enabled: bool,
101    /// Minimum cosine similarity to consider a cached template a match. Default: 0.90.
102    #[serde(default = "default_plan_cache_similarity_threshold")]
103    pub similarity_threshold: f32,
104    /// Days since last access before a template is evicted. Default: 30.
105    #[serde(default = "default_plan_cache_ttl_days")]
106    pub ttl_days: u32,
107    /// Maximum number of cached templates. Default: 100.
108    #[serde(default = "default_plan_cache_max_templates")]
109    pub max_templates: u32,
110}
111
112impl Default for PlanCacheConfig {
113    fn default() -> Self {
114        Self {
115            enabled: false,
116            similarity_threshold: default_plan_cache_similarity_threshold(),
117            ttl_days: default_plan_cache_ttl_days(),
118            max_templates: default_plan_cache_max_templates(),
119        }
120    }
121}
122
123impl PlanCacheConfig {
124    /// Validate that all fields are within sane operating limits.
125    ///
126    /// # Errors
127    ///
128    /// Returns a description string if any field is outside the allowed range.
129    pub fn validate(&self) -> Result<(), String> {
130        if !(0.5..=1.0).contains(&self.similarity_threshold) {
131            return Err(format!(
132                "plan_cache.similarity_threshold must be in [0.5, 1.0], got {}",
133                self.similarity_threshold
134            ));
135        }
136        if self.max_templates == 0 || self.max_templates > 10_000 {
137            return Err(format!(
138                "plan_cache.max_templates must be in [1, 10000], got {}",
139                self.max_templates
140            ));
141        }
142        if self.ttl_days == 0 || self.ttl_days > 365 {
143            return Err(format!(
144                "plan_cache.ttl_days must be in [1, 365], got {}",
145                self.ttl_days
146            ));
147        }
148        Ok(())
149    }
150}
151
152/// Configuration for the task orchestration subsystem (`[orchestration]` TOML section).
153#[derive(Debug, Clone, Deserialize, Serialize)]
154#[serde(default)]
155#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
156pub struct OrchestrationConfig {
157    /// Enable the orchestration subsystem.
158    pub enabled: bool,
159    /// Maximum number of tasks in a single graph.
160    pub max_tasks: u32,
161    /// Maximum number of tasks that can run in parallel.
162    pub max_parallel: u32,
163    /// Default failure strategy for all tasks unless overridden per-task.
164    pub default_failure_strategy: String,
165    /// Default number of retries for the `retry` failure strategy.
166    pub default_max_retries: u32,
167    /// Timeout in seconds for a single task. `0` means no timeout.
168    pub task_timeout_secs: u64,
169    /// Provider name from `[[llm.providers]]` for planning LLM calls.
170    /// Empty string = use the agent's primary provider.
171    #[serde(default)]
172    pub planner_provider: ProviderName,
173    /// Maximum tokens budget hint for planner responses. Reserved for future use when
174    /// per-call token limits are added to the `LlmProvider::chat` API.
175    #[serde(default = "default_planner_max_tokens")]
176    pub planner_max_tokens: u32,
177    /// Total character budget for cross-task dependency context injection.
178    pub dependency_context_budget: usize,
179    /// Whether to show a confirmation prompt before executing a plan.
180    pub confirm_before_execute: bool,
181    /// Maximum tokens budget for aggregation LLM calls. Default: 4096.
182    #[serde(default = "default_aggregator_max_tokens")]
183    pub aggregator_max_tokens: u32,
184    /// Base backoff for `ConcurrencyLimit` retries; grows exponentially (×2 each attempt) up to 5 s.
185    #[serde(default = "default_deferral_backoff_ms")]
186    pub deferral_backoff_ms: u64,
187    /// Plan template caching configuration.
188    #[serde(default)]
189    pub plan_cache: PlanCacheConfig,
190    /// Enable topology-aware concurrency selection. When true, `TopologyClassifier`
191    /// adjusts `max_parallel` based on the DAG structure. Default: false (opt-in).
192    #[serde(default)]
193    pub topology_selection: bool,
194    /// Provider name from `[[llm.providers]]` for verification LLM calls.
195    /// Empty string = use the agent's primary provider. Should be a cheap/fast provider.
196    #[serde(default)]
197    pub verify_provider: ProviderName,
198    /// Maximum tokens budget for verification LLM calls. Default: 1024.
199    #[serde(default = "default_verify_max_tokens")]
200    pub verify_max_tokens: u32,
201    /// Maximum number of replan cycles per graph execution. Default: 2.
202    ///
203    /// Prevents infinite verify-replan loops. 0 = disable replan (verification still
204    /// runs, gaps are logged only).
205    #[serde(default = "default_max_replans")]
206    pub max_replans: u32,
207    /// Enable post-task completeness verification. Default: false (opt-in).
208    ///
209    /// When true, completed tasks are evaluated by `PlanVerifier`. Task stays
210    /// `Completed` during verification; downstream tasks are unblocked immediately.
211    /// Verification is best-effort and does not gate dispatch.
212    #[serde(default)]
213    pub verify_completeness: bool,
214    /// Provider name from `[[llm.providers]]` for tool-dispatch routing.
215    /// When set, tool-heavy tasks prefer this provider over the primary.
216    /// Prefer mid-tier models (e.g., qwen2.5:14b) for reliability per arXiv:2601.16280.
217    /// Empty string = use the primary provider.
218    #[serde(default)]
219    pub tool_provider: ProviderName,
220    /// Minimum completeness score (0.0–1.0) for the plan to be accepted without
221    /// replanning. Default: 0.7. When the verifier reports `confidence <
222    /// completeness_threshold` AND gaps exist, a replan cycle is triggered.
223    /// Used by both per-task and whole-plan verification.
224    /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
225    #[serde(default = "default_completeness_threshold")]
226    pub completeness_threshold: f32,
227    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
228    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
229    /// Default: false (opt-in).
230    #[serde(default)]
231    pub cascade_routing: bool,
232    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
233    /// Must be in (0.0, 1.0]. Default: 0.5.
234    #[serde(default = "default_cascade_failure_threshold")]
235    pub cascade_failure_threshold: f32,
236    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
237    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
238    /// end-to-end latency. Default: false (opt-in).
239    #[serde(default)]
240    pub tree_optimized_dispatch: bool,
241
242    /// `AdaptOrch` bandit-driven topology advisor. Default: disabled.
243    #[serde(default)]
244    pub adaptorch: AdaptOrchConfig,
245    /// Consecutive-chain cascade abort threshold: number of consecutive `Failed` entries
246    /// in a `depends_on` chain that triggers a DAG abort.
247    ///
248    /// `0` disables linear-chain cascade abort. Default: 3.
249    /// Must not be `1` — a threshold of 1 would abort on every single failure.
250    #[serde(default = "default_cascade_chain_threshold")]
251    pub cascade_chain_threshold: usize,
252    /// Fan-out cascade abort failure-rate threshold (0.0–1.0).
253    ///
254    /// When a DAG region's failure rate reaches this value AND the region has ≥ 3 tasks,
255    /// the DAG is aborted immediately. `0.0` disables this signal (opt-in).
256    /// Recommended production value: `0.7`.
257    #[serde(default)]
258    pub cascade_failure_rate_abort_threshold: f32,
259    /// TTL for lineage entries in seconds. Entries older than this are pruned during
260    /// chain merge. Setting this too low can prevent detection of slow-build cascades.
261    ///
262    /// Default: 300 seconds (5 minutes).
263    #[serde(default = "default_lineage_ttl_secs")]
264    pub lineage_ttl_secs: u64,
265    /// Enable per-subtask predicate verification gate.
266    ///
267    /// Requires `predicate_provider` or a primary LLM provider to be configured.
268    /// Default: false (opt-in).
269    #[serde(default)]
270    pub verify_predicate_enabled: bool,
271    /// Provider name from `[[llm.providers]]` for predicate evaluation.
272    ///
273    /// Empty string = fall back to `verify_provider`, then primary.
274    #[serde(default)]
275    pub predicate_provider: ProviderName,
276    /// Maximum number of predicate-driven task re-runs across the entire DAG.
277    ///
278    /// Independent of `max_replans` (verifier completeness budget). Default: 2.
279    #[serde(default = "default_max_predicate_replans")]
280    pub max_predicate_replans: u32,
281    /// Timeout in seconds for each predicate LLM evaluation call.
282    ///
283    /// On timeout the evaluator returns a fail-open outcome (`passed = true`,
284    /// `confidence = 0.0`) and logs a warning. Default: 30.
285    #[serde(default = "default_predicate_timeout_secs")]
286    pub predicate_timeout_secs: u64,
287    /// Persist task graph state to `SQLite` across scheduler ticks.
288    ///
289    /// When `true` and a `SemanticMemory` store is available, the scheduler
290    /// snapshots the graph once per tick and on plan completion. Graphs can
291    /// then be rehydrated via `/plan resume <id>` after a restart.
292    /// Default: `true`.
293    #[serde(default = "default_persistence_enabled")]
294    pub persistence_enabled: bool,
295    /// Provider name from `[[llm.providers]]` for scheduling-tier LLM calls
296    /// (aggregation, predicate evaluation, verification when no specific provider is set).
297    ///
298    /// Acts as fallback for `verify_provider` and `predicate_provider` when those are empty.
299    /// Does NOT affect `planner_provider` — planning is a complex task and stays on the quality
300    /// provider. Empty string = use the agent's primary provider.
301    ///
302    /// # Trade-off
303    ///
304    /// Setting this to a fast/cheap model reduces aggregation quality because `LlmAggregator`
305    /// produces user-visible output. See CHANGELOG for details.
306    #[serde(default)]
307    pub orchestrator_provider: ProviderName,
308
309    /// Default per-task cost budget in US cents. `0.0` = unlimited (no budget check).
310    ///
311    /// When a sub-agent task completes, the scheduler emits a `tracing::warn!` if the
312    /// task exceeded this budget. In MVP this is **warn-only** — hard enforcement requires
313    /// per-task `CostTracker` scoping, which is deferred post-v1.0.0.
314    ///
315    /// Individual tasks can override this via `TaskNode::token_budget_cents`.
316    /// Default: `0.0` (unlimited).
317    #[serde(default)]
318    pub default_task_budget_cents: f64,
319}
320
321impl Default for OrchestrationConfig {
322    fn default() -> Self {
323        Self {
324            enabled: false,
325            max_tasks: 20,
326            max_parallel: 4,
327            default_failure_strategy: "abort".to_string(),
328            default_max_retries: 3,
329            task_timeout_secs: 300,
330            planner_provider: ProviderName::default(),
331            planner_max_tokens: default_planner_max_tokens(),
332            dependency_context_budget: 16384,
333            confirm_before_execute: true,
334            aggregator_max_tokens: default_aggregator_max_tokens(),
335            deferral_backoff_ms: default_deferral_backoff_ms(),
336            plan_cache: PlanCacheConfig::default(),
337            topology_selection: false,
338            verify_provider: ProviderName::default(),
339            verify_max_tokens: default_verify_max_tokens(),
340            max_replans: default_max_replans(),
341            verify_completeness: false,
342            completeness_threshold: default_completeness_threshold(),
343            tool_provider: ProviderName::default(),
344            cascade_routing: false,
345            cascade_failure_threshold: default_cascade_failure_threshold(),
346            tree_optimized_dispatch: false,
347            adaptorch: AdaptOrchConfig::default(),
348            cascade_chain_threshold: default_cascade_chain_threshold(),
349            cascade_failure_rate_abort_threshold: 0.0,
350            lineage_ttl_secs: default_lineage_ttl_secs(),
351            verify_predicate_enabled: false,
352            predicate_provider: ProviderName::default(),
353            max_predicate_replans: default_max_predicate_replans(),
354            predicate_timeout_secs: default_predicate_timeout_secs(),
355            persistence_enabled: default_persistence_enabled(),
356            orchestrator_provider: ProviderName::default(),
357            default_task_budget_cents: 0.0,
358        }
359    }
360}
361
362/// Configuration for the autonomous self-experimentation engine (`[experiments]` TOML section).
363///
364/// When `enabled = true`, Zeph periodically runs A/B experiments on its own skill and
365/// prompt configurations to find improvements automatically.
366///
367/// # Example (TOML)
368///
369/// ```toml
370/// [experiments]
371/// enabled = false
372/// max_experiments = 20
373/// auto_apply = false
374/// ```
375#[derive(Debug, Clone, Deserialize, Serialize)]
376#[serde(default)]
377pub struct ExperimentConfig {
378    /// Enable autonomous self-experimentation. Default: `false`.
379    pub enabled: bool,
380    /// Model identifier used for evaluating experiment outcomes.
381    pub eval_model: Option<String>,
382    /// Path to a benchmark JSONL file for evaluating experiments.
383    pub benchmark_file: Option<std::path::PathBuf>,
384    #[serde(default = "default_experiment_max_experiments")]
385    pub max_experiments: u32,
386    #[serde(default = "default_experiment_max_wall_time_secs")]
387    pub max_wall_time_secs: u64,
388    #[serde(default = "default_experiment_min_improvement")]
389    pub min_improvement: f64,
390    #[serde(default = "default_experiment_eval_budget_tokens")]
391    pub eval_budget_tokens: u64,
392    pub auto_apply: bool,
393    #[serde(default)]
394    pub schedule: ExperimentSchedule,
395}
396
397impl Default for ExperimentConfig {
398    fn default() -> Self {
399        Self {
400            enabled: false,
401            eval_model: None,
402            benchmark_file: None,
403            max_experiments: default_experiment_max_experiments(),
404            max_wall_time_secs: default_experiment_max_wall_time_secs(),
405            min_improvement: default_experiment_min_improvement(),
406            eval_budget_tokens: default_experiment_eval_budget_tokens(),
407            auto_apply: false,
408            schedule: ExperimentSchedule::default(),
409        }
410    }
411}
412
413/// Configuration for `AdaptOrch` — bandit-driven topology advisor (`[orchestration.adaptorch]`).
414///
415/// # Example
416///
417/// ```toml
418/// [orchestration.adaptorch]
419/// enabled = true
420/// topology_provider = "fast"
421/// classify_timeout_secs = 4
422/// state_path = ""
423/// ```
424#[derive(Debug, Clone, Deserialize, Serialize)]
425#[serde(default)]
426pub struct AdaptOrchConfig {
427    /// Enable `AdaptOrch`. When `false`, planning uses the default `plan()` path.
428    pub enabled: bool,
429    /// Provider name from `[[llm.providers]]` for goal classification. Empty → primary provider.
430    pub topology_provider: ProviderName,
431    /// Hard timeout (seconds) for the classification LLM call.
432    #[serde(default = "default_classify_timeout_secs")]
433    pub classify_timeout_secs: u64,
434    /// Path to the persisted Beta-arm JSON state file.
435    /// Empty string → `~/.zeph/adaptorch_state.json` (resolved at runtime).
436    #[serde(default)]
437    pub state_path: String,
438    /// Maximum tokens for the classification LLM call.
439    #[serde(default = "default_max_classify_tokens")]
440    pub max_classify_tokens: u32,
441}
442
443fn default_classify_timeout_secs() -> u64 {
444    4
445}
446
447fn default_max_classify_tokens() -> u32 {
448    80
449}
450
451impl Default for AdaptOrchConfig {
452    fn default() -> Self {
453        Self {
454            enabled: false,
455            topology_provider: ProviderName::default(),
456            classify_timeout_secs: default_classify_timeout_secs(),
457            state_path: String::new(),
458            max_classify_tokens: default_max_classify_tokens(),
459        }
460    }
461}
462
463/// Cron scheduling configuration for automatic experiment runs.
464#[derive(Debug, Clone, Deserialize, Serialize)]
465#[serde(default)]
466pub struct ExperimentSchedule {
467    pub enabled: bool,
468    #[serde(default = "default_experiment_schedule_cron")]
469    pub cron: String,
470    #[serde(default = "default_experiment_max_experiments_per_run")]
471    pub max_experiments_per_run: u32,
472    /// Wall-time cap for a single scheduled experiment session (seconds).
473    ///
474    /// Overrides `experiments.max_wall_time_secs` for scheduled runs. Defaults to 1800s so
475    /// a background session cannot overlap the next cron trigger on typical schedules.
476    #[serde(default = "default_experiment_schedule_max_wall_time_secs")]
477    pub max_wall_time_secs: u64,
478}
479
480impl Default for ExperimentSchedule {
481    fn default() -> Self {
482        Self {
483            enabled: false,
484            cron: default_experiment_schedule_cron(),
485            max_experiments_per_run: default_experiment_max_experiments_per_run(),
486            max_wall_time_secs: default_experiment_schedule_max_wall_time_secs(),
487        }
488    }
489}
490
491impl ExperimentConfig {
492    /// Validate that numeric bounds are within sane operating limits.
493    ///
494    /// # Errors
495    ///
496    /// Returns a description string if any field is outside allowed range.
497    pub fn validate(&self) -> Result<(), String> {
498        if !(1..=1_000).contains(&self.max_experiments) {
499            return Err(format!(
500                "experiments.max_experiments must be in 1..=1000, got {}",
501                self.max_experiments
502            ));
503        }
504        if !(60..=86_400).contains(&self.max_wall_time_secs) {
505            return Err(format!(
506                "experiments.max_wall_time_secs must be in 60..=86400, got {}",
507                self.max_wall_time_secs
508            ));
509        }
510        if !(1_000..=10_000_000).contains(&self.eval_budget_tokens) {
511            return Err(format!(
512                "experiments.eval_budget_tokens must be in 1000..=10000000, got {}",
513                self.eval_budget_tokens
514            ));
515        }
516        if !(0.0..=100.0).contains(&self.min_improvement) {
517            return Err(format!(
518                "experiments.min_improvement must be in 0.0..=100.0, got {}",
519                self.min_improvement
520            ));
521        }
522        if !(1..=100).contains(&self.schedule.max_experiments_per_run) {
523            return Err(format!(
524                "experiments.schedule.max_experiments_per_run must be in 1..=100, got {}",
525                self.schedule.max_experiments_per_run
526            ));
527        }
528        if !(60..=86_400).contains(&self.schedule.max_wall_time_secs) {
529            return Err(format!(
530                "experiments.schedule.max_wall_time_secs must be in 60..=86400, got {}",
531                self.schedule.max_wall_time_secs
532            ));
533        }
534        Ok(())
535    }
536}
537
538#[cfg(test)]
539mod tests {
540    use super::*;
541
542    #[test]
543    fn plan_cache_similarity_threshold_above_one_is_rejected() {
544        let cfg = PlanCacheConfig {
545            similarity_threshold: 1.1,
546            ..PlanCacheConfig::default()
547        };
548        let result = cfg.validate();
549        assert!(
550            result.is_err(),
551            "similarity_threshold = 1.1 must return a validation error"
552        );
553    }
554
555    #[test]
556    fn completeness_threshold_default_is_0_7() {
557        let cfg = OrchestrationConfig::default();
558        assert!(
559            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
560            "completeness_threshold default must be 0.7, got {}",
561            cfg.completeness_threshold
562        );
563    }
564
565    #[test]
566    fn completeness_threshold_serde_round_trip() {
567        let toml_in = r"
568            enabled = true
569            completeness_threshold = 0.85
570        ";
571        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
572        assert!((cfg.completeness_threshold - 0.85).abs() < f32::EPSILON);
573
574        let serialized = toml::to_string(&cfg).expect("serialize");
575        let cfg2: OrchestrationConfig = toml::from_str(&serialized).expect("re-deserialize");
576        assert!((cfg2.completeness_threshold - 0.85).abs() < f32::EPSILON);
577    }
578
579    #[test]
580    fn completeness_threshold_missing_uses_default() {
581        let toml_in = "enabled = true\n";
582        let cfg: OrchestrationConfig = toml::from_str(toml_in).expect("deserialize");
583        assert!(
584            (cfg.completeness_threshold - 0.7).abs() < f32::EPSILON,
585            "missing field must use default 0.7, got {}",
586            cfg.completeness_threshold
587        );
588    }
589}