Skip to main content

zeph_config/
learning.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_min_failures() -> u32 {
8    3
9}
10
11fn default_improve_threshold() -> f64 {
12    0.7
13}
14
15fn default_rollback_threshold() -> f64 {
16    0.5
17}
18
19fn default_min_evaluations() -> u32 {
20    5
21}
22
23fn default_max_versions() -> u32 {
24    10
25}
26
27fn default_cooldown_minutes() -> u64 {
28    60
29}
30
31fn default_correction_detection() -> bool {
32    true
33}
34
35fn default_correction_confidence_threshold() -> f32 {
36    0.6
37}
38
39fn default_judge_adaptive_low() -> f32 {
40    0.5
41}
42
43fn default_judge_adaptive_high() -> f32 {
44    0.8
45}
46
47fn default_judge_llm_timeout_secs() -> u64 {
48    30
49}
50
51fn default_correction_recall_limit() -> u32 {
52    3
53}
54
55fn default_correction_min_similarity() -> f32 {
56    0.75
57}
58
59fn default_auto_promote_min_uses() -> u32 {
60    50
61}
62
63fn default_auto_promote_threshold() -> f64 {
64    0.95
65}
66
67fn default_auto_demote_min_uses() -> u32 {
68    30
69}
70
71fn default_auto_demote_threshold() -> f64 {
72    0.40
73}
74
75fn default_min_sessions_before_promote() -> u32 {
76    2
77}
78
79fn default_min_sessions_before_demote() -> u32 {
80    1
81}
82
83fn default_max_auto_sections() -> u32 {
84    3
85}
86
87fn default_arise_min_tool_calls() -> u32 {
88    2
89}
90
91fn default_stem_min_occurrences() -> u32 {
92    3
93}
94
95fn default_stem_min_success_rate() -> f64 {
96    0.8
97}
98
99fn default_stem_retention_days() -> u32 {
100    90
101}
102
103fn default_stem_pattern_window_days() -> u32 {
104    30
105}
106
107fn default_erl_max_heuristics_per_skill() -> u32 {
108    3
109}
110
111fn default_erl_dedup_threshold() -> f32 {
112    0.9
113}
114
115fn default_erl_min_confidence() -> f64 {
116    0.5
117}
118
119fn default_d2skill_max_corrections() -> u32 {
120    3
121}
122
123/// Strategy for detecting implicit user corrections.
124#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
125#[serde(rename_all = "lowercase")]
126pub enum DetectorMode {
127    /// Pattern-matching only — zero LLM calls. Default behavior.
128    #[default]
129    Regex,
130    /// LLM-based judge for borderline / missed cases. Invoked only when
131    /// regex confidence falls below `judge_adaptive_high` or regex returns None.
132    ///
133    /// Note: with current regex values (ExplicitRejection=0.85, SelfCorrection=0.80,
134    /// Repetition=0.75, AlternativeRequest=0.70) and `adaptive_high=0.80`,
135    /// `ExplicitRejection` and `SelfCorrection` bypass the judge (confidence >= `adaptive_high`),
136    /// while `AlternativeRequest`, `Repetition`, and regex misses go through it.
137    Judge,
138    /// ML model-backed feedback classification via `LlmClassifier`.
139    ///
140    /// Uses the provider named in `feedback_provider` (or the primary provider if empty).
141    /// Shares the same adaptive thresholds and rate limiter as `Judge` mode.
142    /// Returns `JudgeVerdict` directly, preserving `kind` and `reasoning` metadata.
143    ///
144    /// Falls back to regex-only if the provider cannot be resolved — never fails startup.
145    Model,
146}
147
148/// Self-learning and skill evolution configuration, nested under `[skills.learning]` in TOML.
149///
150/// When `enabled = true`, Zeph tracks skill performance and can automatically improve or roll
151/// back skill definitions based on usage outcomes (ARISE, STEM, `D2Skill` pipelines).
152///
153/// # Example (TOML)
154///
155/// ```toml
156/// [skills.learning]
157/// enabled = true
158/// auto_activate = false
159/// min_failures = 3
160/// ```
161#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
162#[derive(Debug, Clone, Deserialize, Serialize)]
163pub struct LearningConfig {
164    /// Enable self-learning pipelines. Default: `false`.
165    #[serde(default)]
166    pub enabled: bool,
167    /// Automatically activate improved skill versions without user confirmation. Default: `false`.
168    #[serde(default)]
169    pub auto_activate: bool,
170    #[serde(default = "default_min_failures")]
171    pub min_failures: u32,
172    #[serde(default = "default_improve_threshold")]
173    pub improve_threshold: f64,
174    #[serde(default = "default_rollback_threshold")]
175    pub rollback_threshold: f64,
176    #[serde(default = "default_min_evaluations")]
177    pub min_evaluations: u32,
178    #[serde(default = "default_max_versions")]
179    pub max_versions: u32,
180    #[serde(default = "default_cooldown_minutes")]
181    pub cooldown_minutes: u64,
182    #[serde(default = "default_correction_detection")]
183    pub correction_detection: bool,
184    #[serde(default = "default_correction_confidence_threshold")]
185    pub correction_confidence_threshold: f32,
186    /// Detector strategy: "regex" (default) or "judge".
187    #[serde(default)]
188    pub detector_mode: DetectorMode,
189    /// Model for the judge detector (e.g. "claude-sonnet-4-6"). Empty = use primary provider.
190    #[serde(default)]
191    pub judge_model: String,
192    /// Provider name from `[[llm.providers]]` for `detector_mode = "model"` (`LlmClassifier`).
193    ///
194    /// Empty = use the primary provider. Named but not found in registry = log warning,
195    /// degrade to regex-only. Never fails startup.
196    #[serde(default)]
197    pub feedback_provider: ProviderName,
198    /// Regex confidence below this value is treated as "not a correction" — judge not invoked.
199    #[serde(default = "default_judge_adaptive_low")]
200    pub judge_adaptive_low: f32,
201    /// Regex confidence at or above this value is accepted without judge confirmation.
202    #[serde(default = "default_judge_adaptive_high")]
203    pub judge_adaptive_high: f32,
204    /// Maximum seconds to wait for the judge LLM to respond before timing out.
205    /// Applies to `detector_mode = "judge"` only.
206    #[serde(default = "default_judge_llm_timeout_secs")]
207    pub judge_llm_timeout_secs: u64,
208    #[serde(default = "default_correction_recall_limit")]
209    pub correction_recall_limit: u32,
210    #[serde(default = "default_correction_min_similarity")]
211    pub correction_min_similarity: f32,
212    #[serde(default = "default_auto_promote_min_uses")]
213    pub auto_promote_min_uses: u32,
214    #[serde(default = "default_auto_promote_threshold")]
215    pub auto_promote_threshold: f64,
216    #[serde(default = "default_auto_demote_min_uses")]
217    pub auto_demote_min_uses: u32,
218    #[serde(default = "default_auto_demote_threshold")]
219    pub auto_demote_threshold: f64,
220    /// When true, auto-promote and auto-demote decisions require the skill to have been used
221    /// across at least `min_sessions_before_promote` (for promotion) or
222    /// `min_sessions_before_demote` (for demotion) distinct conversation sessions.
223    /// Prevents trust transitions from a single long session.
224    #[serde(default)]
225    pub cross_session_rollout: bool,
226    /// Minimum number of distinct `conversation_id` values in `skill_outcomes` before
227    /// auto-promotion is eligible. Only checked when `cross_session_rollout = true`.
228    #[serde(default = "default_min_sessions_before_promote")]
229    pub min_sessions_before_promote: u32,
230    /// Minimum distinct sessions before auto-demotion when `cross_session_rollout = true`.
231    ///
232    /// Default 1 (demotion can happen after a single bad session by default). Separate from
233    /// `min_sessions_before_promote` because demotion should be fast (low threshold) while
234    /// promotion benefits from conservative validation (higher threshold).
235    #[serde(default = "default_min_sessions_before_demote")]
236    pub min_sessions_before_demote: u32,
237    /// Maximum number of top-level content sections (markdown H2 headers) allowed in
238    /// auto-generated skill bodies. Bodies exceeding this limit are rejected by
239    /// `validate_body_sections()`.
240    #[serde(default = "default_max_auto_sections")]
241    pub max_auto_sections: u32,
242    /// When true, auto-generated skill versions must pass a domain-conditioned evaluation
243    /// before promotion. If the improved body drifts from the original skill's domain,
244    /// activation is skipped (the version is still saved for manual review).
245    #[serde(default)]
246    pub domain_success_gate: bool,
247
248    // --- ARISE: trace-based skill improvement ---
249    /// Enable ARISE trace-based skill improvement (disabled by default).
250    #[serde(default)]
251    pub arise_enabled: bool,
252    /// Minimum tool calls in a turn to trigger ARISE trace improvement.
253    #[serde(default = "default_arise_min_tool_calls")]
254    pub arise_min_tool_calls: u32,
255    /// Provider name from `[[llm.providers]]` for ARISE trace summarization.
256    /// Empty = fall back to primary provider.
257    #[serde(default)]
258    pub arise_trace_provider: ProviderName,
259
260    // --- STEM: pattern-to-skill conversion ---
261    /// Enable STEM automatic tool pattern detection and skill generation (disabled by default).
262    #[serde(default)]
263    pub stem_enabled: bool,
264    /// Minimum occurrences of a tool sequence before generating a skill candidate.
265    #[serde(default = "default_stem_min_occurrences")]
266    pub stem_min_occurrences: u32,
267    /// Minimum success rate of the pattern before generating a skill candidate.
268    #[serde(default = "default_stem_min_success_rate")]
269    pub stem_min_success_rate: f64,
270    /// Provider name from `[[llm.providers]]` for STEM skill generation.
271    /// Empty = fall back to primary provider.
272    #[serde(default)]
273    pub stem_provider: ProviderName,
274    /// Days to retain rows in `skill_usage_log` before pruning.
275    #[serde(default = "default_stem_retention_days")]
276    pub stem_retention_days: u32,
277    /// Window in days for pattern detection queries (limits scan cost on large tables).
278    #[serde(default = "default_stem_pattern_window_days")]
279    pub stem_pattern_window_days: u32,
280
281    // --- ERL: experiential reflective learning ---
282    /// Enable ERL post-task heuristic extraction (disabled by default).
283    #[serde(default)]
284    pub erl_enabled: bool,
285    /// Provider name from `[[llm.providers]]` for ERL heuristic extraction.
286    /// Empty = fall back to primary provider.
287    #[serde(default)]
288    pub erl_extract_provider: ProviderName,
289    /// Maximum heuristics prepended per skill at match time.
290    #[serde(default = "default_erl_max_heuristics_per_skill")]
291    pub erl_max_heuristics_per_skill: u32,
292    /// Text similarity threshold (Jaccard) for heuristic deduplication.
293    /// When exact text match exceeds this, increment `use_count` instead of inserting.
294    #[serde(default = "default_erl_dedup_threshold")]
295    pub erl_dedup_threshold: f32,
296    /// Minimum confidence to include a heuristic at match time.
297    #[serde(default = "default_erl_min_confidence")]
298    pub erl_min_confidence: f64,
299
300    // --- D2Skill: step-level error correction ---
301    /// Enable `D2Skill` step-level error correction (disabled by default).
302    ///
303    /// Requires `arise_enabled = true` to populate corrections from ARISE traces.
304    /// If `d2skill_enabled = true` and `arise_enabled = false`, existing corrections
305    /// are still applied but no new ones are generated via ARISE.
306    #[serde(default)]
307    pub d2skill_enabled: bool,
308    /// Maximum corrections to inject per failure event.
309    #[serde(default = "default_d2skill_max_corrections")]
310    pub d2skill_max_corrections: u32,
311    /// Provider name from `[[llm.providers]]` for correction extraction from ARISE traces.
312    /// Empty = fall back to primary provider.
313    #[serde(default)]
314    pub d2skill_provider: ProviderName,
315}
316
317impl Default for LearningConfig {
318    fn default() -> Self {
319        Self {
320            enabled: false,
321            auto_activate: false,
322            min_failures: default_min_failures(),
323            improve_threshold: default_improve_threshold(),
324            rollback_threshold: default_rollback_threshold(),
325            min_evaluations: default_min_evaluations(),
326            max_versions: default_max_versions(),
327            cooldown_minutes: default_cooldown_minutes(),
328            correction_detection: default_correction_detection(),
329            correction_confidence_threshold: default_correction_confidence_threshold(),
330            detector_mode: DetectorMode::default(),
331            judge_model: String::new(),
332            feedback_provider: ProviderName::default(),
333            judge_adaptive_low: default_judge_adaptive_low(),
334            judge_adaptive_high: default_judge_adaptive_high(),
335            judge_llm_timeout_secs: default_judge_llm_timeout_secs(),
336            correction_recall_limit: default_correction_recall_limit(),
337            correction_min_similarity: default_correction_min_similarity(),
338            auto_promote_min_uses: default_auto_promote_min_uses(),
339            auto_promote_threshold: default_auto_promote_threshold(),
340            auto_demote_min_uses: default_auto_demote_min_uses(),
341            auto_demote_threshold: default_auto_demote_threshold(),
342            cross_session_rollout: false,
343            min_sessions_before_promote: default_min_sessions_before_promote(),
344            min_sessions_before_demote: default_min_sessions_before_demote(),
345            max_auto_sections: default_max_auto_sections(),
346            domain_success_gate: false,
347            arise_enabled: false,
348            arise_min_tool_calls: default_arise_min_tool_calls(),
349            arise_trace_provider: ProviderName::default(),
350            stem_enabled: false,
351            stem_min_occurrences: default_stem_min_occurrences(),
352            stem_min_success_rate: default_stem_min_success_rate(),
353            stem_provider: ProviderName::default(),
354            stem_retention_days: default_stem_retention_days(),
355            stem_pattern_window_days: default_stem_pattern_window_days(),
356            erl_enabled: false,
357            erl_extract_provider: ProviderName::default(),
358            erl_max_heuristics_per_skill: default_erl_max_heuristics_per_skill(),
359            erl_dedup_threshold: default_erl_dedup_threshold(),
360            erl_min_confidence: default_erl_min_confidence(),
361            d2skill_enabled: false,
362            d2skill_max_corrections: default_d2skill_max_corrections(),
363            d2skill_provider: ProviderName::default(),
364        }
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    #[test]
373    fn detector_mode_default_is_regex() {
374        assert_eq!(DetectorMode::default(), DetectorMode::Regex);
375    }
376
377    #[test]
378    fn detector_mode_serde_roundtrip() {
379        for (mode, expected_str) in [
380            (DetectorMode::Regex, "\"regex\""),
381            (DetectorMode::Judge, "\"judge\""),
382            (DetectorMode::Model, "\"model\""),
383        ] {
384            let serialized = serde_json::to_string(&mode).unwrap();
385            assert_eq!(serialized, expected_str, "serialize {mode:?}");
386            let deserialized: DetectorMode = serde_json::from_str(&serialized).unwrap();
387            assert_eq!(deserialized, mode, "deserialize {mode:?}");
388        }
389    }
390
391    #[test]
392    fn learning_config_default_detector_mode_is_regex() {
393        let cfg = LearningConfig::default();
394        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
395    }
396
397    #[test]
398    fn learning_config_default_feedback_provider_is_empty() {
399        let cfg = LearningConfig::default();
400        assert!(cfg.feedback_provider.is_empty());
401    }
402
403    #[test]
404    fn learning_config_deserialize_model_mode() {
405        let toml = r#"detector_mode = "model"
406feedback_provider = "fast""#;
407        let cfg: LearningConfig = toml::from_str(toml).unwrap();
408        assert_eq!(cfg.detector_mode, DetectorMode::Model);
409        assert_eq!(cfg.feedback_provider, "fast");
410    }
411
412    #[test]
413    fn learning_config_deserialize_empty_feedback_provider() {
414        let toml = r#"detector_mode = "model""#;
415        let cfg: LearningConfig = toml::from_str(toml).unwrap();
416        assert_eq!(cfg.detector_mode, DetectorMode::Model);
417        assert!(
418            cfg.feedback_provider.is_empty(),
419            "empty feedback_provider must default to empty string (fallback to primary)"
420        );
421    }
422
423    #[test]
424    fn learning_config_deserialize_empty_section_uses_defaults() {
425        let cfg: LearningConfig = toml::from_str("").unwrap();
426        assert!(!cfg.enabled);
427        assert_eq!(cfg.min_failures, 3);
428        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
429        assert!(cfg.feedback_provider.is_empty());
430    }
431
432    #[test]
433    fn judge_llm_timeout_secs_default_and_roundtrip() {
434        let cfg = LearningConfig::default();
435        assert_eq!(cfg.judge_llm_timeout_secs, 30);
436        let cfg: LearningConfig = toml::from_str("judge_llm_timeout_secs = 60").unwrap();
437        assert_eq!(cfg.judge_llm_timeout_secs, 60);
438    }
439
440    #[test]
441    fn learning_config_defaults_for_new_fields() {
442        let cfg = LearningConfig::default();
443        assert!(!cfg.cross_session_rollout);
444        assert_eq!(cfg.min_sessions_before_promote, 2);
445        assert_eq!(cfg.max_auto_sections, 3);
446        assert!(!cfg.domain_success_gate);
447    }
448
449    #[test]
450    fn learning_config_min_sessions_before_demote_default() {
451        let cfg = LearningConfig::default();
452        assert_eq!(cfg.min_sessions_before_demote, 1);
453    }
454
455    #[test]
456    fn arise_stem_erl_defaults() {
457        let cfg = LearningConfig::default();
458        assert!(!cfg.arise_enabled);
459        assert_eq!(cfg.arise_min_tool_calls, 2);
460        assert!(cfg.arise_trace_provider.is_empty());
461        assert!(!cfg.stem_enabled);
462        assert_eq!(cfg.stem_min_occurrences, 3);
463        assert!((cfg.stem_min_success_rate - 0.8).abs() < f64::EPSILON);
464        assert!(cfg.stem_provider.is_empty());
465        assert_eq!(cfg.stem_retention_days, 90);
466        assert_eq!(cfg.stem_pattern_window_days, 30);
467        assert!(!cfg.erl_enabled);
468        assert!(cfg.erl_extract_provider.is_empty());
469        assert_eq!(cfg.erl_max_heuristics_per_skill, 3);
470        assert!((cfg.erl_dedup_threshold - 0.9).abs() < f32::EPSILON);
471        assert!((cfg.erl_min_confidence - 0.5).abs() < f64::EPSILON);
472    }
473
474    #[test]
475    fn arise_stem_erl_serde_roundtrip() {
476        let toml = r#"
477arise_enabled = true
478arise_min_tool_calls = 3
479arise_trace_provider = "fast"
480stem_enabled = true
481stem_min_occurrences = 5
482stem_min_success_rate = 0.9
483stem_provider = "mid"
484stem_retention_days = 60
485stem_pattern_window_days = 14
486erl_enabled = true
487erl_extract_provider = "fast"
488erl_max_heuristics_per_skill = 5
489erl_dedup_threshold = 0.85
490erl_min_confidence = 0.6
491"#;
492        let cfg: LearningConfig = toml::from_str(toml).unwrap();
493        assert!(cfg.arise_enabled);
494        assert_eq!(cfg.arise_min_tool_calls, 3);
495        assert_eq!(cfg.arise_trace_provider, "fast");
496        assert!(cfg.stem_enabled);
497        assert_eq!(cfg.stem_min_occurrences, 5);
498        assert!((cfg.stem_min_success_rate - 0.9).abs() < f64::EPSILON);
499        assert_eq!(cfg.stem_provider, "mid");
500        assert_eq!(cfg.stem_retention_days, 60);
501        assert_eq!(cfg.stem_pattern_window_days, 14);
502        assert!(cfg.erl_enabled);
503        assert_eq!(cfg.erl_extract_provider, "fast");
504        assert_eq!(cfg.erl_max_heuristics_per_skill, 5);
505        assert!((cfg.erl_dedup_threshold - 0.85_f32).abs() < f32::EPSILON);
506        assert!((cfg.erl_min_confidence - 0.6).abs() < f64::EPSILON);
507    }
508
509    #[test]
510    fn arise_stem_erl_empty_section_uses_defaults() {
511        let cfg: LearningConfig = toml::from_str("").unwrap();
512        assert!(!cfg.arise_enabled);
513        assert!(!cfg.stem_enabled);
514        assert!(!cfg.erl_enabled);
515    }
516
517    #[test]
518    fn learning_config_new_fields_serde_roundtrip() {
519        let toml = r"
520cross_session_rollout = true
521min_sessions_before_promote = 5
522min_sessions_before_demote = 2
523max_auto_sections = 4
524domain_success_gate = true
525";
526        let cfg: LearningConfig = toml::from_str(toml).unwrap();
527        assert!(cfg.cross_session_rollout);
528        assert_eq!(cfg.min_sessions_before_promote, 5);
529        assert_eq!(cfg.min_sessions_before_demote, 2);
530        assert_eq!(cfg.max_auto_sections, 4);
531        assert!(cfg.domain_success_gate);
532    }
533}