Skip to main content

zeph_config/
learning.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_min_failures() -> u32 {
8    3
9}
10
11fn default_improve_threshold() -> f64 {
12    0.7
13}
14
15fn default_rollback_threshold() -> f64 {
16    0.5
17}
18
19fn default_min_evaluations() -> u32 {
20    5
21}
22
23fn default_max_versions() -> u32 {
24    10
25}
26
27fn default_cooldown_minutes() -> u64 {
28    60
29}
30
31fn default_correction_detection() -> bool {
32    true
33}
34
35fn default_correction_confidence_threshold() -> f32 {
36    0.6
37}
38
39fn default_judge_adaptive_low() -> f32 {
40    0.5
41}
42
43fn default_judge_adaptive_high() -> f32 {
44    0.8
45}
46
47fn default_correction_recall_limit() -> u32 {
48    3
49}
50
51fn default_correction_min_similarity() -> f32 {
52    0.75
53}
54
55fn default_auto_promote_min_uses() -> u32 {
56    50
57}
58
59fn default_auto_promote_threshold() -> f64 {
60    0.95
61}
62
63fn default_auto_demote_min_uses() -> u32 {
64    30
65}
66
67fn default_auto_demote_threshold() -> f64 {
68    0.40
69}
70
71fn default_min_sessions_before_promote() -> u32 {
72    2
73}
74
75fn default_min_sessions_before_demote() -> u32 {
76    1
77}
78
79fn default_max_auto_sections() -> u32 {
80    3
81}
82
83fn default_arise_min_tool_calls() -> u32 {
84    2
85}
86
87fn default_stem_min_occurrences() -> u32 {
88    3
89}
90
91fn default_stem_min_success_rate() -> f64 {
92    0.8
93}
94
95fn default_stem_retention_days() -> u32 {
96    90
97}
98
99fn default_stem_pattern_window_days() -> u32 {
100    30
101}
102
103fn default_erl_max_heuristics_per_skill() -> u32 {
104    3
105}
106
107fn default_erl_dedup_threshold() -> f32 {
108    0.9
109}
110
111fn default_erl_min_confidence() -> f64 {
112    0.5
113}
114
115fn default_d2skill_max_corrections() -> u32 {
116    3
117}
118
119/// Strategy for detecting implicit user corrections.
120#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
121#[serde(rename_all = "lowercase")]
122pub enum DetectorMode {
123    /// Pattern-matching only — zero LLM calls. Default behavior.
124    #[default]
125    Regex,
126    /// LLM-based judge for borderline / missed cases. Invoked only when
127    /// regex confidence falls below `judge_adaptive_high` or regex returns None.
128    ///
129    /// Note: with current regex values (ExplicitRejection=0.85, SelfCorrection=0.80,
130    /// Repetition=0.75, AlternativeRequest=0.70) and `adaptive_high=0.80`,
131    /// `ExplicitRejection` and `SelfCorrection` bypass the judge (confidence >= `adaptive_high`),
132    /// while `AlternativeRequest`, `Repetition`, and regex misses go through it.
133    Judge,
134    /// ML model-backed feedback classification via `LlmClassifier`.
135    ///
136    /// Uses the provider named in `feedback_provider` (or the primary provider if empty).
137    /// Shares the same adaptive thresholds and rate limiter as `Judge` mode.
138    /// Returns `JudgeVerdict` directly, preserving `kind` and `reasoning` metadata.
139    ///
140    /// Falls back to regex-only if the provider cannot be resolved — never fails startup.
141    Model,
142}
143
144/// Self-learning and skill evolution configuration, nested under `[skills.learning]` in TOML.
145///
146/// When `enabled = true`, Zeph tracks skill performance and can automatically improve or roll
147/// back skill definitions based on usage outcomes (ARISE, STEM, `D2Skill` pipelines).
148///
149/// # Example (TOML)
150///
151/// ```toml
152/// [skills.learning]
153/// enabled = true
154/// auto_activate = false
155/// min_failures = 3
156/// ```
157#[allow(clippy::struct_excessive_bools)]
158#[derive(Debug, Clone, Deserialize, Serialize)]
159pub struct LearningConfig {
160    /// Enable self-learning pipelines. Default: `false`.
161    #[serde(default)]
162    pub enabled: bool,
163    /// Automatically activate improved skill versions without user confirmation. Default: `false`.
164    #[serde(default)]
165    pub auto_activate: bool,
166    #[serde(default = "default_min_failures")]
167    pub min_failures: u32,
168    #[serde(default = "default_improve_threshold")]
169    pub improve_threshold: f64,
170    #[serde(default = "default_rollback_threshold")]
171    pub rollback_threshold: f64,
172    #[serde(default = "default_min_evaluations")]
173    pub min_evaluations: u32,
174    #[serde(default = "default_max_versions")]
175    pub max_versions: u32,
176    #[serde(default = "default_cooldown_minutes")]
177    pub cooldown_minutes: u64,
178    #[serde(default = "default_correction_detection")]
179    pub correction_detection: bool,
180    #[serde(default = "default_correction_confidence_threshold")]
181    pub correction_confidence_threshold: f32,
182    /// Detector strategy: "regex" (default) or "judge".
183    #[serde(default)]
184    pub detector_mode: DetectorMode,
185    /// Model for the judge detector (e.g. "claude-sonnet-4-6"). Empty = use primary provider.
186    #[serde(default)]
187    pub judge_model: String,
188    /// Provider name from `[[llm.providers]]` for `detector_mode = "model"` (`LlmClassifier`).
189    ///
190    /// Empty = use the primary provider. Named but not found in registry = log warning,
191    /// degrade to regex-only. Never fails startup.
192    #[serde(default)]
193    pub feedback_provider: ProviderName,
194    /// Regex confidence below this value is treated as "not a correction" — judge not invoked.
195    #[serde(default = "default_judge_adaptive_low")]
196    pub judge_adaptive_low: f32,
197    /// Regex confidence at or above this value is accepted without judge confirmation.
198    #[serde(default = "default_judge_adaptive_high")]
199    pub judge_adaptive_high: f32,
200    #[serde(default = "default_correction_recall_limit")]
201    pub correction_recall_limit: u32,
202    #[serde(default = "default_correction_min_similarity")]
203    pub correction_min_similarity: f32,
204    #[serde(default = "default_auto_promote_min_uses")]
205    pub auto_promote_min_uses: u32,
206    #[serde(default = "default_auto_promote_threshold")]
207    pub auto_promote_threshold: f64,
208    #[serde(default = "default_auto_demote_min_uses")]
209    pub auto_demote_min_uses: u32,
210    #[serde(default = "default_auto_demote_threshold")]
211    pub auto_demote_threshold: f64,
212    /// When true, auto-promote and auto-demote decisions require the skill to have been used
213    /// across at least `min_sessions_before_promote` (for promotion) or
214    /// `min_sessions_before_demote` (for demotion) distinct conversation sessions.
215    /// Prevents trust transitions from a single long session.
216    #[serde(default)]
217    pub cross_session_rollout: bool,
218    /// Minimum number of distinct `conversation_id` values in `skill_outcomes` before
219    /// auto-promotion is eligible. Only checked when `cross_session_rollout = true`.
220    #[serde(default = "default_min_sessions_before_promote")]
221    pub min_sessions_before_promote: u32,
222    /// Minimum distinct sessions before auto-demotion when `cross_session_rollout = true`.
223    ///
224    /// Default 1 (demotion can happen after a single bad session by default). Separate from
225    /// `min_sessions_before_promote` because demotion should be fast (low threshold) while
226    /// promotion benefits from conservative validation (higher threshold).
227    #[serde(default = "default_min_sessions_before_demote")]
228    pub min_sessions_before_demote: u32,
229    /// Maximum number of top-level content sections (markdown H2 headers) allowed in
230    /// auto-generated skill bodies. Bodies exceeding this limit are rejected by
231    /// `validate_body_sections()`.
232    #[serde(default = "default_max_auto_sections")]
233    pub max_auto_sections: u32,
234    /// When true, auto-generated skill versions must pass a domain-conditioned evaluation
235    /// before promotion. If the improved body drifts from the original skill's domain,
236    /// activation is skipped (the version is still saved for manual review).
237    #[serde(default)]
238    pub domain_success_gate: bool,
239
240    // --- ARISE: trace-based skill improvement ---
241    /// Enable ARISE trace-based skill improvement (disabled by default).
242    #[serde(default)]
243    pub arise_enabled: bool,
244    /// Minimum tool calls in a turn to trigger ARISE trace improvement.
245    #[serde(default = "default_arise_min_tool_calls")]
246    pub arise_min_tool_calls: u32,
247    /// Provider name from `[[llm.providers]]` for ARISE trace summarization.
248    /// Empty = fall back to primary provider.
249    #[serde(default)]
250    pub arise_trace_provider: ProviderName,
251
252    // --- STEM: pattern-to-skill conversion ---
253    /// Enable STEM automatic tool pattern detection and skill generation (disabled by default).
254    #[serde(default)]
255    pub stem_enabled: bool,
256    /// Minimum occurrences of a tool sequence before generating a skill candidate.
257    #[serde(default = "default_stem_min_occurrences")]
258    pub stem_min_occurrences: u32,
259    /// Minimum success rate of the pattern before generating a skill candidate.
260    #[serde(default = "default_stem_min_success_rate")]
261    pub stem_min_success_rate: f64,
262    /// Provider name from `[[llm.providers]]` for STEM skill generation.
263    /// Empty = fall back to primary provider.
264    #[serde(default)]
265    pub stem_provider: ProviderName,
266    /// Days to retain rows in `skill_usage_log` before pruning.
267    #[serde(default = "default_stem_retention_days")]
268    pub stem_retention_days: u32,
269    /// Window in days for pattern detection queries (limits scan cost on large tables).
270    #[serde(default = "default_stem_pattern_window_days")]
271    pub stem_pattern_window_days: u32,
272
273    // --- ERL: experiential reflective learning ---
274    /// Enable ERL post-task heuristic extraction (disabled by default).
275    #[serde(default)]
276    pub erl_enabled: bool,
277    /// Provider name from `[[llm.providers]]` for ERL heuristic extraction.
278    /// Empty = fall back to primary provider.
279    #[serde(default)]
280    pub erl_extract_provider: ProviderName,
281    /// Maximum heuristics prepended per skill at match time.
282    #[serde(default = "default_erl_max_heuristics_per_skill")]
283    pub erl_max_heuristics_per_skill: u32,
284    /// Text similarity threshold (Jaccard) for heuristic deduplication.
285    /// When exact text match exceeds this, increment `use_count` instead of inserting.
286    #[serde(default = "default_erl_dedup_threshold")]
287    pub erl_dedup_threshold: f32,
288    /// Minimum confidence to include a heuristic at match time.
289    #[serde(default = "default_erl_min_confidence")]
290    pub erl_min_confidence: f64,
291
292    // --- D2Skill: step-level error correction ---
293    /// Enable `D2Skill` step-level error correction (disabled by default).
294    ///
295    /// Requires `arise_enabled = true` to populate corrections from ARISE traces.
296    /// If `d2skill_enabled = true` and `arise_enabled = false`, existing corrections
297    /// are still applied but no new ones are generated via ARISE.
298    #[serde(default)]
299    pub d2skill_enabled: bool,
300    /// Maximum corrections to inject per failure event.
301    #[serde(default = "default_d2skill_max_corrections")]
302    pub d2skill_max_corrections: u32,
303    /// Provider name from `[[llm.providers]]` for correction extraction from ARISE traces.
304    /// Empty = fall back to primary provider.
305    #[serde(default)]
306    pub d2skill_provider: ProviderName,
307}
308
309impl Default for LearningConfig {
310    fn default() -> Self {
311        Self {
312            enabled: false,
313            auto_activate: false,
314            min_failures: default_min_failures(),
315            improve_threshold: default_improve_threshold(),
316            rollback_threshold: default_rollback_threshold(),
317            min_evaluations: default_min_evaluations(),
318            max_versions: default_max_versions(),
319            cooldown_minutes: default_cooldown_minutes(),
320            correction_detection: default_correction_detection(),
321            correction_confidence_threshold: default_correction_confidence_threshold(),
322            detector_mode: DetectorMode::default(),
323            judge_model: String::new(),
324            feedback_provider: ProviderName::default(),
325            judge_adaptive_low: default_judge_adaptive_low(),
326            judge_adaptive_high: default_judge_adaptive_high(),
327            correction_recall_limit: default_correction_recall_limit(),
328            correction_min_similarity: default_correction_min_similarity(),
329            auto_promote_min_uses: default_auto_promote_min_uses(),
330            auto_promote_threshold: default_auto_promote_threshold(),
331            auto_demote_min_uses: default_auto_demote_min_uses(),
332            auto_demote_threshold: default_auto_demote_threshold(),
333            cross_session_rollout: false,
334            min_sessions_before_promote: default_min_sessions_before_promote(),
335            min_sessions_before_demote: default_min_sessions_before_demote(),
336            max_auto_sections: default_max_auto_sections(),
337            domain_success_gate: false,
338            arise_enabled: false,
339            arise_min_tool_calls: default_arise_min_tool_calls(),
340            arise_trace_provider: ProviderName::default(),
341            stem_enabled: false,
342            stem_min_occurrences: default_stem_min_occurrences(),
343            stem_min_success_rate: default_stem_min_success_rate(),
344            stem_provider: ProviderName::default(),
345            stem_retention_days: default_stem_retention_days(),
346            stem_pattern_window_days: default_stem_pattern_window_days(),
347            erl_enabled: false,
348            erl_extract_provider: ProviderName::default(),
349            erl_max_heuristics_per_skill: default_erl_max_heuristics_per_skill(),
350            erl_dedup_threshold: default_erl_dedup_threshold(),
351            erl_min_confidence: default_erl_min_confidence(),
352            d2skill_enabled: false,
353            d2skill_max_corrections: default_d2skill_max_corrections(),
354            d2skill_provider: ProviderName::default(),
355        }
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn detector_mode_default_is_regex() {
365        assert_eq!(DetectorMode::default(), DetectorMode::Regex);
366    }
367
368    #[test]
369    fn detector_mode_serde_roundtrip() {
370        for (mode, expected_str) in [
371            (DetectorMode::Regex, "\"regex\""),
372            (DetectorMode::Judge, "\"judge\""),
373            (DetectorMode::Model, "\"model\""),
374        ] {
375            let serialized = serde_json::to_string(&mode).unwrap();
376            assert_eq!(serialized, expected_str, "serialize {mode:?}");
377            let deserialized: DetectorMode = serde_json::from_str(&serialized).unwrap();
378            assert_eq!(deserialized, mode, "deserialize {mode:?}");
379        }
380    }
381
382    #[test]
383    fn learning_config_default_detector_mode_is_regex() {
384        let cfg = LearningConfig::default();
385        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
386    }
387
388    #[test]
389    fn learning_config_default_feedback_provider_is_empty() {
390        let cfg = LearningConfig::default();
391        assert!(cfg.feedback_provider.is_empty());
392    }
393
394    #[test]
395    fn learning_config_deserialize_model_mode() {
396        let toml = r#"detector_mode = "model"
397feedback_provider = "fast""#;
398        let cfg: LearningConfig = toml::from_str(toml).unwrap();
399        assert_eq!(cfg.detector_mode, DetectorMode::Model);
400        assert_eq!(cfg.feedback_provider, "fast");
401    }
402
403    #[test]
404    fn learning_config_deserialize_empty_feedback_provider() {
405        let toml = r#"detector_mode = "model""#;
406        let cfg: LearningConfig = toml::from_str(toml).unwrap();
407        assert_eq!(cfg.detector_mode, DetectorMode::Model);
408        assert!(
409            cfg.feedback_provider.is_empty(),
410            "empty feedback_provider must default to empty string (fallback to primary)"
411        );
412    }
413
414    #[test]
415    fn learning_config_deserialize_empty_section_uses_defaults() {
416        let cfg: LearningConfig = toml::from_str("").unwrap();
417        assert!(!cfg.enabled);
418        assert_eq!(cfg.min_failures, 3);
419        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
420        assert!(cfg.feedback_provider.is_empty());
421    }
422
423    #[test]
424    fn learning_config_defaults_for_new_fields() {
425        let cfg = LearningConfig::default();
426        assert!(!cfg.cross_session_rollout);
427        assert_eq!(cfg.min_sessions_before_promote, 2);
428        assert_eq!(cfg.max_auto_sections, 3);
429        assert!(!cfg.domain_success_gate);
430    }
431
432    #[test]
433    fn learning_config_min_sessions_before_demote_default() {
434        let cfg = LearningConfig::default();
435        assert_eq!(cfg.min_sessions_before_demote, 1);
436    }
437
438    #[test]
439    fn arise_stem_erl_defaults() {
440        let cfg = LearningConfig::default();
441        assert!(!cfg.arise_enabled);
442        assert_eq!(cfg.arise_min_tool_calls, 2);
443        assert!(cfg.arise_trace_provider.is_empty());
444        assert!(!cfg.stem_enabled);
445        assert_eq!(cfg.stem_min_occurrences, 3);
446        assert!((cfg.stem_min_success_rate - 0.8).abs() < f64::EPSILON);
447        assert!(cfg.stem_provider.is_empty());
448        assert_eq!(cfg.stem_retention_days, 90);
449        assert_eq!(cfg.stem_pattern_window_days, 30);
450        assert!(!cfg.erl_enabled);
451        assert!(cfg.erl_extract_provider.is_empty());
452        assert_eq!(cfg.erl_max_heuristics_per_skill, 3);
453        assert!((cfg.erl_dedup_threshold - 0.9).abs() < f32::EPSILON);
454        assert!((cfg.erl_min_confidence - 0.5).abs() < f64::EPSILON);
455    }
456
457    #[test]
458    fn arise_stem_erl_serde_roundtrip() {
459        let toml = r#"
460arise_enabled = true
461arise_min_tool_calls = 3
462arise_trace_provider = "fast"
463stem_enabled = true
464stem_min_occurrences = 5
465stem_min_success_rate = 0.9
466stem_provider = "mid"
467stem_retention_days = 60
468stem_pattern_window_days = 14
469erl_enabled = true
470erl_extract_provider = "fast"
471erl_max_heuristics_per_skill = 5
472erl_dedup_threshold = 0.85
473erl_min_confidence = 0.6
474"#;
475        let cfg: LearningConfig = toml::from_str(toml).unwrap();
476        assert!(cfg.arise_enabled);
477        assert_eq!(cfg.arise_min_tool_calls, 3);
478        assert_eq!(cfg.arise_trace_provider, "fast");
479        assert!(cfg.stem_enabled);
480        assert_eq!(cfg.stem_min_occurrences, 5);
481        assert!((cfg.stem_min_success_rate - 0.9).abs() < f64::EPSILON);
482        assert_eq!(cfg.stem_provider, "mid");
483        assert_eq!(cfg.stem_retention_days, 60);
484        assert_eq!(cfg.stem_pattern_window_days, 14);
485        assert!(cfg.erl_enabled);
486        assert_eq!(cfg.erl_extract_provider, "fast");
487        assert_eq!(cfg.erl_max_heuristics_per_skill, 5);
488        assert!((cfg.erl_dedup_threshold - 0.85_f32).abs() < f32::EPSILON);
489        assert!((cfg.erl_min_confidence - 0.6).abs() < f64::EPSILON);
490    }
491
492    #[test]
493    fn arise_stem_erl_empty_section_uses_defaults() {
494        let cfg: LearningConfig = toml::from_str("").unwrap();
495        assert!(!cfg.arise_enabled);
496        assert!(!cfg.stem_enabled);
497        assert!(!cfg.erl_enabled);
498    }
499
500    #[test]
501    fn learning_config_new_fields_serde_roundtrip() {
502        let toml = r"
503cross_session_rollout = true
504min_sessions_before_promote = 5
505min_sessions_before_demote = 2
506max_auto_sections = 4
507domain_success_gate = true
508";
509        let cfg: LearningConfig = toml::from_str(toml).unwrap();
510        assert!(cfg.cross_session_rollout);
511        assert_eq!(cfg.min_sessions_before_promote, 5);
512        assert_eq!(cfg.min_sessions_before_demote, 2);
513        assert_eq!(cfg.max_auto_sections, 4);
514        assert!(cfg.domain_success_gate);
515    }
516}