Skip to main content

zeph_config/
learning.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::providers::ProviderName;
5use serde::{Deserialize, Serialize};
6
7fn default_min_failures() -> u32 {
8    3
9}
10
11fn default_improve_threshold() -> f64 {
12    0.7
13}
14
15fn default_rollback_threshold() -> f64 {
16    0.5
17}
18
19fn default_min_evaluations() -> u32 {
20    5
21}
22
23fn default_max_versions() -> u32 {
24    10
25}
26
27fn default_cooldown_minutes() -> u64 {
28    60
29}
30
31fn default_correction_detection() -> bool {
32    true
33}
34
35fn default_correction_confidence_threshold() -> f32 {
36    0.6
37}
38
39fn default_judge_adaptive_low() -> f32 {
40    0.5
41}
42
43fn default_judge_adaptive_high() -> f32 {
44    0.8
45}
46
47fn default_judge_llm_timeout_secs() -> u64 {
48    30
49}
50
51fn default_correction_recall_limit() -> u32 {
52    3
53}
54
55fn default_correction_min_similarity() -> f32 {
56    0.75
57}
58
59fn default_auto_promote_min_uses() -> u32 {
60    50
61}
62
63fn default_auto_promote_threshold() -> f64 {
64    0.95
65}
66
67fn default_auto_demote_min_uses() -> u32 {
68    30
69}
70
71fn default_auto_demote_threshold() -> f64 {
72    0.40
73}
74
75fn default_min_sessions_before_promote() -> u32 {
76    2
77}
78
79fn default_min_sessions_before_demote() -> u32 {
80    1
81}
82
83fn default_max_auto_sections() -> u32 {
84    3
85}
86
87fn default_arise_min_tool_calls() -> u32 {
88    2
89}
90
91fn default_stem_min_occurrences() -> u32 {
92    3
93}
94
95fn default_stem_min_success_rate() -> f64 {
96    0.8
97}
98
99fn default_stem_retention_days() -> u32 {
100    90
101}
102
103fn default_stem_pattern_window_days() -> u32 {
104    30
105}
106
107fn default_erl_max_heuristics_per_skill() -> u32 {
108    3
109}
110
111fn default_erl_dedup_threshold() -> f32 {
112    0.9
113}
114
115fn default_erl_min_confidence() -> f64 {
116    0.5
117}
118
119fn default_d2skill_max_corrections() -> u32 {
120    3
121}
122
123fn default_trace_extraction_max_turns() -> u32 {
124    200
125}
126
127fn default_trace_extraction_max_sessions_queued() -> usize {
128    10
129}
130
131fn default_trace_extraction_max_input_bytes() -> usize {
132    131_072 // 128 KB
133}
134
135fn default_merge_threshold() -> f32 {
136    0.75
137}
138
139fn default_dedup_threshold() -> f32 {
140    0.90
141}
142
143fn default_skill_merge_enabled() -> bool {
144    true
145}
146
147fn default_heuristic_promotion_threshold() -> u32 {
148    5
149}
150
151fn default_heuristic_promotion_interval_hours() -> u64 {
152    24
153}
154
155/// Strategy for detecting implicit user corrections.
156#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
157#[serde(rename_all = "lowercase")]
158#[non_exhaustive]
159pub enum DetectorMode {
160    /// Pattern-matching only — zero LLM calls. Default behavior.
161    #[default]
162    Regex,
163    /// LLM-based judge for borderline / missed cases. Invoked only when
164    /// regex confidence falls below `judge_adaptive_high` or regex returns None.
165    ///
166    /// Note: with current regex values (ExplicitRejection=0.85, SelfCorrection=0.80,
167    /// Repetition=0.75, AlternativeRequest=0.70) and `adaptive_high=0.80`,
168    /// `ExplicitRejection` and `SelfCorrection` bypass the judge (confidence >= `adaptive_high`),
169    /// while `AlternativeRequest`, `Repetition`, and regex misses go through it.
170    Judge,
171    /// ML model-backed feedback classification via `LlmClassifier`.
172    ///
173    /// Uses the provider named in `feedback_provider` (or the primary provider if empty).
174    /// Shares the same adaptive thresholds and rate limiter as `Judge` mode.
175    /// Returns `JudgeVerdict` directly, preserving `kind` and `reasoning` metadata.
176    ///
177    /// Falls back to regex-only if the provider cannot be resolved — never fails startup.
178    Model,
179}
180
181/// Self-learning and skill evolution configuration, nested under `[skills.learning]` in TOML.
182///
183/// When `enabled = true`, Zeph tracks skill performance and can automatically improve or roll
184/// back skill definitions based on usage outcomes (ARISE, STEM, `D2Skill` pipelines).
185///
186/// # Example (TOML)
187///
188/// ```toml
189/// [skills.learning]
190/// enabled = true
191/// auto_activate = false
192/// min_failures = 3
193/// ```
194#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
195#[derive(Debug, Clone, Deserialize, Serialize)]
196pub struct LearningConfig {
197    /// Enable self-learning pipelines. Default: `false`.
198    #[serde(default)]
199    pub enabled: bool,
200    /// Automatically activate improved skill versions without user confirmation. Default: `false`.
201    #[serde(default)]
202    pub auto_activate: bool,
203    #[serde(default = "default_min_failures")]
204    pub min_failures: u32,
205    #[serde(default = "default_improve_threshold")]
206    pub improve_threshold: f64,
207    #[serde(default = "default_rollback_threshold")]
208    pub rollback_threshold: f64,
209    #[serde(default = "default_min_evaluations")]
210    pub min_evaluations: u32,
211    #[serde(default = "default_max_versions")]
212    pub max_versions: u32,
213    #[serde(default = "default_cooldown_minutes")]
214    pub cooldown_minutes: u64,
215    #[serde(default = "default_correction_detection")]
216    pub correction_detection: bool,
217    #[serde(default = "default_correction_confidence_threshold")]
218    pub correction_confidence_threshold: f32,
219    /// Detector strategy: "regex" (default) or "judge".
220    #[serde(default)]
221    pub detector_mode: DetectorMode,
222    /// Named provider from `[[llm.providers]]` for the judge detector (legacy field, prefer `judge_provider`). Empty = use primary provider.
223    #[serde(default)]
224    pub judge_model: String,
225    /// Named provider from `[[llm.providers]]` for the judge detector (`detector_mode = "judge"`).
226    ///
227    /// When set, overrides the model-level fallback: the named provider is resolved and used
228    /// instead of the primary provider. Empty = use primary provider (same as leaving unset).
229    #[serde(default)]
230    pub judge_provider: String,
231    /// Provider name from `[[llm.providers]]` for `detector_mode = "model"` (`LlmClassifier`).
232    ///
233    /// Empty = use the primary provider. Named but not found in registry = log warning,
234    /// degrade to regex-only. Never fails startup.
235    #[serde(default)]
236    pub feedback_provider: ProviderName,
237    /// Regex confidence below this value is treated as "not a correction" — judge not invoked.
238    #[serde(default = "default_judge_adaptive_low")]
239    pub judge_adaptive_low: f32,
240    /// Regex confidence at or above this value is accepted without judge confirmation.
241    #[serde(default = "default_judge_adaptive_high")]
242    pub judge_adaptive_high: f32,
243    /// Maximum seconds to wait for the judge LLM to respond before timing out.
244    /// Applies to `detector_mode = "judge"` only.
245    #[serde(default = "default_judge_llm_timeout_secs")]
246    pub judge_llm_timeout_secs: u64,
247    #[serde(default = "default_correction_recall_limit")]
248    pub correction_recall_limit: u32,
249    #[serde(default = "default_correction_min_similarity")]
250    pub correction_min_similarity: f32,
251    #[serde(default = "default_auto_promote_min_uses")]
252    pub auto_promote_min_uses: u32,
253    #[serde(default = "default_auto_promote_threshold")]
254    pub auto_promote_threshold: f64,
255    #[serde(default = "default_auto_demote_min_uses")]
256    pub auto_demote_min_uses: u32,
257    #[serde(default = "default_auto_demote_threshold")]
258    pub auto_demote_threshold: f64,
259    /// When true, auto-promote and auto-demote decisions require the skill to have been used
260    /// across at least `min_sessions_before_promote` (for promotion) or
261    /// `min_sessions_before_demote` (for demotion) distinct conversation sessions.
262    /// Prevents trust transitions from a single long session.
263    #[serde(default)]
264    pub cross_session_rollout: bool,
265    /// Minimum number of distinct `conversation_id` values in `skill_outcomes` before
266    /// auto-promotion is eligible. Only checked when `cross_session_rollout = true`.
267    #[serde(default = "default_min_sessions_before_promote")]
268    pub min_sessions_before_promote: u32,
269    /// Minimum distinct sessions before auto-demotion when `cross_session_rollout = true`.
270    ///
271    /// Default 1 (demotion can happen after a single bad session by default). Separate from
272    /// `min_sessions_before_promote` because demotion should be fast (low threshold) while
273    /// promotion benefits from conservative validation (higher threshold).
274    #[serde(default = "default_min_sessions_before_demote")]
275    pub min_sessions_before_demote: u32,
276    /// Maximum number of top-level content sections (markdown H2 headers) allowed in
277    /// auto-generated skill bodies. Bodies exceeding this limit are rejected by
278    /// `validate_body_sections()`.
279    #[serde(default = "default_max_auto_sections")]
280    pub max_auto_sections: u32,
281    /// When true, auto-generated skill versions must pass a domain-conditioned evaluation
282    /// before promotion. If the improved body drifts from the original skill's domain,
283    /// activation is skipped (the version is still saved for manual review).
284    #[serde(default)]
285    pub domain_success_gate: bool,
286
287    // --- ARISE: trace-based skill improvement ---
288    /// Enable ARISE trace-based skill improvement (disabled by default).
289    #[serde(default)]
290    pub arise_enabled: bool,
291    /// Minimum tool calls in a turn to trigger ARISE trace improvement.
292    #[serde(default = "default_arise_min_tool_calls")]
293    pub arise_min_tool_calls: u32,
294    /// Provider name from `[[llm.providers]]` for ARISE trace summarization.
295    /// Empty = fall back to primary provider.
296    #[serde(default)]
297    pub arise_trace_provider: ProviderName,
298
299    // --- STEM: pattern-to-skill conversion ---
300    /// Enable STEM automatic tool pattern detection and skill generation (disabled by default).
301    #[serde(default)]
302    pub stem_enabled: bool,
303    /// Minimum occurrences of a tool sequence before generating a skill candidate.
304    #[serde(default = "default_stem_min_occurrences")]
305    pub stem_min_occurrences: u32,
306    /// Minimum success rate of the pattern before generating a skill candidate.
307    #[serde(default = "default_stem_min_success_rate")]
308    pub stem_min_success_rate: f64,
309    /// Provider name from `[[llm.providers]]` for STEM skill generation.
310    /// Empty = fall back to primary provider.
311    #[serde(default)]
312    pub stem_provider: ProviderName,
313    /// Days to retain rows in `skill_usage_log` before pruning.
314    #[serde(default = "default_stem_retention_days")]
315    pub stem_retention_days: u32,
316    /// Window in days for pattern detection queries (limits scan cost on large tables).
317    #[serde(default = "default_stem_pattern_window_days")]
318    pub stem_pattern_window_days: u32,
319
320    // --- ERL: experiential reflective learning ---
321    /// Enable ERL post-task heuristic extraction (disabled by default).
322    #[serde(default)]
323    pub erl_enabled: bool,
324    /// Provider name from `[[llm.providers]]` for ERL heuristic extraction.
325    /// Empty = fall back to primary provider.
326    #[serde(default)]
327    pub erl_extract_provider: ProviderName,
328    /// Maximum heuristics prepended per skill at match time.
329    #[serde(default = "default_erl_max_heuristics_per_skill")]
330    pub erl_max_heuristics_per_skill: u32,
331    /// Text similarity threshold (Jaccard) for heuristic deduplication.
332    /// When exact text match exceeds this, increment `use_count` instead of inserting.
333    #[serde(default = "default_erl_dedup_threshold")]
334    pub erl_dedup_threshold: f32,
335    /// Minimum confidence to include a heuristic at match time.
336    #[serde(default = "default_erl_min_confidence")]
337    pub erl_min_confidence: f64,
338
339    // --- D2Skill: step-level error correction ---
340    /// Enable `D2Skill` step-level error correction (disabled by default).
341    ///
342    /// Requires `arise_enabled = true` to populate corrections from ARISE traces.
343    /// If `d2skill_enabled = true` and `arise_enabled = false`, existing corrections
344    /// are still applied but no new ones are generated via ARISE.
345    #[serde(default)]
346    pub d2skill_enabled: bool,
347    /// Maximum corrections to inject per failure event.
348    #[serde(default = "default_d2skill_max_corrections")]
349    pub d2skill_max_corrections: u32,
350    /// Provider name from `[[llm.providers]]` for correction extraction from ARISE traces.
351    /// Empty = fall back to primary provider.
352    #[serde(default)]
353    pub d2skill_provider: ProviderName,
354
355    // --- AutoSkill A1: Conversation trace extraction (spec 056) ---
356    /// Enable background skill extraction from completed conversation traces. Default: `false`.
357    #[serde(default)]
358    pub trace_extraction_enabled: bool,
359    /// Provider name from `[[llm.providers]]` for trace extraction LLM calls.
360    /// Empty = fall back to the primary provider.
361    #[serde(default)]
362    pub trace_extraction_provider: ProviderName,
363    /// Provider name from `[[llm.providers]]` for embedding calls during trace extraction.
364    /// Must reference a provider that supports `embed()`. Empty = fall back to the primary provider.
365    #[serde(default)]
366    pub trace_extraction_embedding_provider: ProviderName,
367    /// Maximum user messages to include per extraction session. Default: 200.
368    #[serde(default = "default_trace_extraction_max_turns")]
369    pub trace_extraction_max_turns: u32,
370    /// Maximum concurrent background extraction tasks before dropping oldest. Default: 10.
371    #[serde(default = "default_trace_extraction_max_sessions_queued")]
372    pub trace_extraction_max_sessions_queued: usize,
373    /// Maximum total bytes of user messages to send to the extraction LLM. Default: 131072 (128 KB).
374    #[serde(default = "default_trace_extraction_max_input_bytes")]
375    pub trace_extraction_max_input_bytes: usize,
376
377    // --- AutoSkill A2: Versioned merging (spec 057) ---
378    /// Enable the Merge branch in the Add/Merge/Discard decision flow. Default: `true`.
379    ///
380    /// When `false`, candidates in the merge zone (`merge_threshold <= sim < dedup_threshold`)
381    /// are Discarded instead of merged.
382    #[serde(default = "default_skill_merge_enabled")]
383    pub skill_merge_enabled: bool,
384    /// Provider name from `[[llm.providers]]` for LLM merge calls.
385    /// Empty = fall back to the primary provider.
386    #[serde(default)]
387    pub skill_merge_provider: ProviderName,
388    /// Minimum cosine similarity to trigger a merge with the nearest skill. Default: 0.75.
389    ///
390    /// Must be strictly less than `dedup_threshold` (validated at startup).
391    #[serde(default = "default_merge_threshold")]
392    pub merge_threshold: f32,
393    /// Minimum cosine similarity to discard a candidate as a near-exact duplicate. Default: 0.90.
394    ///
395    /// Must be strictly greater than `merge_threshold` (validated at startup).
396    #[serde(default = "default_dedup_threshold")]
397    pub dedup_threshold: f32,
398
399    // --- AutoSkill A6: Heuristic promotion from ERL (spec 061) ---
400    /// Enable periodic heuristic promotion from ERL to full skills. Default: `false`.
401    ///
402    /// When `true`, a background task runs every `heuristic_promotion_interval_hours` hours
403    /// and evaluates whether accumulated ERL heuristics are substantial enough for promotion.
404    #[serde(default)]
405    pub heuristic_promotion_enabled: bool,
406    /// Provider name from `[[llm.providers]]` for heuristic promotion LLM calls.
407    ///
408    /// Use a quality provider — promotion is an offline, non-latency-sensitive analysis.
409    /// Empty = fall back to the primary provider.
410    #[serde(default)]
411    pub heuristic_promotion_provider: ProviderName,
412    /// Minimum heuristic count per skill to trigger promotion evaluation. Default: `5`.
413    ///
414    /// Skills with fewer heuristics (above `erl_min_confidence`) are skipped.
415    #[serde(default = "default_heuristic_promotion_threshold")]
416    pub heuristic_promotion_threshold: u32,
417    /// Interval in hours between promotion evaluation runs. Default: `24`.
418    #[serde(default = "default_heuristic_promotion_interval_hours")]
419    pub heuristic_promotion_interval_hours: u64,
420}
421
422impl Default for LearningConfig {
423    fn default() -> Self {
424        Self {
425            enabled: false,
426            auto_activate: false,
427            min_failures: default_min_failures(),
428            improve_threshold: default_improve_threshold(),
429            rollback_threshold: default_rollback_threshold(),
430            min_evaluations: default_min_evaluations(),
431            max_versions: default_max_versions(),
432            cooldown_minutes: default_cooldown_minutes(),
433            correction_detection: default_correction_detection(),
434            correction_confidence_threshold: default_correction_confidence_threshold(),
435            detector_mode: DetectorMode::default(),
436            judge_model: String::new(),
437            judge_provider: String::new(),
438            feedback_provider: ProviderName::default(),
439            judge_adaptive_low: default_judge_adaptive_low(),
440            judge_adaptive_high: default_judge_adaptive_high(),
441            judge_llm_timeout_secs: default_judge_llm_timeout_secs(),
442            correction_recall_limit: default_correction_recall_limit(),
443            correction_min_similarity: default_correction_min_similarity(),
444            auto_promote_min_uses: default_auto_promote_min_uses(),
445            auto_promote_threshold: default_auto_promote_threshold(),
446            auto_demote_min_uses: default_auto_demote_min_uses(),
447            auto_demote_threshold: default_auto_demote_threshold(),
448            cross_session_rollout: false,
449            min_sessions_before_promote: default_min_sessions_before_promote(),
450            min_sessions_before_demote: default_min_sessions_before_demote(),
451            max_auto_sections: default_max_auto_sections(),
452            domain_success_gate: false,
453            arise_enabled: false,
454            arise_min_tool_calls: default_arise_min_tool_calls(),
455            arise_trace_provider: ProviderName::default(),
456            stem_enabled: false,
457            stem_min_occurrences: default_stem_min_occurrences(),
458            stem_min_success_rate: default_stem_min_success_rate(),
459            stem_provider: ProviderName::default(),
460            stem_retention_days: default_stem_retention_days(),
461            stem_pattern_window_days: default_stem_pattern_window_days(),
462            erl_enabled: false,
463            erl_extract_provider: ProviderName::default(),
464            erl_max_heuristics_per_skill: default_erl_max_heuristics_per_skill(),
465            erl_dedup_threshold: default_erl_dedup_threshold(),
466            erl_min_confidence: default_erl_min_confidence(),
467            d2skill_enabled: false,
468            d2skill_max_corrections: default_d2skill_max_corrections(),
469            d2skill_provider: ProviderName::default(),
470            trace_extraction_enabled: false,
471            trace_extraction_provider: ProviderName::default(),
472            trace_extraction_embedding_provider: ProviderName::default(),
473            trace_extraction_max_turns: default_trace_extraction_max_turns(),
474            trace_extraction_max_sessions_queued: default_trace_extraction_max_sessions_queued(),
475            trace_extraction_max_input_bytes: default_trace_extraction_max_input_bytes(),
476            skill_merge_enabled: default_skill_merge_enabled(),
477            skill_merge_provider: ProviderName::default(),
478            merge_threshold: default_merge_threshold(),
479            dedup_threshold: default_dedup_threshold(),
480            heuristic_promotion_enabled: false,
481            heuristic_promotion_provider: ProviderName::default(),
482            heuristic_promotion_threshold: default_heuristic_promotion_threshold(),
483            heuristic_promotion_interval_hours: default_heuristic_promotion_interval_hours(),
484        }
485    }
486}
487
488impl LearningConfig {
489    /// Validate invariants that cannot be expressed through serde defaults alone.
490    ///
491    /// # Errors
492    ///
493    /// Returns an error string if `merge_threshold >= dedup_threshold`.
494    pub fn validate(&self) -> Result<(), String> {
495        if self.merge_threshold >= self.dedup_threshold {
496            return Err(format!(
497                "skills.learning.merge_threshold ({}) must be strictly less than dedup_threshold ({})",
498                self.merge_threshold, self.dedup_threshold
499            ));
500        }
501        Ok(())
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    #[test]
510    fn detector_mode_default_is_regex() {
511        assert_eq!(DetectorMode::default(), DetectorMode::Regex);
512    }
513
514    #[test]
515    fn detector_mode_serde_roundtrip() {
516        for (mode, expected_str) in [
517            (DetectorMode::Regex, "\"regex\""),
518            (DetectorMode::Judge, "\"judge\""),
519            (DetectorMode::Model, "\"model\""),
520        ] {
521            let serialized = serde_json::to_string(&mode).unwrap();
522            assert_eq!(serialized, expected_str, "serialize {mode:?}");
523            let deserialized: DetectorMode = serde_json::from_str(&serialized).unwrap();
524            assert_eq!(deserialized, mode, "deserialize {mode:?}");
525        }
526    }
527
528    #[test]
529    fn learning_config_default_detector_mode_is_regex() {
530        let cfg = LearningConfig::default();
531        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
532    }
533
534    #[test]
535    fn learning_config_default_feedback_provider_is_empty() {
536        let cfg = LearningConfig::default();
537        assert!(cfg.feedback_provider.is_empty());
538    }
539
540    #[test]
541    fn learning_config_deserialize_model_mode() {
542        let toml = r#"detector_mode = "model"
543feedback_provider = "fast""#;
544        let cfg: LearningConfig = toml::from_str(toml).unwrap();
545        assert_eq!(cfg.detector_mode, DetectorMode::Model);
546        assert_eq!(cfg.feedback_provider, "fast");
547    }
548
549    #[test]
550    fn learning_config_deserialize_empty_feedback_provider() {
551        let toml = r#"detector_mode = "model""#;
552        let cfg: LearningConfig = toml::from_str(toml).unwrap();
553        assert_eq!(cfg.detector_mode, DetectorMode::Model);
554        assert!(
555            cfg.feedback_provider.is_empty(),
556            "empty feedback_provider must default to empty string (fallback to primary)"
557        );
558    }
559
560    #[test]
561    fn learning_config_deserialize_empty_section_uses_defaults() {
562        let cfg: LearningConfig = toml::from_str("").unwrap();
563        assert!(!cfg.enabled);
564        assert_eq!(cfg.min_failures, 3);
565        assert_eq!(cfg.detector_mode, DetectorMode::Regex);
566        assert!(cfg.feedback_provider.is_empty());
567    }
568
569    #[test]
570    fn judge_llm_timeout_secs_default_and_roundtrip() {
571        let cfg = LearningConfig::default();
572        assert_eq!(cfg.judge_llm_timeout_secs, 30);
573        let cfg: LearningConfig = toml::from_str("judge_llm_timeout_secs = 60").unwrap();
574        assert_eq!(cfg.judge_llm_timeout_secs, 60);
575    }
576
577    #[test]
578    fn learning_config_defaults_for_new_fields() {
579        let cfg = LearningConfig::default();
580        assert!(!cfg.cross_session_rollout);
581        assert_eq!(cfg.min_sessions_before_promote, 2);
582        assert_eq!(cfg.max_auto_sections, 3);
583        assert!(!cfg.domain_success_gate);
584    }
585
586    #[test]
587    fn learning_config_min_sessions_before_demote_default() {
588        let cfg = LearningConfig::default();
589        assert_eq!(cfg.min_sessions_before_demote, 1);
590    }
591
592    #[test]
593    fn arise_stem_erl_defaults() {
594        let cfg = LearningConfig::default();
595        assert!(!cfg.arise_enabled);
596        assert_eq!(cfg.arise_min_tool_calls, 2);
597        assert!(cfg.arise_trace_provider.is_empty());
598        assert!(!cfg.stem_enabled);
599        assert_eq!(cfg.stem_min_occurrences, 3);
600        assert!((cfg.stem_min_success_rate - 0.8).abs() < f64::EPSILON);
601        assert!(cfg.stem_provider.is_empty());
602        assert_eq!(cfg.stem_retention_days, 90);
603        assert_eq!(cfg.stem_pattern_window_days, 30);
604        assert!(!cfg.erl_enabled);
605        assert!(cfg.erl_extract_provider.is_empty());
606        assert_eq!(cfg.erl_max_heuristics_per_skill, 3);
607        assert!((cfg.erl_dedup_threshold - 0.9).abs() < f32::EPSILON);
608        assert!((cfg.erl_min_confidence - 0.5).abs() < f64::EPSILON);
609    }
610
611    #[test]
612    fn arise_stem_erl_serde_roundtrip() {
613        let toml = r#"
614arise_enabled = true
615arise_min_tool_calls = 3
616arise_trace_provider = "fast"
617stem_enabled = true
618stem_min_occurrences = 5
619stem_min_success_rate = 0.9
620stem_provider = "mid"
621stem_retention_days = 60
622stem_pattern_window_days = 14
623erl_enabled = true
624erl_extract_provider = "fast"
625erl_max_heuristics_per_skill = 5
626erl_dedup_threshold = 0.85
627erl_min_confidence = 0.6
628"#;
629        let cfg: LearningConfig = toml::from_str(toml).unwrap();
630        assert!(cfg.arise_enabled);
631        assert_eq!(cfg.arise_min_tool_calls, 3);
632        assert_eq!(cfg.arise_trace_provider, "fast");
633        assert!(cfg.stem_enabled);
634        assert_eq!(cfg.stem_min_occurrences, 5);
635        assert!((cfg.stem_min_success_rate - 0.9).abs() < f64::EPSILON);
636        assert_eq!(cfg.stem_provider, "mid");
637        assert_eq!(cfg.stem_retention_days, 60);
638        assert_eq!(cfg.stem_pattern_window_days, 14);
639        assert!(cfg.erl_enabled);
640        assert_eq!(cfg.erl_extract_provider, "fast");
641        assert_eq!(cfg.erl_max_heuristics_per_skill, 5);
642        assert!((cfg.erl_dedup_threshold - 0.85_f32).abs() < f32::EPSILON);
643        assert!((cfg.erl_min_confidence - 0.6).abs() < f64::EPSILON);
644    }
645
646    #[test]
647    fn arise_stem_erl_empty_section_uses_defaults() {
648        let cfg: LearningConfig = toml::from_str("").unwrap();
649        assert!(!cfg.arise_enabled);
650        assert!(!cfg.stem_enabled);
651        assert!(!cfg.erl_enabled);
652    }
653
654    #[test]
655    fn autoskill_a2_defaults() {
656        let cfg = LearningConfig::default();
657        assert!(cfg.skill_merge_enabled);
658        assert!(cfg.skill_merge_provider.is_empty());
659        assert!((cfg.merge_threshold - 0.75_f32).abs() < f32::EPSILON);
660        assert!((cfg.dedup_threshold - 0.90_f32).abs() < f32::EPSILON);
661    }
662
663    #[test]
664    fn validate_merge_lt_dedup_ok() {
665        let cfg = LearningConfig::default(); // merge=0.75, dedup=0.90
666        assert!(cfg.validate().is_ok());
667    }
668
669    #[test]
670    fn validate_merge_eq_dedup_err() {
671        let cfg = LearningConfig {
672            merge_threshold: 0.90,
673            dedup_threshold: 0.90,
674            ..LearningConfig::default()
675        };
676        let err = cfg.validate().unwrap_err();
677        assert!(
678            err.contains("merge_threshold") && err.contains("dedup_threshold"),
679            "unexpected error: {err}"
680        );
681    }
682
683    #[test]
684    fn validate_merge_gt_dedup_err() {
685        let cfg = LearningConfig {
686            merge_threshold: 0.95,
687            dedup_threshold: 0.90,
688            ..LearningConfig::default()
689        };
690        let err = cfg.validate().unwrap_err();
691        assert!(
692            err.contains("merge_threshold") && err.contains("dedup_threshold"),
693            "unexpected error: {err}"
694        );
695    }
696
697    #[test]
698    fn autoskill_a2_dedup_threshold_default_and_roundtrip() {
699        let cfg = LearningConfig::default();
700        assert!((cfg.dedup_threshold - 0.90_f32).abs() < f32::EPSILON);
701        let cfg: LearningConfig = toml::from_str("dedup_threshold = 0.95").unwrap();
702        assert!((cfg.dedup_threshold - 0.95_f32).abs() < f32::EPSILON);
703    }
704
705    #[test]
706    fn learning_config_new_fields_serde_roundtrip() {
707        let toml = r"
708cross_session_rollout = true
709min_sessions_before_promote = 5
710min_sessions_before_demote = 2
711max_auto_sections = 4
712domain_success_gate = true
713";
714        let cfg: LearningConfig = toml::from_str(toml).unwrap();
715        assert!(cfg.cross_session_rollout);
716        assert_eq!(cfg.min_sessions_before_promote, 5);
717        assert_eq!(cfg.min_sessions_before_demote, 2);
718        assert_eq!(cfg.max_auto_sections, 4);
719        assert!(cfg.domain_success_gate);
720    }
721
722    #[test]
723    fn trace_extraction_embedding_provider_default_and_roundtrip() {
724        let cfg = LearningConfig::default();
725        assert!(cfg.trace_extraction_embedding_provider.is_empty());
726        let cfg: LearningConfig =
727            toml::from_str(r#"trace_extraction_embedding_provider = "embed-fast""#).unwrap();
728        assert_eq!(cfg.trace_extraction_embedding_provider, "embed-fast");
729    }
730
731    #[test]
732    fn heuristic_promotion_defaults() {
733        let cfg = LearningConfig::default();
734        assert!(!cfg.heuristic_promotion_enabled);
735        assert!(cfg.heuristic_promotion_provider.is_empty());
736        assert_eq!(cfg.heuristic_promotion_threshold, 5);
737        assert_eq!(cfg.heuristic_promotion_interval_hours, 24);
738    }
739
740    #[test]
741    fn heuristic_promotion_serde_roundtrip() {
742        let toml = r#"
743heuristic_promotion_enabled = true
744heuristic_promotion_provider = "quality"
745heuristic_promotion_threshold = 10
746heuristic_promotion_interval_hours = 48
747"#;
748        let cfg: LearningConfig = toml::from_str(toml).unwrap();
749        assert!(cfg.heuristic_promotion_enabled);
750        assert_eq!(cfg.heuristic_promotion_provider, "quality");
751        assert_eq!(cfg.heuristic_promotion_threshold, 10);
752        assert_eq!(cfg.heuristic_promotion_interval_hours, 48);
753    }
754
755    #[test]
756    fn heuristic_promotion_empty_section_uses_defaults() {
757        let cfg: LearningConfig = toml::from_str("").unwrap();
758        assert!(!cfg.heuristic_promotion_enabled);
759        assert_eq!(cfg.heuristic_promotion_threshold, 5);
760        assert_eq!(cfg.heuristic_promotion_interval_hours, 24);
761    }
762
763    #[test]
764    fn judge_provider_default_is_empty() {
765        let cfg = LearningConfig::default();
766        assert!(cfg.judge_provider.is_empty());
767    }
768
769    #[test]
770    fn judge_provider_serde_roundtrip() {
771        let cfg: LearningConfig = toml::from_str(r#"judge_provider = "quality""#).unwrap();
772        assert_eq!(cfg.judge_provider, "quality");
773    }
774
775    #[test]
776    fn judge_provider_and_judge_model_coexist() {
777        let toml = r#"
778judge_model = "claude-sonnet-4-6"
779judge_provider = "quality"
780detector_mode = "judge"
781"#;
782        let cfg: LearningConfig = toml::from_str(toml).unwrap();
783        assert_eq!(cfg.judge_model, "claude-sonnet-4-6");
784        assert_eq!(cfg.judge_provider, "quality");
785        assert_eq!(cfg.detector_mode, DetectorMode::Judge);
786    }
787
788    #[test]
789    fn judge_provider_absent_falls_back_to_empty_default() {
790        let cfg: LearningConfig = toml::from_str("judge_model = \"gpt-4o\"").unwrap();
791        assert!(
792            cfg.judge_provider.is_empty(),
793            "missing judge_provider must default to empty string"
794        );
795        assert_eq!(cfg.judge_model, "gpt-4o");
796    }
797}