Skip to main content

zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15    5
16}
17
18fn default_max_history() -> usize {
19    100
20}
21
22fn default_title_max_chars() -> usize {
23    60
24}
25
26fn default_document_collection() -> String {
27    "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31    1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35    100
36}
37
38fn default_document_top_k() -> usize {
39    3
40}
41
42fn default_autosave_min_length() -> usize {
43    20
44}
45
46fn default_tool_call_cutoff() -> usize {
47    6
48}
49
50fn default_token_safety_margin() -> f32 {
51    1.0
52}
53
54fn default_redact_credentials() -> bool {
55    true
56}
57
58fn default_qdrant_url() -> String {
59    "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63    50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67    60
68}
69
70fn default_context_budget_tokens() -> usize {
71    0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75    0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79    0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83    6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87    2
88}
89
90fn default_auto_budget() -> bool {
91    true
92}
93
94fn default_prune_protect_tokens() -> usize {
95    40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99    0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103    30
104}
105
106fn default_mmr_lambda() -> f32 {
107    0.7
108}
109
110fn default_semantic_enabled() -> bool {
111    true
112}
113
114fn default_recall_limit() -> usize {
115    5
116}
117
118fn default_vector_weight() -> f64 {
119    0.7
120}
121
122fn default_keyword_weight() -> f64 {
123    0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127    10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131    15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135    100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139    8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143    4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147    10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151    0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155    0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159    15
160}
161
162fn default_graph_max_hops() -> u32 {
163    2
164}
165
166fn default_graph_recall_limit() -> usize {
167    10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171    90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175    0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179    100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183    0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187    3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191    0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195    0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199    50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203    1000
204}
205
206fn default_benna_alpha() -> f32 {
207    0.3
208}
209
210fn default_benna_fast_rate() -> f32 {
211    0.5
212}
213
214fn default_benna_slow_rate() -> f32 {
215    0.05
216}
217
218fn default_write_gate_min_edge_relevance() -> f32 {
219    0.3
220}
221
222fn default_conflict_recency_slow_threshold() -> f32 {
223    0.2
224}
225
226fn default_note_linking_similarity_threshold() -> f32 {
227    0.85
228}
229
230fn default_note_linking_top_k() -> usize {
231    10
232}
233
234fn default_note_linking_timeout_secs() -> u64 {
235    5
236}
237
238fn default_shutdown_summary() -> bool {
239    true
240}
241
242fn default_shutdown_summary_min_messages() -> usize {
243    4
244}
245
246fn default_shutdown_summary_max_messages() -> usize {
247    20
248}
249
250fn default_shutdown_summary_timeout_secs() -> u64 {
251    30
252}
253
254fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
255where
256    D: serde::Deserializer<'de>,
257{
258    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
259    if value.is_nan() || value.is_infinite() {
260        return Err(serde::de::Error::custom(
261            "similarity_threshold must be a finite number",
262        ));
263    }
264    if !(0.5..=1.0).contains(&value) {
265        return Err(serde::de::Error::custom(
266            "similarity_threshold must be in [0.5, 1.0]",
267        ));
268    }
269    Ok(value)
270}
271
272fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
273where
274    D: serde::Deserializer<'de>,
275{
276    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
277    if value < 2 {
278        return Err(serde::de::Error::custom(
279            "promotion_min_sessions must be >= 2",
280        ));
281    }
282    Ok(value)
283}
284
285fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
286where
287    D: serde::Deserializer<'de>,
288{
289    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
290    if value == 0 {
291        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
292    }
293    Ok(value)
294}
295
296fn default_tier_promotion_min_sessions() -> u32 {
297    3
298}
299
300fn default_tier_similarity_threshold() -> f32 {
301    0.92
302}
303
304fn default_tier_sweep_interval_secs() -> u64 {
305    3600
306}
307
308fn default_tier_sweep_batch_size() -> usize {
309    100
310}
311
312fn default_scene_similarity_threshold() -> f32 {
313    0.80
314}
315
316fn default_scene_batch_size() -> usize {
317    50
318}
319
320fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
321where
322    D: serde::Deserializer<'de>,
323{
324    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
325    if value.is_nan() || value.is_infinite() {
326        return Err(serde::de::Error::custom(
327            "scene_similarity_threshold must be a finite number",
328        ));
329    }
330    if !(0.5..=1.0).contains(&value) {
331        return Err(serde::de::Error::custom(
332            "scene_similarity_threshold must be in [0.5, 1.0]",
333        ));
334    }
335    Ok(value)
336}
337
338fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
339where
340    D: serde::Deserializer<'de>,
341{
342    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
343    if value == 0 {
344        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
345    }
346    Ok(value)
347}
348
349/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
350///
351/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
352/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
353///
354/// # Validation
355///
356/// Constraints enforced at deserialization time:
357/// - `similarity_threshold` in `[0.5, 1.0]`
358/// - `promotion_min_sessions >= 2`
359/// - `sweep_batch_size >= 1`
360/// - `scene_similarity_threshold` in `[0.5, 1.0]`
361/// - `scene_batch_size >= 1`
362#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
363#[serde(default)]
364pub struct TierConfig {
365    /// Enable the tier promotion system. When `false`, all messages remain episodic.
366    /// Default: `false`.
367    pub enabled: bool,
368    /// Minimum number of distinct sessions a fact must appear in before promotion.
369    /// Must be `>= 2`. Default: `3`.
370    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
371    pub promotion_min_sessions: u32,
372    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
373    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
374    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
375    pub similarity_threshold: f32,
376    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
377    pub sweep_interval_secs: u64,
378    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
379    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
380    pub sweep_batch_size: usize,
381    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
382    pub scene_enabled: bool,
383    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
384    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
385    pub scene_similarity_threshold: f32,
386    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
387    #[serde(deserialize_with = "validate_scene_batch_size")]
388    pub scene_batch_size: usize,
389    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
390    /// Falls back to the primary provider when empty. Default: `""`.
391    pub scene_provider: ProviderName,
392    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
393    pub scene_sweep_interval_secs: u64,
394}
395
396fn default_scene_sweep_interval_secs() -> u64 {
397    7200
398}
399
400impl Default for TierConfig {
401    fn default() -> Self {
402        Self {
403            enabled: false,
404            promotion_min_sessions: default_tier_promotion_min_sessions(),
405            similarity_threshold: default_tier_similarity_threshold(),
406            sweep_interval_secs: default_tier_sweep_interval_secs(),
407            sweep_batch_size: default_tier_sweep_batch_size(),
408            scene_enabled: false,
409            scene_similarity_threshold: default_scene_similarity_threshold(),
410            scene_batch_size: default_scene_batch_size(),
411            scene_provider: ProviderName::default(),
412            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
413        }
414    }
415}
416
417fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
418where
419    D: serde::Deserializer<'de>,
420{
421    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
422    if value.is_nan() || value.is_infinite() {
423        return Err(serde::de::Error::custom(
424            "temporal_decay_rate must be a finite number",
425        ));
426    }
427    if !(0.0..=10.0).contains(&value) {
428        return Err(serde::de::Error::custom(
429            "temporal_decay_rate must be in [0.0, 10.0]",
430        ));
431    }
432    Ok(value)
433}
434
435fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
436where
437    D: serde::Deserializer<'de>,
438{
439    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
440    if value.is_nan() || value.is_infinite() {
441        return Err(serde::de::Error::custom(
442            "similarity_threshold must be a finite number",
443        ));
444    }
445    if !(0.0..=1.0).contains(&value) {
446        return Err(serde::de::Error::custom(
447            "similarity_threshold must be in [0.0, 1.0]",
448        ));
449    }
450    Ok(value)
451}
452
453fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
454where
455    D: serde::Deserializer<'de>,
456{
457    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
458    if value.is_nan() || value.is_infinite() {
459        return Err(serde::de::Error::custom(
460            "importance_weight must be a finite number",
461        ));
462    }
463    if value < 0.0 {
464        return Err(serde::de::Error::custom(
465            "importance_weight must be non-negative",
466        ));
467    }
468    if value > 1.0 {
469        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
470    }
471    Ok(value)
472}
473
474fn default_importance_weight() -> f64 {
475    0.15
476}
477
478/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
479///
480/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
481/// Seeds are initialized from fuzzy entity matches, then activation propagates
482/// hop-by-hop with exponential decay and lateral inhibition.
483///
484/// # Validation
485///
486/// Constraints enforced at deserialization time:
487/// - `0.0 < decay_lambda <= 1.0`
488/// - `max_hops >= 1`
489/// - `activation_threshold < inhibition_threshold`
490/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
491#[derive(Debug, Clone, Deserialize, Serialize)]
492#[serde(default)]
493pub struct SpreadingActivationConfig {
494    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
495    pub enabled: bool,
496    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
497    #[serde(deserialize_with = "validate_decay_lambda")]
498    pub decay_lambda: f32,
499    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
500    #[serde(deserialize_with = "validate_max_hops")]
501    pub max_hops: u32,
502    /// Minimum activation score to include a node in results. Default: `0.1`.
503    pub activation_threshold: f32,
504    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
505    pub inhibition_threshold: f32,
506    /// Cap on total activated nodes per spread pass. Default: `50`.
507    pub max_activated_nodes: usize,
508    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
509    #[serde(default = "default_seed_structural_weight")]
510    pub seed_structural_weight: f32,
511    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
512    #[serde(default = "default_seed_community_cap")]
513    pub seed_community_cap: usize,
514    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
515    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
516    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
517    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
518    pub recall_timeout_ms: u64,
519    /// SYNAPSE blend coefficient for Benna-Fusi fast/slow variables (#3709).
520    ///
521    /// `blended = alpha * confidence_fast + (1 - alpha) * confidence_slow`.
522    /// Range: `[0.0, 1.0]`. Default: `0.3`.
523    #[serde(
524        default = "default_benna_alpha",
525        deserialize_with = "validate_benna_alpha"
526    )]
527    pub alpha: f32,
528    /// Benna-Fusi fast-variable learning rate applied on each confidence merge (#3709).
529    ///
530    /// `fast' = fast + eta_f * (c - fast)`. Range: `(0.0, 1.0]`. Default: `0.5`.
531    #[serde(
532        default = "default_benna_fast_rate",
533        deserialize_with = "validate_benna_rate"
534    )]
535    pub benna_fast_rate: f32,
536    /// Benna-Fusi slow-variable learning rate applied on each confidence merge (#3709).
537    ///
538    /// `slow' = slow + eta_s * (fast' - slow)`. Range: `(0.0, 1.0]`. Default: `0.05`.
539    #[serde(
540        default = "default_benna_slow_rate",
541        deserialize_with = "validate_benna_rate"
542    )]
543    pub benna_slow_rate: f32,
544}
545
546fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
547where
548    D: serde::Deserializer<'de>,
549{
550    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
551    if value.is_nan() || value.is_infinite() {
552        return Err(serde::de::Error::custom(
553            "decay_lambda must be a finite number",
554        ));
555    }
556    if !(value > 0.0 && value <= 1.0) {
557        return Err(serde::de::Error::custom(
558            "decay_lambda must be in (0.0, 1.0]",
559        ));
560    }
561    Ok(value)
562}
563
564fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
565where
566    D: serde::Deserializer<'de>,
567{
568    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
569    if value == 0 {
570        return Err(serde::de::Error::custom("max_hops must be >= 1"));
571    }
572    Ok(value)
573}
574
575fn validate_unit_f32<'de, D>(deserializer: D) -> Result<f32, D::Error>
576where
577    D: serde::Deserializer<'de>,
578{
579    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
580    if !value.is_finite() {
581        return Err(serde::de::Error::custom("value must be a finite number"));
582    }
583    if !(0.0..=1.0).contains(&value) {
584        return Err(serde::de::Error::custom("value must be in [0.0, 1.0]"));
585    }
586    Ok(value)
587}
588
589fn validate_benna_alpha<'de, D>(deserializer: D) -> Result<f32, D::Error>
590where
591    D: serde::Deserializer<'de>,
592{
593    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
594    if !value.is_finite() {
595        return Err(serde::de::Error::custom("alpha must be a finite number"));
596    }
597    if !(0.0..=1.0).contains(&value) {
598        return Err(serde::de::Error::custom("alpha must be in [0.0, 1.0]"));
599    }
600    Ok(value)
601}
602
603fn validate_benna_rate<'de, D>(deserializer: D) -> Result<f32, D::Error>
604where
605    D: serde::Deserializer<'de>,
606{
607    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
608    if !value.is_finite() {
609        return Err(serde::de::Error::custom(
610            "benna_fast_rate/benna_slow_rate must be a finite number",
611        ));
612    }
613    if !(value > 0.0 && value <= 1.0) {
614        return Err(serde::de::Error::custom(
615            "benna_fast_rate/benna_slow_rate must be in (0.0, 1.0]",
616        ));
617    }
618    Ok(value)
619}
620
621impl SpreadingActivationConfig {
622    /// Validate cross-field constraints that cannot be expressed in per-field validators.
623    ///
624    /// # Errors
625    ///
626    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
627    pub fn validate(&self) -> Result<(), String> {
628        if self.activation_threshold >= self.inhibition_threshold {
629            return Err(format!(
630                "activation_threshold ({}) must be < inhibition_threshold ({})",
631                self.activation_threshold, self.inhibition_threshold
632            ));
633        }
634        Ok(())
635    }
636}
637
638fn default_seed_structural_weight() -> f32 {
639    0.4
640}
641
642fn default_seed_community_cap() -> usize {
643    3
644}
645
646impl Default for SpreadingActivationConfig {
647    fn default() -> Self {
648        Self {
649            enabled: false,
650            decay_lambda: default_spreading_activation_decay_lambda(),
651            max_hops: default_spreading_activation_max_hops(),
652            activation_threshold: default_spreading_activation_activation_threshold(),
653            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
654            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
655            seed_structural_weight: default_seed_structural_weight(),
656            seed_community_cap: default_seed_community_cap(),
657            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
658            alpha: default_benna_alpha(),
659            benna_fast_rate: default_benna_fast_rate(),
660            benna_slow_rate: default_benna_slow_rate(),
661        }
662    }
663}
664
665/// `MemORAI` write-gate prefilter configuration (#3709).
666///
667/// When `enabled = true`, low-signal edges (confidence below threshold + generic relation type)
668/// are silently dropped before write, reducing noise in the knowledge graph.
669///
670/// TOML path: `[memory.graph.write_gate]`
671#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
672#[serde(default)]
673pub struct WriteGateConfig {
674    /// Enable write-gate prefilter. Default: `false` (opt-in).
675    pub enabled: bool,
676    /// Minimum edge confidence to pass the gate when the relation is low-signal. Default: `0.3`.
677    ///
678    /// Range: `[0.0, 1.0]`.
679    #[serde(
680        default = "default_write_gate_min_edge_relevance",
681        deserialize_with = "validate_unit_f32"
682    )]
683    pub min_edge_relevance: f32,
684}
685
686impl Default for WriteGateConfig {
687    fn default() -> Self {
688        Self {
689            enabled: false,
690            min_edge_relevance: default_write_gate_min_edge_relevance(),
691        }
692    }
693}
694
695/// Recency fallback threshold for the conflict resolver (#3709).
696///
697/// TOML path: `[memory.graph.conflict]`
698#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
699#[serde(default)]
700pub struct ConflictRecencyConfig {
701    /// Minimum `confidence_slow` for the recency strategy to prefer an edge. Default: `0.2`.
702    ///
703    /// When two cardinality-1 heads conflict and recency is the resolution strategy,
704    /// only edges with `confidence_slow >= recency_slow_threshold` are preferred by recency;
705    /// edges below the threshold fall back to `valid_from` comparison. Range: `[0.0, 1.0]`.
706    #[serde(
707        default = "default_conflict_recency_slow_threshold",
708        deserialize_with = "validate_unit_f32"
709    )]
710    pub recency_slow_threshold: f32,
711}
712
713impl Default for ConflictRecencyConfig {
714    fn default() -> Self {
715        Self {
716            recency_slow_threshold: default_conflict_recency_slow_threshold(),
717        }
718    }
719}
720
721/// Kumiho belief revision configuration.
722#[derive(Debug, Clone, Deserialize, Serialize)]
723#[serde(default)]
724pub struct BeliefRevisionConfig {
725    /// Enable semantic contradiction detection for graph edges. Default: `false`.
726    pub enabled: bool,
727    /// Cosine similarity threshold for considering two facts as contradictory.
728    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
729    #[serde(deserialize_with = "validate_similarity_threshold")]
730    pub similarity_threshold: f32,
731}
732
733fn default_belief_revision_similarity_threshold() -> f32 {
734    0.85
735}
736
737impl Default for BeliefRevisionConfig {
738    fn default() -> Self {
739        Self {
740            enabled: false,
741            similarity_threshold: default_belief_revision_similarity_threshold(),
742        }
743    }
744}
745
746/// D-MEM RPE-based tiered graph extraction routing configuration.
747#[derive(Debug, Clone, Deserialize, Serialize)]
748#[serde(default)]
749pub struct RpeConfig {
750    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
751    pub enabled: bool,
752    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
753    /// Default: `0.3`.
754    #[serde(deserialize_with = "validate_similarity_threshold")]
755    pub threshold: f32,
756    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
757    pub max_skip_turns: u32,
758}
759
760fn default_rpe_threshold() -> f32 {
761    0.3
762}
763
764fn default_rpe_max_skip_turns() -> u32 {
765    5
766}
767
768impl Default for RpeConfig {
769    fn default() -> Self {
770        Self {
771            enabled: false,
772            threshold: default_rpe_threshold(),
773            max_skip_turns: default_rpe_max_skip_turns(),
774        }
775    }
776}
777
778/// Configuration for A-MEM dynamic note linking.
779///
780/// When enabled, after each graph extraction pass, entities extracted from the message are
781/// compared against the entity embedding collection. Pairs with cosine similarity above
782/// `similarity_threshold` receive a `similar_to` edge in the graph.
783#[derive(Debug, Clone, Deserialize, Serialize)]
784#[serde(default)]
785pub struct NoteLinkingConfig {
786    /// Enable A-MEM note linking after graph extraction. Default: `false`.
787    pub enabled: bool,
788    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
789    #[serde(deserialize_with = "validate_similarity_threshold")]
790    pub similarity_threshold: f32,
791    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
792    pub top_k: usize,
793    /// Timeout for the entire linking pass in seconds. Default: `5`.
794    pub timeout_secs: u64,
795}
796
797impl Default for NoteLinkingConfig {
798    fn default() -> Self {
799        Self {
800            enabled: false,
801            similarity_threshold: default_note_linking_similarity_threshold(),
802            top_k: default_note_linking_top_k(),
803            timeout_secs: default_note_linking_timeout_secs(),
804        }
805    }
806}
807
808/// Vector backend selector for embedding storage.
809#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
810#[serde(rename_all = "lowercase")]
811#[non_exhaustive]
812pub enum VectorBackend {
813    Qdrant,
814    #[default]
815    Sqlite,
816}
817
818impl VectorBackend {
819    /// Return the lowercase identifier string for this backend.
820    ///
821    /// # Examples
822    ///
823    /// ```
824    /// use zeph_config::VectorBackend;
825    ///
826    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
827    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
828    /// ```
829    #[must_use]
830    pub fn as_str(&self) -> &'static str {
831        match self {
832            Self::Qdrant => "qdrant",
833            Self::Sqlite => "sqlite",
834        }
835    }
836}
837
838/// Memory subsystem configuration, nested under `[memory]` in TOML.
839///
840/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
841/// multi-tier promotion, and all memory-related background tasks.
842///
843/// # Example (TOML)
844///
845/// ```toml
846/// [memory]
847/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
848/// qdrant_url = "http://localhost:6334"
849/// history_limit = 50
850/// summarization_threshold = 50
851/// auto_budget = true
852/// ```
853#[derive(Debug, Deserialize, Serialize)]
854#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
855pub struct MemoryConfig {
856    #[serde(default)]
857    pub compression_guidelines: CompressionGuidelinesConfig,
858    #[serde(default = "default_sqlite_path_field")]
859    pub sqlite_path: String,
860    pub history_limit: u32,
861    #[serde(default = "default_qdrant_url")]
862    pub qdrant_url: String,
863    /// Optional API key for authenticating to a remote or managed Qdrant cluster.
864    ///
865    /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
866    /// Leave `None` for local dev instances. The actual key is resolved from the vault:
867    /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
868    ///
869    /// The value is wrapped in [`Secret`] to prevent accidental logging.
870    /// `skip_serializing` prevents the key from being written back to TOML on config save.
871    #[serde(default, skip_serializing)]
872    pub qdrant_api_key: Option<Secret>,
873    #[serde(default)]
874    pub semantic: SemanticConfig,
875    #[serde(default = "default_summarization_threshold")]
876    pub summarization_threshold: usize,
877    /// LLM call timeout for summarization, in seconds. Default: `60`.
878    #[serde(default = "default_summarization_llm_timeout_secs")]
879    pub summarization_llm_timeout_secs: u64,
880    #[serde(default = "default_context_budget_tokens")]
881    pub context_budget_tokens: usize,
882    #[serde(default = "default_soft_compaction_threshold")]
883    pub soft_compaction_threshold: f32,
884    #[serde(
885        default = "default_hard_compaction_threshold",
886        alias = "compaction_threshold"
887    )]
888    pub hard_compaction_threshold: f32,
889    #[serde(default = "default_compaction_preserve_tail")]
890    pub compaction_preserve_tail: usize,
891    #[serde(default = "default_compaction_cooldown_turns")]
892    pub compaction_cooldown_turns: u8,
893    #[serde(default = "default_auto_budget")]
894    pub auto_budget: bool,
895    #[serde(default = "default_prune_protect_tokens")]
896    pub prune_protect_tokens: usize,
897    #[serde(default = "default_cross_session_score_threshold")]
898    pub cross_session_score_threshold: f32,
899    #[serde(default)]
900    pub vector_backend: VectorBackend,
901    #[serde(default = "default_token_safety_margin")]
902    pub token_safety_margin: f32,
903    #[serde(default = "default_redact_credentials")]
904    pub redact_credentials: bool,
905    #[serde(default = "default_true")]
906    pub autosave_assistant: bool,
907    #[serde(default = "default_autosave_min_length")]
908    pub autosave_min_length: usize,
909    #[serde(default = "default_tool_call_cutoff")]
910    pub tool_call_cutoff: usize,
911    #[serde(default = "default_sqlite_pool_size")]
912    pub sqlite_pool_size: u32,
913    #[serde(default)]
914    pub sessions: SessionsConfig,
915    #[serde(default)]
916    pub documents: DocumentConfig,
917    #[serde(default)]
918    pub eviction: EvictionConfig,
919    #[serde(default)]
920    pub compression: CompressionConfig,
921    #[serde(default)]
922    pub sidequest: SidequestConfig,
923    #[serde(default)]
924    pub graph: GraphConfig,
925    /// Store a lightweight session summary to the vector store on shutdown when no session
926    /// summary exists yet for this conversation. Enables cross-session recall for short or
927    /// interrupted sessions that never triggered hard compaction. Default: `true`.
928    #[serde(default = "default_shutdown_summary")]
929    pub shutdown_summary: bool,
930    /// Minimum number of user-turn messages required before a shutdown summary is generated.
931    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
932    #[serde(default = "default_shutdown_summary_min_messages")]
933    pub shutdown_summary_min_messages: usize,
934    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
935    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
936    /// Default: `20`.
937    #[serde(default = "default_shutdown_summary_max_messages")]
938    pub shutdown_summary_max_messages: usize,
939    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
940    /// Applies independently to the structured call and to the plain-text fallback.
941    /// Default: `10`.
942    #[serde(default = "default_shutdown_summary_timeout_secs")]
943    pub shutdown_summary_timeout_secs: u64,
944    /// LLM provider used for shutdown summarization calls.
945    ///
946    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
947    /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
948    ///
949    /// Example:
950    /// ```toml
951    /// [memory]
952    /// shutdown_summary_provider = "fast"
953    /// ```
954    #[serde(default)]
955    pub shutdown_summary_provider: ProviderName,
956    /// LLM provider used for deferred tool-pair summarization (context compaction).
957    ///
958    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
959    /// provider. A mid-tier model is usually sufficient for compaction summaries.
960    ///
961    /// Example:
962    /// ```toml
963    /// [memory]
964    /// compaction_provider = "fast"
965    /// ```
966    #[serde(default)]
967    pub compaction_provider: ProviderName,
968    /// Use structured anchored summaries for context compaction.
969    ///
970    /// When enabled, hard compaction requests a JSON schema from the LLM
971    /// instead of free-form prose. Falls back to prose if the LLM fails
972    /// to produce valid JSON. Default: `false`.
973    #[serde(default)]
974    pub structured_summaries: bool,
975    /// AOI three-layer memory tier promotion system.
976    ///
977    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
978    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
979    #[serde(default)]
980    pub tiers: TierConfig,
981    /// A-MAC adaptive memory admission control.
982    ///
983    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
984    /// if its composite admission score falls below the configured threshold.
985    #[serde(default)]
986    pub admission: AdmissionConfig,
987    /// Session digest generation at session end. Default: disabled.
988    #[serde(default)]
989    pub digest: DigestConfig,
990    /// Context assembly strategy. Default: `full_history` (current behavior).
991    #[serde(default)]
992    pub context_strategy: ContextStrategy,
993    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
994    #[serde(default = "default_crossover_turn_threshold")]
995    pub crossover_turn_threshold: u32,
996    /// All-Mem lifelong memory consolidation sweep.
997    ///
998    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
999    /// messages and merges them into consolidated entries via LLM.
1000    #[serde(default)]
1001    pub consolidation: ConsolidationConfig,
1002    /// `SleepGate` forgetting sweep (#2397).
1003    ///
1004    /// When `forgetting.enabled = true`, a background loop periodically decays importance
1005    /// scores and prunes memories below the forgetting floor.
1006    #[serde(default)]
1007    pub forgetting: ForgettingConfig,
1008    /// `PostgreSQL` connection URL.
1009    ///
1010    /// Used when the binary is compiled with `--features postgres`.
1011    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
1012    /// Example: `postgres://user:pass@localhost:5432/zeph`
1013    /// Default: `None` (uses `sqlite_path` instead).
1014    #[serde(default)]
1015    pub database_url: Option<String>,
1016    /// Cost-sensitive store routing (#2444).
1017    ///
1018    /// When `store_routing.enabled = true`, query intent is classified and routed to
1019    /// the cheapest sufficient backend instead of querying all stores on every turn.
1020    #[serde(default)]
1021    pub store_routing: StoreRoutingConfig,
1022    /// Persona memory layer (#2461).
1023    ///
1024    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
1025    /// from conversation history and injected into context after the system prompt.
1026    #[serde(default)]
1027    pub persona: PersonaConfig,
1028    /// Trajectory-informed memory (#2498).
1029    #[serde(default)]
1030    pub trajectory: TrajectoryConfig,
1031    /// Category-aware memory (#2428).
1032    #[serde(default)]
1033    pub category: CategoryConfig,
1034    /// `TiMem` temporal-hierarchical memory tree (#2262).
1035    #[serde(default)]
1036    pub tree: TreeConfig,
1037    /// Time-based microcompact (#2699).
1038    ///
1039    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
1040    /// from context when the session has been idle longer than `gap_threshold_minutes`.
1041    #[serde(default)]
1042    pub microcompact: MicrocompactConfig,
1043    /// autoDream background memory consolidation (#2697).
1044    ///
1045    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
1046    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
1047    #[serde(default)]
1048    pub autodream: AutoDreamConfig,
1049    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
1050    ///
1051    /// Before inserting a new key fact, its nearest neighbour is looked up in the
1052    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
1053    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
1054    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
1055    #[serde(default = "default_key_facts_dedup_threshold")]
1056    pub key_facts_dedup_threshold: f32,
1057    /// Experience compression spectrum (#3305).
1058    ///
1059    /// Controls three-tier retrieval policy and background skill-promotion engine.
1060    #[serde(default)]
1061    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
1062    /// MemMachine-inspired retrieval-stage tuning (#3340).
1063    ///
1064    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
1065    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
1066    /// uniformly across graph, hybrid, and vector-only recall paths.
1067    ///
1068    /// # Example (TOML)
1069    ///
1070    /// ```toml
1071    /// [memory.retrieval]
1072    /// depth = 40
1073    /// search_prompt_template = ""
1074    /// context_format = "structured"
1075    /// ```
1076    #[serde(default)]
1077    pub retrieval: RetrievalConfig,
1078    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
1079    ///
1080    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
1081    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
1082    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
1083    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
1084    /// and injected before the LLM call.
1085    #[serde(default)]
1086    pub reasoning: ReasoningConfig,
1087    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
1088    ///
1089    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
1090    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
1091    ///
1092    /// # Example (TOML)
1093    ///
1094    /// ```toml
1095    /// [memory.hebbian]
1096    /// enabled = true
1097    /// hebbian_lr = 0.1
1098    /// ```
1099    #[serde(default)]
1100    pub hebbian: HebbianConfig,
1101    /// `MemCoT` rolling semantic state configuration (#3574).
1102    ///
1103    /// When `enabled = true`, each completed assistant turn spawns a background distillation
1104    /// task that compresses the response into a short semantic state buffer. The buffer is
1105    /// prepended to graph recall queries so retrieval stays contextually relevant across long
1106    /// multi-turn sessions.
1107    ///
1108    /// # Example (TOML)
1109    ///
1110    /// ```toml
1111    /// [memory.memcot]
1112    /// enabled = true
1113    /// distill_provider = "fast"
1114    /// min_assistant_chars = 200
1115    /// max_distills_per_session = 50
1116    /// ```
1117    #[serde(default)]
1118    pub memcot: MemCotConfig,
1119    /// `OmniMem` retrieval failure tracking (issue #3576).
1120    ///
1121    /// When `enabled = true`, no-hit and low-confidence recall events are logged
1122    /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
1123    ///
1124    /// # Example (TOML)
1125    ///
1126    /// ```toml
1127    /// [memory.retrieval_failures]
1128    /// enabled = true
1129    /// low_confidence_threshold = 0.3
1130    /// retention_days = 90
1131    /// ```
1132    #[serde(default)]
1133    pub retrieval_failures: RetrievalFailuresConfig,
1134    /// Write quality gate (#3629).
1135    ///
1136    /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
1137    /// writes are rejected before persistence. Evaluated after A-MAC admission control.
1138    #[serde(default)]
1139    pub quality_gate: WriteQualityGateConfig,
1140    /// `MemFlow` tiered intent-driven retrieval (issue #3712).
1141    ///
1142    /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
1143    /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
1144    /// `DeepReasoning`) with optional validation and tier escalation.
1145    #[serde(default)]
1146    pub tiered_retrieval: TieredRetrievalConfig,
1147    /// `ScrapMem` optical forgetting (issue #3713).
1148    ///
1149    /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
1150    /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1151    #[serde(default)]
1152    pub optical_forgetting: OpticalForgettingConfig,
1153    /// EM-Graph episodic event extraction and causal linking (issue #3713).
1154    ///
1155    /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1156    /// and linked via causal relationships, enabling causal-chain retrieval.
1157    #[serde(default)]
1158    pub em_graph: EmGraphConfig,
1159    /// Episodic-to-semantic consolidation daemon (issue #3799).
1160    ///
1161    /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1162    /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1163    /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1164    #[serde(default)]
1165    pub episodic_consolidation: EpisodicConsolidationConfig,
1166    /// MAGE shadow memory trajectory risk accumulator (spec 004-16).
1167    ///
1168    /// Maintains a per-session rolling risk score fed by sanitizer audit signals.
1169    /// When `shadow_memory.enabled = true`, tool execution is gated if cumulative
1170    /// trajectory risk exceeds `risk_threshold`. When `false`, all code paths are
1171    /// zero-cost no-ops.
1172    ///
1173    /// # Example (TOML)
1174    ///
1175    /// ```toml
1176    /// [memory.shadow_memory]
1177    /// enabled = true
1178    /// risk_threshold = 0.75
1179    /// risk_halflife_turns = 10
1180    /// ```
1181    #[serde(default)]
1182    pub shadow_memory: TrajectoryRiskAccumulatorConfig,
1183    /// Five-signal SYNAPSE retrieval (issue #4374).
1184    ///
1185    /// When `five_signal.enabled = true`, SYNAPSE recall weights five signals: recency,
1186    /// relevance, access frequency, causal distance, and novelty. All new signals default
1187    /// to weight `0.0`, preserving exact backward compatibility.
1188    #[serde(default)]
1189    pub five_signal: FiveSignalConfig,
1190    /// Context-Adaptive Memory fidelity scoring (CAM Phase 1, #4547).
1191    ///
1192    /// When `fidelity.enabled = true`, the heuristic fidelity scorer runs after each
1193    /// `apply_prepared_context()` call and assigns `Full / Compressed / Placeholder`
1194    /// levels to historical messages. Default: disabled.
1195    ///
1196    /// # Example (TOML)
1197    ///
1198    /// ```toml
1199    /// [memory.fidelity]
1200    /// enabled = false
1201    /// w_semantic = 0.3
1202    /// w_temporal = 0.3
1203    /// w_importance = 0.2
1204    /// w_plan = 0.2
1205    /// full_threshold = 0.7
1206    /// compressed_threshold = 0.3
1207    /// compressed_max_tokens = 50
1208    /// regrade_threshold = 0.6
1209    /// min_query_length = 8
1210    /// max_scored_messages = 500
1211    /// ```
1212    #[serde(default, skip_serializing_if = "Option::is_none")]
1213    pub fidelity: Option<crate::fidelity::FidelityConfig>,
1214}
1215
1216// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1217
1218/// `MemFlow` tiered intent-driven retrieval configuration.
1219///
1220/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1221/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1222/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1223///
1224/// # Example (TOML)
1225///
1226/// ```toml
1227/// [memory.tiered_retrieval]
1228/// enabled = false
1229/// classifier_provider = ""
1230/// validator_provider = ""
1231/// token_budget = 4096
1232/// validation_enabled = false
1233/// validation_threshold = 0.6
1234/// max_escalations = 1
1235/// classifier_timeout_secs = 5
1236/// validator_timeout_secs = 5
1237///
1238/// # Signal weights (all default to 0.0; set to activate each signal)
1239/// similarity_weight = 1.0
1240/// recency_weight = 0.0
1241/// recency_half_life_days = 7
1242/// tfidf_weight = 0.0
1243/// cognitive_signal_weight = 0.0
1244/// tier_boost_weight = 0.0
1245/// semantic_tier_boost = 1.0
1246/// ```
1247#[derive(Debug, Clone, Deserialize, Serialize)]
1248#[serde(default)]
1249pub struct TieredRetrievalConfig {
1250    /// Enable `MemFlow` tiered retrieval. Default: `false`.
1251    pub enabled: bool,
1252    /// Provider name from `[[llm.providers]]` for intent classification.
1253    ///
1254    /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1255    /// is set but the call fails, falls back to the heuristic (fail-open).
1256    pub classifier_provider: ProviderName,
1257    /// Provider name from `[[llm.providers]]` for evidence validation.
1258    ///
1259    /// When empty or when `validation_enabled = false`, no validation call is made.
1260    pub validator_provider: ProviderName,
1261    /// Maximum tokens to gather for evidence per query. Default: `4096`.
1262    pub token_budget: usize,
1263    /// Enable evidence validation and tier escalation. Default: `false`.
1264    pub validation_enabled: bool,
1265    /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1266    pub validation_threshold: f32,
1267    /// Maximum tier escalations per query. Default: `1`.
1268    pub max_escalations: u8,
1269    /// Timeout in seconds for the classifier LLM call. Default: `5`.
1270    ///
1271    /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1272    pub classifier_timeout_secs: u64,
1273    /// Timeout in seconds for the validator LLM call. Default: `5`.
1274    ///
1275    /// On timeout the validator is treated as sufficient (fail-open).
1276    pub validator_timeout_secs: u64,
1277
1278    // ── Signal weights ────────────────────────────────────────────────────────
1279    /// Weight applied to the raw similarity score from vector/keyword recall. Default: `1.0`.
1280    ///
1281    /// Set to `1.0` and all other weights to `0.0` to reproduce pre-signal behaviour.
1282    pub similarity_weight: f64,
1283    /// Weight applied to the recency decay signal. Default: `0.0` (disabled).
1284    pub recency_weight: f64,
1285    /// Half-life for recency decay in days. Default: `7`.
1286    ///
1287    /// A message that is `recency_half_life_days` old receives a recency score of `0.5`.
1288    /// Set `recency_weight = 0.0` to disable recency scoring entirely.
1289    pub recency_half_life_days: u32,
1290    /// Weight applied to the TF-IDF signal. Default: `0.0` (disabled).
1291    pub tfidf_weight: f64,
1292    /// Weight applied to the cognitive signal (message access frequency). Default: `0.0` (disabled).
1293    pub cognitive_signal_weight: f64,
1294    /// Weight applied to the tier boost signal for consolidated/semantic entries. Default: `0.0` (disabled).
1295    pub tier_boost_weight: f64,
1296    /// Additive score awarded to entries in the `semantic` tier when `tier_boost_weight > 0`. Default: `1.0`.
1297    ///
1298    /// The final contribution is `tier_boost_weight * semantic_tier_boost` for semantic entries
1299    /// and `0.0` for episodic entries.
1300    pub semantic_tier_boost: f64,
1301    /// Route the `DeepReasoning` tier graph step through query-conditioned recall (#3994).
1302    ///
1303    /// When `true`, the graph recall step for `IntentClass::DeepReasoning` uses
1304    /// `recall_graph_hela` (HELA spreading activation) instead of static-weight BFS,
1305    /// producing query-aligned results. Requires an embedding store. Default: `false` (opt-in).
1306    #[serde(default)]
1307    pub deep_reasoning_query_conditioned: bool,
1308}
1309
1310impl Default for TieredRetrievalConfig {
1311    fn default() -> Self {
1312        Self {
1313            enabled: false,
1314            classifier_provider: ProviderName::default(),
1315            validator_provider: ProviderName::default(),
1316            token_budget: 4096,
1317            validation_enabled: false,
1318            validation_threshold: 0.6,
1319            max_escalations: 1,
1320            classifier_timeout_secs: 5,
1321            validator_timeout_secs: 5,
1322            similarity_weight: 1.0,
1323            recency_weight: 0.0,
1324            recency_half_life_days: 7,
1325            tfidf_weight: 0.0,
1326            cognitive_signal_weight: 0.0,
1327            tier_boost_weight: 0.0,
1328            semantic_tier_boost: 1.0,
1329            deep_reasoning_query_conditioned: false,
1330        }
1331    }
1332}
1333
1334// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1335
1336/// `ScrapMem` optical forgetting configuration.
1337///
1338/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1339/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1340/// forgetting compresses content in place based on age.
1341///
1342/// # Example (TOML)
1343///
1344/// ```toml
1345/// [memory.optical_forgetting]
1346/// enabled = false
1347/// compress_provider = ""
1348/// compress_after_turns = 100
1349/// summarize_after_turns = 500
1350/// sweep_interval_secs = 3600
1351/// sweep_batch_size = 50
1352/// ```
1353#[derive(Debug, Clone, Deserialize, Serialize)]
1354#[serde(default)]
1355pub struct OpticalForgettingConfig {
1356    /// Enable optical forgetting sweep. Default: `false`.
1357    pub enabled: bool,
1358    /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1359    /// Falls back to the primary provider when empty.
1360    pub compress_provider: ProviderName,
1361    /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1362    pub compress_after_turns: u32,
1363    /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1364    pub summarize_after_turns: u32,
1365    /// How often the sweep runs, in seconds. Default: `3600`.
1366    pub sweep_interval_secs: u64,
1367    /// Maximum messages to compress per sweep iteration. Default: `50`.
1368    pub sweep_batch_size: usize,
1369}
1370
1371impl Default for OpticalForgettingConfig {
1372    fn default() -> Self {
1373        Self {
1374            enabled: false,
1375            compress_provider: ProviderName::default(),
1376            compress_after_turns: 100,
1377            summarize_after_turns: 500,
1378            sweep_interval_secs: 3600,
1379            sweep_batch_size: 50,
1380        }
1381    }
1382}
1383
1384// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1385
1386/// EM-Graph episodic event extraction and causal linking configuration.
1387///
1388/// When enabled, episodic events are extracted from conversation turns and linked
1389/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1390///
1391/// # Example (TOML)
1392///
1393/// ```toml
1394/// [memory.em_graph]
1395/// enabled = false
1396/// extract_provider = ""
1397/// max_chain_depth = 3
1398/// ```
1399#[derive(Debug, Clone, Deserialize, Serialize)]
1400#[serde(default)]
1401pub struct EmGraphConfig {
1402    /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1403    pub enabled: bool,
1404    /// Provider name from `[[llm.providers]]` for event extraction.
1405    /// Falls back to the primary provider when empty.
1406    pub extract_provider: ProviderName,
1407    /// Maximum hops when traversing causal chains during recall. Default: `3`.
1408    pub max_chain_depth: u32,
1409}
1410
1411impl Default for EmGraphConfig {
1412    fn default() -> Self {
1413        Self {
1414            enabled: false,
1415            extract_provider: ProviderName::default(),
1416            max_chain_depth: 3,
1417        }
1418    }
1419}
1420
1421// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1422
1423fn default_episodic_consolidation_interval_secs() -> u64 {
1424    1800
1425}
1426
1427fn default_episodic_consolidation_batch_size() -> usize {
1428    30
1429}
1430
1431fn default_episodic_consolidation_min_age_secs() -> u64 {
1432    300
1433}
1434
1435fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1436    0.6
1437}
1438
1439// ── Five-signal SYNAPSE retrieval config (issue #4374) ────────────────────────
1440
1441fn default_five_signal_w_recency() -> f64 {
1442    0.35
1443}
1444
1445fn default_five_signal_w_relevance() -> f64 {
1446    0.35
1447}
1448
1449fn default_causal_bfs_max_depth() -> u32 {
1450    10
1451}
1452
1453fn default_neutral_causal_distance() -> u32 {
1454    5
1455}
1456
1457fn default_novelty_decay_rate() -> f64 {
1458    0.1
1459}
1460
1461fn default_five_signal_interval_seconds() -> u64 {
1462    7200
1463}
1464
1465fn default_five_signal_batch_size() -> usize {
1466    500
1467}
1468
1469fn default_five_signal_daemon_max_runtime_ms() -> u64 {
1470    30_000
1471}
1472
1473fn default_five_signal_promotion_score_threshold() -> f64 {
1474    0.70
1475}
1476
1477fn default_five_signal_demotion_score_threshold() -> f64 {
1478    0.20
1479}
1480
1481fn default_five_signal_top_k_per_run() -> usize {
1482    500
1483}
1484
1485/// Five-signal SYNAPSE retrieval configuration (issue #4374).
1486///
1487/// Extends SYNAPSE recall with three additional signals — access frequency, causal
1488/// distance, and novelty — beyond the two-signal baseline (recency + relevance).
1489/// All new signal weights default to `0.0`, preserving exact backward compatibility.
1490///
1491/// # Example (TOML)
1492///
1493/// ```toml
1494/// [memory.five_signal]
1495/// enabled = true
1496/// w_recency   = 0.35
1497/// w_relevance = 0.35
1498/// w_frequency = 0.15
1499/// w_causal    = 0.10
1500/// w_novelty   = 0.05
1501///
1502/// [memory.five_signal.consolidation_daemon]
1503/// enabled = true
1504/// interval_seconds = 7200
1505/// ```
1506#[derive(Debug, Clone, Deserialize, Serialize)]
1507pub struct FiveSignalConfig {
1508    /// Master switch. When `false`, the five-signal code path contributes zero overhead.
1509    #[serde(default)]
1510    pub enabled: bool,
1511    /// Weight for the recency signal. Default: `0.35`.
1512    #[serde(default = "default_five_signal_w_recency")]
1513    pub w_recency: f64,
1514    /// Weight for the semantic relevance signal. Default: `0.35`.
1515    #[serde(default = "default_five_signal_w_relevance")]
1516    pub w_relevance: f64,
1517    /// Weight for the access frequency signal. Default: `0.0` (baseline-compatible).
1518    #[serde(default)]
1519    pub w_frequency: f64,
1520    /// Weight for the causal distance signal. Default: `0.0` (baseline-compatible).
1521    #[serde(default)]
1522    pub w_causal: f64,
1523    /// Weight for the novelty signal. Default: `0.0` (baseline-compatible).
1524    #[serde(default)]
1525    pub w_novelty: f64,
1526    /// Maximum BFS depth for causal distance computation. Default: `10`.
1527    #[serde(default = "default_causal_bfs_max_depth")]
1528    pub causal_bfs_max_depth: u32,
1529    /// Causal distance assigned when no goal entity is set or a fact lies beyond
1530    /// `causal_bfs_max_depth`. Default: `5`.
1531    #[serde(default = "default_neutral_causal_distance")]
1532    pub neutral_causal_distance: u32,
1533    /// Decay rate λ in `exp(-λ × days)` for the novelty signal. Default: `0.1`.
1534    #[serde(default = "default_novelty_decay_rate")]
1535    pub novelty_decay_rate: f64,
1536    /// Async consolidation daemon that promotes hot episodic facts to Qdrant.
1537    #[serde(default)]
1538    pub consolidation_daemon: FiveSignalConsolidationConfig,
1539}
1540
1541impl Default for FiveSignalConfig {
1542    fn default() -> Self {
1543        Self {
1544            enabled: false,
1545            w_recency: default_five_signal_w_recency(),
1546            w_relevance: default_five_signal_w_relevance(),
1547            w_frequency: 0.0,
1548            w_causal: 0.0,
1549            w_novelty: 0.0,
1550            causal_bfs_max_depth: default_causal_bfs_max_depth(),
1551            neutral_causal_distance: default_neutral_causal_distance(),
1552            novelty_decay_rate: default_novelty_decay_rate(),
1553            consolidation_daemon: FiveSignalConsolidationConfig::default(),
1554        }
1555    }
1556}
1557
1558/// Async consolidation daemon configuration for five-signal retrieval (issue #4374).
1559///
1560/// When `enabled = true`, a background task runs at `interval_seconds` intervals,
1561/// evaluates the top `top_k_per_run` episodic facts by five-signal score, promotes
1562/// facts above `promotion_score_threshold` to Qdrant, and demotes facts below
1563/// `demotion_score_threshold` to `episodic_only` tier.
1564///
1565/// # Example (TOML)
1566///
1567/// ```toml
1568/// [memory.five_signal.consolidation_daemon]
1569/// enabled = true
1570/// interval_seconds = 7200
1571/// batch_size = 500
1572/// promotion_score_threshold = 0.70
1573/// demotion_score_threshold = 0.20
1574/// ```
1575#[derive(Debug, Clone, Deserialize, Serialize)]
1576pub struct FiveSignalConsolidationConfig {
1577    /// Enable the daemon. Requires the `scheduler` feature. Default: `false`.
1578    #[serde(default)]
1579    pub enabled: bool,
1580    /// Interval between daemon runs in seconds. Default: `7200` (2 hours).
1581    #[serde(default = "default_five_signal_interval_seconds")]
1582    pub interval_seconds: u64,
1583    /// Maximum facts processed (embed + upsert) per run. Default: `500`.
1584    #[serde(default = "default_five_signal_batch_size")]
1585    pub batch_size: usize,
1586    /// Hard timeout per run in milliseconds. Default: `30000`.
1587    #[serde(default = "default_five_signal_daemon_max_runtime_ms")]
1588    pub daemon_max_runtime_ms: u64,
1589    /// Five-signal score above which a fact is promoted to Qdrant. Default: `0.70`.
1590    #[serde(default = "default_five_signal_promotion_score_threshold")]
1591    pub promotion_score_threshold: f64,
1592    /// Five-signal score below which a promoted fact is demoted. Default: `0.20`.
1593    #[serde(default = "default_five_signal_demotion_score_threshold")]
1594    pub demotion_score_threshold: f64,
1595    /// Number of episodic facts queried per run (SQL LIMIT). Must be >= `batch_size`.
1596    /// Default: `500`.
1597    #[serde(default = "default_five_signal_top_k_per_run")]
1598    pub top_k_per_run: usize,
1599}
1600
1601impl Default for FiveSignalConsolidationConfig {
1602    fn default() -> Self {
1603        Self {
1604            enabled: false,
1605            interval_seconds: default_five_signal_interval_seconds(),
1606            batch_size: default_five_signal_batch_size(),
1607            daemon_max_runtime_ms: default_five_signal_daemon_max_runtime_ms(),
1608            promotion_score_threshold: default_five_signal_promotion_score_threshold(),
1609            demotion_score_threshold: default_five_signal_demotion_score_threshold(),
1610            top_k_per_run: default_five_signal_top_k_per_run(),
1611        }
1612    }
1613}
1614
1615/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1616///
1617/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1618/// extracts durable factual statements via LLM, deduplicates them against existing
1619/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1620/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1621///
1622/// # Example (TOML)
1623///
1624/// ```toml
1625/// [memory.episodic_consolidation]
1626/// enabled = false
1627/// consolidation_provider = ""
1628/// interval_secs = 1800
1629/// batch_size = 30
1630/// min_age_secs = 300
1631/// dedup_jaccard_threshold = 0.6
1632/// ```
1633#[derive(Debug, Clone, Deserialize, Serialize)]
1634#[serde(default)]
1635pub struct EpisodicConsolidationConfig {
1636    /// Enable the episodic consolidation daemon. Default: `false`.
1637    pub enabled: bool,
1638    /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1639    /// Falls back to the primary provider when empty.
1640    pub consolidation_provider: ProviderName,
1641    /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1642    #[serde(default = "default_episodic_consolidation_interval_secs")]
1643    pub interval_secs: u64,
1644    /// Maximum number of episodic events to process per sweep. Default: `30`.
1645    #[serde(default = "default_episodic_consolidation_batch_size")]
1646    pub batch_size: usize,
1647    /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1648    /// Prevents consolidating events from the active conversation.
1649    #[serde(default = "default_episodic_consolidation_min_age_secs")]
1650    pub min_age_secs: u64,
1651    /// Jaccard similarity threshold for deduplication against existing key facts.
1652    /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1653    #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1654    pub dedup_jaccard_threshold: f32,
1655}
1656
1657impl Default for EpisodicConsolidationConfig {
1658    fn default() -> Self {
1659        Self {
1660            enabled: false,
1661            consolidation_provider: ProviderName::default(),
1662            interval_secs: default_episodic_consolidation_interval_secs(),
1663            batch_size: default_episodic_consolidation_batch_size(),
1664            min_age_secs: default_episodic_consolidation_min_age_secs(),
1665            dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1666        }
1667    }
1668}
1669
1670fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1671    0.3
1672}
1673
1674fn default_retrieval_failures_retention_days() -> u32 {
1675    90
1676}
1677
1678fn default_retrieval_failures_channel_capacity() -> usize {
1679    256
1680}
1681
1682fn default_retrieval_failures_batch_size() -> usize {
1683    16
1684}
1685
1686fn default_retrieval_failures_flush_interval_ms() -> u64 {
1687    100
1688}
1689
1690fn default_crossover_turn_threshold() -> u32 {
1691    20
1692}
1693
1694fn default_key_facts_dedup_threshold() -> f32 {
1695    0.95
1696}
1697
1698/// Session digest configuration (#2289).
1699#[derive(Debug, Clone, Deserialize, Serialize)]
1700#[serde(default)]
1701pub struct DigestConfig {
1702    /// Enable session digest generation at session end. Default: `false`.
1703    pub enabled: bool,
1704    /// Provider name from `[[llm.providers]]` for digest generation.
1705    /// Falls back to the primary provider when `None`.
1706    #[serde(default)]
1707    pub provider: Option<ProviderName>,
1708    /// Maximum tokens for the digest text. Default: `500`.
1709    pub max_tokens: usize,
1710    /// Maximum messages to feed into the digest prompt. Default: `50`.
1711    pub max_input_messages: usize,
1712}
1713
1714impl Default for DigestConfig {
1715    fn default() -> Self {
1716        Self {
1717            enabled: false,
1718            provider: None,
1719            max_tokens: 500,
1720            max_input_messages: 50,
1721        }
1722    }
1723}
1724
1725/// Context assembly strategy (#2288).
1726#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1727#[serde(rename_all = "snake_case")]
1728#[non_exhaustive]
1729pub enum ContextStrategy {
1730    /// Full conversation history trimmed to budget, with memory augmentation.
1731    /// This is the default and existing behavior.
1732    #[default]
1733    FullHistory,
1734    /// Drop conversation history; assemble context from summaries, semantic recall,
1735    /// cross-session memory, and session digest only.
1736    MemoryFirst,
1737    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1738    /// `crossover_turn_threshold`.
1739    Adaptive,
1740}
1741
1742/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1743#[derive(Debug, Clone, Deserialize, Serialize)]
1744#[serde(default)]
1745pub struct SessionsConfig {
1746    /// Maximum number of sessions returned by list operations (0 = unlimited).
1747    #[serde(default = "default_max_history")]
1748    pub max_history: usize,
1749    /// Maximum characters for auto-generated session titles.
1750    #[serde(default = "default_title_max_chars")]
1751    pub title_max_chars: usize,
1752}
1753
1754impl Default for SessionsConfig {
1755    fn default() -> Self {
1756        Self {
1757            max_history: default_max_history(),
1758            title_max_chars: default_title_max_chars(),
1759        }
1760    }
1761}
1762
1763/// Configuration for the document ingestion and RAG retrieval pipeline.
1764#[derive(Debug, Clone, Deserialize, Serialize)]
1765pub struct DocumentConfig {
1766    #[serde(default = "default_document_collection")]
1767    pub collection: String,
1768    #[serde(default = "default_document_chunk_size")]
1769    pub chunk_size: usize,
1770    #[serde(default = "default_document_chunk_overlap")]
1771    pub chunk_overlap: usize,
1772    /// Number of document chunks to inject into agent context per turn.
1773    #[serde(default = "default_document_top_k")]
1774    pub top_k: usize,
1775    /// Enable document RAG injection into agent context.
1776    #[serde(default)]
1777    pub rag_enabled: bool,
1778}
1779
1780impl Default for DocumentConfig {
1781    fn default() -> Self {
1782        Self {
1783            collection: default_document_collection(),
1784            chunk_size: default_document_chunk_size(),
1785            chunk_overlap: default_document_chunk_overlap(),
1786            top_k: default_document_top_k(),
1787            rag_enabled: false,
1788        }
1789    }
1790}
1791
1792/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1793///
1794/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1795/// re-ranking, and hybrid BM25+vector weighting.
1796///
1797/// # Example (TOML)
1798///
1799/// ```toml
1800/// [memory.semantic]
1801/// enabled = true
1802/// recall_limit = 5
1803/// vector_weight = 0.7
1804/// keyword_weight = 0.3
1805/// mmr_lambda = 0.7
1806/// ```
1807#[derive(Debug, Deserialize, Serialize)]
1808#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1809pub struct SemanticConfig {
1810    /// Enable vector-based semantic recall. Default: `true`.
1811    #[serde(default = "default_semantic_enabled")]
1812    pub enabled: bool,
1813    #[serde(default = "default_recall_limit")]
1814    pub recall_limit: usize,
1815    #[serde(default = "default_vector_weight")]
1816    pub vector_weight: f64,
1817    #[serde(default = "default_keyword_weight")]
1818    pub keyword_weight: f64,
1819    #[serde(default = "default_true")]
1820    pub temporal_decay_enabled: bool,
1821    #[serde(default = "default_temporal_decay_half_life_days")]
1822    pub temporal_decay_half_life_days: u32,
1823    #[serde(default = "default_true")]
1824    pub mmr_enabled: bool,
1825    #[serde(default = "default_mmr_lambda")]
1826    pub mmr_lambda: f32,
1827    #[serde(default = "default_true")]
1828    pub importance_enabled: bool,
1829    #[serde(
1830        default = "default_importance_weight",
1831        deserialize_with = "validate_importance_weight"
1832    )]
1833    pub importance_weight: f64,
1834    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1835    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1836    /// from contending with the guardrail at the API server level (rate limits, Ollama
1837    /// single-model lock). Falls back to the main agent provider when `None`.
1838    #[serde(default)]
1839    pub embedding_provider: Option<ProviderName>,
1840    /// Timeout in seconds applied to every `embed()` call inside `zeph-memory`.
1841    ///
1842    /// Applies to all embedding call sites: admission control, quality gate, recall,
1843    /// summarization, graph retrieval, consolidation, and tree consolidation.
1844    /// Set to a higher value when using slow remote embedding providers.
1845    /// Default: `5`.
1846    #[serde(default = "default_embed_timeout_secs")]
1847    pub embed_timeout_secs: u64,
1848}
1849
1850impl Default for SemanticConfig {
1851    fn default() -> Self {
1852        Self {
1853            enabled: default_semantic_enabled(),
1854            recall_limit: default_recall_limit(),
1855            vector_weight: default_vector_weight(),
1856            keyword_weight: default_keyword_weight(),
1857            temporal_decay_enabled: true,
1858            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1859            mmr_enabled: true,
1860            mmr_lambda: default_mmr_lambda(),
1861            importance_enabled: true,
1862            importance_weight: default_importance_weight(),
1863            embedding_provider: None,
1864            embed_timeout_secs: default_embed_timeout_secs(),
1865        }
1866    }
1867}
1868
1869fn default_embed_timeout_secs() -> u64 {
1870    5
1871}
1872
1873/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1874///
1875/// Controls how each recalled memory entry is presented in the assembled prompt.
1876/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1877/// always contain the raw message text. The format is applied exclusively during
1878/// context assembly and is never persisted.
1879///
1880/// # Token cost
1881///
1882/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1883/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1884#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1885#[serde(rename_all = "snake_case")]
1886#[non_exhaustive]
1887pub enum ContextFormat {
1888    /// Emit a labeled header per snippet:
1889    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1890    ///
1891    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1892    /// memory without re-parsing the recall body.
1893    #[default]
1894    Structured,
1895    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1896    ///
1897    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1898    /// is tight and provenance headers are not needed.
1899    Plain,
1900}
1901
1902/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1903///
1904/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1905/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1906/// configs parse unchanged.
1907///
1908/// # Example (TOML)
1909///
1910/// ```toml
1911/// [memory.retrieval]
1912/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1913/// # search_prompt_template = ""
1914/// # context_format = "structured"
1915/// ```
1916#[derive(Debug, Clone, Deserialize, Serialize)]
1917#[serde(default)]
1918pub struct RetrievalConfig {
1919    /// Number of ANN candidates fetched from the vector store before keyword merge,
1920    /// temporal decay, and MMR re-ranking.
1921    ///
1922    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1923    ///   to pre-#3340 deployments.
1924    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1925    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1926    ///   size, or higher for better MMR diversity.
1927    ///
1928    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1929    /// cannot saturate the requested top-k.
1930    pub depth: u32,
1931    /// Template applied to the raw user query before embedding.
1932    ///
1933    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1934    /// Empty string (default) = identity: the query is embedded as-is.
1935    ///
1936    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1937    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1938    pub search_prompt_template: String,
1939    /// Shape of memory snippets injected into agent context.
1940    ///
1941    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1942    /// Default: `Structured`.
1943    pub context_format: ContextFormat,
1944    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1945    ///
1946    /// When `true` and the query is classified as first-person, the query embedding is
1947    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1948    /// towards persona-relevant content for self-referential queries.
1949    ///
1950    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1951    #[serde(default = "default_query_bias_correction")]
1952    pub query_bias_correction: bool,
1953    /// Blend weight for query-bias correction (MM-F3, #3341).
1954    ///
1955    /// Controls how much the query embedding shifts towards the profile centroid.
1956    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1957    #[serde(default = "default_query_bias_profile_weight")]
1958    pub query_bias_profile_weight: f32,
1959    /// Centroid TTL in seconds (MM-F3, #3341).
1960    ///
1961    /// The profile centroid computed from persona facts is cached for this many seconds.
1962    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1963    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1964    pub query_bias_centroid_ttl_secs: u64,
1965}
1966
1967fn default_query_bias_correction() -> bool {
1968    true
1969}
1970
1971fn default_query_bias_profile_weight() -> f32 {
1972    0.25
1973}
1974
1975fn default_query_bias_centroid_ttl_secs() -> u64 {
1976    300
1977}
1978
1979impl Default for RetrievalConfig {
1980    fn default() -> Self {
1981        Self {
1982            depth: 0,
1983            search_prompt_template: String::new(),
1984            context_format: ContextFormat::default(),
1985            query_bias_correction: default_query_bias_correction(),
1986            query_bias_profile_weight: default_query_bias_profile_weight(),
1987            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1988        }
1989    }
1990}
1991
1992/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1993///
1994/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1995/// recall traversal increments the `weight` column of the traversed edges, building
1996/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1997/// runs a background sweep that identifies high-traffic entity clusters and distills
1998/// them into `graph_rules` entries via an LLM.
1999#[derive(Debug, Clone, Deserialize, Serialize)]
2000#[serde(default)]
2001pub struct HebbianConfig {
2002    /// Master switch. When `false`, no `weight` updates are written to the database
2003    /// and the consolidation loop does not start. Default: `false`.
2004    pub enabled: bool,
2005    /// Weight increment per co-activation (HL-F2, #3344).
2006    ///
2007    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
2008    /// startup when `enabled = true`. Default: `0.1`.
2009    pub hebbian_lr: f32,
2010    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
2011    ///
2012    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
2013    /// Default: `3600` (one hour).
2014    pub consolidation_interval_secs: u64,
2015    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
2016    /// candidate (HL-F3, #3345). Default: `5.0`.
2017    pub consolidation_threshold: f64,
2018    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
2019    ///
2020    /// Falls back to the main provider when `None` or unresolvable.
2021    #[serde(default)]
2022    pub consolidate_provider: Option<ProviderName>,
2023    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
2024    pub max_candidates_per_sweep: usize,
2025    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
2026    ///
2027    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
2028    /// Default: `86400` (24 hours).
2029    pub consolidation_cooldown_secs: u64,
2030    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
2031    /// Default: `30`.
2032    pub consolidation_prompt_timeout_secs: u64,
2033    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
2034    /// (HL-F4, #3345). Default: `20`.
2035    pub consolidation_max_neighbors: usize,
2036    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
2037    ///
2038    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
2039    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
2040    pub spreading_activation: bool,
2041    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
2042    pub spread_depth: u32,
2043    /// MAGMA edge-type filter for HL-F5 spreading activation.
2044    ///
2045    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
2046    /// Empty = traverse all edge types. Default: `[]`.
2047    pub spread_edge_types: Vec<EdgeType>,
2048    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
2049    ///
2050    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
2051    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
2052    pub step_budget_ms: u64,
2053    /// Timeout for the initial query embedding call in HL-F5, in seconds.
2054    ///
2055    /// `0` disables the timeout. Default: `5`.
2056    pub embed_timeout_secs: u64,
2057}
2058
2059impl Default for HebbianConfig {
2060    fn default() -> Self {
2061        Self {
2062            enabled: false,
2063            hebbian_lr: 0.1,
2064            consolidation_interval_secs: 3600,
2065            consolidation_threshold: 5.0,
2066            consolidate_provider: None,
2067            max_candidates_per_sweep: 10,
2068            consolidation_cooldown_secs: 86_400,
2069            consolidation_prompt_timeout_secs: 30,
2070            consolidation_max_neighbors: 20,
2071            spreading_activation: false,
2072            spread_depth: 2,
2073            spread_edge_types: Vec::new(),
2074            step_budget_ms: 8,
2075            embed_timeout_secs: 5,
2076        }
2077    }
2078}
2079
2080/// Compression strategy for active context compression (#1161).
2081#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
2082#[serde(tag = "strategy", rename_all = "snake_case")]
2083#[non_exhaustive]
2084pub enum CompressionStrategy {
2085    /// Compress only when reactive compaction fires (current behavior).
2086    #[default]
2087    Reactive,
2088    /// Compress proactively when context exceeds `threshold_tokens`.
2089    Proactive {
2090        /// Token count that triggers proactive compression.
2091        threshold_tokens: usize,
2092        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
2093        max_summary_tokens: usize,
2094    },
2095    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
2096    /// safety net. The `compress_context` tool is also available in all other strategies.
2097    Autonomous,
2098    /// Knowledge-block-aware compression strategy (#2510).
2099    ///
2100    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
2101    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
2102    Focus,
2103}
2104
2105/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
2106///
2107/// When `context-compression` feature is enabled, this replaces the default oldest-first
2108/// heuristic with scored eviction.
2109#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
2110#[serde(rename_all = "snake_case")]
2111#[non_exhaustive]
2112pub enum PruningStrategy {
2113    /// Oldest-first eviction — current default behavior.
2114    #[default]
2115    Reactive,
2116    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
2117    /// lowest-first. Requires `context-compression` feature.
2118    TaskAware,
2119    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
2120    /// Requires `context-compression` feature.
2121    Mig,
2122    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
2123    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
2124    /// Requires `context-compression` feature.
2125    Subgoal,
2126    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
2127    /// Requires `context-compression` feature.
2128    SubgoalMig,
2129}
2130
2131impl PruningStrategy {
2132    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
2133    #[must_use]
2134    pub fn is_subgoal(self) -> bool {
2135        matches!(self, Self::Subgoal | Self::SubgoalMig)
2136    }
2137}
2138
2139// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
2140// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
2141impl<'de> serde::Deserialize<'de> for PruningStrategy {
2142    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
2143        let s = String::deserialize(deserializer)?;
2144        s.parse().map_err(serde::de::Error::custom)
2145    }
2146}
2147
2148impl std::str::FromStr for PruningStrategy {
2149    type Err = String;
2150
2151    fn from_str(s: &str) -> Result<Self, Self::Err> {
2152        match s {
2153            "reactive" => Ok(Self::Reactive),
2154            "task_aware" | "task-aware" => Ok(Self::TaskAware),
2155            "mig" => Ok(Self::Mig),
2156            // task_aware_mig was removed (dead code — was routed to scored path only).
2157            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
2158            "task_aware_mig" | "task-aware-mig" => {
2159                tracing::warn!(
2160                    "pruning strategy `task_aware_mig` has been removed; \
2161                     falling back to `reactive`. Use `task_aware` or `mig` instead."
2162                );
2163                Ok(Self::Reactive)
2164            }
2165            "subgoal" => Ok(Self::Subgoal),
2166            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
2167            other => Err(format!(
2168                "unknown pruning strategy `{other}`, expected \
2169                 reactive|task_aware|mig|subgoal|subgoal_mig"
2170            )),
2171        }
2172    }
2173}
2174
2175fn default_high_density_budget() -> f32 {
2176    0.7
2177}
2178
2179fn default_low_density_budget() -> f32 {
2180    0.3
2181}
2182
2183/// Configuration for the `SleepGate` forgetting sweep (#2397).
2184///
2185/// When `enabled = true`, a background loop periodically decays importance scores
2186/// (synaptic downscaling), restores recently-accessed memories (selective replay),
2187/// and prunes memories below `forgetting_floor` (targeted forgetting).
2188#[derive(Debug, Clone, Deserialize, Serialize)]
2189#[serde(default)]
2190pub struct ForgettingConfig {
2191    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
2192    pub enabled: bool,
2193    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
2194    pub decay_rate: f32,
2195    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
2196    pub forgetting_floor: f32,
2197    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
2198    pub sweep_interval_secs: u64,
2199    /// Maximum messages to process per sweep. Default: `500`.
2200    pub sweep_batch_size: usize,
2201    /// Hours: messages accessed within this window get replay protection. Default: `24`.
2202    pub replay_window_hours: u32,
2203    /// Messages with `access_count` >= this get replay protection. Default: `3`.
2204    pub replay_min_access_count: u32,
2205    /// Hours: never prune messages accessed within this window. Default: `24`.
2206    pub protect_recent_hours: u32,
2207    /// Never prune messages with `access_count` >= this. Default: `3`.
2208    pub protect_min_access_count: u32,
2209}
2210
2211impl Default for ForgettingConfig {
2212    fn default() -> Self {
2213        Self {
2214            enabled: false,
2215            decay_rate: 0.1,
2216            forgetting_floor: 0.05,
2217            sweep_interval_secs: 7200,
2218            sweep_batch_size: 500,
2219            replay_window_hours: 24,
2220            replay_min_access_count: 3,
2221            protect_recent_hours: 24,
2222            protect_min_access_count: 3,
2223        }
2224    }
2225}
2226
2227/// Configuration for active context compression (#1161).
2228#[derive(Debug, Clone, Default, Deserialize, Serialize)]
2229#[serde(default)]
2230pub struct CompressionConfig {
2231    /// Compression strategy.
2232    #[serde(flatten)]
2233    pub strategy: CompressionStrategy,
2234    /// Tool-output pruning strategy (requires `context-compression` feature).
2235    pub pruning_strategy: PruningStrategy,
2236    /// Model to use for compression summaries.
2237    ///
2238    /// Currently unused — the primary summary provider is used regardless of this value.
2239    /// Reserved for future per-compression model selection. Setting this field has no effect.
2240    pub model: String,
2241    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
2242    /// Falls back to the primary provider when empty. Default: `""`.
2243    pub compress_provider: ProviderName,
2244    /// Compaction probe: validates summary quality before committing it (#1609).
2245    #[serde(default)]
2246    pub probe: CompactionProbeConfig,
2247    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
2248    ///
2249    /// When enabled, tool output bodies in the compaction range are saved to
2250    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
2251    /// The LLM summarizes placeholder messages; archived content is appended as
2252    /// a postfix after summarization so references survive compaction.
2253    /// Default: `false`.
2254    #[serde(default)]
2255    pub archive_tool_outputs: bool,
2256    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
2257    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
2258    /// Falls back to the primary provider when empty. Default: `""`.
2259    pub focus_scorer_provider: ProviderName,
2260    /// Token-budget fraction for high-density content in density-aware compression (#2481).
2261    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
2262    #[serde(default = "default_high_density_budget")]
2263    pub high_density_budget: f32,
2264    /// Token-budget fraction for low-density content in density-aware compression (#2481).
2265    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
2266    #[serde(default = "default_low_density_budget")]
2267    pub low_density_budget: f32,
2268    /// Typed-page classification and batch-level assertion checking (#3630).
2269    #[serde(default)]
2270    pub typed_pages: TypedPagesConfig,
2271    /// Acon tool-result compression settings (#4021).
2272    ///
2273    /// Controls per-result and batch-level token budgets for tool outputs before they enter
2274    /// message history. Distinct from `[tools.compression]` (TACO), which applies regex-based
2275    /// rule compression at the executor level.
2276    #[serde(default)]
2277    pub acon: AconConfig,
2278    /// ARC agent-initiated compaction settings (#4020).
2279    ///
2280    /// When `allow_agent_compaction = true`, the agent can call the `request_compaction`
2281    /// internal tool to trigger context summarization on demand.
2282    #[serde(default)]
2283    pub arc: ArcCompactionConfig,
2284}
2285
2286fn default_acon_passthrough_threshold() -> usize {
2287    2000
2288}
2289
2290fn default_acon_summarize_threshold() -> usize {
2291    4000
2292}
2293
2294fn default_acon_total_budget() -> usize {
2295    8000
2296}
2297
2298fn validate_acon_passthrough_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2299where
2300    D: serde::Deserializer<'de>,
2301{
2302    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2303    if value == 0 {
2304        return Err(serde::de::Error::custom(
2305            "acon.passthrough_threshold must be >= 1",
2306        ));
2307    }
2308    Ok(value)
2309}
2310
2311fn validate_acon_summarize_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2312where
2313    D: serde::Deserializer<'de>,
2314{
2315    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2316    if value == 0 {
2317        return Err(serde::de::Error::custom(
2318            "acon.summarize_threshold must be >= 1",
2319        ));
2320    }
2321    Ok(value)
2322}
2323
2324fn validate_acon_total_budget<'de, D>(deserializer: D) -> Result<usize, D::Error>
2325where
2326    D: serde::Deserializer<'de>,
2327{
2328    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2329    if value == 0 {
2330        return Err(serde::de::Error::custom("acon.total_budget must be >= 1"));
2331    }
2332    Ok(value)
2333}
2334
2335/// Token budget configuration for Acon tool-result compression (#4021).
2336///
2337/// Controls per-result and batch-level token budgets for tool outputs injected into context.
2338/// Distinct from `[tools.compression]` (TACO), which applies regex-based rule compression
2339/// at the executor level.
2340///
2341/// # Invariants
2342///
2343/// The following ordering must hold: `passthrough_threshold < summarize_threshold <= total_budget`.
2344/// A config where `passthrough_threshold >= summarize_threshold` would make the summarization path
2345/// unreachable, silently producing incorrect compression behavior.
2346///
2347/// # Example (TOML)
2348///
2349/// ```toml
2350/// [memory.compression.acon]
2351/// enabled = true
2352/// passthrough_threshold = 2000
2353/// summarize_threshold = 4000
2354/// total_budget = 8000
2355/// ```
2356#[derive(Debug, Clone, Deserialize, Serialize)]
2357#[serde(default)]
2358pub struct AconConfig {
2359    /// Enable Acon tool-result compression. Default: `true`.
2360    pub enabled: bool,
2361    /// Token count below which results pass through unchanged.
2362    /// Also the truncation target: results above this get char-truncated to this size.
2363    /// Must be < `summarize_threshold`. Default: `2000`.
2364    #[serde(default = "default_acon_passthrough_threshold")]
2365    #[serde(deserialize_with = "validate_acon_passthrough_threshold")]
2366    pub passthrough_threshold: usize,
2367    /// Token count above which LLM summarization should be attempted before truncation.
2368    /// Must be > `passthrough_threshold` and <= `total_budget`. Default: `4000`.
2369    #[serde(default = "default_acon_summarize_threshold")]
2370    #[serde(deserialize_with = "validate_acon_summarize_threshold")]
2371    pub summarize_threshold: usize,
2372    /// Maximum total tokens for all tool results in a single turn.
2373    /// Must be >= `summarize_threshold`. Default: `8000`.
2374    #[serde(default = "default_acon_total_budget")]
2375    #[serde(deserialize_with = "validate_acon_total_budget")]
2376    pub total_budget: usize,
2377    /// Provider name from `[[llm.providers]]` for LLM summarization of large results.
2378    /// Falls back to the primary provider when empty. Default: `""`.
2379    #[serde(default)]
2380    pub summarize_provider: ProviderName,
2381}
2382
2383impl AconConfig {
2384    /// Validate threshold ordering invariants after deserialization.
2385    ///
2386    /// Returns an error string if `passthrough_threshold >= summarize_threshold` or
2387    /// `summarize_threshold > total_budget`.
2388    ///
2389    /// # Errors
2390    ///
2391    /// Returns a descriptive error string when any threshold invariant is violated.
2392    pub fn validate(&self) -> Result<(), String> {
2393        if self.passthrough_threshold >= self.summarize_threshold {
2394            return Err(format!(
2395                "acon: passthrough_threshold ({}) must be < summarize_threshold ({})",
2396                self.passthrough_threshold, self.summarize_threshold
2397            ));
2398        }
2399        if self.summarize_threshold > self.total_budget {
2400            return Err(format!(
2401                "acon: summarize_threshold ({}) must be <= total_budget ({})",
2402                self.summarize_threshold, self.total_budget
2403            ));
2404        }
2405        Ok(())
2406    }
2407}
2408
2409impl Default for AconConfig {
2410    fn default() -> Self {
2411        Self {
2412            enabled: true,
2413            passthrough_threshold: default_acon_passthrough_threshold(),
2414            summarize_threshold: default_acon_summarize_threshold(),
2415            total_budget: default_acon_total_budget(),
2416            summarize_provider: ProviderName::default(),
2417        }
2418    }
2419}
2420
2421/// Configuration for ARC agent-initiated compaction (#4020).
2422///
2423/// When `allow_agent_compaction = true`, the `request_compaction` internal tool is
2424/// registered and the agent can call it to trigger context summarization on demand.
2425/// Rate limiting is handled by `CompactionState` — only one compaction fires per turn.
2426///
2427/// # Example (TOML)
2428///
2429/// ```toml
2430/// [memory.compression.arc]
2431/// allow_agent_compaction = true
2432/// ```
2433#[derive(Debug, Clone, Deserialize, Serialize)]
2434#[serde(default)]
2435pub struct ArcCompactionConfig {
2436    /// Allow the agent to request compaction via the `request_compaction` tool call.
2437    /// Default: `true`.
2438    pub allow_agent_compaction: bool,
2439}
2440
2441impl Default for ArcCompactionConfig {
2442    fn default() -> Self {
2443        Self {
2444            allow_agent_compaction: true,
2445        }
2446    }
2447}
2448
2449/// Configuration for typed-page compaction invariants (#3630).
2450///
2451/// Controls classification, batch-level assertion checking, and audit logging.
2452/// All behavior is disabled by default; set `enabled = true` to activate.
2453///
2454/// # Example (TOML)
2455///
2456/// ```toml
2457/// [memory.compression.typed_pages]
2458/// enabled = true
2459/// enforcement = "active"
2460/// audit_path = ""
2461/// audit_channel_capacity = 256
2462/// ```
2463#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
2464#[serde(default)]
2465pub struct TypedPagesConfig {
2466    /// Enable typed-page classification and batch-level assertion checking.
2467    /// Default: `false`.
2468    pub enabled: bool,
2469    /// Enforcement mode:
2470    ///
2471    /// - `observe`: classify and emit audit records only; no behavioral change.
2472    /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
2473    ///
2474    /// Default: `"observe"`.
2475    pub enforcement: TypedPagesEnforcement,
2476    /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
2477    /// Default: `""`.
2478    ///
2479    /// # Security
2480    ///
2481    /// This field is **operator-only trusted input** read from the agent's configuration file.
2482    /// Write access to the config file implies file-system write access, so no additional
2483    /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
2484    /// configuration sources.
2485    pub audit_path: String,
2486    /// Bounded channel capacity for the async audit writer. Default: `256`.
2487    pub audit_channel_capacity: usize,
2488}
2489
2490impl Default for TypedPagesConfig {
2491    fn default() -> Self {
2492        Self {
2493            enabled: false,
2494            enforcement: TypedPagesEnforcement::Observe,
2495            audit_path: String::new(),
2496            audit_channel_capacity: 256,
2497        }
2498    }
2499}
2500
2501/// Enforcement mode for typed-page compaction (#3630).
2502#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
2503#[serde(rename_all = "snake_case")]
2504#[non_exhaustive]
2505pub enum TypedPagesEnforcement {
2506    /// Classify and audit only. Zero behavioral change relative to the untyped path.
2507    #[default]
2508    Observe,
2509    /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
2510    Active,
2511}
2512
2513fn default_sidequest_interval_turns() -> u32 {
2514    4
2515}
2516
2517fn default_sidequest_max_eviction_ratio() -> f32 {
2518    0.5
2519}
2520
2521fn default_sidequest_max_cursors() -> usize {
2522    30
2523}
2524
2525fn default_sidequest_min_cursor_tokens() -> usize {
2526    100
2527}
2528
2529/// Configuration for LLM-driven side-thread tool output eviction (#1885).
2530#[derive(Debug, Clone, Deserialize, Serialize)]
2531#[serde(default)]
2532pub struct SidequestConfig {
2533    /// Enable `SideQuest` eviction. Default: `false`.
2534    pub enabled: bool,
2535    /// Run eviction every N user turns. Default: `4`.
2536    #[serde(default = "default_sidequest_interval_turns")]
2537    pub interval_turns: u32,
2538    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
2539    #[serde(default = "default_sidequest_max_eviction_ratio")]
2540    pub max_eviction_ratio: f32,
2541    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
2542    #[serde(default = "default_sidequest_max_cursors")]
2543    pub max_cursors: usize,
2544    /// Exclude tool outputs smaller than this token count from eviction candidates.
2545    /// Default: `100`.
2546    #[serde(default = "default_sidequest_min_cursor_tokens")]
2547    pub min_cursor_tokens: usize,
2548}
2549
2550impl Default for SidequestConfig {
2551    fn default() -> Self {
2552        Self {
2553            enabled: false,
2554            interval_turns: default_sidequest_interval_turns(),
2555            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
2556            max_cursors: default_sidequest_max_cursors(),
2557            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
2558        }
2559    }
2560}
2561
2562/// Graph retrieval strategy for `[memory.graph]`.
2563///
2564/// Selects the algorithm used to traverse the knowledge graph during recall.
2565/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
2566#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
2567#[serde(rename_all = "snake_case")]
2568#[non_exhaustive]
2569pub enum GraphRetrievalStrategy {
2570    /// SYNAPSE spreading activation (default, existing behavior).
2571    #[default]
2572    Synapse,
2573    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
2574    Bfs,
2575    /// A* shortest-path traversal via petgraph.
2576    #[serde(rename = "astar")]
2577    AStar,
2578    /// Concentric BFS expanding outward from seed nodes.
2579    WaterCircles,
2580    /// Beam search: keep top-K candidates per hop.
2581    BeamSearch,
2582    /// Dynamic: LLM classifier selects strategy per query.
2583    Hybrid,
2584}
2585
2586fn default_beam_width() -> usize {
2587    10
2588}
2589
2590/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
2591///
2592/// Controls the width of the beam during graph traversal: how many top candidates
2593/// are retained at each hop.
2594#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2595pub struct BeamSearchConfig {
2596    /// Number of top candidates kept per hop. Default: `10`.
2597    #[serde(default = "default_beam_width")]
2598    pub beam_width: usize,
2599}
2600
2601impl Default for BeamSearchConfig {
2602    fn default() -> Self {
2603        Self {
2604            beam_width: default_beam_width(),
2605        }
2606    }
2607}
2608
2609/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
2610///
2611/// Controls ring-by-ring concentric BFS traversal from seed nodes.
2612#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
2613pub struct WaterCirclesConfig {
2614    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
2615    #[serde(default)]
2616    pub ring_limit: usize,
2617}
2618
2619fn default_evolution_sweep_interval() -> usize {
2620    50
2621}
2622
2623fn default_confidence_prune_threshold() -> f32 {
2624    0.1
2625}
2626
2627/// Experience memory configuration for `[memory.graph.experience]`.
2628///
2629/// Controls recording of tool execution outcomes and graph evolution sweeps.
2630#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2631pub struct ExperienceConfig {
2632    /// Enable experience memory recording. Default: `false`.
2633    #[serde(default)]
2634    pub enabled: bool,
2635    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2636    #[serde(default)]
2637    pub evolution_sweep_enabled: bool,
2638    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2639    #[serde(default = "default_confidence_prune_threshold")]
2640    pub confidence_prune_threshold: f32,
2641    /// Number of turns between evolution sweeps. Default: `50`.
2642    #[serde(default = "default_evolution_sweep_interval")]
2643    pub evolution_sweep_interval: usize,
2644}
2645
2646impl Default for ExperienceConfig {
2647    fn default() -> Self {
2648        Self {
2649            enabled: false,
2650            evolution_sweep_enabled: false,
2651            confidence_prune_threshold: default_confidence_prune_threshold(),
2652            evolution_sweep_interval: default_evolution_sweep_interval(),
2653        }
2654    }
2655}
2656
2657/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2658///
2659/// # Security
2660///
2661/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2662/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2663/// when processing conversations that may contain personal, medical, or sensitive data until
2664/// a redaction pass is implemented on the write path.
2665#[derive(Debug, Clone, Deserialize, Serialize)]
2666#[serde(default)]
2667pub struct GraphConfig {
2668    pub enabled: bool,
2669    pub extract_model: String,
2670    #[serde(default = "default_graph_max_entities_per_message")]
2671    pub max_entities_per_message: usize,
2672    #[serde(default = "default_graph_max_edges_per_message")]
2673    pub max_edges_per_message: usize,
2674    #[serde(default = "default_graph_community_refresh_interval")]
2675    pub community_refresh_interval: usize,
2676    #[serde(default = "default_graph_entity_similarity_threshold")]
2677    pub entity_similarity_threshold: f32,
2678    #[serde(default = "default_graph_extraction_timeout_secs")]
2679    pub extraction_timeout_secs: u64,
2680    #[serde(default)]
2681    pub use_embedding_resolution: bool,
2682    #[serde(default = "default_graph_entity_ambiguous_threshold")]
2683    pub entity_ambiguous_threshold: f32,
2684    #[serde(default = "default_graph_max_hops")]
2685    pub max_hops: u32,
2686    #[serde(default = "default_graph_recall_limit")]
2687    pub recall_limit: usize,
2688    /// Days to retain expired (superseded) edges before deletion. Default: 90.
2689    #[serde(default = "default_graph_expired_edge_retention_days")]
2690    pub expired_edge_retention_days: u32,
2691    /// Maximum entities to retain in the graph. 0 = unlimited.
2692    #[serde(default)]
2693    pub max_entities: usize,
2694    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2695    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2696    pub community_summary_max_prompt_bytes: usize,
2697    /// Maximum concurrent LLM calls during community summarization. Default: 4.
2698    #[serde(default = "default_graph_community_summary_concurrency")]
2699    pub community_summary_concurrency: usize,
2700    /// Number of edges fetched per chunk during community detection. Default: 10000.
2701    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2702    #[serde(default = "default_lpa_edge_chunk_size")]
2703    pub lpa_edge_chunk_size: usize,
2704    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2705    ///
2706    /// When > 0, recent edges receive a small additive score boost over older edges.
2707    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2708    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2709    #[serde(
2710        default = "default_graph_temporal_decay_rate",
2711        deserialize_with = "validate_temporal_decay_rate"
2712    )]
2713    pub temporal_decay_rate: f64,
2714    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2715    ///
2716    /// Caps the result set returned for a given source entity + predicate pair. Prevents
2717    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2718    /// or API endpoints.
2719    #[serde(default = "default_graph_edge_history_limit")]
2720    pub edge_history_limit: usize,
2721    /// A-MEM dynamic note linking configuration.
2722    ///
2723    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2724    /// semantically similar entities via `similar_to` edges. Requires an embedding store
2725    /// (`qdrant` or `sqlite` vector backend) to be configured.
2726    #[serde(default)]
2727    pub note_linking: NoteLinkingConfig,
2728    /// SYNAPSE spreading activation retrieval configuration.
2729    ///
2730    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2731    /// with lateral inhibition and temporal decay instead of BFS.
2732    #[serde(default)]
2733    pub spreading_activation: SpreadingActivationConfig,
2734    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2735    ///
2736    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2737    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2738    #[serde(default)]
2739    pub retrieval_strategy: GraphRetrievalStrategy,
2740    /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2741    ///
2742    /// When non-empty, graph extraction (and downstream note linking and community
2743    /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2744    /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2745    /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2746    /// which causes systematic quality gate rejections. A named provider built via
2747    /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2748    /// has no quality gate attached.
2749    ///
2750    /// Falls back to the primary provider when empty. Default: `""` (use primary).
2751    #[serde(default)]
2752    pub extract_provider: ProviderName,
2753    /// Named LLM provider for hybrid strategy classification.
2754    /// Falls back to the default provider when `None`.
2755    #[serde(default)]
2756    pub strategy_classifier_provider: Option<ProviderName>,
2757    /// Beam search configuration.
2758    #[serde(default)]
2759    pub beam_search: BeamSearchConfig,
2760    /// `WaterCircles` BFS configuration.
2761    #[serde(default)]
2762    pub watercircles: WaterCirclesConfig,
2763    /// Experience memory configuration.
2764    #[serde(default)]
2765    pub experience: ExperienceConfig,
2766    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2767    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2768    #[serde(
2769        default = "default_link_weight_decay_lambda",
2770        deserialize_with = "validate_link_weight_decay_lambda"
2771    )]
2772    pub link_weight_decay_lambda: f64,
2773    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2774    #[serde(default = "default_link_weight_decay_interval_secs")]
2775    pub link_weight_decay_interval_secs: u64,
2776    /// Kumiho AGM-inspired belief revision configuration.
2777    ///
2778    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2779    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2780    /// `superseded_by` pointer and the new edge becomes the current belief.
2781    #[serde(default)]
2782    pub belief_revision: BeliefRevisionConfig,
2783    /// D-MEM RPE-based tiered graph extraction routing.
2784    ///
2785    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2786    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2787    #[serde(default)]
2788    pub rpe: RpeConfig,
2789    /// `SQLite` connection pool size dedicated to graph operations.
2790    ///
2791    /// Graph tables share the same database file as messages/embeddings but use a
2792    /// separate pool to prevent pool starvation when community detection or spreading
2793    /// activation runs concurrently with regular memory operations. Default: `3`.
2794    #[serde(default = "default_graph_pool_size")]
2795    pub pool_size: u32,
2796    /// APEX-MEM append-only write path (#3631).
2797    ///
2798    /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2799    /// supersession chains instead of the legacy destructive-update path.
2800    #[serde(default)]
2801    pub apex_mem: ApexMemConfig,
2802    /// LLM call timeout per extraction request, in seconds. Default: `30`.
2803    #[serde(default = "default_graph_llm_timeout_secs")]
2804    pub llm_timeout_secs: u64,
2805    /// PRISM query-sensitive edge costing in A* graph recall.
2806    ///
2807    /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2808    /// between the query embedding and the target entity embedding:
2809    /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2810    /// Edges toward semantically relevant entities receive lower cost and are therefore
2811    /// preferred by A*, producing query-aligned recall paths.
2812    ///
2813    /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2814    /// store is unavailable or a target entity has no stored embedding, falls back to the
2815    /// baseline cost `1.0 - confidence`.
2816    ///
2817    /// Default: `false` (preserves existing A* behaviour).
2818    #[serde(default)]
2819    pub query_sensitive_cost: bool,
2820
2821    /// Implicit conflict detection for SYNAPSE recall (spec 004-17, STALE/CUPMem).
2822    ///
2823    /// When enabled, write-time fuzzy predicate matching detects implicit conflicts
2824    /// between graph edges and annotates SYNAPSE recall results accordingly.
2825    #[serde(default)]
2826    pub implicit_conflict: ImplicitConflictConfig,
2827    /// `MemORAI` write-gate prefilter (#3709).
2828    ///
2829    /// When `write_gate.enabled = true`, low-signal edges are dropped before graph write,
2830    /// reducing noise. Opt-in; default is `false`.
2831    #[serde(default)]
2832    pub write_gate: WriteGateConfig,
2833    /// Conflict resolver recency-fallback threshold (#3709).
2834    ///
2835    /// Controls when the recency strategy is allowed to override `valid_from` comparison.
2836    #[serde(default)]
2837    pub conflict_recency: ConflictRecencyConfig,
2838}
2839
2840/// Similarity method for implicit conflict detection.
2841#[derive(
2842    Debug,
2843    Clone,
2844    Copy,
2845    PartialEq,
2846    Eq,
2847    Default,
2848    serde::Serialize,
2849    serde::Deserialize,
2850    schemars::JsonSchema,
2851)]
2852#[serde(rename_all = "snake_case")]
2853#[non_exhaustive]
2854pub enum SimilarityMethod {
2855    /// Normalized Levenshtein edit distance.
2856    #[default]
2857    Levenshtein,
2858    /// Cosine similarity over pre-computed predicate embeddings.
2859    Embedding,
2860    /// Either method triggers detection.
2861    Both,
2862}
2863
2864/// Resolution strategy when an implicit conflict is detected.
2865#[derive(
2866    Debug,
2867    Clone,
2868    Copy,
2869    PartialEq,
2870    Eq,
2871    Default,
2872    serde::Serialize,
2873    serde::Deserialize,
2874    schemars::JsonSchema,
2875)]
2876#[serde(rename_all = "snake_case")]
2877#[non_exhaustive]
2878pub enum ConflictResolutionStrategy {
2879    /// Mark the pair as a candidate but do not supersede either edge.
2880    #[default]
2881    FlagOnly,
2882    /// Supersede the older edge via APEX-MEM `insert_or_supersede`.
2883    Recency,
2884    /// Supersede the lower-confidence edge.
2885    Confidence,
2886    /// Delegate resolution to an LLM provider; fall back to `flag_only` on timeout.
2887    Llm,
2888}
2889
2890/// Configuration for the optional background consolidation daemon (spec 004-17).
2891#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2892#[serde(default)]
2893pub struct ConsolidationDaemonConfig {
2894    /// Enable the background consolidation daemon.
2895    pub enabled: bool,
2896    /// How often the daemon runs, in seconds. Default: 7200 (2 hours).
2897    #[serde(default = "default_ic_daemon_interval_secs")]
2898    pub interval_seconds: u64,
2899    /// Maximum number of candidates processed per daemon run. Default: 100.
2900    #[serde(default = "default_ic_daemon_batch_size")]
2901    pub batch_size: usize,
2902}
2903
2904impl Default for ConsolidationDaemonConfig {
2905    fn default() -> Self {
2906        Self {
2907            enabled: false,
2908            interval_seconds: default_ic_daemon_interval_secs(),
2909            batch_size: default_ic_daemon_batch_size(),
2910        }
2911    }
2912}
2913
2914fn default_ic_daemon_interval_secs() -> u64 {
2915    7200
2916}
2917
2918fn default_ic_daemon_batch_size() -> usize {
2919    100
2920}
2921
2922/// Configuration for implicit conflict detection (spec 004-17, STALE/CUPMem).
2923///
2924/// Controls write-time fuzzy predicate matching and SYNAPSE recall annotation.
2925/// All detection is gated behind `enabled = false` by default — no overhead when disabled.
2926///
2927/// TOML path: `[memory.graph.implicit_conflict]`
2928///
2929/// # Examples
2930///
2931/// ```toml
2932/// [memory.graph.implicit_conflict]
2933/// enabled = true
2934/// similarity_method = "levenshtein"
2935/// conflict_similarity_threshold = 0.80
2936/// resolution_strategy = "flag_only"
2937/// candidate_ttl_days = 30
2938/// propagation_depth = 2
2939/// ```
2940#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2941#[serde(default)]
2942pub struct ImplicitConflictConfig {
2943    /// Enable implicit conflict detection. Default: `false`.
2944    pub enabled: bool,
2945    /// Similarity method used to detect candidate pairs.
2946    #[serde(default)]
2947    pub similarity_method: SimilarityMethod,
2948    /// Minimum similarity score to flag a pair as a conflict candidate. Default: 0.80.
2949    #[serde(default = "default_ic_similarity_threshold")]
2950    pub conflict_similarity_threshold: f64,
2951    /// How to resolve detected conflicts. Default: `flag_only`.
2952    #[serde(default)]
2953    pub resolution_strategy: ConflictResolutionStrategy,
2954    /// Provider name (from `[[llm.providers]]`) for LLM-mediated resolution.
2955    #[serde(default)]
2956    pub implicit_conflict_provider: crate::providers::ProviderName,
2957    /// LLM resolution timeout in milliseconds. Default: 800.
2958    #[serde(default = "default_ic_llm_timeout_ms")]
2959    pub conflict_llm_timeout_ms: u64,
2960    /// Days before an unresolved candidate entry expires. Default: 30.
2961    #[serde(default = "default_ic_candidate_ttl_days")]
2962    pub candidate_ttl_days: u32,
2963    /// SYNAPSE propagation depth for surfacing superseding facts. Default: 2.
2964    #[serde(default = "default_ic_propagation_depth")]
2965    pub propagation_depth: u32,
2966    /// Background consolidation daemon configuration.
2967    #[serde(default)]
2968    pub consolidation_daemon: ConsolidationDaemonConfig,
2969}
2970
2971impl Default for ImplicitConflictConfig {
2972    fn default() -> Self {
2973        Self {
2974            enabled: false,
2975            similarity_method: SimilarityMethod::default(),
2976            conflict_similarity_threshold: default_ic_similarity_threshold(),
2977            resolution_strategy: ConflictResolutionStrategy::default(),
2978            implicit_conflict_provider: crate::providers::ProviderName::default(),
2979            conflict_llm_timeout_ms: default_ic_llm_timeout_ms(),
2980            candidate_ttl_days: default_ic_candidate_ttl_days(),
2981            propagation_depth: default_ic_propagation_depth(),
2982            consolidation_daemon: ConsolidationDaemonConfig::default(),
2983        }
2984    }
2985}
2986
2987fn default_ic_similarity_threshold() -> f64 {
2988    0.80
2989}
2990
2991fn default_ic_llm_timeout_ms() -> u64 {
2992    800
2993}
2994
2995fn default_ic_candidate_ttl_days() -> u32 {
2996    30
2997}
2998
2999fn default_ic_propagation_depth() -> u32 {
3000    2
3001}
3002
3003fn default_graph_pool_size() -> u32 {
3004    3
3005}
3006
3007fn default_graph_llm_timeout_secs() -> u64 {
3008    30
3009}
3010
3011/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
3012///
3013/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
3014/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
3015/// the full supersession chain and enables conflict resolution.
3016///
3017/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
3018#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
3019#[serde(default)]
3020pub struct ApexMemConfig {
3021    /// Enable the APEX-MEM append-only write path. Default: `false`.
3022    pub enabled: bool,
3023}
3024
3025fn default_quality_gate_threshold() -> f32 {
3026    0.55
3027}
3028
3029fn default_quality_gate_recent_window() -> usize {
3030    32
3031}
3032
3033fn default_quality_gate_contradiction_grace_seconds() -> u64 {
3034    300
3035}
3036
3037fn default_quality_gate_information_value_weight() -> f32 {
3038    0.4
3039}
3040
3041fn default_quality_gate_reference_completeness_weight() -> f32 {
3042    0.3
3043}
3044
3045fn default_quality_gate_contradiction_weight() -> f32 {
3046    0.3
3047}
3048
3049fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
3050    0.35
3051}
3052
3053fn default_quality_gate_llm_timeout_ms() -> u64 {
3054    500
3055}
3056
3057fn default_quality_gate_llm_weight() -> f32 {
3058    0.5
3059}
3060
3061fn default_quality_gate_reference_check_lang_en() -> bool {
3062    true
3063}
3064
3065/// Write quality gate configuration (`[memory.quality_gate]`).
3066///
3067/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
3068/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
3069/// scoring is opt-in via `quality_gate_provider`.
3070///
3071/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
3072#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3073#[serde(default)]
3074pub struct WriteQualityGateConfig {
3075    /// Enable the write quality gate. Default: `false`.
3076    pub enabled: bool,
3077    /// Combined score threshold below which writes are rejected. Default: `0.55`.
3078    #[serde(default = "default_quality_gate_threshold")]
3079    pub threshold: f32,
3080    /// Number of recent writes compared for information-value scoring. Default: `32`.
3081    #[serde(default = "default_quality_gate_recent_window")]
3082    pub recent_window: usize,
3083    /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
3084    #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
3085    pub contradiction_grace_seconds: u64,
3086    /// Weight of `information_value` sub-score. Default: `0.4`.
3087    #[serde(default = "default_quality_gate_information_value_weight")]
3088    pub information_value_weight: f32,
3089    /// Weight of `reference_completeness` sub-score. Default: `0.3`.
3090    #[serde(default = "default_quality_gate_reference_completeness_weight")]
3091    pub reference_completeness_weight: f32,
3092    /// Weight of `contradiction` sub-score. Default: `0.3`.
3093    #[serde(default = "default_quality_gate_contradiction_weight")]
3094    pub contradiction_weight: f32,
3095    /// Rolling rejection-rate alarm ratio. Default: `0.35`.
3096    #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
3097    pub rejection_rate_alarm_ratio: f32,
3098    /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
3099    #[serde(default)]
3100    pub quality_gate_provider: ProviderName,
3101    /// LLM timeout in milliseconds. Default: `500`.
3102    #[serde(default = "default_quality_gate_llm_timeout_ms")]
3103    pub llm_timeout_ms: u64,
3104    /// LLM blend weight into final score. Default: `0.5`.
3105    #[serde(default = "default_quality_gate_llm_weight")]
3106    pub llm_weight: f32,
3107    /// Enable pronoun/deictic reference checks (English only). Default: `true`.
3108    #[serde(default = "default_quality_gate_reference_check_lang_en")]
3109    pub reference_check_lang_en: bool,
3110}
3111
3112impl Default for WriteQualityGateConfig {
3113    fn default() -> Self {
3114        Self {
3115            enabled: false,
3116            threshold: default_quality_gate_threshold(),
3117            recent_window: default_quality_gate_recent_window(),
3118            contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
3119            information_value_weight: default_quality_gate_information_value_weight(),
3120            reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
3121            contradiction_weight: default_quality_gate_contradiction_weight(),
3122            rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
3123            quality_gate_provider: ProviderName::default(),
3124            llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
3125            llm_weight: default_quality_gate_llm_weight(),
3126            reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
3127        }
3128    }
3129}
3130
3131impl Default for GraphConfig {
3132    fn default() -> Self {
3133        Self {
3134            enabled: false,
3135            extract_model: String::new(),
3136            max_entities_per_message: default_graph_max_entities_per_message(),
3137            max_edges_per_message: default_graph_max_edges_per_message(),
3138            community_refresh_interval: default_graph_community_refresh_interval(),
3139            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
3140            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
3141            use_embedding_resolution: false,
3142            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
3143            max_hops: default_graph_max_hops(),
3144            recall_limit: default_graph_recall_limit(),
3145            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
3146            max_entities: 0,
3147            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
3148            community_summary_concurrency: default_graph_community_summary_concurrency(),
3149            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
3150            temporal_decay_rate: default_graph_temporal_decay_rate(),
3151            edge_history_limit: default_graph_edge_history_limit(),
3152            note_linking: NoteLinkingConfig::default(),
3153            spreading_activation: SpreadingActivationConfig::default(),
3154            retrieval_strategy: GraphRetrievalStrategy::default(),
3155            extract_provider: ProviderName::default(),
3156            strategy_classifier_provider: None,
3157            beam_search: BeamSearchConfig::default(),
3158            watercircles: WaterCirclesConfig::default(),
3159            experience: ExperienceConfig::default(),
3160            link_weight_decay_lambda: default_link_weight_decay_lambda(),
3161            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
3162            belief_revision: BeliefRevisionConfig::default(),
3163            rpe: RpeConfig::default(),
3164            pool_size: default_graph_pool_size(),
3165            apex_mem: ApexMemConfig::default(),
3166            llm_timeout_secs: default_graph_llm_timeout_secs(),
3167            query_sensitive_cost: false,
3168            implicit_conflict: ImplicitConflictConfig::default(),
3169            write_gate: WriteGateConfig::default(),
3170            conflict_recency: ConflictRecencyConfig::default(),
3171        }
3172    }
3173}
3174
3175fn default_consolidation_confidence_threshold() -> f32 {
3176    0.7
3177}
3178
3179fn default_consolidation_sweep_interval_secs() -> u64 {
3180    3600
3181}
3182
3183fn default_consolidation_sweep_batch_size() -> usize {
3184    50
3185}
3186
3187fn default_consolidation_similarity_threshold() -> f32 {
3188    0.85
3189}
3190
3191/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
3192///
3193/// When `enabled = true`, a background loop periodically clusters semantically similar messages
3194/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
3195/// they are marked as consolidated and deprioritized in recall via temporal decay.
3196#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3197#[serde(default)]
3198pub struct ConsolidationConfig {
3199    /// Enable the consolidation background loop. Default: `false`.
3200    pub enabled: bool,
3201    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3202    /// Falls back to the primary provider when empty. Default: `""`.
3203    #[serde(default)]
3204    pub consolidation_provider: ProviderName,
3205    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
3206    #[serde(default = "default_consolidation_confidence_threshold")]
3207    pub confidence_threshold: f32,
3208    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
3209    #[serde(default = "default_consolidation_sweep_interval_secs")]
3210    pub sweep_interval_secs: u64,
3211    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
3212    #[serde(default = "default_consolidation_sweep_batch_size")]
3213    pub sweep_batch_size: usize,
3214    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
3215    /// Default: `0.85`.
3216    #[serde(default = "default_consolidation_similarity_threshold")]
3217    pub similarity_threshold: f32,
3218    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3219    #[serde(default = "default_consolidation_llm_timeout_secs")]
3220    pub llm_timeout_secs: u64,
3221    /// Per-call timeout for every `embed()` invocation in the consolidation sweep, in seconds.
3222    /// Default: `5`.
3223    #[serde(default = "default_embed_timeout_secs")]
3224    pub embed_timeout_secs: u64,
3225}
3226
3227impl Default for ConsolidationConfig {
3228    fn default() -> Self {
3229        Self {
3230            enabled: false,
3231            consolidation_provider: ProviderName::default(),
3232            confidence_threshold: default_consolidation_confidence_threshold(),
3233            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
3234            sweep_batch_size: default_consolidation_sweep_batch_size(),
3235            similarity_threshold: default_consolidation_similarity_threshold(),
3236            llm_timeout_secs: default_consolidation_llm_timeout_secs(),
3237            embed_timeout_secs: default_embed_timeout_secs(),
3238        }
3239    }
3240}
3241
3242fn default_consolidation_llm_timeout_secs() -> u64 {
3243    30
3244}
3245
3246fn default_link_weight_decay_lambda() -> f64 {
3247    0.95
3248}
3249
3250fn default_link_weight_decay_interval_secs() -> u64 {
3251    86400
3252}
3253
3254fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
3255where
3256    D: serde::Deserializer<'de>,
3257{
3258    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
3259    if value.is_nan() || value.is_infinite() {
3260        return Err(serde::de::Error::custom(
3261            "link_weight_decay_lambda must be a finite number",
3262        ));
3263    }
3264    if !(value > 0.0 && value <= 1.0) {
3265        return Err(serde::de::Error::custom(
3266            "link_weight_decay_lambda must be in (0.0, 1.0]",
3267        ));
3268    }
3269    Ok(value)
3270}
3271
3272fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
3273where
3274    D: serde::Deserializer<'de>,
3275{
3276    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3277    if value.is_nan() || value.is_infinite() {
3278        return Err(serde::de::Error::custom(
3279            "threshold must be a finite number",
3280        ));
3281    }
3282    if !(0.0..=1.0).contains(&value) {
3283        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
3284    }
3285    Ok(value)
3286}
3287
3288fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
3289where
3290    D: serde::Deserializer<'de>,
3291{
3292    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3293    if value.is_nan() || value.is_infinite() {
3294        return Err(serde::de::Error::custom(
3295            "fast_path_margin must be a finite number",
3296        ));
3297    }
3298    if !(0.0..=1.0).contains(&value) {
3299        return Err(serde::de::Error::custom(
3300            "fast_path_margin must be in [0.0, 1.0]",
3301        ));
3302    }
3303    Ok(value)
3304}
3305
3306fn default_admission_threshold() -> f32 {
3307    0.40
3308}
3309
3310fn default_admission_fast_path_margin() -> f32 {
3311    0.15
3312}
3313
3314fn default_rl_min_samples() -> u32 {
3315    500
3316}
3317
3318fn default_rl_retrain_interval_secs() -> u64 {
3319    3600
3320}
3321
3322/// Admission decision strategy.
3323///
3324/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
3325/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
3326#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
3327#[serde(rename_all = "snake_case")]
3328#[non_exhaustive]
3329pub enum AdmissionStrategy {
3330    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
3331    #[default]
3332    Heuristic,
3333    /// Learned model: logistic regression trained on recall feedback.
3334    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
3335    Rl,
3336}
3337
3338fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
3339where
3340    D: serde::Deserializer<'de>,
3341{
3342    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3343    if value < 0.0 {
3344        return Err(serde::de::Error::custom(
3345            "admission weight must be non-negative (>= 0.0)",
3346        ));
3347    }
3348    Ok(value)
3349}
3350
3351/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
3352///
3353/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
3354/// All values must be non-negative.
3355#[derive(Debug, Clone, Deserialize, Serialize)]
3356#[serde(default)]
3357pub struct AdmissionWeights {
3358    /// LLM-estimated future reuse probability. Default: `0.30`.
3359    #[serde(deserialize_with = "validate_admission_weight")]
3360    pub future_utility: f32,
3361    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
3362    #[serde(deserialize_with = "validate_admission_weight")]
3363    pub factual_confidence: f32,
3364    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
3365    #[serde(deserialize_with = "validate_admission_weight")]
3366    pub semantic_novelty: f32,
3367    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
3368    #[serde(deserialize_with = "validate_admission_weight")]
3369    pub temporal_recency: f32,
3370    /// Content type prior based on role. Default: `0.15`.
3371    #[serde(deserialize_with = "validate_admission_weight")]
3372    pub content_type_prior: f32,
3373    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
3374    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
3375    /// Normalized automatically at runtime. Default: `0.0`.
3376    #[serde(deserialize_with = "validate_admission_weight")]
3377    pub goal_utility: f32,
3378}
3379
3380impl Default for AdmissionWeights {
3381    fn default() -> Self {
3382        Self {
3383            future_utility: 0.30,
3384            factual_confidence: 0.15,
3385            semantic_novelty: 0.30,
3386            temporal_recency: 0.10,
3387            content_type_prior: 0.15,
3388            goal_utility: 0.0,
3389        }
3390    }
3391}
3392
3393impl AdmissionWeights {
3394    /// Return weights normalized so they sum to 1.0.
3395    ///
3396    /// All weights are non-negative; the sum is always > 0 when defaults are used.
3397    #[must_use]
3398    pub fn normalized(&self) -> Self {
3399        let sum = self.future_utility
3400            + self.factual_confidence
3401            + self.semantic_novelty
3402            + self.temporal_recency
3403            + self.content_type_prior
3404            + self.goal_utility;
3405        if sum <= f32::EPSILON {
3406            return Self::default();
3407        }
3408        Self {
3409            future_utility: self.future_utility / sum,
3410            factual_confidence: self.factual_confidence / sum,
3411            semantic_novelty: self.semantic_novelty / sum,
3412            temporal_recency: self.temporal_recency / sum,
3413            content_type_prior: self.content_type_prior / sum,
3414            goal_utility: self.goal_utility / sum,
3415        }
3416    }
3417}
3418
3419/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
3420///
3421/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
3422/// Messages below the composite admission threshold are rejected and not persisted.
3423#[derive(Debug, Clone, Deserialize, Serialize)]
3424#[serde(default)]
3425pub struct AdmissionConfig {
3426    /// Enable A-MAC admission control. Default: `false`.
3427    pub enabled: bool,
3428    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
3429    /// Default: `0.40`.
3430    #[serde(deserialize_with = "validate_admission_threshold")]
3431    pub threshold: f32,
3432    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
3433    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
3434    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
3435    pub fast_path_margin: f32,
3436    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
3437    /// Falls back to the primary provider when empty. Default: `""`.
3438    pub admission_provider: ProviderName,
3439    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
3440    pub weights: AdmissionWeights,
3441    /// Admission decision strategy. Default: `heuristic`.
3442    #[serde(default)]
3443    pub admission_strategy: AdmissionStrategy,
3444    /// Minimum training samples before the RL model is activated.
3445    /// Below this count the system falls back to `Heuristic`. Default: `500`.
3446    #[serde(default = "default_rl_min_samples")]
3447    pub rl_min_samples: u32,
3448    /// Background RL model retraining interval in seconds. Default: `3600`.
3449    #[serde(default = "default_rl_retrain_interval_secs")]
3450    pub rl_retrain_interval_secs: u64,
3451    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
3452    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
3453    /// Zero regression when `false`. Default: `false`.
3454    #[serde(default)]
3455    pub goal_conditioned_write: bool,
3456    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
3457    /// Used only for borderline cases (similarity within 0.1 of threshold).
3458    /// Falls back to the primary provider when empty. Default: `""`.
3459    #[serde(default)]
3460    pub goal_utility_provider: ProviderName,
3461    /// Minimum cosine similarity between goal embedding and candidate memory
3462    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
3463    #[serde(default = "default_goal_utility_threshold")]
3464    pub goal_utility_threshold: f32,
3465    /// Weight of the `goal_utility` factor in the composite admission score.
3466    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
3467    #[serde(default = "default_goal_utility_weight")]
3468    pub goal_utility_weight: f32,
3469}
3470
3471fn default_goal_utility_threshold() -> f32 {
3472    0.4
3473}
3474
3475fn default_goal_utility_weight() -> f32 {
3476    0.25
3477}
3478
3479impl Default for AdmissionConfig {
3480    fn default() -> Self {
3481        Self {
3482            enabled: false,
3483            threshold: default_admission_threshold(),
3484            fast_path_margin: default_admission_fast_path_margin(),
3485            admission_provider: ProviderName::default(),
3486            weights: AdmissionWeights::default(),
3487            admission_strategy: AdmissionStrategy::default(),
3488            rl_min_samples: default_rl_min_samples(),
3489            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
3490            goal_conditioned_write: false,
3491            goal_utility_provider: ProviderName::default(),
3492            goal_utility_threshold: default_goal_utility_threshold(),
3493            goal_utility_weight: default_goal_utility_weight(),
3494        }
3495    }
3496}
3497
3498/// Routing strategy for `[memory.store_routing]`.
3499#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
3500#[serde(rename_all = "snake_case")]
3501#[non_exhaustive]
3502pub enum StoreRoutingStrategy {
3503    /// Pure heuristic pattern matching. Zero LLM calls. Default.
3504    #[default]
3505    Heuristic,
3506    /// LLM-based classification via `routing_classifier_provider`.
3507    Llm,
3508    /// Heuristic first; escalates to LLM only when confidence is low.
3509    Hybrid,
3510}
3511
3512/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
3513///
3514/// Controls how each query is classified and routed to the appropriate memory
3515/// backend(s), avoiding unnecessary store queries for simple lookups.
3516#[derive(Debug, Clone, Deserialize, Serialize)]
3517#[serde(default)]
3518pub struct StoreRoutingConfig {
3519    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
3520    /// directly (existing behavior). Default: `false`.
3521    pub enabled: bool,
3522    /// Routing strategy. Default: `heuristic`.
3523    pub strategy: StoreRoutingStrategy,
3524    /// Provider name from `[[llm.providers]]` for LLM-based classification.
3525    /// Falls back to the primary provider when empty. Default: `""`.
3526    pub routing_classifier_provider: ProviderName,
3527    /// Route to use when the classifier is uncertain (confidence < threshold).
3528    ///
3529    /// Defaults to [`MemoryRoute::Hybrid`].
3530    pub fallback_route: MemoryRoute,
3531    /// Confidence threshold below which `HybridRouter` escalates to LLM.
3532    /// Range: `[0.0, 1.0]`. Default: `0.7`.
3533    pub confidence_threshold: f32,
3534}
3535
3536impl Default for StoreRoutingConfig {
3537    fn default() -> Self {
3538        Self {
3539            enabled: false,
3540            strategy: StoreRoutingStrategy::Heuristic,
3541            routing_classifier_provider: ProviderName::default(),
3542            fallback_route: MemoryRoute::Hybrid,
3543            confidence_threshold: 0.7,
3544        }
3545    }
3546}
3547
3548/// Persona memory layer configuration (#2461).
3549///
3550/// When `enabled = true`, user preferences and domain knowledge are extracted from
3551/// conversation history via a cheap LLM provider and injected after the system prompt.
3552#[derive(Debug, Clone, Deserialize, Serialize)]
3553#[serde(default)]
3554pub struct PersonaConfig {
3555    /// Enable persona memory extraction and injection. Default: `false`.
3556    pub enabled: bool,
3557    /// Provider name from `[[llm.providers]]` for persona extraction.
3558    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
3559    pub persona_provider: ProviderName,
3560    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
3561    pub min_confidence: f64,
3562    /// Minimum user messages before extraction runs in a session. Default: `3`.
3563    pub min_messages: usize,
3564    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
3565    pub max_messages: usize,
3566    /// LLM timeout for the extraction call in seconds. Default: `10`.
3567    pub extraction_timeout_secs: u64,
3568    /// Token budget allocated to persona context in assembly. Default: `500`.
3569    pub context_budget_tokens: usize,
3570}
3571
3572impl Default for PersonaConfig {
3573    fn default() -> Self {
3574        Self {
3575            enabled: false,
3576            persona_provider: ProviderName::default(),
3577            min_confidence: 0.6,
3578            min_messages: 3,
3579            max_messages: 10,
3580            extraction_timeout_secs: 10,
3581            context_budget_tokens: 500,
3582        }
3583    }
3584}
3585
3586/// Trajectory-informed memory configuration (#2498).
3587///
3588/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
3589/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
3590/// Procedural entries are injected into context as "past experience" during assembly.
3591#[derive(Debug, Clone, Deserialize, Serialize)]
3592#[serde(default)]
3593pub struct TrajectoryConfig {
3594    /// Enable trajectory extraction and context injection. Default: `false`.
3595    pub enabled: bool,
3596    /// Provider name from `[[llm.providers]]` for extraction.
3597    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3598    pub trajectory_provider: ProviderName,
3599    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
3600    pub context_budget_tokens: usize,
3601    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
3602    pub max_messages: usize,
3603    /// LLM timeout for the extraction call in seconds. Default: `10`.
3604    pub extraction_timeout_secs: u64,
3605    /// Number of procedural entries retrieved for context injection. Default: `5`.
3606    pub recall_top_k: usize,
3607    /// Minimum confidence score for entries included in context. Default: `0.6`.
3608    pub min_confidence: f64,
3609}
3610
3611impl Default for TrajectoryConfig {
3612    fn default() -> Self {
3613        Self {
3614            enabled: false,
3615            trajectory_provider: ProviderName::default(),
3616            context_budget_tokens: 400,
3617            max_messages: 10,
3618            extraction_timeout_secs: 10,
3619            recall_top_k: 5,
3620            min_confidence: 0.6,
3621        }
3622    }
3623}
3624
3625/// Category-aware memory configuration (#2428).
3626///
3627/// When `enabled = true`, messages are auto-tagged with a category derived from the active
3628/// skill or tool context. The category is stored in the `messages.category` column and used
3629/// as a Qdrant payload filter during recall.
3630#[derive(Debug, Clone, Deserialize, Serialize)]
3631#[serde(default)]
3632pub struct CategoryConfig {
3633    /// Enable category tagging and category-filtered recall. Default: `false`.
3634    pub enabled: bool,
3635    /// Automatically assign category from skill metadata or tool type. Default: `true`.
3636    pub auto_tag: bool,
3637}
3638
3639impl Default for CategoryConfig {
3640    fn default() -> Self {
3641        Self {
3642            enabled: false,
3643            auto_tag: true,
3644        }
3645    }
3646}
3647
3648/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
3649///
3650/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
3651/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
3652/// for complex queries.
3653#[derive(Debug, Clone, Deserialize, Serialize)]
3654#[serde(default)]
3655pub struct TreeConfig {
3656    /// Enable the memory tree and background consolidation loop. Default: `false`.
3657    pub enabled: bool,
3658    /// Provider name from `[[llm.providers]]` for node consolidation.
3659    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3660    pub consolidation_provider: ProviderName,
3661    /// Interval between consolidation sweeps in seconds. Default: `300`.
3662    pub sweep_interval_secs: u64,
3663    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
3664    pub batch_size: usize,
3665    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
3666    pub similarity_threshold: f32,
3667    /// Maximum tree depth (levels above leaves). Default: `3`.
3668    pub max_level: u32,
3669    /// Token budget allocated to tree memory in context assembly. Default: `400`.
3670    pub context_budget_tokens: usize,
3671    /// Number of tree nodes retrieved for context. Default: `5`.
3672    pub recall_top_k: usize,
3673    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
3674    pub min_cluster_size: usize,
3675}
3676
3677impl Default for TreeConfig {
3678    fn default() -> Self {
3679        Self {
3680            enabled: false,
3681            consolidation_provider: ProviderName::default(),
3682            sweep_interval_secs: 300,
3683            batch_size: 20,
3684            similarity_threshold: 0.8,
3685            max_level: 3,
3686            context_budget_tokens: 400,
3687            recall_top_k: 5,
3688            min_cluster_size: 2,
3689        }
3690    }
3691}
3692
3693/// Time-based microcompact configuration (#2699).
3694///
3695/// When `enabled = true`, low-value tool outputs are cleared from context
3696/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
3697/// The most recent `keep_recent` tool messages are preserved unconditionally.
3698#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3699#[serde(default)]
3700pub struct MicrocompactConfig {
3701    /// Enable time-based microcompaction. Default: `false`.
3702    pub enabled: bool,
3703    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
3704    pub gap_threshold_minutes: u32,
3705    /// Number of most recent compactable tool messages to preserve. Default: `3`.
3706    pub keep_recent: usize,
3707}
3708
3709impl Default for MicrocompactConfig {
3710    fn default() -> Self {
3711        Self {
3712            enabled: false,
3713            gap_threshold_minutes: 60,
3714            keep_recent: 3,
3715        }
3716    }
3717}
3718
3719/// autoDream background memory consolidation configuration (#2697).
3720///
3721/// When `enabled = true`, a constrained consolidation subagent runs after
3722/// a session ends if both `min_sessions` and `min_hours` gates pass.
3723#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3724#[serde(default)]
3725pub struct AutoDreamConfig {
3726    /// Enable autoDream consolidation. Default: `false`.
3727    pub enabled: bool,
3728    /// Minimum number of sessions between consolidations. Default: `3`.
3729    pub min_sessions: u32,
3730    /// Minimum hours between consolidations. Default: `24`.
3731    pub min_hours: u32,
3732    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3733    /// Falls back to the primary provider when empty. Default: `""`.
3734    pub consolidation_provider: ProviderName,
3735    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
3736    pub max_iterations: u8,
3737    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3738    #[serde(default = "default_autodream_llm_timeout_secs")]
3739    pub llm_timeout_secs: u64,
3740}
3741
3742impl Default for AutoDreamConfig {
3743    fn default() -> Self {
3744        Self {
3745            enabled: false,
3746            min_sessions: 3,
3747            min_hours: 24,
3748            consolidation_provider: ProviderName::default(),
3749            max_iterations: 8,
3750            llm_timeout_secs: default_autodream_llm_timeout_secs(),
3751        }
3752    }
3753}
3754
3755fn default_autodream_llm_timeout_secs() -> u64 {
3756    30
3757}
3758
3759/// `MagicDocs` auto-maintained markdown configuration (#2702).
3760///
3761/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
3762/// are registered and periodically updated by a constrained subagent.
3763#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3764#[serde(default)]
3765pub struct MagicDocsConfig {
3766    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
3767    pub enabled: bool,
3768    /// Minimum turns between updates for a given doc path. Default: `5`.
3769    pub min_turns_between_updates: u32,
3770    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
3771    /// Falls back to the primary provider when empty. Default: `""`.
3772    pub update_provider: ProviderName,
3773    /// Maximum agent loop iterations per doc update. Default: `4`.
3774    pub max_iterations: u8,
3775}
3776
3777impl Default for MagicDocsConfig {
3778    fn default() -> Self {
3779        Self {
3780            enabled: false,
3781            min_turns_between_updates: 5,
3782            update_provider: ProviderName::default(),
3783            max_iterations: 4,
3784        }
3785    }
3786}
3787
3788#[cfg(test)]
3789mod tests {
3790    use super::*;
3791
3792    // Verify that serde deserialization routes through FromStr so that removed variants
3793    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
3794    #[test]
3795    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
3796        #[derive(serde::Deserialize)]
3797        struct Wrapper {
3798            #[allow(dead_code)]
3799            pruning_strategy: PruningStrategy,
3800        }
3801        let toml = r#"pruning_strategy = "task_aware_mig""#;
3802        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
3803        assert_eq!(
3804            w.pruning_strategy,
3805            PruningStrategy::Reactive,
3806            "task_aware_mig must fall back to Reactive"
3807        );
3808    }
3809
3810    #[test]
3811    fn pruning_strategy_toml_round_trip() {
3812        #[derive(serde::Deserialize)]
3813        struct Wrapper {
3814            #[allow(dead_code)]
3815            pruning_strategy: PruningStrategy,
3816        }
3817        for (input, expected) in [
3818            ("reactive", PruningStrategy::Reactive),
3819            ("task_aware", PruningStrategy::TaskAware),
3820            ("mig", PruningStrategy::Mig),
3821        ] {
3822            let toml = format!(r#"pruning_strategy = "{input}""#);
3823            let w: Wrapper = toml::from_str(&toml)
3824                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3825            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3826        }
3827    }
3828
3829    #[test]
3830    fn pruning_strategy_toml_unknown_value_errors() {
3831        #[derive(serde::Deserialize)]
3832        #[allow(dead_code)]
3833        struct Wrapper {
3834            pruning_strategy: PruningStrategy,
3835        }
3836        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3837        assert!(
3838            toml::from_str::<Wrapper>(toml).is_err(),
3839            "unknown strategy must produce an error"
3840        );
3841    }
3842
3843    #[test]
3844    fn tier_config_defaults_are_correct() {
3845        let cfg = TierConfig::default();
3846        assert!(!cfg.enabled);
3847        assert_eq!(cfg.promotion_min_sessions, 3);
3848        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3849        assert_eq!(cfg.sweep_interval_secs, 3600);
3850        assert_eq!(cfg.sweep_batch_size, 100);
3851    }
3852
3853    #[test]
3854    fn tier_config_rejects_min_sessions_below_2() {
3855        let toml = "promotion_min_sessions = 1";
3856        assert!(toml::from_str::<TierConfig>(toml).is_err());
3857    }
3858
3859    #[test]
3860    fn tier_config_rejects_similarity_threshold_below_0_5() {
3861        let toml = "similarity_threshold = 0.4";
3862        assert!(toml::from_str::<TierConfig>(toml).is_err());
3863    }
3864
3865    #[test]
3866    fn tier_config_rejects_zero_sweep_batch_size() {
3867        let toml = "sweep_batch_size = 0";
3868        assert!(toml::from_str::<TierConfig>(toml).is_err());
3869    }
3870
3871    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3872        let input = format!("importance_weight = {toml_val}");
3873        toml::from_str::<SemanticConfig>(&input)
3874    }
3875
3876    #[test]
3877    fn importance_weight_default_is_0_15() {
3878        let cfg = SemanticConfig::default();
3879        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3880    }
3881
3882    #[test]
3883    fn importance_weight_valid_zero() {
3884        let cfg = deserialize_importance_weight("0.0").unwrap();
3885        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3886    }
3887
3888    #[test]
3889    fn importance_weight_valid_one() {
3890        let cfg = deserialize_importance_weight("1.0").unwrap();
3891        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3892    }
3893
3894    #[test]
3895    fn importance_weight_rejects_near_zero_negative() {
3896        // TOML does not have a NaN literal, but we can test via a f64 that
3897        // the validator rejects out-of-range values. Test with negative here
3898        // and rely on validate_importance_weight rejecting non-finite via
3899        // a constructed deserializer call.
3900        let result = deserialize_importance_weight("-0.01");
3901        assert!(
3902            result.is_err(),
3903            "negative importance_weight must be rejected"
3904        );
3905    }
3906
3907    #[test]
3908    fn importance_weight_rejects_negative() {
3909        let result = deserialize_importance_weight("-1.0");
3910        assert!(result.is_err(), "negative value must be rejected");
3911    }
3912
3913    #[test]
3914    fn importance_weight_rejects_greater_than_one() {
3915        let result = deserialize_importance_weight("1.01");
3916        assert!(result.is_err(), "value > 1.0 must be rejected");
3917    }
3918
3919    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3920
3921    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3922    #[test]
3923    fn admission_weights_normalized_sums_to_one() {
3924        let w = AdmissionWeights {
3925            future_utility: 2.0,
3926            factual_confidence: 1.0,
3927            semantic_novelty: 3.0,
3928            temporal_recency: 1.0,
3929            content_type_prior: 3.0,
3930            goal_utility: 0.0,
3931        };
3932        let n = w.normalized();
3933        let sum = n.future_utility
3934            + n.factual_confidence
3935            + n.semantic_novelty
3936            + n.temporal_recency
3937            + n.content_type_prior;
3938        assert!(
3939            (sum - 1.0).abs() < 0.001,
3940            "normalized weights must sum to 1.0, got {sum}"
3941        );
3942    }
3943
3944    // Test: already-normalized weights are preserved.
3945    #[test]
3946    fn admission_weights_normalized_preserves_already_unit_sum() {
3947        let w = AdmissionWeights::default();
3948        let n = w.normalized();
3949        let sum = n.future_utility
3950            + n.factual_confidence
3951            + n.semantic_novelty
3952            + n.temporal_recency
3953            + n.content_type_prior;
3954        assert!(
3955            (sum - 1.0).abs() < 0.001,
3956            "default weights sum to ~1.0 after normalization"
3957        );
3958    }
3959
3960    // Test: zero weights fall back to default (no divide-by-zero panic).
3961    #[test]
3962    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3963        let w = AdmissionWeights {
3964            future_utility: 0.0,
3965            factual_confidence: 0.0,
3966            semantic_novelty: 0.0,
3967            temporal_recency: 0.0,
3968            content_type_prior: 0.0,
3969            goal_utility: 0.0,
3970        };
3971        let n = w.normalized();
3972        let default = AdmissionWeights::default();
3973        assert!(
3974            (n.future_utility - default.future_utility).abs() < 0.001,
3975            "zero-sum weights must fall back to defaults"
3976        );
3977    }
3978
3979    // Test: AdmissionConfig default values match documented defaults.
3980    #[test]
3981    fn admission_config_defaults() {
3982        let cfg = AdmissionConfig::default();
3983        assert!(!cfg.enabled);
3984        assert!((cfg.threshold - 0.40).abs() < 0.001);
3985        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3986        assert!(cfg.admission_provider.is_empty());
3987    }
3988
3989    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3990
3991    #[test]
3992    fn spreading_activation_default_recall_timeout_ms_is_1000() {
3993        let cfg = SpreadingActivationConfig::default();
3994        assert_eq!(
3995            cfg.recall_timeout_ms, 1000,
3996            "default recall_timeout_ms must be 1000ms"
3997        );
3998    }
3999
4000    #[test]
4001    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
4002        #[derive(serde::Deserialize)]
4003        struct Wrapper {
4004            recall_timeout_ms: u64,
4005        }
4006        let toml = "recall_timeout_ms = 500";
4007        let w: Wrapper = toml::from_str(toml).unwrap();
4008        assert_eq!(w.recall_timeout_ms, 500);
4009    }
4010
4011    #[test]
4012    fn spreading_activation_validate_cross_field_constraints() {
4013        let mut cfg = SpreadingActivationConfig::default();
4014        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
4015        assert!(cfg.validate().is_ok());
4016
4017        // Equal thresholds must be rejected.
4018        cfg.activation_threshold = 0.5;
4019        cfg.inhibition_threshold = 0.5;
4020        assert!(cfg.validate().is_err());
4021    }
4022
4023    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
4024
4025    #[test]
4026    fn compression_config_focus_strategy_deserializes() {
4027        let toml = r#"strategy = "focus""#;
4028        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4029        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
4030    }
4031
4032    #[test]
4033    fn compression_config_density_budget_defaults_on_deserialize() {
4034        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
4035        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
4036        let toml = r#"strategy = "reactive""#;
4037        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4038        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
4039        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
4040    }
4041
4042    #[test]
4043    fn compression_config_density_budget_round_trip() {
4044        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
4045        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4046        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
4047        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
4048    }
4049
4050    #[test]
4051    fn compression_config_focus_scorer_provider_default_empty() {
4052        let cfg = CompressionConfig::default();
4053        assert!(cfg.focus_scorer_provider.is_empty());
4054    }
4055
4056    #[test]
4057    fn compression_config_focus_scorer_provider_round_trip() {
4058        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
4059        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4060        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
4061    }
4062}
4063
4064/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
4065///
4066/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
4067/// Successful and failed reasoning chains are compressed into short, generalizable strategy
4068/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
4069/// and injected into the prompt preamble.
4070///
4071/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
4072///
4073/// # Example
4074///
4075/// ```toml
4076/// [memory.reasoning]
4077/// enabled = true
4078/// extract_provider = "fast"
4079/// distill_provider = "fast"
4080/// top_k = 3
4081/// store_limit = 1000
4082/// ```
4083#[derive(Debug, Clone, Deserialize, Serialize)]
4084#[serde(default)]
4085pub struct ReasoningConfig {
4086    /// Enable the reasoning-bank pipeline. Default: `false`.
4087    pub enabled: bool,
4088    /// Provider name from `[[llm.providers]]` for the self-judge step.
4089    /// Falls back to the primary provider when empty. Default: `""`.
4090    pub extract_provider: ProviderName,
4091    /// Provider name from `[[llm.providers]]` for the distillation step.
4092    /// Falls back to the primary provider when empty. Default: `""`.
4093    pub distill_provider: ProviderName,
4094    /// Number of strategies retrieved per turn for context injection. Default: `3`.
4095    pub top_k: usize,
4096    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
4097    pub store_limit: usize,
4098    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
4099    pub max_messages: usize,
4100    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
4101    pub max_message_chars: usize,
4102    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
4103    pub context_budget_tokens: usize,
4104    /// Minimum number of messages required before self-judge fires. Default: `2`.
4105    pub min_messages: usize,
4106    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
4107    pub extraction_timeout_secs: u64,
4108    /// Timeout in seconds for the distillation LLM call. Default: `30`.
4109    pub distill_timeout_secs: u64,
4110    /// Maximum number of recent messages passed to the self-judge evaluator.
4111    /// Narrowing to the last user+assistant pair improves classification accuracy.
4112    /// Default: `2`.
4113    pub self_judge_window: usize,
4114    /// Minimum characters in the assistant response to trigger self-judge.
4115    /// Short or trivial responses are skipped. Default: `50`.
4116    pub min_assistant_chars: usize,
4117}
4118
4119impl Default for ReasoningConfig {
4120    fn default() -> Self {
4121        Self {
4122            enabled: false,
4123            extract_provider: ProviderName::default(),
4124            distill_provider: ProviderName::default(),
4125            top_k: 3,
4126            store_limit: 1000,
4127            max_messages: 6,
4128            max_message_chars: 2000,
4129            context_budget_tokens: 500,
4130            min_messages: 2,
4131            extraction_timeout_secs: 30,
4132            distill_timeout_secs: 30,
4133            self_judge_window: 2,
4134            min_assistant_chars: 50,
4135        }
4136    }
4137}
4138
4139// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
4140
4141/// Eviction policy variant.
4142///
4143/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
4144#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
4145#[serde(rename_all = "lowercase")]
4146#[non_exhaustive]
4147pub enum EvictionPolicy {
4148    /// Ebbinghaus forgetting-curve eviction.
4149    #[default]
4150    Ebbinghaus,
4151}
4152
4153/// Configuration for the memory eviction policy.
4154///
4155/// Controls which policy runs during the periodic sweep and how many entries
4156/// are retained. `zeph-memory` re-exports this type from here.
4157#[derive(Debug, Clone, Deserialize, Serialize)]
4158pub struct EvictionConfig {
4159    /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
4160    pub policy: EvictionPolicy,
4161    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
4162    pub max_entries: usize,
4163    /// How often to run the eviction sweep, in seconds.
4164    pub sweep_interval_secs: u64,
4165}
4166
4167impl Default for EvictionConfig {
4168    fn default() -> Self {
4169        Self {
4170            policy: EvictionPolicy::Ebbinghaus,
4171            max_entries: 0,
4172            sweep_interval_secs: 3600,
4173        }
4174    }
4175}
4176
4177// ── Compression guidelines config (moved from zeph-memory) ───────────────────
4178
4179/// Configuration for ACON failure-driven compression guidelines.
4180///
4181/// `zeph-memory` re-exports this type from here.
4182#[derive(Debug, Clone, Deserialize, Serialize)]
4183#[serde(default)]
4184pub struct CompressionGuidelinesConfig {
4185    /// Enable the feature. Default: `false`.
4186    pub enabled: bool,
4187    /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
4188    pub update_threshold: u16,
4189    /// Maximum token budget for the guidelines document. Default: `500`.
4190    pub max_guidelines_tokens: usize,
4191    /// Maximum failure pairs consumed per update cycle. Default: `10`.
4192    pub max_pairs_per_update: usize,
4193    /// Number of turns after hard compaction to watch for context loss. Default: `10`.
4194    pub detection_window_turns: u64,
4195    /// Interval in seconds between background updater checks. Default: `300`.
4196    pub update_interval_secs: u64,
4197    /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
4198    pub max_stored_pairs: usize,
4199    /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
4200    /// `None` (or `Some("")`) falls back to the primary provider.
4201    #[serde(default, skip_serializing_if = "Option::is_none")]
4202    pub guidelines_provider: Option<ProviderName>,
4203    /// Maintain separate guideline documents per content category.
4204    #[serde(default)]
4205    pub categorized_guidelines: bool,
4206}
4207
4208impl Default for CompressionGuidelinesConfig {
4209    fn default() -> Self {
4210        Self {
4211            enabled: false,
4212            update_threshold: 5,
4213            max_guidelines_tokens: 500,
4214            max_pairs_per_update: 10,
4215            detection_window_turns: 10,
4216            update_interval_secs: 300,
4217            max_stored_pairs: 100,
4218            guidelines_provider: None,
4219            categorized_guidelines: false,
4220        }
4221    }
4222}
4223
4224// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
4225
4226/// Functional category of a compaction probe question.
4227///
4228/// `zeph-memory` re-exports this type from here.
4229#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
4230#[serde(rename_all = "lowercase")]
4231#[non_exhaustive]
4232pub enum ProbeCategory {
4233    /// Did specific facts survive? (file paths, function names, values, decisions)
4234    Recall,
4235    /// Does the agent know which files/tools/URLs it used?
4236    Artifact,
4237    /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
4238    Continuation,
4239    /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
4240    Decision,
4241}
4242
4243/// Configuration for the compaction probe.
4244///
4245/// `zeph-memory` re-exports this type from here.
4246#[derive(Debug, Clone, Serialize, Deserialize)]
4247#[serde(default)]
4248pub struct CompactionProbeConfig {
4249    /// Enable compaction probe validation. Default: `false`.
4250    pub enabled: bool,
4251    /// Provider name from `[[llm.providers]]` for probe LLM calls.
4252    /// `None` (or `Some("")`) uses the summary provider.
4253    #[serde(default, skip_serializing_if = "Option::is_none")]
4254    pub probe_provider: Option<ProviderName>,
4255    /// Minimum score to pass without warnings. Default: `0.6`.
4256    pub threshold: f32,
4257    /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
4258    pub hard_fail_threshold: f32,
4259    /// Maximum number of probe questions to generate. Default: `5`.
4260    pub max_questions: usize,
4261    /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
4262    pub timeout_secs: u64,
4263    /// Optional per-category weight multipliers for the overall score.
4264    #[serde(default)]
4265    pub category_weights: Option<HashMap<ProbeCategory, f32>>,
4266}
4267
4268impl Default for CompactionProbeConfig {
4269    fn default() -> Self {
4270        Self {
4271            enabled: false,
4272            probe_provider: None,
4273            threshold: 0.6,
4274            hard_fail_threshold: 0.35,
4275            max_questions: 5,
4276            timeout_secs: 15,
4277            category_weights: None,
4278        }
4279    }
4280}
4281
4282// ── MemCoT semantic state config ─────────────────────────────────────────────
4283
4284/// `MemCoT` semantic-state distillation configuration.
4285///
4286/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
4287/// summarizing conceptual progress across turns. This buffer is injected into graph
4288/// recall queries to improve retrieval relevance.
4289///
4290/// All LLM work (distillation) runs asynchronously — never on the turn thread.
4291/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
4292///
4293/// # Config example
4294///
4295/// ```toml
4296/// [memory.memcot]
4297/// enabled = true
4298/// distill_provider = "fast"
4299/// distill_timeout_secs = 5
4300/// min_assistant_chars = 200
4301/// min_distill_interval_secs = 30
4302/// max_distills_per_session = 50
4303/// max_state_chars = 800
4304/// recall_view = "head"
4305/// ```
4306#[derive(Debug, Clone, Serialize, Deserialize)]
4307#[serde(default)]
4308pub struct MemCotConfig {
4309    /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
4310    ///
4311    /// When `false`, the accumulator is never allocated and no LLM calls are made.
4312    pub enabled: bool,
4313    /// Provider name from `[[llm.providers]]` for distillation.
4314    ///
4315    /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
4316    /// A startup warning is emitted when the resolved model does not look fast-tier.
4317    /// Falls back to the primary provider when empty. Default: `""`.
4318    pub distill_provider: ProviderName,
4319    /// Timeout in seconds for each distillation LLM call. Default: `5`.
4320    pub distill_timeout_secs: u64,
4321    /// Minimum characters in the assistant response to trigger distillation.
4322    /// Short or trivial replies are skipped. Default: `200`.
4323    pub min_assistant_chars: usize,
4324    /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
4325    ///
4326    /// Prevents runaway costs on long sessions with rapid turns.
4327    /// Clearing `/new` resets this counter.
4328    pub min_distill_interval_secs: u64,
4329    /// Maximum distillation spawns per conversation session. Default: `50`.
4330    ///
4331    /// Once this cap is reached the accumulator stops distilling for the rest of the
4332    /// session. Counter is reset when the user sends `/new`.
4333    pub max_distills_per_session: u64,
4334    /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
4335    /// Default: `800`.
4336    pub max_state_chars: usize,
4337    /// Recall view applied when `MemCoT` is active. Default: `Head`.
4338    ///
4339    /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
4340    /// - `zoom_in`: adds source-message provenance to each returned fact.
4341    /// - `zoom_out`: expands 1-hop neighbors per returned fact.
4342    ///
4343    /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
4344    pub recall_view: RecallViewConfig,
4345    /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
4346    pub zoom_out_neighbor_cap: usize,
4347    /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
4348    /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
4349    #[serde(default, skip_serializing_if = "Vec::is_empty")]
4350    pub fast_tier_models: Vec<String>,
4351}
4352
4353/// Recall view variant exposed in config.
4354///
4355/// Maps 1-to-1 to `zeph_memory::RecallView`.
4356#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
4357#[serde(rename_all = "snake_case")]
4358#[non_exhaustive]
4359pub enum RecallViewConfig {
4360    /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
4361    #[default]
4362    Head,
4363    /// Adds source-message provenance to each returned fact.
4364    ZoomIn,
4365    /// Expands 1-hop neighbor facts per returned fact.
4366    ZoomOut,
4367}
4368
4369impl Default for MemCotConfig {
4370    fn default() -> Self {
4371        Self {
4372            enabled: false,
4373            distill_provider: ProviderName::default(),
4374            distill_timeout_secs: 5,
4375            min_assistant_chars: 200,
4376            min_distill_interval_secs: 30,
4377            max_distills_per_session: 50,
4378            max_state_chars: 800,
4379            recall_view: RecallViewConfig::Head,
4380            zoom_out_neighbor_cap: 3,
4381            fast_tier_models: Vec::new(),
4382        }
4383    }
4384}
4385
4386/// `OmniMem` retrieval failure tracking configuration (issue #3576).
4387///
4388/// Controls the async logger that records no-hit and low-confidence recall events
4389/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
4390#[derive(Debug, Clone, Deserialize, Serialize)]
4391#[serde(default)]
4392pub struct RetrievalFailuresConfig {
4393    /// Enable retrieval failure logging. Default: `false`.
4394    pub enabled: bool,
4395    /// Composite recall score below which a result is classified as low-confidence.
4396    ///
4397    /// The threshold applies to the post-reranking composite score (which incorporates
4398    /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
4399    /// the scoring pipeline in use. Default: `0.3`.
4400    #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
4401    pub low_confidence_threshold: f32,
4402    /// Days to retain failure records before automatic cleanup. Default: `90`.
4403    #[serde(default = "default_retrieval_failures_retention_days")]
4404    pub retention_days: u32,
4405    /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
4406    #[serde(default = "default_retrieval_failures_channel_capacity")]
4407    pub channel_capacity: usize,
4408    /// Maximum records collected before flushing a batch INSERT. Default: `16`.
4409    #[serde(default = "default_retrieval_failures_batch_size")]
4410    pub batch_size: usize,
4411    /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
4412    #[serde(default = "default_retrieval_failures_flush_interval_ms")]
4413    pub flush_interval_ms: u64,
4414}
4415
4416impl Default for RetrievalFailuresConfig {
4417    fn default() -> Self {
4418        Self {
4419            enabled: false,
4420            low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
4421            retention_days: default_retrieval_failures_retention_days(),
4422            channel_capacity: default_retrieval_failures_channel_capacity(),
4423            batch_size: default_retrieval_failures_batch_size(),
4424            flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
4425        }
4426    }
4427}
4428
4429// ── TrajectoryRiskAccumulator config (spec 004-16) ─────────────────────────────
4430
4431fn validate_tra_nonneg_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
4432where
4433    D: serde::Deserializer<'de>,
4434{
4435    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
4436    if value.is_nan() || value.is_infinite() || value < 0.0 {
4437        return Err(serde::de::Error::custom(
4438            "signal weight and severity multiplier values must be finite and non-negative",
4439        ));
4440    }
4441    Ok(value)
4442}
4443
4444/// Per-signal-type base weights for the trajectory risk accumulator.
4445///
4446/// Each weight is in `(0.0, 1.0]` and is multiplied by the severity multiplier
4447/// before being added to `trajectory_risk`.
4448///
4449/// # Example (TOML)
4450///
4451/// ```toml
4452/// [memory.shadow_memory.signal_weights]
4453/// prompt_injection = 0.6
4454/// ```
4455#[derive(Debug, Clone, Serialize, Deserialize)]
4456pub struct TrajectorySignalWeights {
4457    /// Weight for `PolicyViolation` signals. Default: `0.30`.
4458    #[serde(
4459        default = "default_sw_policy_violation",
4460        deserialize_with = "validate_tra_nonneg_weight"
4461    )]
4462    pub policy_violation: f64,
4463    /// Weight for `PromptInjectionPattern` signals. Default: `0.50`.
4464    #[serde(
4465        default = "default_sw_prompt_injection",
4466        deserialize_with = "validate_tra_nonneg_weight"
4467    )]
4468    pub prompt_injection: f64,
4469    /// Weight for `ToolChainAnomaly` signals. Default: `0.25`.
4470    #[serde(
4471        default = "default_sw_tool_chain_anomaly",
4472        deserialize_with = "validate_tra_nonneg_weight"
4473    )]
4474    pub tool_chain_anomaly: f64,
4475    /// Weight for `ConfidenceDrop` signals. Default: `0.15`.
4476    #[serde(
4477        default = "default_sw_confidence_drop",
4478        deserialize_with = "validate_tra_nonneg_weight"
4479    )]
4480    pub confidence_drop: f64,
4481}
4482
4483fn default_sw_policy_violation() -> f64 {
4484    0.30
4485}
4486fn default_sw_prompt_injection() -> f64 {
4487    0.50
4488}
4489fn default_sw_tool_chain_anomaly() -> f64 {
4490    0.25
4491}
4492fn default_sw_confidence_drop() -> f64 {
4493    0.15
4494}
4495
4496impl Default for TrajectorySignalWeights {
4497    fn default() -> Self {
4498        Self {
4499            policy_violation: default_sw_policy_violation(),
4500            prompt_injection: default_sw_prompt_injection(),
4501            tool_chain_anomaly: default_sw_tool_chain_anomaly(),
4502            confidence_drop: default_sw_confidence_drop(),
4503        }
4504    }
4505}
4506
4507/// Per-severity multipliers applied on top of signal base weights.
4508///
4509/// # Example (TOML)
4510///
4511/// ```toml
4512/// [memory.shadow_memory.severity_multipliers]
4513/// high = 3.0
4514/// ```
4515#[derive(Debug, Clone, Serialize, Deserialize)]
4516pub struct TrajectorySeverityMultipliers {
4517    /// Multiplier for low-severity signals. Default: `0.5`.
4518    #[serde(
4519        default = "default_sev_low",
4520        deserialize_with = "validate_tra_nonneg_weight"
4521    )]
4522    pub low: f64,
4523    /// Multiplier for medium-severity signals. Default: `1.0`.
4524    #[serde(
4525        default = "default_sev_medium",
4526        deserialize_with = "validate_tra_nonneg_weight"
4527    )]
4528    pub medium: f64,
4529    /// Multiplier for high-severity signals. Default: `2.0`.
4530    #[serde(
4531        default = "default_sev_high",
4532        deserialize_with = "validate_tra_nonneg_weight"
4533    )]
4534    pub high: f64,
4535}
4536
4537fn default_sev_low() -> f64 {
4538    0.5
4539}
4540fn default_sev_medium() -> f64 {
4541    1.0
4542}
4543fn default_sev_high() -> f64 {
4544    2.0
4545}
4546
4547impl Default for TrajectorySeverityMultipliers {
4548    fn default() -> Self {
4549        Self {
4550            low: default_sev_low(),
4551            medium: default_sev_medium(),
4552            high: default_sev_high(),
4553        }
4554    }
4555}
4556
4557/// Configuration for the MAGE trajectory risk accumulator (spec 004-16).
4558///
4559/// Controls how per-turn safety signals accumulate into a session-level risk score
4560/// and when tool execution is blocked or escalated.
4561///
4562/// # Example (TOML)
4563///
4564/// ```toml
4565/// [memory.shadow_memory]
4566/// enabled = true
4567/// risk_threshold = 0.75
4568/// escalation_threshold = 0.50
4569/// risk_halflife_turns = 10
4570/// signal_history_cap = 200
4571/// tui_show_risk_gauge = true
4572/// reset_on_compaction = false
4573/// ```
4574#[derive(Debug, Clone, Serialize, Deserialize)]
4575pub struct TrajectoryRiskAccumulatorConfig {
4576    /// Enable shadow memory. When `false`, `TrajectoryRiskAccumulator` is a zero-cost noop.
4577    #[serde(default)]
4578    pub enabled: bool,
4579    /// Block tool execution when `trajectory_risk >= risk_threshold`. Default: `0.75`.
4580    #[serde(default = "default_tra_risk_threshold")]
4581    pub risk_threshold: f64,
4582    /// Escalate to human confirmation when risk is in `[escalation_threshold, risk_threshold)`.
4583    /// Default: `0.50`.
4584    #[serde(default = "default_tra_escalation_threshold")]
4585    pub escalation_threshold: f64,
4586    /// Number of turns after which accumulated risk halves (exponential decay). Default: `10`.
4587    #[serde(default = "default_tra_risk_halflife_turns")]
4588    pub risk_halflife_turns: u32,
4589    /// Maximum number of signal events kept in the ring buffer. Default: `200`.
4590    #[serde(default = "default_tra_signal_history_cap")]
4591    pub signal_history_cap: usize,
4592    /// Show a risk gauge in the TUI security panel when the TUI is enabled. Default: `true`.
4593    #[serde(default = "default_true")]
4594    pub tui_show_risk_gauge: bool,
4595    /// Reset `trajectory_risk` to zero when a context compaction occurs. Default: `false`.
4596    #[serde(default)]
4597    pub reset_on_compaction: bool,
4598    /// Per-signal-type base weights.
4599    #[serde(default)]
4600    pub signal_weights: TrajectorySignalWeights,
4601    /// Per-severity multipliers applied on top of signal weights.
4602    #[serde(default)]
4603    pub severity_multipliers: TrajectorySeverityMultipliers,
4604}
4605
4606fn default_tra_risk_threshold() -> f64 {
4607    0.75
4608}
4609fn default_tra_escalation_threshold() -> f64 {
4610    0.50
4611}
4612fn default_tra_risk_halflife_turns() -> u32 {
4613    10
4614}
4615fn default_tra_signal_history_cap() -> usize {
4616    200
4617}
4618
4619impl Default for TrajectoryRiskAccumulatorConfig {
4620    fn default() -> Self {
4621        Self {
4622            enabled: false,
4623            risk_threshold: default_tra_risk_threshold(),
4624            escalation_threshold: default_tra_escalation_threshold(),
4625            risk_halflife_turns: default_tra_risk_halflife_turns(),
4626            signal_history_cap: default_tra_signal_history_cap(),
4627            tui_show_risk_gauge: true,
4628            reset_on_compaction: false,
4629            signal_weights: TrajectorySignalWeights::default(),
4630            severity_multipliers: TrajectorySeverityMultipliers::default(),
4631        }
4632    }
4633}
4634
4635#[cfg(test)]
4636mod memcot_config_tests {
4637    use super::*;
4638
4639    #[test]
4640    fn memcot_config_default_disabled() {
4641        let cfg = MemCotConfig::default();
4642        assert!(!cfg.enabled);
4643        assert!(cfg.distill_provider.is_empty());
4644        assert_eq!(cfg.distill_timeout_secs, 5);
4645        assert_eq!(cfg.min_assistant_chars, 200);
4646        assert_eq!(cfg.min_distill_interval_secs, 30);
4647        assert_eq!(cfg.max_distills_per_session, 50);
4648        assert_eq!(cfg.max_state_chars, 800);
4649        assert_eq!(cfg.recall_view, RecallViewConfig::Head);
4650        assert_eq!(cfg.zoom_out_neighbor_cap, 3);
4651    }
4652
4653    #[test]
4654    fn memcot_config_round_trip() {
4655        let toml = r#"
4656            enabled = true
4657            distill_provider = "fast"
4658            distill_timeout_secs = 10
4659            min_assistant_chars = 100
4660            min_distill_interval_secs = 60
4661            max_distills_per_session = 20
4662            max_state_chars = 400
4663            recall_view = "zoom_in"
4664            zoom_out_neighbor_cap = 5
4665        "#;
4666        let cfg: MemCotConfig = toml::from_str(toml).unwrap();
4667        assert!(cfg.enabled);
4668        assert_eq!(cfg.distill_provider.as_str(), "fast");
4669        assert_eq!(cfg.distill_timeout_secs, 10);
4670        assert_eq!(cfg.min_distill_interval_secs, 60);
4671        assert_eq!(cfg.max_distills_per_session, 20);
4672        assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
4673        assert_eq!(cfg.zoom_out_neighbor_cap, 5);
4674    }
4675}
4676
4677#[cfg(test)]
4678mod apex_mem_quality_gate_config_tests {
4679    use super::*;
4680
4681    #[test]
4682    fn apex_mem_config_default_disabled() {
4683        let cfg = ApexMemConfig::default();
4684        assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
4685    }
4686
4687    #[test]
4688    fn apex_mem_config_serde_round_trip() {
4689        let toml = "enabled = true";
4690        let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
4691        assert!(cfg.enabled);
4692    }
4693
4694    #[test]
4695    fn apex_mem_config_empty_toml_uses_defaults() {
4696        let cfg: ApexMemConfig = toml::from_str("").unwrap();
4697        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4698    }
4699
4700    #[test]
4701    fn write_quality_gate_config_default_disabled() {
4702        let cfg = WriteQualityGateConfig::default();
4703        assert!(!cfg.enabled);
4704        assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
4705        assert_eq!(cfg.recent_window, 32);
4706        assert_eq!(cfg.contradiction_grace_seconds, 300);
4707        assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
4708        assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
4709        assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
4710        assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
4711        assert!(cfg.quality_gate_provider.is_empty());
4712        assert_eq!(cfg.llm_timeout_ms, 500);
4713        assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
4714        assert!(cfg.reference_check_lang_en);
4715    }
4716
4717    #[test]
4718    fn write_quality_gate_config_serde_round_trip() {
4719        let toml = r#"
4720            enabled = true
4721            threshold = 0.70
4722            recent_window = 16
4723            contradiction_grace_seconds = 600
4724            information_value_weight = 0.5
4725            reference_completeness_weight = 0.25
4726            contradiction_weight = 0.25
4727            rejection_rate_alarm_ratio = 0.50
4728            quality_gate_provider = "fast"
4729            llm_timeout_ms = 1000
4730            llm_weight = 0.3
4731            reference_check_lang_en = false
4732        "#;
4733        let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
4734        assert!(cfg.enabled);
4735        assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
4736        assert_eq!(cfg.recent_window, 16);
4737        assert_eq!(cfg.contradiction_grace_seconds, 600);
4738        assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
4739        assert_eq!(cfg.llm_timeout_ms, 1000);
4740        assert!(!cfg.reference_check_lang_en);
4741    }
4742
4743    #[test]
4744    fn write_quality_gate_config_empty_toml_uses_defaults() {
4745        let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
4746        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4747        assert_eq!(cfg.recent_window, 32);
4748    }
4749
4750    #[test]
4751    fn memory_config_shutdown_summary_provider_toml_roundtrip() {
4752        let toml = r#"
4753            history_limit = 50
4754            shutdown_summary_provider = "fast"
4755        "#;
4756        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4757        assert_eq!(
4758            cfg.shutdown_summary_provider.as_str(),
4759            "fast",
4760            "shutdown_summary_provider must deserialize from TOML"
4761        );
4762    }
4763
4764    #[test]
4765    fn five_signal_config_default_is_disabled() {
4766        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4767        assert!(!cfg.five_signal.enabled);
4768        assert!((cfg.five_signal.w_recency - 0.35).abs() < 1e-9);
4769        assert!((cfg.five_signal.w_relevance - 0.35).abs() < 1e-9);
4770        assert!((cfg.five_signal.w_frequency).abs() < 1e-9);
4771        assert!((cfg.five_signal.w_causal).abs() < 1e-9);
4772        assert!((cfg.five_signal.w_novelty).abs() < 1e-9);
4773    }
4774
4775    #[test]
4776    fn five_signal_config_toml_roundtrip() {
4777        let toml = r"
4778            history_limit = 50
4779            [five_signal]
4780            enabled = true
4781            w_recency = 0.35
4782            w_relevance = 0.35
4783            w_frequency = 0.15
4784            w_causal = 0.10
4785            w_novelty = 0.05
4786        ";
4787        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4788        assert!(cfg.five_signal.enabled);
4789        assert!((cfg.five_signal.w_frequency - 0.15).abs() < 1e-9);
4790    }
4791
4792    #[test]
4793    fn memory_config_shutdown_summary_provider_default_is_empty() {
4794        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4795        assert_eq!(
4796            cfg.shutdown_summary_provider.as_str(),
4797            "",
4798            "shutdown_summary_provider must default to empty string"
4799        );
4800    }
4801
4802    #[test]
4803    fn memory_config_compaction_provider_toml_roundtrip() {
4804        let toml = r#"
4805            history_limit = 50
4806            compaction_provider = "mid"
4807        "#;
4808        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4809        assert_eq!(
4810            cfg.compaction_provider.as_str(),
4811            "mid",
4812            "compaction_provider must deserialize from TOML"
4813        );
4814    }
4815
4816    #[test]
4817    fn memory_config_compaction_provider_default_is_empty() {
4818        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4819        assert_eq!(
4820            cfg.compaction_provider.as_str(),
4821            "",
4822            "compaction_provider must default to empty string"
4823        );
4824    }
4825}