Skip to main content

zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15    5
16}
17
18fn default_max_history() -> usize {
19    100
20}
21
22fn default_title_max_chars() -> usize {
23    60
24}
25
26fn default_document_collection() -> String {
27    "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31    1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35    100
36}
37
38fn default_document_top_k() -> usize {
39    3
40}
41
42fn default_autosave_min_length() -> usize {
43    20
44}
45
46fn default_tool_call_cutoff() -> usize {
47    6
48}
49
50fn default_token_safety_margin() -> f32 {
51    1.0
52}
53
54fn default_redact_credentials() -> bool {
55    true
56}
57
58fn default_qdrant_url() -> String {
59    "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63    50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67    60
68}
69
70fn default_context_budget_tokens() -> usize {
71    0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75    0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79    0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83    6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87    2
88}
89
90fn default_auto_budget() -> bool {
91    true
92}
93
94fn default_prune_protect_tokens() -> usize {
95    40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99    0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103    30
104}
105
106fn default_mmr_lambda() -> f32 {
107    0.7
108}
109
110fn default_semantic_enabled() -> bool {
111    true
112}
113
114fn default_recall_limit() -> usize {
115    5
116}
117
118fn default_vector_weight() -> f64 {
119    0.7
120}
121
122fn default_keyword_weight() -> f64 {
123    0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127    10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131    15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135    100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139    8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143    4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147    10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151    0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155    0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159    15
160}
161
162fn default_graph_max_hops() -> u32 {
163    2
164}
165
166fn default_graph_recall_limit() -> usize {
167    10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171    90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175    0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179    100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183    0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187    3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191    0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195    0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199    50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203    1000
204}
205
206fn default_note_linking_similarity_threshold() -> f32 {
207    0.85
208}
209
210fn default_note_linking_top_k() -> usize {
211    10
212}
213
214fn default_note_linking_timeout_secs() -> u64 {
215    5
216}
217
218fn default_shutdown_summary() -> bool {
219    true
220}
221
222fn default_shutdown_summary_min_messages() -> usize {
223    4
224}
225
226fn default_shutdown_summary_max_messages() -> usize {
227    20
228}
229
230fn default_shutdown_summary_timeout_secs() -> u64 {
231    30
232}
233
234fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
235where
236    D: serde::Deserializer<'de>,
237{
238    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
239    if value.is_nan() || value.is_infinite() {
240        return Err(serde::de::Error::custom(
241            "similarity_threshold must be a finite number",
242        ));
243    }
244    if !(0.5..=1.0).contains(&value) {
245        return Err(serde::de::Error::custom(
246            "similarity_threshold must be in [0.5, 1.0]",
247        ));
248    }
249    Ok(value)
250}
251
252fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
253where
254    D: serde::Deserializer<'de>,
255{
256    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
257    if value < 2 {
258        return Err(serde::de::Error::custom(
259            "promotion_min_sessions must be >= 2",
260        ));
261    }
262    Ok(value)
263}
264
265fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
266where
267    D: serde::Deserializer<'de>,
268{
269    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
270    if value == 0 {
271        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
272    }
273    Ok(value)
274}
275
276fn default_tier_promotion_min_sessions() -> u32 {
277    3
278}
279
280fn default_tier_similarity_threshold() -> f32 {
281    0.92
282}
283
284fn default_tier_sweep_interval_secs() -> u64 {
285    3600
286}
287
288fn default_tier_sweep_batch_size() -> usize {
289    100
290}
291
292fn default_scene_similarity_threshold() -> f32 {
293    0.80
294}
295
296fn default_scene_batch_size() -> usize {
297    50
298}
299
300fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
301where
302    D: serde::Deserializer<'de>,
303{
304    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
305    if value.is_nan() || value.is_infinite() {
306        return Err(serde::de::Error::custom(
307            "scene_similarity_threshold must be a finite number",
308        ));
309    }
310    if !(0.5..=1.0).contains(&value) {
311        return Err(serde::de::Error::custom(
312            "scene_similarity_threshold must be in [0.5, 1.0]",
313        ));
314    }
315    Ok(value)
316}
317
318fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
319where
320    D: serde::Deserializer<'de>,
321{
322    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
323    if value == 0 {
324        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
325    }
326    Ok(value)
327}
328
329/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
330///
331/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
332/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
333///
334/// # Validation
335///
336/// Constraints enforced at deserialization time:
337/// - `similarity_threshold` in `[0.5, 1.0]`
338/// - `promotion_min_sessions >= 2`
339/// - `sweep_batch_size >= 1`
340/// - `scene_similarity_threshold` in `[0.5, 1.0]`
341/// - `scene_batch_size >= 1`
342#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
343#[serde(default)]
344pub struct TierConfig {
345    /// Enable the tier promotion system. When `false`, all messages remain episodic.
346    /// Default: `false`.
347    pub enabled: bool,
348    /// Minimum number of distinct sessions a fact must appear in before promotion.
349    /// Must be `>= 2`. Default: `3`.
350    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
351    pub promotion_min_sessions: u32,
352    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
353    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
354    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
355    pub similarity_threshold: f32,
356    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
357    pub sweep_interval_secs: u64,
358    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
359    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
360    pub sweep_batch_size: usize,
361    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
362    pub scene_enabled: bool,
363    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
364    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
365    pub scene_similarity_threshold: f32,
366    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
367    #[serde(deserialize_with = "validate_scene_batch_size")]
368    pub scene_batch_size: usize,
369    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
370    /// Falls back to the primary provider when empty. Default: `""`.
371    pub scene_provider: ProviderName,
372    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
373    pub scene_sweep_interval_secs: u64,
374}
375
376fn default_scene_sweep_interval_secs() -> u64 {
377    7200
378}
379
380impl Default for TierConfig {
381    fn default() -> Self {
382        Self {
383            enabled: false,
384            promotion_min_sessions: default_tier_promotion_min_sessions(),
385            similarity_threshold: default_tier_similarity_threshold(),
386            sweep_interval_secs: default_tier_sweep_interval_secs(),
387            sweep_batch_size: default_tier_sweep_batch_size(),
388            scene_enabled: false,
389            scene_similarity_threshold: default_scene_similarity_threshold(),
390            scene_batch_size: default_scene_batch_size(),
391            scene_provider: ProviderName::default(),
392            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
393        }
394    }
395}
396
397fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
398where
399    D: serde::Deserializer<'de>,
400{
401    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
402    if value.is_nan() || value.is_infinite() {
403        return Err(serde::de::Error::custom(
404            "temporal_decay_rate must be a finite number",
405        ));
406    }
407    if !(0.0..=10.0).contains(&value) {
408        return Err(serde::de::Error::custom(
409            "temporal_decay_rate must be in [0.0, 10.0]",
410        ));
411    }
412    Ok(value)
413}
414
415fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
416where
417    D: serde::Deserializer<'de>,
418{
419    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
420    if value.is_nan() || value.is_infinite() {
421        return Err(serde::de::Error::custom(
422            "similarity_threshold must be a finite number",
423        ));
424    }
425    if !(0.0..=1.0).contains(&value) {
426        return Err(serde::de::Error::custom(
427            "similarity_threshold must be in [0.0, 1.0]",
428        ));
429    }
430    Ok(value)
431}
432
433fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
434where
435    D: serde::Deserializer<'de>,
436{
437    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
438    if value.is_nan() || value.is_infinite() {
439        return Err(serde::de::Error::custom(
440            "importance_weight must be a finite number",
441        ));
442    }
443    if value < 0.0 {
444        return Err(serde::de::Error::custom(
445            "importance_weight must be non-negative",
446        ));
447    }
448    if value > 1.0 {
449        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
450    }
451    Ok(value)
452}
453
454fn default_importance_weight() -> f64 {
455    0.15
456}
457
458/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
459///
460/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
461/// Seeds are initialized from fuzzy entity matches, then activation propagates
462/// hop-by-hop with exponential decay and lateral inhibition.
463///
464/// # Validation
465///
466/// Constraints enforced at deserialization time:
467/// - `0.0 < decay_lambda <= 1.0`
468/// - `max_hops >= 1`
469/// - `activation_threshold < inhibition_threshold`
470/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
471#[derive(Debug, Clone, Deserialize, Serialize)]
472#[serde(default)]
473pub struct SpreadingActivationConfig {
474    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
475    pub enabled: bool,
476    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
477    #[serde(deserialize_with = "validate_decay_lambda")]
478    pub decay_lambda: f32,
479    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
480    #[serde(deserialize_with = "validate_max_hops")]
481    pub max_hops: u32,
482    /// Minimum activation score to include a node in results. Default: `0.1`.
483    pub activation_threshold: f32,
484    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
485    pub inhibition_threshold: f32,
486    /// Cap on total activated nodes per spread pass. Default: `50`.
487    pub max_activated_nodes: usize,
488    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
489    #[serde(default = "default_seed_structural_weight")]
490    pub seed_structural_weight: f32,
491    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
492    #[serde(default = "default_seed_community_cap")]
493    pub seed_community_cap: usize,
494    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
495    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
496    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
497    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
498    pub recall_timeout_ms: u64,
499}
500
501fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
502where
503    D: serde::Deserializer<'de>,
504{
505    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
506    if value.is_nan() || value.is_infinite() {
507        return Err(serde::de::Error::custom(
508            "decay_lambda must be a finite number",
509        ));
510    }
511    if !(value > 0.0 && value <= 1.0) {
512        return Err(serde::de::Error::custom(
513            "decay_lambda must be in (0.0, 1.0]",
514        ));
515    }
516    Ok(value)
517}
518
519fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
520where
521    D: serde::Deserializer<'de>,
522{
523    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
524    if value == 0 {
525        return Err(serde::de::Error::custom("max_hops must be >= 1"));
526    }
527    Ok(value)
528}
529
530impl SpreadingActivationConfig {
531    /// Validate cross-field constraints that cannot be expressed in per-field validators.
532    ///
533    /// # Errors
534    ///
535    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
536    pub fn validate(&self) -> Result<(), String> {
537        if self.activation_threshold >= self.inhibition_threshold {
538            return Err(format!(
539                "activation_threshold ({}) must be < inhibition_threshold ({})",
540                self.activation_threshold, self.inhibition_threshold
541            ));
542        }
543        Ok(())
544    }
545}
546
547fn default_seed_structural_weight() -> f32 {
548    0.4
549}
550
551fn default_seed_community_cap() -> usize {
552    3
553}
554
555impl Default for SpreadingActivationConfig {
556    fn default() -> Self {
557        Self {
558            enabled: false,
559            decay_lambda: default_spreading_activation_decay_lambda(),
560            max_hops: default_spreading_activation_max_hops(),
561            activation_threshold: default_spreading_activation_activation_threshold(),
562            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
563            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
564            seed_structural_weight: default_seed_structural_weight(),
565            seed_community_cap: default_seed_community_cap(),
566            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
567        }
568    }
569}
570
571/// Kumiho belief revision configuration.
572#[derive(Debug, Clone, Deserialize, Serialize)]
573#[serde(default)]
574pub struct BeliefRevisionConfig {
575    /// Enable semantic contradiction detection for graph edges. Default: `false`.
576    pub enabled: bool,
577    /// Cosine similarity threshold for considering two facts as contradictory.
578    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
579    #[serde(deserialize_with = "validate_similarity_threshold")]
580    pub similarity_threshold: f32,
581}
582
583fn default_belief_revision_similarity_threshold() -> f32 {
584    0.85
585}
586
587impl Default for BeliefRevisionConfig {
588    fn default() -> Self {
589        Self {
590            enabled: false,
591            similarity_threshold: default_belief_revision_similarity_threshold(),
592        }
593    }
594}
595
596/// D-MEM RPE-based tiered graph extraction routing configuration.
597#[derive(Debug, Clone, Deserialize, Serialize)]
598#[serde(default)]
599pub struct RpeConfig {
600    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
601    pub enabled: bool,
602    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
603    /// Default: `0.3`.
604    #[serde(deserialize_with = "validate_similarity_threshold")]
605    pub threshold: f32,
606    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
607    pub max_skip_turns: u32,
608}
609
610fn default_rpe_threshold() -> f32 {
611    0.3
612}
613
614fn default_rpe_max_skip_turns() -> u32 {
615    5
616}
617
618impl Default for RpeConfig {
619    fn default() -> Self {
620        Self {
621            enabled: false,
622            threshold: default_rpe_threshold(),
623            max_skip_turns: default_rpe_max_skip_turns(),
624        }
625    }
626}
627
628/// Configuration for A-MEM dynamic note linking.
629///
630/// When enabled, after each graph extraction pass, entities extracted from the message are
631/// compared against the entity embedding collection. Pairs with cosine similarity above
632/// `similarity_threshold` receive a `similar_to` edge in the graph.
633#[derive(Debug, Clone, Deserialize, Serialize)]
634#[serde(default)]
635pub struct NoteLinkingConfig {
636    /// Enable A-MEM note linking after graph extraction. Default: `false`.
637    pub enabled: bool,
638    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
639    #[serde(deserialize_with = "validate_similarity_threshold")]
640    pub similarity_threshold: f32,
641    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
642    pub top_k: usize,
643    /// Timeout for the entire linking pass in seconds. Default: `5`.
644    pub timeout_secs: u64,
645}
646
647impl Default for NoteLinkingConfig {
648    fn default() -> Self {
649        Self {
650            enabled: false,
651            similarity_threshold: default_note_linking_similarity_threshold(),
652            top_k: default_note_linking_top_k(),
653            timeout_secs: default_note_linking_timeout_secs(),
654        }
655    }
656}
657
658/// Vector backend selector for embedding storage.
659#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
660#[serde(rename_all = "lowercase")]
661pub enum VectorBackend {
662    Qdrant,
663    #[default]
664    Sqlite,
665}
666
667impl VectorBackend {
668    /// Return the lowercase identifier string for this backend.
669    ///
670    /// # Examples
671    ///
672    /// ```
673    /// use zeph_config::VectorBackend;
674    ///
675    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
676    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
677    /// ```
678    #[must_use]
679    pub fn as_str(&self) -> &'static str {
680        match self {
681            Self::Qdrant => "qdrant",
682            Self::Sqlite => "sqlite",
683        }
684    }
685}
686
687/// Memory subsystem configuration, nested under `[memory]` in TOML.
688///
689/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
690/// multi-tier promotion, and all memory-related background tasks.
691///
692/// # Example (TOML)
693///
694/// ```toml
695/// [memory]
696/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
697/// qdrant_url = "http://localhost:6334"
698/// history_limit = 50
699/// summarization_threshold = 50
700/// auto_budget = true
701/// ```
702#[derive(Debug, Deserialize, Serialize)]
703#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
704pub struct MemoryConfig {
705    #[serde(default)]
706    pub compression_guidelines: CompressionGuidelinesConfig,
707    #[serde(default = "default_sqlite_path_field")]
708    pub sqlite_path: String,
709    pub history_limit: u32,
710    #[serde(default = "default_qdrant_url")]
711    pub qdrant_url: String,
712    /// Optional API key for authenticating to a remote or managed Qdrant cluster.
713    ///
714    /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
715    /// Leave `None` for local dev instances. The actual key is resolved from the vault:
716    /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
717    ///
718    /// The value is wrapped in [`Secret`] to prevent accidental logging.
719    /// `skip_serializing` prevents the key from being written back to TOML on config save.
720    #[serde(default, skip_serializing)]
721    pub qdrant_api_key: Option<Secret>,
722    #[serde(default)]
723    pub semantic: SemanticConfig,
724    #[serde(default = "default_summarization_threshold")]
725    pub summarization_threshold: usize,
726    /// LLM call timeout for summarization, in seconds. Default: `60`.
727    #[serde(default = "default_summarization_llm_timeout_secs")]
728    pub summarization_llm_timeout_secs: u64,
729    #[serde(default = "default_context_budget_tokens")]
730    pub context_budget_tokens: usize,
731    #[serde(default = "default_soft_compaction_threshold")]
732    pub soft_compaction_threshold: f32,
733    #[serde(
734        default = "default_hard_compaction_threshold",
735        alias = "compaction_threshold"
736    )]
737    pub hard_compaction_threshold: f32,
738    #[serde(default = "default_compaction_preserve_tail")]
739    pub compaction_preserve_tail: usize,
740    #[serde(default = "default_compaction_cooldown_turns")]
741    pub compaction_cooldown_turns: u8,
742    #[serde(default = "default_auto_budget")]
743    pub auto_budget: bool,
744    #[serde(default = "default_prune_protect_tokens")]
745    pub prune_protect_tokens: usize,
746    #[serde(default = "default_cross_session_score_threshold")]
747    pub cross_session_score_threshold: f32,
748    #[serde(default)]
749    pub vector_backend: VectorBackend,
750    #[serde(default = "default_token_safety_margin")]
751    pub token_safety_margin: f32,
752    #[serde(default = "default_redact_credentials")]
753    pub redact_credentials: bool,
754    #[serde(default = "default_true")]
755    pub autosave_assistant: bool,
756    #[serde(default = "default_autosave_min_length")]
757    pub autosave_min_length: usize,
758    #[serde(default = "default_tool_call_cutoff")]
759    pub tool_call_cutoff: usize,
760    #[serde(default = "default_sqlite_pool_size")]
761    pub sqlite_pool_size: u32,
762    #[serde(default)]
763    pub sessions: SessionsConfig,
764    #[serde(default)]
765    pub documents: DocumentConfig,
766    #[serde(default)]
767    pub eviction: EvictionConfig,
768    #[serde(default)]
769    pub compression: CompressionConfig,
770    #[serde(default)]
771    pub sidequest: SidequestConfig,
772    #[serde(default)]
773    pub graph: GraphConfig,
774    /// Store a lightweight session summary to the vector store on shutdown when no session
775    /// summary exists yet for this conversation. Enables cross-session recall for short or
776    /// interrupted sessions that never triggered hard compaction. Default: `true`.
777    #[serde(default = "default_shutdown_summary")]
778    pub shutdown_summary: bool,
779    /// Minimum number of user-turn messages required before a shutdown summary is generated.
780    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
781    #[serde(default = "default_shutdown_summary_min_messages")]
782    pub shutdown_summary_min_messages: usize,
783    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
784    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
785    /// Default: `20`.
786    #[serde(default = "default_shutdown_summary_max_messages")]
787    pub shutdown_summary_max_messages: usize,
788    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
789    /// Applies independently to the structured call and to the plain-text fallback.
790    /// Default: `10`.
791    #[serde(default = "default_shutdown_summary_timeout_secs")]
792    pub shutdown_summary_timeout_secs: u64,
793    /// LLM provider used for shutdown summarization calls.
794    ///
795    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
796    /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
797    ///
798    /// Example:
799    /// ```toml
800    /// [memory]
801    /// shutdown_summary_provider = "fast"
802    /// ```
803    #[serde(default)]
804    pub shutdown_summary_provider: ProviderName,
805    /// LLM provider used for deferred tool-pair summarization (context compaction).
806    ///
807    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
808    /// provider. A mid-tier model is usually sufficient for compaction summaries.
809    ///
810    /// Example:
811    /// ```toml
812    /// [memory]
813    /// compaction_provider = "fast"
814    /// ```
815    #[serde(default)]
816    pub compaction_provider: ProviderName,
817    /// Use structured anchored summaries for context compaction.
818    ///
819    /// When enabled, hard compaction requests a JSON schema from the LLM
820    /// instead of free-form prose. Falls back to prose if the LLM fails
821    /// to produce valid JSON. Default: `false`.
822    #[serde(default)]
823    pub structured_summaries: bool,
824    /// AOI three-layer memory tier promotion system.
825    ///
826    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
827    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
828    #[serde(default)]
829    pub tiers: TierConfig,
830    /// A-MAC adaptive memory admission control.
831    ///
832    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
833    /// if its composite admission score falls below the configured threshold.
834    #[serde(default)]
835    pub admission: AdmissionConfig,
836    /// Session digest generation at session end. Default: disabled.
837    #[serde(default)]
838    pub digest: DigestConfig,
839    /// Context assembly strategy. Default: `full_history` (current behavior).
840    #[serde(default)]
841    pub context_strategy: ContextStrategy,
842    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
843    #[serde(default = "default_crossover_turn_threshold")]
844    pub crossover_turn_threshold: u32,
845    /// All-Mem lifelong memory consolidation sweep.
846    ///
847    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
848    /// messages and merges them into consolidated entries via LLM.
849    #[serde(default)]
850    pub consolidation: ConsolidationConfig,
851    /// `SleepGate` forgetting sweep (#2397).
852    ///
853    /// When `forgetting.enabled = true`, a background loop periodically decays importance
854    /// scores and prunes memories below the forgetting floor.
855    #[serde(default)]
856    pub forgetting: ForgettingConfig,
857    /// `PostgreSQL` connection URL.
858    ///
859    /// Used when the binary is compiled with `--features postgres`.
860    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
861    /// Example: `postgres://user:pass@localhost:5432/zeph`
862    /// Default: `None` (uses `sqlite_path` instead).
863    #[serde(default)]
864    pub database_url: Option<String>,
865    /// Cost-sensitive store routing (#2444).
866    ///
867    /// When `store_routing.enabled = true`, query intent is classified and routed to
868    /// the cheapest sufficient backend instead of querying all stores on every turn.
869    #[serde(default)]
870    pub store_routing: StoreRoutingConfig,
871    /// Persona memory layer (#2461).
872    ///
873    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
874    /// from conversation history and injected into context after the system prompt.
875    #[serde(default)]
876    pub persona: PersonaConfig,
877    /// Trajectory-informed memory (#2498).
878    #[serde(default)]
879    pub trajectory: TrajectoryConfig,
880    /// Category-aware memory (#2428).
881    #[serde(default)]
882    pub category: CategoryConfig,
883    /// `TiMem` temporal-hierarchical memory tree (#2262).
884    #[serde(default)]
885    pub tree: TreeConfig,
886    /// Time-based microcompact (#2699).
887    ///
888    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
889    /// from context when the session has been idle longer than `gap_threshold_minutes`.
890    #[serde(default)]
891    pub microcompact: MicrocompactConfig,
892    /// autoDream background memory consolidation (#2697).
893    ///
894    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
895    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
896    #[serde(default)]
897    pub autodream: AutoDreamConfig,
898    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
899    ///
900    /// Before inserting a new key fact, its nearest neighbour is looked up in the
901    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
902    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
903    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
904    #[serde(default = "default_key_facts_dedup_threshold")]
905    pub key_facts_dedup_threshold: f32,
906    /// Experience compression spectrum (#3305).
907    ///
908    /// Controls three-tier retrieval policy and background skill-promotion engine.
909    #[serde(default)]
910    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
911    /// MemMachine-inspired retrieval-stage tuning (#3340).
912    ///
913    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
914    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
915    /// uniformly across graph, hybrid, and vector-only recall paths.
916    ///
917    /// # Example (TOML)
918    ///
919    /// ```toml
920    /// [memory.retrieval]
921    /// depth = 40
922    /// search_prompt_template = ""
923    /// context_format = "structured"
924    /// ```
925    #[serde(default)]
926    pub retrieval: RetrievalConfig,
927    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
928    ///
929    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
930    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
931    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
932    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
933    /// and injected before the LLM call.
934    #[serde(default)]
935    pub reasoning: ReasoningConfig,
936    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
937    ///
938    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
939    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
940    ///
941    /// # Example (TOML)
942    ///
943    /// ```toml
944    /// [memory.hebbian]
945    /// enabled = true
946    /// hebbian_lr = 0.1
947    /// ```
948    #[serde(default)]
949    pub hebbian: HebbianConfig,
950    /// `MemCoT` rolling semantic state configuration (#3574).
951    ///
952    /// When `enabled = true`, each completed assistant turn spawns a background distillation
953    /// task that compresses the response into a short semantic state buffer. The buffer is
954    /// prepended to graph recall queries so retrieval stays contextually relevant across long
955    /// multi-turn sessions.
956    ///
957    /// # Example (TOML)
958    ///
959    /// ```toml
960    /// [memory.memcot]
961    /// enabled = true
962    /// distill_provider = "fast"
963    /// min_assistant_chars = 200
964    /// max_distills_per_session = 50
965    /// ```
966    #[serde(default)]
967    pub memcot: MemCotConfig,
968    /// `OmniMem` retrieval failure tracking (issue #3576).
969    ///
970    /// When `enabled = true`, no-hit and low-confidence recall events are logged
971    /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
972    ///
973    /// # Example (TOML)
974    ///
975    /// ```toml
976    /// [memory.retrieval_failures]
977    /// enabled = true
978    /// low_confidence_threshold = 0.3
979    /// retention_days = 90
980    /// ```
981    #[serde(default)]
982    pub retrieval_failures: RetrievalFailuresConfig,
983    /// Write quality gate (#3629).
984    ///
985    /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
986    /// writes are rejected before persistence. Evaluated after A-MAC admission control.
987    #[serde(default)]
988    pub quality_gate: WriteQualityGateConfig,
989    /// `MemFlow` tiered intent-driven retrieval (issue #3712).
990    ///
991    /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
992    /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
993    /// `DeepReasoning`) with optional validation and tier escalation.
994    #[serde(default)]
995    pub tiered_retrieval: TieredRetrievalConfig,
996    /// `ScrapMem` optical forgetting (issue #3713).
997    ///
998    /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
999    /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1000    #[serde(default)]
1001    pub optical_forgetting: OpticalForgettingConfig,
1002    /// EM-Graph episodic event extraction and causal linking (issue #3713).
1003    ///
1004    /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1005    /// and linked via causal relationships, enabling causal-chain retrieval.
1006    #[serde(default)]
1007    pub em_graph: EmGraphConfig,
1008    /// Episodic-to-semantic consolidation daemon (issue #3799).
1009    ///
1010    /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1011    /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1012    /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1013    #[serde(default)]
1014    pub episodic_consolidation: EpisodicConsolidationConfig,
1015}
1016
1017// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1018
1019/// `MemFlow` tiered intent-driven retrieval configuration.
1020///
1021/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1022/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1023/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1024///
1025/// # Example (TOML)
1026///
1027/// ```toml
1028/// [memory.tiered_retrieval]
1029/// enabled = false
1030/// classifier_provider = ""
1031/// validator_provider = ""
1032/// token_budget = 4096
1033/// validation_enabled = false
1034/// validation_threshold = 0.6
1035/// max_escalations = 1
1036/// classifier_timeout_secs = 5
1037/// validator_timeout_secs = 5
1038/// ```
1039#[derive(Debug, Clone, Deserialize, Serialize)]
1040#[serde(default)]
1041pub struct TieredRetrievalConfig {
1042    /// Enable `MemFlow` tiered retrieval. Default: `false`.
1043    pub enabled: bool,
1044    /// Provider name from `[[llm.providers]]` for intent classification.
1045    ///
1046    /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1047    /// is set but the call fails, falls back to the heuristic (fail-open).
1048    pub classifier_provider: ProviderName,
1049    /// Provider name from `[[llm.providers]]` for evidence validation.
1050    ///
1051    /// When empty or when `validation_enabled = false`, no validation call is made.
1052    pub validator_provider: ProviderName,
1053    /// Maximum tokens to gather for evidence per query. Default: `4096`.
1054    pub token_budget: usize,
1055    /// Enable evidence validation and tier escalation. Default: `false`.
1056    pub validation_enabled: bool,
1057    /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1058    pub validation_threshold: f32,
1059    /// Maximum tier escalations per query. Default: `1`.
1060    pub max_escalations: u8,
1061    /// Timeout in seconds for the classifier LLM call. Default: `5`.
1062    ///
1063    /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1064    pub classifier_timeout_secs: u64,
1065    /// Timeout in seconds for the validator LLM call. Default: `5`.
1066    ///
1067    /// On timeout the validator is treated as sufficient (fail-open).
1068    pub validator_timeout_secs: u64,
1069}
1070
1071impl Default for TieredRetrievalConfig {
1072    fn default() -> Self {
1073        Self {
1074            enabled: false,
1075            classifier_provider: ProviderName::default(),
1076            validator_provider: ProviderName::default(),
1077            token_budget: 4096,
1078            validation_enabled: false,
1079            validation_threshold: 0.6,
1080            max_escalations: 1,
1081            classifier_timeout_secs: 5,
1082            validator_timeout_secs: 5,
1083        }
1084    }
1085}
1086
1087// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1088
1089/// `ScrapMem` optical forgetting configuration.
1090///
1091/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1092/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1093/// forgetting compresses content in place based on age.
1094///
1095/// # Example (TOML)
1096///
1097/// ```toml
1098/// [memory.optical_forgetting]
1099/// enabled = false
1100/// compress_provider = ""
1101/// compress_after_turns = 100
1102/// summarize_after_turns = 500
1103/// sweep_interval_secs = 3600
1104/// sweep_batch_size = 50
1105/// ```
1106#[derive(Debug, Clone, Deserialize, Serialize)]
1107#[serde(default)]
1108pub struct OpticalForgettingConfig {
1109    /// Enable optical forgetting sweep. Default: `false`.
1110    pub enabled: bool,
1111    /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1112    /// Falls back to the primary provider when empty.
1113    pub compress_provider: ProviderName,
1114    /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1115    pub compress_after_turns: u32,
1116    /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1117    pub summarize_after_turns: u32,
1118    /// How often the sweep runs, in seconds. Default: `3600`.
1119    pub sweep_interval_secs: u64,
1120    /// Maximum messages to compress per sweep iteration. Default: `50`.
1121    pub sweep_batch_size: usize,
1122}
1123
1124impl Default for OpticalForgettingConfig {
1125    fn default() -> Self {
1126        Self {
1127            enabled: false,
1128            compress_provider: ProviderName::default(),
1129            compress_after_turns: 100,
1130            summarize_after_turns: 500,
1131            sweep_interval_secs: 3600,
1132            sweep_batch_size: 50,
1133        }
1134    }
1135}
1136
1137// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1138
1139/// EM-Graph episodic event extraction and causal linking configuration.
1140///
1141/// When enabled, episodic events are extracted from conversation turns and linked
1142/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1143///
1144/// # Example (TOML)
1145///
1146/// ```toml
1147/// [memory.em_graph]
1148/// enabled = false
1149/// extract_provider = ""
1150/// max_chain_depth = 3
1151/// ```
1152#[derive(Debug, Clone, Deserialize, Serialize)]
1153#[serde(default)]
1154pub struct EmGraphConfig {
1155    /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1156    pub enabled: bool,
1157    /// Provider name from `[[llm.providers]]` for event extraction.
1158    /// Falls back to the primary provider when empty.
1159    pub extract_provider: ProviderName,
1160    /// Maximum hops when traversing causal chains during recall. Default: `3`.
1161    pub max_chain_depth: u32,
1162}
1163
1164impl Default for EmGraphConfig {
1165    fn default() -> Self {
1166        Self {
1167            enabled: false,
1168            extract_provider: ProviderName::default(),
1169            max_chain_depth: 3,
1170        }
1171    }
1172}
1173
1174// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1175
1176fn default_episodic_consolidation_interval_secs() -> u64 {
1177    1800
1178}
1179
1180fn default_episodic_consolidation_batch_size() -> usize {
1181    30
1182}
1183
1184fn default_episodic_consolidation_min_age_secs() -> u64 {
1185    300
1186}
1187
1188fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1189    0.6
1190}
1191
1192/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1193///
1194/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1195/// extracts durable factual statements via LLM, deduplicates them against existing
1196/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1197/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1198///
1199/// # Example (TOML)
1200///
1201/// ```toml
1202/// [memory.episodic_consolidation]
1203/// enabled = false
1204/// consolidation_provider = ""
1205/// interval_secs = 1800
1206/// batch_size = 30
1207/// min_age_secs = 300
1208/// dedup_jaccard_threshold = 0.6
1209/// ```
1210#[derive(Debug, Clone, Deserialize, Serialize)]
1211#[serde(default)]
1212pub struct EpisodicConsolidationConfig {
1213    /// Enable the episodic consolidation daemon. Default: `false`.
1214    pub enabled: bool,
1215    /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1216    /// Falls back to the primary provider when empty.
1217    pub consolidation_provider: ProviderName,
1218    /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1219    #[serde(default = "default_episodic_consolidation_interval_secs")]
1220    pub interval_secs: u64,
1221    /// Maximum number of episodic events to process per sweep. Default: `30`.
1222    #[serde(default = "default_episodic_consolidation_batch_size")]
1223    pub batch_size: usize,
1224    /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1225    /// Prevents consolidating events from the active conversation.
1226    #[serde(default = "default_episodic_consolidation_min_age_secs")]
1227    pub min_age_secs: u64,
1228    /// Jaccard similarity threshold for deduplication against existing key facts.
1229    /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1230    #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1231    pub dedup_jaccard_threshold: f32,
1232}
1233
1234impl Default for EpisodicConsolidationConfig {
1235    fn default() -> Self {
1236        Self {
1237            enabled: false,
1238            consolidation_provider: ProviderName::default(),
1239            interval_secs: default_episodic_consolidation_interval_secs(),
1240            batch_size: default_episodic_consolidation_batch_size(),
1241            min_age_secs: default_episodic_consolidation_min_age_secs(),
1242            dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1243        }
1244    }
1245}
1246
1247fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1248    0.3
1249}
1250
1251fn default_retrieval_failures_retention_days() -> u32 {
1252    90
1253}
1254
1255fn default_retrieval_failures_channel_capacity() -> usize {
1256    256
1257}
1258
1259fn default_retrieval_failures_batch_size() -> usize {
1260    16
1261}
1262
1263fn default_retrieval_failures_flush_interval_ms() -> u64 {
1264    100
1265}
1266
1267fn default_crossover_turn_threshold() -> u32 {
1268    20
1269}
1270
1271fn default_key_facts_dedup_threshold() -> f32 {
1272    0.95
1273}
1274
1275/// Session digest configuration (#2289).
1276#[derive(Debug, Clone, Deserialize, Serialize)]
1277#[serde(default)]
1278pub struct DigestConfig {
1279    /// Enable session digest generation at session end. Default: `false`.
1280    pub enabled: bool,
1281    /// Provider name from `[[llm.providers]]` for digest generation.
1282    /// Falls back to the primary provider when `None`.
1283    #[serde(default)]
1284    pub provider: Option<ProviderName>,
1285    /// Maximum tokens for the digest text. Default: `500`.
1286    pub max_tokens: usize,
1287    /// Maximum messages to feed into the digest prompt. Default: `50`.
1288    pub max_input_messages: usize,
1289}
1290
1291impl Default for DigestConfig {
1292    fn default() -> Self {
1293        Self {
1294            enabled: false,
1295            provider: None,
1296            max_tokens: 500,
1297            max_input_messages: 50,
1298        }
1299    }
1300}
1301
1302/// Context assembly strategy (#2288).
1303#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1304#[serde(rename_all = "snake_case")]
1305pub enum ContextStrategy {
1306    /// Full conversation history trimmed to budget, with memory augmentation.
1307    /// This is the default and existing behavior.
1308    #[default]
1309    FullHistory,
1310    /// Drop conversation history; assemble context from summaries, semantic recall,
1311    /// cross-session memory, and session digest only.
1312    MemoryFirst,
1313    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1314    /// `crossover_turn_threshold`.
1315    Adaptive,
1316}
1317
1318/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1319#[derive(Debug, Clone, Deserialize, Serialize)]
1320#[serde(default)]
1321pub struct SessionsConfig {
1322    /// Maximum number of sessions returned by list operations (0 = unlimited).
1323    #[serde(default = "default_max_history")]
1324    pub max_history: usize,
1325    /// Maximum characters for auto-generated session titles.
1326    #[serde(default = "default_title_max_chars")]
1327    pub title_max_chars: usize,
1328}
1329
1330impl Default for SessionsConfig {
1331    fn default() -> Self {
1332        Self {
1333            max_history: default_max_history(),
1334            title_max_chars: default_title_max_chars(),
1335        }
1336    }
1337}
1338
1339/// Configuration for the document ingestion and RAG retrieval pipeline.
1340#[derive(Debug, Clone, Deserialize, Serialize)]
1341pub struct DocumentConfig {
1342    #[serde(default = "default_document_collection")]
1343    pub collection: String,
1344    #[serde(default = "default_document_chunk_size")]
1345    pub chunk_size: usize,
1346    #[serde(default = "default_document_chunk_overlap")]
1347    pub chunk_overlap: usize,
1348    /// Number of document chunks to inject into agent context per turn.
1349    #[serde(default = "default_document_top_k")]
1350    pub top_k: usize,
1351    /// Enable document RAG injection into agent context.
1352    #[serde(default)]
1353    pub rag_enabled: bool,
1354}
1355
1356impl Default for DocumentConfig {
1357    fn default() -> Self {
1358        Self {
1359            collection: default_document_collection(),
1360            chunk_size: default_document_chunk_size(),
1361            chunk_overlap: default_document_chunk_overlap(),
1362            top_k: default_document_top_k(),
1363            rag_enabled: false,
1364        }
1365    }
1366}
1367
1368/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1369///
1370/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1371/// re-ranking, and hybrid BM25+vector weighting.
1372///
1373/// # Example (TOML)
1374///
1375/// ```toml
1376/// [memory.semantic]
1377/// enabled = true
1378/// recall_limit = 5
1379/// vector_weight = 0.7
1380/// keyword_weight = 0.3
1381/// mmr_lambda = 0.7
1382/// ```
1383#[derive(Debug, Deserialize, Serialize)]
1384#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1385pub struct SemanticConfig {
1386    /// Enable vector-based semantic recall. Default: `true`.
1387    #[serde(default = "default_semantic_enabled")]
1388    pub enabled: bool,
1389    #[serde(default = "default_recall_limit")]
1390    pub recall_limit: usize,
1391    #[serde(default = "default_vector_weight")]
1392    pub vector_weight: f64,
1393    #[serde(default = "default_keyword_weight")]
1394    pub keyword_weight: f64,
1395    #[serde(default = "default_true")]
1396    pub temporal_decay_enabled: bool,
1397    #[serde(default = "default_temporal_decay_half_life_days")]
1398    pub temporal_decay_half_life_days: u32,
1399    #[serde(default = "default_true")]
1400    pub mmr_enabled: bool,
1401    #[serde(default = "default_mmr_lambda")]
1402    pub mmr_lambda: f32,
1403    #[serde(default = "default_true")]
1404    pub importance_enabled: bool,
1405    #[serde(
1406        default = "default_importance_weight",
1407        deserialize_with = "validate_importance_weight"
1408    )]
1409    pub importance_weight: f64,
1410    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1411    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1412    /// from contending with the guardrail at the API server level (rate limits, Ollama
1413    /// single-model lock). Falls back to the main agent provider when `None`.
1414    #[serde(default)]
1415    pub embed_provider: Option<ProviderName>,
1416}
1417
1418impl Default for SemanticConfig {
1419    fn default() -> Self {
1420        Self {
1421            enabled: default_semantic_enabled(),
1422            recall_limit: default_recall_limit(),
1423            vector_weight: default_vector_weight(),
1424            keyword_weight: default_keyword_weight(),
1425            temporal_decay_enabled: true,
1426            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1427            mmr_enabled: true,
1428            mmr_lambda: default_mmr_lambda(),
1429            importance_enabled: true,
1430            importance_weight: default_importance_weight(),
1431            embed_provider: None,
1432        }
1433    }
1434}
1435
1436/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1437///
1438/// Controls how each recalled memory entry is presented in the assembled prompt.
1439/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1440/// always contain the raw message text. The format is applied exclusively during
1441/// context assembly and is never persisted.
1442///
1443/// # Token cost
1444///
1445/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1446/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1447#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1448#[serde(rename_all = "snake_case")]
1449pub enum ContextFormat {
1450    /// Emit a labeled header per snippet:
1451    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1452    ///
1453    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1454    /// memory without re-parsing the recall body.
1455    #[default]
1456    Structured,
1457    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1458    ///
1459    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1460    /// is tight and provenance headers are not needed.
1461    Plain,
1462}
1463
1464/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1465///
1466/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1467/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1468/// configs parse unchanged.
1469///
1470/// # Example (TOML)
1471///
1472/// ```toml
1473/// [memory.retrieval]
1474/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1475/// # search_prompt_template = ""
1476/// # context_format = "structured"
1477/// ```
1478#[derive(Debug, Clone, Deserialize, Serialize)]
1479#[serde(default)]
1480pub struct RetrievalConfig {
1481    /// Number of ANN candidates fetched from the vector store before keyword merge,
1482    /// temporal decay, and MMR re-ranking.
1483    ///
1484    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1485    ///   to pre-#3340 deployments.
1486    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1487    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1488    ///   size, or higher for better MMR diversity.
1489    ///
1490    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1491    /// cannot saturate the requested top-k.
1492    pub depth: u32,
1493    /// Template applied to the raw user query before embedding.
1494    ///
1495    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1496    /// Empty string (default) = identity: the query is embedded as-is.
1497    ///
1498    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1499    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1500    pub search_prompt_template: String,
1501    /// Shape of memory snippets injected into agent context.
1502    ///
1503    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1504    /// Default: `Structured`.
1505    pub context_format: ContextFormat,
1506    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1507    ///
1508    /// When `true` and the query is classified as first-person, the query embedding is
1509    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1510    /// towards persona-relevant content for self-referential queries.
1511    ///
1512    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1513    #[serde(default = "default_query_bias_correction")]
1514    pub query_bias_correction: bool,
1515    /// Blend weight for query-bias correction (MM-F3, #3341).
1516    ///
1517    /// Controls how much the query embedding shifts towards the profile centroid.
1518    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1519    #[serde(default = "default_query_bias_profile_weight")]
1520    pub query_bias_profile_weight: f32,
1521    /// Centroid TTL in seconds (MM-F3, #3341).
1522    ///
1523    /// The profile centroid computed from persona facts is cached for this many seconds.
1524    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1525    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1526    pub query_bias_centroid_ttl_secs: u64,
1527}
1528
1529fn default_query_bias_correction() -> bool {
1530    true
1531}
1532
1533fn default_query_bias_profile_weight() -> f32 {
1534    0.25
1535}
1536
1537fn default_query_bias_centroid_ttl_secs() -> u64 {
1538    300
1539}
1540
1541impl Default for RetrievalConfig {
1542    fn default() -> Self {
1543        Self {
1544            depth: 0,
1545            search_prompt_template: String::new(),
1546            context_format: ContextFormat::default(),
1547            query_bias_correction: default_query_bias_correction(),
1548            query_bias_profile_weight: default_query_bias_profile_weight(),
1549            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1550        }
1551    }
1552}
1553
1554/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1555///
1556/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1557/// recall traversal increments the `weight` column of the traversed edges, building
1558/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1559/// runs a background sweep that identifies high-traffic entity clusters and distills
1560/// them into `graph_rules` entries via an LLM.
1561#[derive(Debug, Clone, Deserialize, Serialize)]
1562#[serde(default)]
1563pub struct HebbianConfig {
1564    /// Master switch. When `false`, no `weight` updates are written to the database
1565    /// and the consolidation loop does not start. Default: `false`.
1566    pub enabled: bool,
1567    /// Weight increment per co-activation (HL-F2, #3344).
1568    ///
1569    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1570    /// startup when `enabled = true`. Default: `0.1`.
1571    pub hebbian_lr: f32,
1572    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1573    ///
1574    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1575    /// Default: `3600` (one hour).
1576    pub consolidation_interval_secs: u64,
1577    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1578    /// candidate (HL-F3, #3345). Default: `5.0`.
1579    pub consolidation_threshold: f64,
1580    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1581    ///
1582    /// Falls back to the main provider when `None` or unresolvable.
1583    #[serde(default)]
1584    pub consolidate_provider: Option<ProviderName>,
1585    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1586    pub max_candidates_per_sweep: usize,
1587    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1588    ///
1589    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1590    /// Default: `86400` (24 hours).
1591    pub consolidation_cooldown_secs: u64,
1592    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1593    /// Default: `30`.
1594    pub consolidation_prompt_timeout_secs: u64,
1595    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1596    /// (HL-F4, #3345). Default: `20`.
1597    pub consolidation_max_neighbors: usize,
1598    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1599    ///
1600    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1601    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1602    pub spreading_activation: bool,
1603    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1604    pub spread_depth: u32,
1605    /// MAGMA edge-type filter for HL-F5 spreading activation.
1606    ///
1607    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1608    /// Empty = traverse all edge types. Default: `[]`.
1609    pub spread_edge_types: Vec<EdgeType>,
1610    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1611    ///
1612    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1613    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1614    pub step_budget_ms: u64,
1615    /// Timeout for the initial query embedding call in HL-F5, in seconds.
1616    ///
1617    /// `0` disables the timeout. Default: `5`.
1618    pub embed_timeout_secs: u64,
1619}
1620
1621impl Default for HebbianConfig {
1622    fn default() -> Self {
1623        Self {
1624            enabled: false,
1625            hebbian_lr: 0.1,
1626            consolidation_interval_secs: 3600,
1627            consolidation_threshold: 5.0,
1628            consolidate_provider: None,
1629            max_candidates_per_sweep: 10,
1630            consolidation_cooldown_secs: 86_400,
1631            consolidation_prompt_timeout_secs: 30,
1632            consolidation_max_neighbors: 20,
1633            spreading_activation: false,
1634            spread_depth: 2,
1635            spread_edge_types: Vec::new(),
1636            step_budget_ms: 8,
1637            embed_timeout_secs: 5,
1638        }
1639    }
1640}
1641
1642/// Compression strategy for active context compression (#1161).
1643#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1644#[serde(tag = "strategy", rename_all = "snake_case")]
1645pub enum CompressionStrategy {
1646    /// Compress only when reactive compaction fires (current behavior).
1647    #[default]
1648    Reactive,
1649    /// Compress proactively when context exceeds `threshold_tokens`.
1650    Proactive {
1651        /// Token count that triggers proactive compression.
1652        threshold_tokens: usize,
1653        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1654        max_summary_tokens: usize,
1655    },
1656    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1657    /// safety net. The `compress_context` tool is also available in all other strategies.
1658    Autonomous,
1659    /// Knowledge-block-aware compression strategy (#2510).
1660    ///
1661    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1662    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1663    Focus,
1664}
1665
1666/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1667///
1668/// When `context-compression` feature is enabled, this replaces the default oldest-first
1669/// heuristic with scored eviction.
1670#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1671#[serde(rename_all = "snake_case")]
1672pub enum PruningStrategy {
1673    /// Oldest-first eviction — current default behavior.
1674    #[default]
1675    Reactive,
1676    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1677    /// lowest-first. Requires `context-compression` feature.
1678    TaskAware,
1679    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1680    /// Requires `context-compression` feature.
1681    Mig,
1682    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1683    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1684    /// Requires `context-compression` feature.
1685    Subgoal,
1686    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1687    /// Requires `context-compression` feature.
1688    SubgoalMig,
1689}
1690
1691impl PruningStrategy {
1692    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1693    #[must_use]
1694    pub fn is_subgoal(self) -> bool {
1695        matches!(self, Self::Subgoal | Self::SubgoalMig)
1696    }
1697}
1698
1699// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1700// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1701impl<'de> serde::Deserialize<'de> for PruningStrategy {
1702    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1703        let s = String::deserialize(deserializer)?;
1704        s.parse().map_err(serde::de::Error::custom)
1705    }
1706}
1707
1708impl std::str::FromStr for PruningStrategy {
1709    type Err = String;
1710
1711    fn from_str(s: &str) -> Result<Self, Self::Err> {
1712        match s {
1713            "reactive" => Ok(Self::Reactive),
1714            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1715            "mig" => Ok(Self::Mig),
1716            // task_aware_mig was removed (dead code — was routed to scored path only).
1717            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1718            "task_aware_mig" | "task-aware-mig" => {
1719                tracing::warn!(
1720                    "pruning strategy `task_aware_mig` has been removed; \
1721                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1722                );
1723                Ok(Self::Reactive)
1724            }
1725            "subgoal" => Ok(Self::Subgoal),
1726            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1727            other => Err(format!(
1728                "unknown pruning strategy `{other}`, expected \
1729                 reactive|task_aware|mig|subgoal|subgoal_mig"
1730            )),
1731        }
1732    }
1733}
1734
1735fn default_high_density_budget() -> f32 {
1736    0.7
1737}
1738
1739fn default_low_density_budget() -> f32 {
1740    0.3
1741}
1742
1743/// Configuration for the `SleepGate` forgetting sweep (#2397).
1744///
1745/// When `enabled = true`, a background loop periodically decays importance scores
1746/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1747/// and prunes memories below `forgetting_floor` (targeted forgetting).
1748#[derive(Debug, Clone, Deserialize, Serialize)]
1749#[serde(default)]
1750pub struct ForgettingConfig {
1751    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1752    pub enabled: bool,
1753    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1754    pub decay_rate: f32,
1755    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1756    pub forgetting_floor: f32,
1757    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1758    pub sweep_interval_secs: u64,
1759    /// Maximum messages to process per sweep. Default: `500`.
1760    pub sweep_batch_size: usize,
1761    /// Hours: messages accessed within this window get replay protection. Default: `24`.
1762    pub replay_window_hours: u32,
1763    /// Messages with `access_count` >= this get replay protection. Default: `3`.
1764    pub replay_min_access_count: u32,
1765    /// Hours: never prune messages accessed within this window. Default: `24`.
1766    pub protect_recent_hours: u32,
1767    /// Never prune messages with `access_count` >= this. Default: `3`.
1768    pub protect_min_access_count: u32,
1769}
1770
1771impl Default for ForgettingConfig {
1772    fn default() -> Self {
1773        Self {
1774            enabled: false,
1775            decay_rate: 0.1,
1776            forgetting_floor: 0.05,
1777            sweep_interval_secs: 7200,
1778            sweep_batch_size: 500,
1779            replay_window_hours: 24,
1780            replay_min_access_count: 3,
1781            protect_recent_hours: 24,
1782            protect_min_access_count: 3,
1783        }
1784    }
1785}
1786
1787/// Configuration for active context compression (#1161).
1788#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1789#[serde(default)]
1790pub struct CompressionConfig {
1791    /// Compression strategy.
1792    #[serde(flatten)]
1793    pub strategy: CompressionStrategy,
1794    /// Tool-output pruning strategy (requires `context-compression` feature).
1795    pub pruning_strategy: PruningStrategy,
1796    /// Model to use for compression summaries.
1797    ///
1798    /// Currently unused — the primary summary provider is used regardless of this value.
1799    /// Reserved for future per-compression model selection. Setting this field has no effect.
1800    pub model: String,
1801    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1802    /// Falls back to the primary provider when empty. Default: `""`.
1803    pub compress_provider: ProviderName,
1804    /// Compaction probe: validates summary quality before committing it (#1609).
1805    #[serde(default)]
1806    pub probe: CompactionProbeConfig,
1807    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1808    ///
1809    /// When enabled, tool output bodies in the compaction range are saved to
1810    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1811    /// The LLM summarizes placeholder messages; archived content is appended as
1812    /// a postfix after summarization so references survive compaction.
1813    /// Default: `false`.
1814    #[serde(default)]
1815    pub archive_tool_outputs: bool,
1816    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
1817    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
1818    /// Falls back to the primary provider when empty. Default: `""`.
1819    pub focus_scorer_provider: ProviderName,
1820    /// Token-budget fraction for high-density content in density-aware compression (#2481).
1821    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1822    #[serde(default = "default_high_density_budget")]
1823    pub high_density_budget: f32,
1824    /// Token-budget fraction for low-density content in density-aware compression (#2481).
1825    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1826    #[serde(default = "default_low_density_budget")]
1827    pub low_density_budget: f32,
1828    /// Typed-page classification and batch-level assertion checking (#3630).
1829    #[serde(default)]
1830    pub typed_pages: TypedPagesConfig,
1831}
1832
1833/// Configuration for typed-page compaction invariants (#3630).
1834///
1835/// Controls classification, batch-level assertion checking, and audit logging.
1836/// All behavior is disabled by default; set `enabled = true` to activate.
1837///
1838/// # Example (TOML)
1839///
1840/// ```toml
1841/// [memory.compression.typed_pages]
1842/// enabled = true
1843/// enforcement = "active"
1844/// audit_path = ""
1845/// audit_channel_capacity = 256
1846/// ```
1847#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
1848#[serde(default)]
1849pub struct TypedPagesConfig {
1850    /// Enable typed-page classification and batch-level assertion checking.
1851    /// Default: `false`.
1852    pub enabled: bool,
1853    /// Enforcement mode:
1854    ///
1855    /// - `observe`: classify and emit audit records only; no behavioral change.
1856    /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
1857    ///
1858    /// Default: `"observe"`.
1859    pub enforcement: TypedPagesEnforcement,
1860    /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
1861    /// Default: `""`.
1862    ///
1863    /// # Security
1864    ///
1865    /// This field is **operator-only trusted input** read from the agent's configuration file.
1866    /// Write access to the config file implies file-system write access, so no additional
1867    /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
1868    /// configuration sources.
1869    pub audit_path: String,
1870    /// Bounded channel capacity for the async audit writer. Default: `256`.
1871    pub audit_channel_capacity: usize,
1872}
1873
1874impl Default for TypedPagesConfig {
1875    fn default() -> Self {
1876        Self {
1877            enabled: false,
1878            enforcement: TypedPagesEnforcement::Observe,
1879            audit_path: String::new(),
1880            audit_channel_capacity: 256,
1881        }
1882    }
1883}
1884
1885/// Enforcement mode for typed-page compaction (#3630).
1886#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
1887#[serde(rename_all = "snake_case")]
1888pub enum TypedPagesEnforcement {
1889    /// Classify and audit only. Zero behavioral change relative to the untyped path.
1890    #[default]
1891    Observe,
1892    /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
1893    Active,
1894}
1895
1896fn default_sidequest_interval_turns() -> u32 {
1897    4
1898}
1899
1900fn default_sidequest_max_eviction_ratio() -> f32 {
1901    0.5
1902}
1903
1904fn default_sidequest_max_cursors() -> usize {
1905    30
1906}
1907
1908fn default_sidequest_min_cursor_tokens() -> usize {
1909    100
1910}
1911
1912/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1913#[derive(Debug, Clone, Deserialize, Serialize)]
1914#[serde(default)]
1915pub struct SidequestConfig {
1916    /// Enable `SideQuest` eviction. Default: `false`.
1917    pub enabled: bool,
1918    /// Run eviction every N user turns. Default: `4`.
1919    #[serde(default = "default_sidequest_interval_turns")]
1920    pub interval_turns: u32,
1921    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1922    #[serde(default = "default_sidequest_max_eviction_ratio")]
1923    pub max_eviction_ratio: f32,
1924    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1925    #[serde(default = "default_sidequest_max_cursors")]
1926    pub max_cursors: usize,
1927    /// Exclude tool outputs smaller than this token count from eviction candidates.
1928    /// Default: `100`.
1929    #[serde(default = "default_sidequest_min_cursor_tokens")]
1930    pub min_cursor_tokens: usize,
1931}
1932
1933impl Default for SidequestConfig {
1934    fn default() -> Self {
1935        Self {
1936            enabled: false,
1937            interval_turns: default_sidequest_interval_turns(),
1938            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1939            max_cursors: default_sidequest_max_cursors(),
1940            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1941        }
1942    }
1943}
1944
1945/// Graph retrieval strategy for `[memory.graph]`.
1946///
1947/// Selects the algorithm used to traverse the knowledge graph during recall.
1948/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
1949#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
1950#[serde(rename_all = "snake_case")]
1951pub enum GraphRetrievalStrategy {
1952    /// SYNAPSE spreading activation (default, existing behavior).
1953    #[default]
1954    Synapse,
1955    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
1956    Bfs,
1957    /// A* shortest-path traversal via petgraph.
1958    #[serde(rename = "astar")]
1959    AStar,
1960    /// Concentric BFS expanding outward from seed nodes.
1961    WaterCircles,
1962    /// Beam search: keep top-K candidates per hop.
1963    BeamSearch,
1964    /// Dynamic: LLM classifier selects strategy per query.
1965    Hybrid,
1966}
1967
1968fn default_beam_width() -> usize {
1969    10
1970}
1971
1972/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
1973///
1974/// Controls the width of the beam during graph traversal: how many top candidates
1975/// are retained at each hop.
1976#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1977pub struct BeamSearchConfig {
1978    /// Number of top candidates kept per hop. Default: `10`.
1979    #[serde(default = "default_beam_width")]
1980    pub beam_width: usize,
1981}
1982
1983impl Default for BeamSearchConfig {
1984    fn default() -> Self {
1985        Self {
1986            beam_width: default_beam_width(),
1987        }
1988    }
1989}
1990
1991/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
1992///
1993/// Controls ring-by-ring concentric BFS traversal from seed nodes.
1994#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
1995pub struct WaterCirclesConfig {
1996    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
1997    #[serde(default)]
1998    pub ring_limit: usize,
1999}
2000
2001fn default_evolution_sweep_interval() -> usize {
2002    50
2003}
2004
2005fn default_confidence_prune_threshold() -> f32 {
2006    0.1
2007}
2008
2009/// Experience memory configuration for `[memory.graph.experience]`.
2010///
2011/// Controls recording of tool execution outcomes and graph evolution sweeps.
2012#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2013pub struct ExperienceConfig {
2014    /// Enable experience memory recording. Default: `false`.
2015    #[serde(default)]
2016    pub enabled: bool,
2017    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2018    #[serde(default)]
2019    pub evolution_sweep_enabled: bool,
2020    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2021    #[serde(default = "default_confidence_prune_threshold")]
2022    pub confidence_prune_threshold: f32,
2023    /// Number of turns between evolution sweeps. Default: `50`.
2024    #[serde(default = "default_evolution_sweep_interval")]
2025    pub evolution_sweep_interval: usize,
2026}
2027
2028impl Default for ExperienceConfig {
2029    fn default() -> Self {
2030        Self {
2031            enabled: false,
2032            evolution_sweep_enabled: false,
2033            confidence_prune_threshold: default_confidence_prune_threshold(),
2034            evolution_sweep_interval: default_evolution_sweep_interval(),
2035        }
2036    }
2037}
2038
2039/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2040///
2041/// # Security
2042///
2043/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2044/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2045/// when processing conversations that may contain personal, medical, or sensitive data until
2046/// a redaction pass is implemented on the write path.
2047#[derive(Debug, Clone, Deserialize, Serialize)]
2048#[serde(default)]
2049pub struct GraphConfig {
2050    pub enabled: bool,
2051    pub extract_model: String,
2052    #[serde(default = "default_graph_max_entities_per_message")]
2053    pub max_entities_per_message: usize,
2054    #[serde(default = "default_graph_max_edges_per_message")]
2055    pub max_edges_per_message: usize,
2056    #[serde(default = "default_graph_community_refresh_interval")]
2057    pub community_refresh_interval: usize,
2058    #[serde(default = "default_graph_entity_similarity_threshold")]
2059    pub entity_similarity_threshold: f32,
2060    #[serde(default = "default_graph_extraction_timeout_secs")]
2061    pub extraction_timeout_secs: u64,
2062    #[serde(default)]
2063    pub use_embedding_resolution: bool,
2064    #[serde(default = "default_graph_entity_ambiguous_threshold")]
2065    pub entity_ambiguous_threshold: f32,
2066    #[serde(default = "default_graph_max_hops")]
2067    pub max_hops: u32,
2068    #[serde(default = "default_graph_recall_limit")]
2069    pub recall_limit: usize,
2070    /// Days to retain expired (superseded) edges before deletion. Default: 90.
2071    #[serde(default = "default_graph_expired_edge_retention_days")]
2072    pub expired_edge_retention_days: u32,
2073    /// Maximum entities to retain in the graph. 0 = unlimited.
2074    #[serde(default)]
2075    pub max_entities: usize,
2076    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2077    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2078    pub community_summary_max_prompt_bytes: usize,
2079    /// Maximum concurrent LLM calls during community summarization. Default: 4.
2080    #[serde(default = "default_graph_community_summary_concurrency")]
2081    pub community_summary_concurrency: usize,
2082    /// Number of edges fetched per chunk during community detection. Default: 10000.
2083    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2084    #[serde(default = "default_lpa_edge_chunk_size")]
2085    pub lpa_edge_chunk_size: usize,
2086    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2087    ///
2088    /// When > 0, recent edges receive a small additive score boost over older edges.
2089    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2090    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2091    #[serde(
2092        default = "default_graph_temporal_decay_rate",
2093        deserialize_with = "validate_temporal_decay_rate"
2094    )]
2095    pub temporal_decay_rate: f64,
2096    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2097    ///
2098    /// Caps the result set returned for a given source entity + predicate pair. Prevents
2099    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2100    /// or API endpoints.
2101    #[serde(default = "default_graph_edge_history_limit")]
2102    pub edge_history_limit: usize,
2103    /// A-MEM dynamic note linking configuration.
2104    ///
2105    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2106    /// semantically similar entities via `similar_to` edges. Requires an embedding store
2107    /// (`qdrant` or `sqlite` vector backend) to be configured.
2108    #[serde(default)]
2109    pub note_linking: NoteLinkingConfig,
2110    /// SYNAPSE spreading activation retrieval configuration.
2111    ///
2112    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2113    /// with lateral inhibition and temporal decay instead of BFS.
2114    #[serde(default)]
2115    pub spreading_activation: SpreadingActivationConfig,
2116    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2117    ///
2118    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2119    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2120    #[serde(default)]
2121    pub retrieval_strategy: GraphRetrievalStrategy,
2122    /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2123    ///
2124    /// When non-empty, graph extraction (and downstream note linking and community
2125    /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2126    /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2127    /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2128    /// which causes systematic quality gate rejections. A named provider built via
2129    /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2130    /// has no quality gate attached.
2131    ///
2132    /// Falls back to the primary provider when empty. Default: `""` (use primary).
2133    #[serde(default)]
2134    pub extract_provider: ProviderName,
2135    /// Named LLM provider for hybrid strategy classification.
2136    /// Falls back to the default provider when `None`.
2137    #[serde(default)]
2138    pub strategy_classifier_provider: Option<ProviderName>,
2139    /// Beam search configuration.
2140    #[serde(default)]
2141    pub beam_search: BeamSearchConfig,
2142    /// `WaterCircles` BFS configuration.
2143    #[serde(default)]
2144    pub watercircles: WaterCirclesConfig,
2145    /// Experience memory configuration.
2146    #[serde(default)]
2147    pub experience: ExperienceConfig,
2148    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2149    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2150    #[serde(
2151        default = "default_link_weight_decay_lambda",
2152        deserialize_with = "validate_link_weight_decay_lambda"
2153    )]
2154    pub link_weight_decay_lambda: f64,
2155    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2156    #[serde(default = "default_link_weight_decay_interval_secs")]
2157    pub link_weight_decay_interval_secs: u64,
2158    /// Kumiho AGM-inspired belief revision configuration.
2159    ///
2160    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2161    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2162    /// `superseded_by` pointer and the new edge becomes the current belief.
2163    #[serde(default)]
2164    pub belief_revision: BeliefRevisionConfig,
2165    /// D-MEM RPE-based tiered graph extraction routing.
2166    ///
2167    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2168    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2169    #[serde(default)]
2170    pub rpe: RpeConfig,
2171    /// `SQLite` connection pool size dedicated to graph operations.
2172    ///
2173    /// Graph tables share the same database file as messages/embeddings but use a
2174    /// separate pool to prevent pool starvation when community detection or spreading
2175    /// activation runs concurrently with regular memory operations. Default: `3`.
2176    #[serde(default = "default_graph_pool_size")]
2177    pub pool_size: u32,
2178    /// APEX-MEM append-only write path (#3631).
2179    ///
2180    /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2181    /// supersession chains instead of the legacy destructive-update path.
2182    #[serde(default)]
2183    pub apex_mem: ApexMemConfig,
2184    /// LLM call timeout per extraction request, in seconds. Default: `30`.
2185    #[serde(default = "default_graph_llm_timeout_secs")]
2186    pub llm_timeout_secs: u64,
2187    /// PRISM query-sensitive edge costing in A* graph recall.
2188    ///
2189    /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2190    /// between the query embedding and the target entity embedding:
2191    /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2192    /// Edges toward semantically relevant entities receive lower cost and are therefore
2193    /// preferred by A*, producing query-aligned recall paths.
2194    ///
2195    /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2196    /// store is unavailable or a target entity has no stored embedding, falls back to the
2197    /// baseline cost `1.0 - confidence`.
2198    ///
2199    /// Default: `false` (preserves existing A* behaviour).
2200    #[serde(default)]
2201    pub query_sensitive_cost: bool,
2202}
2203
2204fn default_graph_pool_size() -> u32 {
2205    3
2206}
2207
2208fn default_graph_llm_timeout_secs() -> u64 {
2209    30
2210}
2211
2212/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
2213///
2214/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
2215/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
2216/// the full supersession chain and enables conflict resolution.
2217///
2218/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
2219#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2220#[serde(default)]
2221pub struct ApexMemConfig {
2222    /// Enable the APEX-MEM append-only write path. Default: `false`.
2223    pub enabled: bool,
2224}
2225
2226fn default_quality_gate_threshold() -> f32 {
2227    0.55
2228}
2229
2230fn default_quality_gate_recent_window() -> usize {
2231    32
2232}
2233
2234fn default_quality_gate_contradiction_grace_seconds() -> u64 {
2235    300
2236}
2237
2238fn default_quality_gate_information_value_weight() -> f32 {
2239    0.4
2240}
2241
2242fn default_quality_gate_reference_completeness_weight() -> f32 {
2243    0.3
2244}
2245
2246fn default_quality_gate_contradiction_weight() -> f32 {
2247    0.3
2248}
2249
2250fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
2251    0.35
2252}
2253
2254fn default_quality_gate_llm_timeout_ms() -> u64 {
2255    500
2256}
2257
2258fn default_quality_gate_llm_weight() -> f32 {
2259    0.5
2260}
2261
2262fn default_quality_gate_reference_check_lang_en() -> bool {
2263    true
2264}
2265
2266/// Write quality gate configuration (`[memory.quality_gate]`).
2267///
2268/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
2269/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
2270/// scoring is opt-in via `quality_gate_provider`.
2271///
2272/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
2273#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2274#[serde(default)]
2275pub struct WriteQualityGateConfig {
2276    /// Enable the write quality gate. Default: `false`.
2277    pub enabled: bool,
2278    /// Combined score threshold below which writes are rejected. Default: `0.55`.
2279    #[serde(default = "default_quality_gate_threshold")]
2280    pub threshold: f32,
2281    /// Number of recent writes compared for information-value scoring. Default: `32`.
2282    #[serde(default = "default_quality_gate_recent_window")]
2283    pub recent_window: usize,
2284    /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
2285    #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
2286    pub contradiction_grace_seconds: u64,
2287    /// Weight of `information_value` sub-score. Default: `0.4`.
2288    #[serde(default = "default_quality_gate_information_value_weight")]
2289    pub information_value_weight: f32,
2290    /// Weight of `reference_completeness` sub-score. Default: `0.3`.
2291    #[serde(default = "default_quality_gate_reference_completeness_weight")]
2292    pub reference_completeness_weight: f32,
2293    /// Weight of `contradiction` sub-score. Default: `0.3`.
2294    #[serde(default = "default_quality_gate_contradiction_weight")]
2295    pub contradiction_weight: f32,
2296    /// Rolling rejection-rate alarm ratio. Default: `0.35`.
2297    #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
2298    pub rejection_rate_alarm_ratio: f32,
2299    /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
2300    #[serde(default)]
2301    pub quality_gate_provider: ProviderName,
2302    /// LLM timeout in milliseconds. Default: `500`.
2303    #[serde(default = "default_quality_gate_llm_timeout_ms")]
2304    pub llm_timeout_ms: u64,
2305    /// LLM blend weight into final score. Default: `0.5`.
2306    #[serde(default = "default_quality_gate_llm_weight")]
2307    pub llm_weight: f32,
2308    /// Enable pronoun/deictic reference checks (English only). Default: `true`.
2309    #[serde(default = "default_quality_gate_reference_check_lang_en")]
2310    pub reference_check_lang_en: bool,
2311}
2312
2313impl Default for WriteQualityGateConfig {
2314    fn default() -> Self {
2315        Self {
2316            enabled: false,
2317            threshold: default_quality_gate_threshold(),
2318            recent_window: default_quality_gate_recent_window(),
2319            contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
2320            information_value_weight: default_quality_gate_information_value_weight(),
2321            reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
2322            contradiction_weight: default_quality_gate_contradiction_weight(),
2323            rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
2324            quality_gate_provider: ProviderName::default(),
2325            llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
2326            llm_weight: default_quality_gate_llm_weight(),
2327            reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
2328        }
2329    }
2330}
2331
2332impl Default for GraphConfig {
2333    fn default() -> Self {
2334        Self {
2335            enabled: false,
2336            extract_model: String::new(),
2337            max_entities_per_message: default_graph_max_entities_per_message(),
2338            max_edges_per_message: default_graph_max_edges_per_message(),
2339            community_refresh_interval: default_graph_community_refresh_interval(),
2340            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
2341            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
2342            use_embedding_resolution: false,
2343            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
2344            max_hops: default_graph_max_hops(),
2345            recall_limit: default_graph_recall_limit(),
2346            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
2347            max_entities: 0,
2348            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
2349            community_summary_concurrency: default_graph_community_summary_concurrency(),
2350            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
2351            temporal_decay_rate: default_graph_temporal_decay_rate(),
2352            edge_history_limit: default_graph_edge_history_limit(),
2353            note_linking: NoteLinkingConfig::default(),
2354            spreading_activation: SpreadingActivationConfig::default(),
2355            retrieval_strategy: GraphRetrievalStrategy::default(),
2356            extract_provider: ProviderName::default(),
2357            strategy_classifier_provider: None,
2358            beam_search: BeamSearchConfig::default(),
2359            watercircles: WaterCirclesConfig::default(),
2360            experience: ExperienceConfig::default(),
2361            link_weight_decay_lambda: default_link_weight_decay_lambda(),
2362            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
2363            belief_revision: BeliefRevisionConfig::default(),
2364            rpe: RpeConfig::default(),
2365            pool_size: default_graph_pool_size(),
2366            apex_mem: ApexMemConfig::default(),
2367            llm_timeout_secs: default_graph_llm_timeout_secs(),
2368            query_sensitive_cost: false,
2369        }
2370    }
2371}
2372
2373fn default_consolidation_confidence_threshold() -> f32 {
2374    0.7
2375}
2376
2377fn default_consolidation_sweep_interval_secs() -> u64 {
2378    3600
2379}
2380
2381fn default_consolidation_sweep_batch_size() -> usize {
2382    50
2383}
2384
2385fn default_consolidation_similarity_threshold() -> f32 {
2386    0.85
2387}
2388
2389/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
2390///
2391/// When `enabled = true`, a background loop periodically clusters semantically similar messages
2392/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
2393/// they are marked as consolidated and deprioritized in recall via temporal decay.
2394#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2395#[serde(default)]
2396pub struct ConsolidationConfig {
2397    /// Enable the consolidation background loop. Default: `false`.
2398    pub enabled: bool,
2399    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2400    /// Falls back to the primary provider when empty. Default: `""`.
2401    #[serde(default)]
2402    pub consolidation_provider: ProviderName,
2403    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
2404    #[serde(default = "default_consolidation_confidence_threshold")]
2405    pub confidence_threshold: f32,
2406    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
2407    #[serde(default = "default_consolidation_sweep_interval_secs")]
2408    pub sweep_interval_secs: u64,
2409    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
2410    #[serde(default = "default_consolidation_sweep_batch_size")]
2411    pub sweep_batch_size: usize,
2412    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
2413    /// Default: `0.85`.
2414    #[serde(default = "default_consolidation_similarity_threshold")]
2415    pub similarity_threshold: f32,
2416    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
2417    #[serde(default = "default_consolidation_llm_timeout_secs")]
2418    pub llm_timeout_secs: u64,
2419}
2420
2421impl Default for ConsolidationConfig {
2422    fn default() -> Self {
2423        Self {
2424            enabled: false,
2425            consolidation_provider: ProviderName::default(),
2426            confidence_threshold: default_consolidation_confidence_threshold(),
2427            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
2428            sweep_batch_size: default_consolidation_sweep_batch_size(),
2429            similarity_threshold: default_consolidation_similarity_threshold(),
2430            llm_timeout_secs: default_consolidation_llm_timeout_secs(),
2431        }
2432    }
2433}
2434
2435fn default_consolidation_llm_timeout_secs() -> u64 {
2436    30
2437}
2438
2439fn default_link_weight_decay_lambda() -> f64 {
2440    0.95
2441}
2442
2443fn default_link_weight_decay_interval_secs() -> u64 {
2444    86400
2445}
2446
2447fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
2448where
2449    D: serde::Deserializer<'de>,
2450{
2451    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
2452    if value.is_nan() || value.is_infinite() {
2453        return Err(serde::de::Error::custom(
2454            "link_weight_decay_lambda must be a finite number",
2455        ));
2456    }
2457    if !(value > 0.0 && value <= 1.0) {
2458        return Err(serde::de::Error::custom(
2459            "link_weight_decay_lambda must be in (0.0, 1.0]",
2460        ));
2461    }
2462    Ok(value)
2463}
2464
2465fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
2466where
2467    D: serde::Deserializer<'de>,
2468{
2469    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2470    if value.is_nan() || value.is_infinite() {
2471        return Err(serde::de::Error::custom(
2472            "threshold must be a finite number",
2473        ));
2474    }
2475    if !(0.0..=1.0).contains(&value) {
2476        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
2477    }
2478    Ok(value)
2479}
2480
2481fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
2482where
2483    D: serde::Deserializer<'de>,
2484{
2485    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2486    if value.is_nan() || value.is_infinite() {
2487        return Err(serde::de::Error::custom(
2488            "fast_path_margin must be a finite number",
2489        ));
2490    }
2491    if !(0.0..=1.0).contains(&value) {
2492        return Err(serde::de::Error::custom(
2493            "fast_path_margin must be in [0.0, 1.0]",
2494        ));
2495    }
2496    Ok(value)
2497}
2498
2499fn default_admission_threshold() -> f32 {
2500    0.40
2501}
2502
2503fn default_admission_fast_path_margin() -> f32 {
2504    0.15
2505}
2506
2507fn default_rl_min_samples() -> u32 {
2508    500
2509}
2510
2511fn default_rl_retrain_interval_secs() -> u64 {
2512    3600
2513}
2514
2515/// Admission decision strategy.
2516///
2517/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
2518/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
2519#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
2520#[serde(rename_all = "snake_case")]
2521pub enum AdmissionStrategy {
2522    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
2523    #[default]
2524    Heuristic,
2525    /// Learned model: logistic regression trained on recall feedback.
2526    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
2527    Rl,
2528}
2529
2530fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
2531where
2532    D: serde::Deserializer<'de>,
2533{
2534    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2535    if value < 0.0 {
2536        return Err(serde::de::Error::custom(
2537            "admission weight must be non-negative (>= 0.0)",
2538        ));
2539    }
2540    Ok(value)
2541}
2542
2543/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
2544///
2545/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
2546/// All values must be non-negative.
2547#[derive(Debug, Clone, Deserialize, Serialize)]
2548#[serde(default)]
2549pub struct AdmissionWeights {
2550    /// LLM-estimated future reuse probability. Default: `0.30`.
2551    #[serde(deserialize_with = "validate_admission_weight")]
2552    pub future_utility: f32,
2553    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
2554    #[serde(deserialize_with = "validate_admission_weight")]
2555    pub factual_confidence: f32,
2556    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
2557    #[serde(deserialize_with = "validate_admission_weight")]
2558    pub semantic_novelty: f32,
2559    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
2560    #[serde(deserialize_with = "validate_admission_weight")]
2561    pub temporal_recency: f32,
2562    /// Content type prior based on role. Default: `0.15`.
2563    #[serde(deserialize_with = "validate_admission_weight")]
2564    pub content_type_prior: f32,
2565    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
2566    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
2567    /// Normalized automatically at runtime. Default: `0.0`.
2568    #[serde(deserialize_with = "validate_admission_weight")]
2569    pub goal_utility: f32,
2570}
2571
2572impl Default for AdmissionWeights {
2573    fn default() -> Self {
2574        Self {
2575            future_utility: 0.30,
2576            factual_confidence: 0.15,
2577            semantic_novelty: 0.30,
2578            temporal_recency: 0.10,
2579            content_type_prior: 0.15,
2580            goal_utility: 0.0,
2581        }
2582    }
2583}
2584
2585impl AdmissionWeights {
2586    /// Return weights normalized so they sum to 1.0.
2587    ///
2588    /// All weights are non-negative; the sum is always > 0 when defaults are used.
2589    #[must_use]
2590    pub fn normalized(&self) -> Self {
2591        let sum = self.future_utility
2592            + self.factual_confidence
2593            + self.semantic_novelty
2594            + self.temporal_recency
2595            + self.content_type_prior
2596            + self.goal_utility;
2597        if sum <= f32::EPSILON {
2598            return Self::default();
2599        }
2600        Self {
2601            future_utility: self.future_utility / sum,
2602            factual_confidence: self.factual_confidence / sum,
2603            semantic_novelty: self.semantic_novelty / sum,
2604            temporal_recency: self.temporal_recency / sum,
2605            content_type_prior: self.content_type_prior / sum,
2606            goal_utility: self.goal_utility / sum,
2607        }
2608    }
2609}
2610
2611/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
2612///
2613/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
2614/// Messages below the composite admission threshold are rejected and not persisted.
2615#[derive(Debug, Clone, Deserialize, Serialize)]
2616#[serde(default)]
2617pub struct AdmissionConfig {
2618    /// Enable A-MAC admission control. Default: `false`.
2619    pub enabled: bool,
2620    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
2621    /// Default: `0.40`.
2622    #[serde(deserialize_with = "validate_admission_threshold")]
2623    pub threshold: f32,
2624    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
2625    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
2626    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
2627    pub fast_path_margin: f32,
2628    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
2629    /// Falls back to the primary provider when empty. Default: `""`.
2630    pub admission_provider: ProviderName,
2631    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
2632    pub weights: AdmissionWeights,
2633    /// Admission decision strategy. Default: `heuristic`.
2634    #[serde(default)]
2635    pub admission_strategy: AdmissionStrategy,
2636    /// Minimum training samples before the RL model is activated.
2637    /// Below this count the system falls back to `Heuristic`. Default: `500`.
2638    #[serde(default = "default_rl_min_samples")]
2639    pub rl_min_samples: u32,
2640    /// Background RL model retraining interval in seconds. Default: `3600`.
2641    #[serde(default = "default_rl_retrain_interval_secs")]
2642    pub rl_retrain_interval_secs: u64,
2643    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
2644    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
2645    /// Zero regression when `false`. Default: `false`.
2646    #[serde(default)]
2647    pub goal_conditioned_write: bool,
2648    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
2649    /// Used only for borderline cases (similarity within 0.1 of threshold).
2650    /// Falls back to the primary provider when empty. Default: `""`.
2651    #[serde(default)]
2652    pub goal_utility_provider: ProviderName,
2653    /// Minimum cosine similarity between goal embedding and candidate memory
2654    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
2655    #[serde(default = "default_goal_utility_threshold")]
2656    pub goal_utility_threshold: f32,
2657    /// Weight of the `goal_utility` factor in the composite admission score.
2658    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
2659    #[serde(default = "default_goal_utility_weight")]
2660    pub goal_utility_weight: f32,
2661}
2662
2663fn default_goal_utility_threshold() -> f32 {
2664    0.4
2665}
2666
2667fn default_goal_utility_weight() -> f32 {
2668    0.25
2669}
2670
2671impl Default for AdmissionConfig {
2672    fn default() -> Self {
2673        Self {
2674            enabled: false,
2675            threshold: default_admission_threshold(),
2676            fast_path_margin: default_admission_fast_path_margin(),
2677            admission_provider: ProviderName::default(),
2678            weights: AdmissionWeights::default(),
2679            admission_strategy: AdmissionStrategy::default(),
2680            rl_min_samples: default_rl_min_samples(),
2681            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
2682            goal_conditioned_write: false,
2683            goal_utility_provider: ProviderName::default(),
2684            goal_utility_threshold: default_goal_utility_threshold(),
2685            goal_utility_weight: default_goal_utility_weight(),
2686        }
2687    }
2688}
2689
2690/// Routing strategy for `[memory.store_routing]`.
2691#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
2692#[serde(rename_all = "snake_case")]
2693pub enum StoreRoutingStrategy {
2694    /// Pure heuristic pattern matching. Zero LLM calls. Default.
2695    #[default]
2696    Heuristic,
2697    /// LLM-based classification via `routing_classifier_provider`.
2698    Llm,
2699    /// Heuristic first; escalates to LLM only when confidence is low.
2700    Hybrid,
2701}
2702
2703/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
2704///
2705/// Controls how each query is classified and routed to the appropriate memory
2706/// backend(s), avoiding unnecessary store queries for simple lookups.
2707#[derive(Debug, Clone, Deserialize, Serialize)]
2708#[serde(default)]
2709pub struct StoreRoutingConfig {
2710    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
2711    /// directly (existing behavior). Default: `false`.
2712    pub enabled: bool,
2713    /// Routing strategy. Default: `heuristic`.
2714    pub strategy: StoreRoutingStrategy,
2715    /// Provider name from `[[llm.providers]]` for LLM-based classification.
2716    /// Falls back to the primary provider when empty. Default: `""`.
2717    pub routing_classifier_provider: ProviderName,
2718    /// Route to use when the classifier is uncertain (confidence < threshold).
2719    ///
2720    /// Defaults to [`MemoryRoute::Hybrid`].
2721    pub fallback_route: MemoryRoute,
2722    /// Confidence threshold below which `HybridRouter` escalates to LLM.
2723    /// Range: `[0.0, 1.0]`. Default: `0.7`.
2724    pub confidence_threshold: f32,
2725}
2726
2727impl Default for StoreRoutingConfig {
2728    fn default() -> Self {
2729        Self {
2730            enabled: false,
2731            strategy: StoreRoutingStrategy::Heuristic,
2732            routing_classifier_provider: ProviderName::default(),
2733            fallback_route: MemoryRoute::Hybrid,
2734            confidence_threshold: 0.7,
2735        }
2736    }
2737}
2738
2739/// Persona memory layer configuration (#2461).
2740///
2741/// When `enabled = true`, user preferences and domain knowledge are extracted from
2742/// conversation history via a cheap LLM provider and injected after the system prompt.
2743#[derive(Debug, Clone, Deserialize, Serialize)]
2744#[serde(default)]
2745pub struct PersonaConfig {
2746    /// Enable persona memory extraction and injection. Default: `false`.
2747    pub enabled: bool,
2748    /// Provider name from `[[llm.providers]]` for persona extraction.
2749    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
2750    pub persona_provider: ProviderName,
2751    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
2752    pub min_confidence: f64,
2753    /// Minimum user messages before extraction runs in a session. Default: `3`.
2754    pub min_messages: usize,
2755    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
2756    pub max_messages: usize,
2757    /// LLM timeout for the extraction call in seconds. Default: `10`.
2758    pub extraction_timeout_secs: u64,
2759    /// Token budget allocated to persona context in assembly. Default: `500`.
2760    pub context_budget_tokens: usize,
2761}
2762
2763impl Default for PersonaConfig {
2764    fn default() -> Self {
2765        Self {
2766            enabled: false,
2767            persona_provider: ProviderName::default(),
2768            min_confidence: 0.6,
2769            min_messages: 3,
2770            max_messages: 10,
2771            extraction_timeout_secs: 10,
2772            context_budget_tokens: 500,
2773        }
2774    }
2775}
2776
2777/// Trajectory-informed memory configuration (#2498).
2778///
2779/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
2780/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
2781/// Procedural entries are injected into context as "past experience" during assembly.
2782#[derive(Debug, Clone, Deserialize, Serialize)]
2783#[serde(default)]
2784pub struct TrajectoryConfig {
2785    /// Enable trajectory extraction and context injection. Default: `false`.
2786    pub enabled: bool,
2787    /// Provider name from `[[llm.providers]]` for extraction.
2788    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2789    pub trajectory_provider: ProviderName,
2790    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
2791    pub context_budget_tokens: usize,
2792    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
2793    pub max_messages: usize,
2794    /// LLM timeout for the extraction call in seconds. Default: `10`.
2795    pub extraction_timeout_secs: u64,
2796    /// Number of procedural entries retrieved for context injection. Default: `5`.
2797    pub recall_top_k: usize,
2798    /// Minimum confidence score for entries included in context. Default: `0.6`.
2799    pub min_confidence: f64,
2800}
2801
2802impl Default for TrajectoryConfig {
2803    fn default() -> Self {
2804        Self {
2805            enabled: false,
2806            trajectory_provider: ProviderName::default(),
2807            context_budget_tokens: 400,
2808            max_messages: 10,
2809            extraction_timeout_secs: 10,
2810            recall_top_k: 5,
2811            min_confidence: 0.6,
2812        }
2813    }
2814}
2815
2816/// Category-aware memory configuration (#2428).
2817///
2818/// When `enabled = true`, messages are auto-tagged with a category derived from the active
2819/// skill or tool context. The category is stored in the `messages.category` column and used
2820/// as a Qdrant payload filter during recall.
2821#[derive(Debug, Clone, Deserialize, Serialize)]
2822#[serde(default)]
2823pub struct CategoryConfig {
2824    /// Enable category tagging and category-filtered recall. Default: `false`.
2825    pub enabled: bool,
2826    /// Automatically assign category from skill metadata or tool type. Default: `true`.
2827    pub auto_tag: bool,
2828}
2829
2830impl Default for CategoryConfig {
2831    fn default() -> Self {
2832        Self {
2833            enabled: false,
2834            auto_tag: true,
2835        }
2836    }
2837}
2838
2839/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
2840///
2841/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
2842/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
2843/// for complex queries.
2844#[derive(Debug, Clone, Deserialize, Serialize)]
2845#[serde(default)]
2846pub struct TreeConfig {
2847    /// Enable the memory tree and background consolidation loop. Default: `false`.
2848    pub enabled: bool,
2849    /// Provider name from `[[llm.providers]]` for node consolidation.
2850    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2851    pub consolidation_provider: ProviderName,
2852    /// Interval between consolidation sweeps in seconds. Default: `300`.
2853    pub sweep_interval_secs: u64,
2854    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
2855    pub batch_size: usize,
2856    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
2857    pub similarity_threshold: f32,
2858    /// Maximum tree depth (levels above leaves). Default: `3`.
2859    pub max_level: u32,
2860    /// Token budget allocated to tree memory in context assembly. Default: `400`.
2861    pub context_budget_tokens: usize,
2862    /// Number of tree nodes retrieved for context. Default: `5`.
2863    pub recall_top_k: usize,
2864    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
2865    pub min_cluster_size: usize,
2866}
2867
2868impl Default for TreeConfig {
2869    fn default() -> Self {
2870        Self {
2871            enabled: false,
2872            consolidation_provider: ProviderName::default(),
2873            sweep_interval_secs: 300,
2874            batch_size: 20,
2875            similarity_threshold: 0.8,
2876            max_level: 3,
2877            context_budget_tokens: 400,
2878            recall_top_k: 5,
2879            min_cluster_size: 2,
2880        }
2881    }
2882}
2883
2884/// Time-based microcompact configuration (#2699).
2885///
2886/// When `enabled = true`, low-value tool outputs are cleared from context
2887/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
2888/// The most recent `keep_recent` tool messages are preserved unconditionally.
2889#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2890#[serde(default)]
2891pub struct MicrocompactConfig {
2892    /// Enable time-based microcompaction. Default: `false`.
2893    pub enabled: bool,
2894    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
2895    pub gap_threshold_minutes: u32,
2896    /// Number of most recent compactable tool messages to preserve. Default: `3`.
2897    pub keep_recent: usize,
2898}
2899
2900impl Default for MicrocompactConfig {
2901    fn default() -> Self {
2902        Self {
2903            enabled: false,
2904            gap_threshold_minutes: 60,
2905            keep_recent: 3,
2906        }
2907    }
2908}
2909
2910/// autoDream background memory consolidation configuration (#2697).
2911///
2912/// When `enabled = true`, a constrained consolidation subagent runs after
2913/// a session ends if both `min_sessions` and `min_hours` gates pass.
2914#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2915#[serde(default)]
2916pub struct AutoDreamConfig {
2917    /// Enable autoDream consolidation. Default: `false`.
2918    pub enabled: bool,
2919    /// Minimum number of sessions between consolidations. Default: `3`.
2920    pub min_sessions: u32,
2921    /// Minimum hours between consolidations. Default: `24`.
2922    pub min_hours: u32,
2923    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2924    /// Falls back to the primary provider when empty. Default: `""`.
2925    pub consolidation_provider: ProviderName,
2926    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
2927    pub max_iterations: u8,
2928    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
2929    #[serde(default = "default_autodream_llm_timeout_secs")]
2930    pub llm_timeout_secs: u64,
2931}
2932
2933impl Default for AutoDreamConfig {
2934    fn default() -> Self {
2935        Self {
2936            enabled: false,
2937            min_sessions: 3,
2938            min_hours: 24,
2939            consolidation_provider: ProviderName::default(),
2940            max_iterations: 8,
2941            llm_timeout_secs: default_autodream_llm_timeout_secs(),
2942        }
2943    }
2944}
2945
2946fn default_autodream_llm_timeout_secs() -> u64 {
2947    30
2948}
2949
2950/// `MagicDocs` auto-maintained markdown configuration (#2702).
2951///
2952/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2953/// are registered and periodically updated by a constrained subagent.
2954#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2955#[serde(default)]
2956pub struct MagicDocsConfig {
2957    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2958    pub enabled: bool,
2959    /// Minimum turns between updates for a given doc path. Default: `5`.
2960    pub min_turns_between_updates: u32,
2961    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2962    /// Falls back to the primary provider when empty. Default: `""`.
2963    pub update_provider: ProviderName,
2964    /// Maximum agent loop iterations per doc update. Default: `4`.
2965    pub max_iterations: u8,
2966}
2967
2968impl Default for MagicDocsConfig {
2969    fn default() -> Self {
2970        Self {
2971            enabled: false,
2972            min_turns_between_updates: 5,
2973            update_provider: ProviderName::default(),
2974            max_iterations: 4,
2975        }
2976    }
2977}
2978
2979#[cfg(test)]
2980mod tests {
2981    use super::*;
2982
2983    // Verify that serde deserialization routes through FromStr so that removed variants
2984    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2985    #[test]
2986    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2987        #[derive(serde::Deserialize)]
2988        struct Wrapper {
2989            #[allow(dead_code)]
2990            pruning_strategy: PruningStrategy,
2991        }
2992        let toml = r#"pruning_strategy = "task_aware_mig""#;
2993        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2994        assert_eq!(
2995            w.pruning_strategy,
2996            PruningStrategy::Reactive,
2997            "task_aware_mig must fall back to Reactive"
2998        );
2999    }
3000
3001    #[test]
3002    fn pruning_strategy_toml_round_trip() {
3003        #[derive(serde::Deserialize)]
3004        struct Wrapper {
3005            #[allow(dead_code)]
3006            pruning_strategy: PruningStrategy,
3007        }
3008        for (input, expected) in [
3009            ("reactive", PruningStrategy::Reactive),
3010            ("task_aware", PruningStrategy::TaskAware),
3011            ("mig", PruningStrategy::Mig),
3012        ] {
3013            let toml = format!(r#"pruning_strategy = "{input}""#);
3014            let w: Wrapper = toml::from_str(&toml)
3015                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3016            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3017        }
3018    }
3019
3020    #[test]
3021    fn pruning_strategy_toml_unknown_value_errors() {
3022        #[derive(serde::Deserialize)]
3023        #[allow(dead_code)]
3024        struct Wrapper {
3025            pruning_strategy: PruningStrategy,
3026        }
3027        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3028        assert!(
3029            toml::from_str::<Wrapper>(toml).is_err(),
3030            "unknown strategy must produce an error"
3031        );
3032    }
3033
3034    #[test]
3035    fn tier_config_defaults_are_correct() {
3036        let cfg = TierConfig::default();
3037        assert!(!cfg.enabled);
3038        assert_eq!(cfg.promotion_min_sessions, 3);
3039        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3040        assert_eq!(cfg.sweep_interval_secs, 3600);
3041        assert_eq!(cfg.sweep_batch_size, 100);
3042    }
3043
3044    #[test]
3045    fn tier_config_rejects_min_sessions_below_2() {
3046        let toml = "promotion_min_sessions = 1";
3047        assert!(toml::from_str::<TierConfig>(toml).is_err());
3048    }
3049
3050    #[test]
3051    fn tier_config_rejects_similarity_threshold_below_0_5() {
3052        let toml = "similarity_threshold = 0.4";
3053        assert!(toml::from_str::<TierConfig>(toml).is_err());
3054    }
3055
3056    #[test]
3057    fn tier_config_rejects_zero_sweep_batch_size() {
3058        let toml = "sweep_batch_size = 0";
3059        assert!(toml::from_str::<TierConfig>(toml).is_err());
3060    }
3061
3062    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3063        let input = format!("importance_weight = {toml_val}");
3064        toml::from_str::<SemanticConfig>(&input)
3065    }
3066
3067    #[test]
3068    fn importance_weight_default_is_0_15() {
3069        let cfg = SemanticConfig::default();
3070        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3071    }
3072
3073    #[test]
3074    fn importance_weight_valid_zero() {
3075        let cfg = deserialize_importance_weight("0.0").unwrap();
3076        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3077    }
3078
3079    #[test]
3080    fn importance_weight_valid_one() {
3081        let cfg = deserialize_importance_weight("1.0").unwrap();
3082        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3083    }
3084
3085    #[test]
3086    fn importance_weight_rejects_near_zero_negative() {
3087        // TOML does not have a NaN literal, but we can test via a f64 that
3088        // the validator rejects out-of-range values. Test with negative here
3089        // and rely on validate_importance_weight rejecting non-finite via
3090        // a constructed deserializer call.
3091        let result = deserialize_importance_weight("-0.01");
3092        assert!(
3093            result.is_err(),
3094            "negative importance_weight must be rejected"
3095        );
3096    }
3097
3098    #[test]
3099    fn importance_weight_rejects_negative() {
3100        let result = deserialize_importance_weight("-1.0");
3101        assert!(result.is_err(), "negative value must be rejected");
3102    }
3103
3104    #[test]
3105    fn importance_weight_rejects_greater_than_one() {
3106        let result = deserialize_importance_weight("1.01");
3107        assert!(result.is_err(), "value > 1.0 must be rejected");
3108    }
3109
3110    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3111
3112    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3113    #[test]
3114    fn admission_weights_normalized_sums_to_one() {
3115        let w = AdmissionWeights {
3116            future_utility: 2.0,
3117            factual_confidence: 1.0,
3118            semantic_novelty: 3.0,
3119            temporal_recency: 1.0,
3120            content_type_prior: 3.0,
3121            goal_utility: 0.0,
3122        };
3123        let n = w.normalized();
3124        let sum = n.future_utility
3125            + n.factual_confidence
3126            + n.semantic_novelty
3127            + n.temporal_recency
3128            + n.content_type_prior;
3129        assert!(
3130            (sum - 1.0).abs() < 0.001,
3131            "normalized weights must sum to 1.0, got {sum}"
3132        );
3133    }
3134
3135    // Test: already-normalized weights are preserved.
3136    #[test]
3137    fn admission_weights_normalized_preserves_already_unit_sum() {
3138        let w = AdmissionWeights::default();
3139        let n = w.normalized();
3140        let sum = n.future_utility
3141            + n.factual_confidence
3142            + n.semantic_novelty
3143            + n.temporal_recency
3144            + n.content_type_prior;
3145        assert!(
3146            (sum - 1.0).abs() < 0.001,
3147            "default weights sum to ~1.0 after normalization"
3148        );
3149    }
3150
3151    // Test: zero weights fall back to default (no divide-by-zero panic).
3152    #[test]
3153    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3154        let w = AdmissionWeights {
3155            future_utility: 0.0,
3156            factual_confidence: 0.0,
3157            semantic_novelty: 0.0,
3158            temporal_recency: 0.0,
3159            content_type_prior: 0.0,
3160            goal_utility: 0.0,
3161        };
3162        let n = w.normalized();
3163        let default = AdmissionWeights::default();
3164        assert!(
3165            (n.future_utility - default.future_utility).abs() < 0.001,
3166            "zero-sum weights must fall back to defaults"
3167        );
3168    }
3169
3170    // Test: AdmissionConfig default values match documented defaults.
3171    #[test]
3172    fn admission_config_defaults() {
3173        let cfg = AdmissionConfig::default();
3174        assert!(!cfg.enabled);
3175        assert!((cfg.threshold - 0.40).abs() < 0.001);
3176        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3177        assert!(cfg.admission_provider.is_empty());
3178    }
3179
3180    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3181
3182    #[test]
3183    fn spreading_activation_default_recall_timeout_ms_is_1000() {
3184        let cfg = SpreadingActivationConfig::default();
3185        assert_eq!(
3186            cfg.recall_timeout_ms, 1000,
3187            "default recall_timeout_ms must be 1000ms"
3188        );
3189    }
3190
3191    #[test]
3192    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
3193        #[derive(serde::Deserialize)]
3194        struct Wrapper {
3195            recall_timeout_ms: u64,
3196        }
3197        let toml = "recall_timeout_ms = 500";
3198        let w: Wrapper = toml::from_str(toml).unwrap();
3199        assert_eq!(w.recall_timeout_ms, 500);
3200    }
3201
3202    #[test]
3203    fn spreading_activation_validate_cross_field_constraints() {
3204        let mut cfg = SpreadingActivationConfig::default();
3205        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
3206        assert!(cfg.validate().is_ok());
3207
3208        // Equal thresholds must be rejected.
3209        cfg.activation_threshold = 0.5;
3210        cfg.inhibition_threshold = 0.5;
3211        assert!(cfg.validate().is_err());
3212    }
3213
3214    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
3215
3216    #[test]
3217    fn compression_config_focus_strategy_deserializes() {
3218        let toml = r#"strategy = "focus""#;
3219        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3220        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
3221    }
3222
3223    #[test]
3224    fn compression_config_density_budget_defaults_on_deserialize() {
3225        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
3226        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
3227        let toml = r#"strategy = "reactive""#;
3228        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3229        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
3230        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
3231    }
3232
3233    #[test]
3234    fn compression_config_density_budget_round_trip() {
3235        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
3236        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3237        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
3238        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
3239    }
3240
3241    #[test]
3242    fn compression_config_focus_scorer_provider_default_empty() {
3243        let cfg = CompressionConfig::default();
3244        assert!(cfg.focus_scorer_provider.is_empty());
3245    }
3246
3247    #[test]
3248    fn compression_config_focus_scorer_provider_round_trip() {
3249        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
3250        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3251        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
3252    }
3253}
3254
3255/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
3256///
3257/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
3258/// Successful and failed reasoning chains are compressed into short, generalizable strategy
3259/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
3260/// and injected into the prompt preamble.
3261///
3262/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
3263///
3264/// # Example
3265///
3266/// ```toml
3267/// [memory.reasoning]
3268/// enabled = true
3269/// extract_provider = "fast"
3270/// distill_provider = "fast"
3271/// top_k = 3
3272/// store_limit = 1000
3273/// ```
3274#[derive(Debug, Clone, Deserialize, Serialize)]
3275#[serde(default)]
3276pub struct ReasoningConfig {
3277    /// Enable the reasoning-bank pipeline. Default: `false`.
3278    pub enabled: bool,
3279    /// Provider name from `[[llm.providers]]` for the self-judge step.
3280    /// Falls back to the primary provider when empty. Default: `""`.
3281    pub extract_provider: ProviderName,
3282    /// Provider name from `[[llm.providers]]` for the distillation step.
3283    /// Falls back to the primary provider when empty. Default: `""`.
3284    pub distill_provider: ProviderName,
3285    /// Number of strategies retrieved per turn for context injection. Default: `3`.
3286    pub top_k: usize,
3287    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
3288    pub store_limit: usize,
3289    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
3290    pub max_messages: usize,
3291    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
3292    pub max_message_chars: usize,
3293    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
3294    pub context_budget_tokens: usize,
3295    /// Minimum number of messages required before self-judge fires. Default: `2`.
3296    pub min_messages: usize,
3297    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
3298    pub extraction_timeout_secs: u64,
3299    /// Timeout in seconds for the distillation LLM call. Default: `30`.
3300    pub distill_timeout_secs: u64,
3301    /// Maximum number of recent messages passed to the self-judge evaluator.
3302    /// Narrowing to the last user+assistant pair improves classification accuracy.
3303    /// Default: `2`.
3304    pub self_judge_window: usize,
3305    /// Minimum characters in the assistant response to trigger self-judge.
3306    /// Short or trivial responses are skipped. Default: `50`.
3307    pub min_assistant_chars: usize,
3308}
3309
3310impl Default for ReasoningConfig {
3311    fn default() -> Self {
3312        Self {
3313            enabled: false,
3314            extract_provider: ProviderName::default(),
3315            distill_provider: ProviderName::default(),
3316            top_k: 3,
3317            store_limit: 1000,
3318            max_messages: 6,
3319            max_message_chars: 2000,
3320            context_budget_tokens: 500,
3321            min_messages: 2,
3322            extraction_timeout_secs: 30,
3323            distill_timeout_secs: 30,
3324            self_judge_window: 2,
3325            min_assistant_chars: 50,
3326        }
3327    }
3328}
3329
3330// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
3331
3332/// Eviction policy variant.
3333///
3334/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
3335#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
3336#[serde(rename_all = "lowercase")]
3337pub enum EvictionPolicy {
3338    /// Ebbinghaus forgetting-curve eviction.
3339    #[default]
3340    Ebbinghaus,
3341}
3342
3343/// Configuration for the memory eviction policy.
3344///
3345/// Controls which policy runs during the periodic sweep and how many entries
3346/// are retained. `zeph-memory` re-exports this type from here.
3347#[derive(Debug, Clone, Deserialize, Serialize)]
3348pub struct EvictionConfig {
3349    /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
3350    pub policy: EvictionPolicy,
3351    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
3352    pub max_entries: usize,
3353    /// How often to run the eviction sweep, in seconds.
3354    pub sweep_interval_secs: u64,
3355}
3356
3357impl Default for EvictionConfig {
3358    fn default() -> Self {
3359        Self {
3360            policy: EvictionPolicy::Ebbinghaus,
3361            max_entries: 0,
3362            sweep_interval_secs: 3600,
3363        }
3364    }
3365}
3366
3367// ── Compression guidelines config (moved from zeph-memory) ───────────────────
3368
3369/// Configuration for ACON failure-driven compression guidelines.
3370///
3371/// `zeph-memory` re-exports this type from here.
3372#[derive(Debug, Clone, Deserialize, Serialize)]
3373#[serde(default)]
3374pub struct CompressionGuidelinesConfig {
3375    /// Enable the feature. Default: `false`.
3376    pub enabled: bool,
3377    /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
3378    pub update_threshold: u16,
3379    /// Maximum token budget for the guidelines document. Default: `500`.
3380    pub max_guidelines_tokens: usize,
3381    /// Maximum failure pairs consumed per update cycle. Default: `10`.
3382    pub max_pairs_per_update: usize,
3383    /// Number of turns after hard compaction to watch for context loss. Default: `10`.
3384    pub detection_window_turns: u64,
3385    /// Interval in seconds between background updater checks. Default: `300`.
3386    pub update_interval_secs: u64,
3387    /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
3388    pub max_stored_pairs: usize,
3389    /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
3390    /// `None` (or `Some("")`) falls back to the primary provider.
3391    #[serde(default, skip_serializing_if = "Option::is_none")]
3392    pub guidelines_provider: Option<ProviderName>,
3393    /// Maintain separate guideline documents per content category.
3394    #[serde(default)]
3395    pub categorized_guidelines: bool,
3396}
3397
3398impl Default for CompressionGuidelinesConfig {
3399    fn default() -> Self {
3400        Self {
3401            enabled: false,
3402            update_threshold: 5,
3403            max_guidelines_tokens: 500,
3404            max_pairs_per_update: 10,
3405            detection_window_turns: 10,
3406            update_interval_secs: 300,
3407            max_stored_pairs: 100,
3408            guidelines_provider: None,
3409            categorized_guidelines: false,
3410        }
3411    }
3412}
3413
3414// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
3415
3416/// Functional category of a compaction probe question.
3417///
3418/// `zeph-memory` re-exports this type from here.
3419#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
3420#[serde(rename_all = "lowercase")]
3421pub enum ProbeCategory {
3422    /// Did specific facts survive? (file paths, function names, values, decisions)
3423    Recall,
3424    /// Does the agent know which files/tools/URLs it used?
3425    Artifact,
3426    /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
3427    Continuation,
3428    /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
3429    Decision,
3430}
3431
3432/// Configuration for the compaction probe.
3433///
3434/// `zeph-memory` re-exports this type from here.
3435#[derive(Debug, Clone, Serialize, Deserialize)]
3436#[serde(default)]
3437pub struct CompactionProbeConfig {
3438    /// Enable compaction probe validation. Default: `false`.
3439    pub enabled: bool,
3440    /// Provider name from `[[llm.providers]]` for probe LLM calls.
3441    /// `None` (or `Some("")`) uses the summary provider.
3442    #[serde(default, skip_serializing_if = "Option::is_none")]
3443    pub probe_provider: Option<ProviderName>,
3444    /// Minimum score to pass without warnings. Default: `0.6`.
3445    pub threshold: f32,
3446    /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
3447    pub hard_fail_threshold: f32,
3448    /// Maximum number of probe questions to generate. Default: `5`.
3449    pub max_questions: usize,
3450    /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
3451    pub timeout_secs: u64,
3452    /// Optional per-category weight multipliers for the overall score.
3453    #[serde(default)]
3454    pub category_weights: Option<HashMap<ProbeCategory, f32>>,
3455}
3456
3457impl Default for CompactionProbeConfig {
3458    fn default() -> Self {
3459        Self {
3460            enabled: false,
3461            probe_provider: None,
3462            threshold: 0.6,
3463            hard_fail_threshold: 0.35,
3464            max_questions: 5,
3465            timeout_secs: 15,
3466            category_weights: None,
3467        }
3468    }
3469}
3470
3471// ── MemCoT semantic state config ─────────────────────────────────────────────
3472
3473/// `MemCoT` semantic-state distillation configuration.
3474///
3475/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
3476/// summarizing conceptual progress across turns. This buffer is injected into graph
3477/// recall queries to improve retrieval relevance.
3478///
3479/// All LLM work (distillation) runs asynchronously — never on the turn thread.
3480/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
3481///
3482/// # Config example
3483///
3484/// ```toml
3485/// [memory.memcot]
3486/// enabled = true
3487/// distill_provider = "fast"
3488/// distill_timeout_secs = 5
3489/// min_assistant_chars = 200
3490/// min_distill_interval_secs = 30
3491/// max_distills_per_session = 50
3492/// max_state_chars = 800
3493/// recall_view = "head"
3494/// ```
3495#[derive(Debug, Clone, Serialize, Deserialize)]
3496#[serde(default)]
3497pub struct MemCotConfig {
3498    /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
3499    ///
3500    /// When `false`, the accumulator is never allocated and no LLM calls are made.
3501    pub enabled: bool,
3502    /// Provider name from `[[llm.providers]]` for distillation.
3503    ///
3504    /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
3505    /// A startup warning is emitted when the resolved model does not look fast-tier.
3506    /// Falls back to the primary provider when empty. Default: `""`.
3507    pub distill_provider: ProviderName,
3508    /// Timeout in seconds for each distillation LLM call. Default: `5`.
3509    pub distill_timeout_secs: u64,
3510    /// Minimum characters in the assistant response to trigger distillation.
3511    /// Short or trivial replies are skipped. Default: `200`.
3512    pub min_assistant_chars: usize,
3513    /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
3514    ///
3515    /// Prevents runaway costs on long sessions with rapid turns.
3516    /// Clearing `/new` resets this counter.
3517    pub min_distill_interval_secs: u64,
3518    /// Maximum distillation spawns per conversation session. Default: `50`.
3519    ///
3520    /// Once this cap is reached the accumulator stops distilling for the rest of the
3521    /// session. Counter is reset when the user sends `/new`.
3522    pub max_distills_per_session: u64,
3523    /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
3524    /// Default: `800`.
3525    pub max_state_chars: usize,
3526    /// Recall view applied when `MemCoT` is active. Default: `Head`.
3527    ///
3528    /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
3529    /// - `zoom_in`: adds source-message provenance to each returned fact.
3530    /// - `zoom_out`: expands 1-hop neighbors per returned fact.
3531    ///
3532    /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
3533    pub recall_view: RecallViewConfig,
3534    /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
3535    pub zoom_out_neighbor_cap: usize,
3536    /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
3537    /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
3538    #[serde(default, skip_serializing_if = "Vec::is_empty")]
3539    pub fast_tier_models: Vec<String>,
3540}
3541
3542/// Recall view variant exposed in config.
3543///
3544/// Maps 1-to-1 to `zeph_memory::RecallView`.
3545#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
3546#[serde(rename_all = "snake_case")]
3547pub enum RecallViewConfig {
3548    /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
3549    #[default]
3550    Head,
3551    /// Adds source-message provenance to each returned fact.
3552    ZoomIn,
3553    /// Expands 1-hop neighbor facts per returned fact.
3554    ZoomOut,
3555}
3556
3557impl Default for MemCotConfig {
3558    fn default() -> Self {
3559        Self {
3560            enabled: false,
3561            distill_provider: ProviderName::default(),
3562            distill_timeout_secs: 5,
3563            min_assistant_chars: 200,
3564            min_distill_interval_secs: 30,
3565            max_distills_per_session: 50,
3566            max_state_chars: 800,
3567            recall_view: RecallViewConfig::Head,
3568            zoom_out_neighbor_cap: 3,
3569            fast_tier_models: Vec::new(),
3570        }
3571    }
3572}
3573
3574/// `OmniMem` retrieval failure tracking configuration (issue #3576).
3575///
3576/// Controls the async logger that records no-hit and low-confidence recall events
3577/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
3578#[derive(Debug, Clone, Deserialize, Serialize)]
3579#[serde(default)]
3580pub struct RetrievalFailuresConfig {
3581    /// Enable retrieval failure logging. Default: `false`.
3582    pub enabled: bool,
3583    /// Composite recall score below which a result is classified as low-confidence.
3584    ///
3585    /// The threshold applies to the post-reranking composite score (which incorporates
3586    /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
3587    /// the scoring pipeline in use. Default: `0.3`.
3588    #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
3589    pub low_confidence_threshold: f32,
3590    /// Days to retain failure records before automatic cleanup. Default: `90`.
3591    #[serde(default = "default_retrieval_failures_retention_days")]
3592    pub retention_days: u32,
3593    /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
3594    #[serde(default = "default_retrieval_failures_channel_capacity")]
3595    pub channel_capacity: usize,
3596    /// Maximum records collected before flushing a batch INSERT. Default: `16`.
3597    #[serde(default = "default_retrieval_failures_batch_size")]
3598    pub batch_size: usize,
3599    /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
3600    #[serde(default = "default_retrieval_failures_flush_interval_ms")]
3601    pub flush_interval_ms: u64,
3602}
3603
3604impl Default for RetrievalFailuresConfig {
3605    fn default() -> Self {
3606        Self {
3607            enabled: false,
3608            low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
3609            retention_days: default_retrieval_failures_retention_days(),
3610            channel_capacity: default_retrieval_failures_channel_capacity(),
3611            batch_size: default_retrieval_failures_batch_size(),
3612            flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
3613        }
3614    }
3615}
3616
3617#[cfg(test)]
3618mod memcot_config_tests {
3619    use super::*;
3620
3621    #[test]
3622    fn memcot_config_default_disabled() {
3623        let cfg = MemCotConfig::default();
3624        assert!(!cfg.enabled);
3625        assert!(cfg.distill_provider.is_empty());
3626        assert_eq!(cfg.distill_timeout_secs, 5);
3627        assert_eq!(cfg.min_assistant_chars, 200);
3628        assert_eq!(cfg.min_distill_interval_secs, 30);
3629        assert_eq!(cfg.max_distills_per_session, 50);
3630        assert_eq!(cfg.max_state_chars, 800);
3631        assert_eq!(cfg.recall_view, RecallViewConfig::Head);
3632        assert_eq!(cfg.zoom_out_neighbor_cap, 3);
3633    }
3634
3635    #[test]
3636    fn memcot_config_round_trip() {
3637        let toml = r#"
3638            enabled = true
3639            distill_provider = "fast"
3640            distill_timeout_secs = 10
3641            min_assistant_chars = 100
3642            min_distill_interval_secs = 60
3643            max_distills_per_session = 20
3644            max_state_chars = 400
3645            recall_view = "zoom_in"
3646            zoom_out_neighbor_cap = 5
3647        "#;
3648        let cfg: MemCotConfig = toml::from_str(toml).unwrap();
3649        assert!(cfg.enabled);
3650        assert_eq!(cfg.distill_provider.as_str(), "fast");
3651        assert_eq!(cfg.distill_timeout_secs, 10);
3652        assert_eq!(cfg.min_distill_interval_secs, 60);
3653        assert_eq!(cfg.max_distills_per_session, 20);
3654        assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
3655        assert_eq!(cfg.zoom_out_neighbor_cap, 5);
3656    }
3657}
3658
3659#[cfg(test)]
3660mod apex_mem_quality_gate_config_tests {
3661    use super::*;
3662
3663    #[test]
3664    fn apex_mem_config_default_disabled() {
3665        let cfg = ApexMemConfig::default();
3666        assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
3667    }
3668
3669    #[test]
3670    fn apex_mem_config_serde_round_trip() {
3671        let toml = "enabled = true";
3672        let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
3673        assert!(cfg.enabled);
3674    }
3675
3676    #[test]
3677    fn apex_mem_config_empty_toml_uses_defaults() {
3678        let cfg: ApexMemConfig = toml::from_str("").unwrap();
3679        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3680    }
3681
3682    #[test]
3683    fn write_quality_gate_config_default_disabled() {
3684        let cfg = WriteQualityGateConfig::default();
3685        assert!(!cfg.enabled);
3686        assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
3687        assert_eq!(cfg.recent_window, 32);
3688        assert_eq!(cfg.contradiction_grace_seconds, 300);
3689        assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
3690        assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
3691        assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
3692        assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
3693        assert!(cfg.quality_gate_provider.is_empty());
3694        assert_eq!(cfg.llm_timeout_ms, 500);
3695        assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
3696        assert!(cfg.reference_check_lang_en);
3697    }
3698
3699    #[test]
3700    fn write_quality_gate_config_serde_round_trip() {
3701        let toml = r#"
3702            enabled = true
3703            threshold = 0.70
3704            recent_window = 16
3705            contradiction_grace_seconds = 600
3706            information_value_weight = 0.5
3707            reference_completeness_weight = 0.25
3708            contradiction_weight = 0.25
3709            rejection_rate_alarm_ratio = 0.50
3710            quality_gate_provider = "fast"
3711            llm_timeout_ms = 1000
3712            llm_weight = 0.3
3713            reference_check_lang_en = false
3714        "#;
3715        let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
3716        assert!(cfg.enabled);
3717        assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
3718        assert_eq!(cfg.recent_window, 16);
3719        assert_eq!(cfg.contradiction_grace_seconds, 600);
3720        assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
3721        assert_eq!(cfg.llm_timeout_ms, 1000);
3722        assert!(!cfg.reference_check_lang_en);
3723    }
3724
3725    #[test]
3726    fn write_quality_gate_config_empty_toml_uses_defaults() {
3727        let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
3728        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3729        assert_eq!(cfg.recent_window, 32);
3730    }
3731
3732    #[test]
3733    fn memory_config_shutdown_summary_provider_toml_roundtrip() {
3734        let toml = r#"
3735            history_limit = 50
3736            shutdown_summary_provider = "fast"
3737        "#;
3738        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
3739        assert_eq!(
3740            cfg.shutdown_summary_provider.as_str(),
3741            "fast",
3742            "shutdown_summary_provider must deserialize from TOML"
3743        );
3744    }
3745
3746    #[test]
3747    fn memory_config_shutdown_summary_provider_default_is_empty() {
3748        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
3749        assert_eq!(
3750            cfg.shutdown_summary_provider.as_str(),
3751            "",
3752            "shutdown_summary_provider must default to empty string"
3753        );
3754    }
3755
3756    #[test]
3757    fn memory_config_compaction_provider_toml_roundtrip() {
3758        let toml = r#"
3759            history_limit = 50
3760            compaction_provider = "mid"
3761        "#;
3762        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
3763        assert_eq!(
3764            cfg.compaction_provider.as_str(),
3765            "mid",
3766            "compaction_provider must deserialize from TOML"
3767        );
3768    }
3769
3770    #[test]
3771    fn memory_config_compaction_provider_default_is_empty() {
3772        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
3773        assert_eq!(
3774            cfg.compaction_provider.as_str(),
3775            "",
3776            "compaction_provider must default to empty string"
3777        );
3778    }
3779}