zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15    5
16}
17
18fn default_max_history() -> usize {
19    100
20}
21
22fn default_title_max_chars() -> usize {
23    60
24}
25
26fn default_document_collection() -> String {
27    "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31    1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35    100
36}
37
38fn default_document_top_k() -> usize {
39    3
40}
41
42fn default_autosave_min_length() -> usize {
43    20
44}
45
46fn default_tool_call_cutoff() -> usize {
47    6
48}
49
50fn default_token_safety_margin() -> f32 {
51    1.0
52}
53
54fn default_redact_credentials() -> bool {
55    true
56}
57
58fn default_qdrant_url() -> String {
59    "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63    50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67    60
68}
69
70fn default_context_budget_tokens() -> usize {
71    0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75    0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79    0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83    6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87    2
88}
89
90fn default_auto_budget() -> bool {
91    true
92}
93
94fn default_prune_protect_tokens() -> usize {
95    40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99    0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103    30
104}
105
106fn default_mmr_lambda() -> f32 {
107    0.7
108}
109
110fn default_semantic_enabled() -> bool {
111    true
112}
113
114fn default_recall_limit() -> usize {
115    5
116}
117
118fn default_vector_weight() -> f64 {
119    0.7
120}
121
122fn default_keyword_weight() -> f64 {
123    0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127    10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131    15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135    100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139    8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143    4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147    10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151    0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155    0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159    15
160}
161
162fn default_graph_max_hops() -> u32 {
163    2
164}
165
166fn default_graph_recall_limit() -> usize {
167    10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171    90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175    0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179    100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183    0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187    3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191    0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195    0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199    50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203    1000
204}
205
206fn default_note_linking_similarity_threshold() -> f32 {
207    0.85
208}
209
210fn default_note_linking_top_k() -> usize {
211    10
212}
213
214fn default_note_linking_timeout_secs() -> u64 {
215    5
216}
217
218fn default_shutdown_summary() -> bool {
219    true
220}
221
222fn default_shutdown_summary_min_messages() -> usize {
223    4
224}
225
226fn default_shutdown_summary_max_messages() -> usize {
227    20
228}
229
230fn default_shutdown_summary_timeout_secs() -> u64 {
231    30
232}
233
234fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
235where
236    D: serde::Deserializer<'de>,
237{
238    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
239    if value.is_nan() || value.is_infinite() {
240        return Err(serde::de::Error::custom(
241            "similarity_threshold must be a finite number",
242        ));
243    }
244    if !(0.5..=1.0).contains(&value) {
245        return Err(serde::de::Error::custom(
246            "similarity_threshold must be in [0.5, 1.0]",
247        ));
248    }
249    Ok(value)
250}
251
252fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
253where
254    D: serde::Deserializer<'de>,
255{
256    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
257    if value < 2 {
258        return Err(serde::de::Error::custom(
259            "promotion_min_sessions must be >= 2",
260        ));
261    }
262    Ok(value)
263}
264
265fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
266where
267    D: serde::Deserializer<'de>,
268{
269    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
270    if value == 0 {
271        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
272    }
273    Ok(value)
274}
275
276fn default_tier_promotion_min_sessions() -> u32 {
277    3
278}
279
280fn default_tier_similarity_threshold() -> f32 {
281    0.92
282}
283
284fn default_tier_sweep_interval_secs() -> u64 {
285    3600
286}
287
288fn default_tier_sweep_batch_size() -> usize {
289    100
290}
291
292fn default_scene_similarity_threshold() -> f32 {
293    0.80
294}
295
296fn default_scene_batch_size() -> usize {
297    50
298}
299
300fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
301where
302    D: serde::Deserializer<'de>,
303{
304    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
305    if value.is_nan() || value.is_infinite() {
306        return Err(serde::de::Error::custom(
307            "scene_similarity_threshold must be a finite number",
308        ));
309    }
310    if !(0.5..=1.0).contains(&value) {
311        return Err(serde::de::Error::custom(
312            "scene_similarity_threshold must be in [0.5, 1.0]",
313        ));
314    }
315    Ok(value)
316}
317
318fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
319where
320    D: serde::Deserializer<'de>,
321{
322    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
323    if value == 0 {
324        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
325    }
326    Ok(value)
327}
328
329/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
330///
331/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
332/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
333///
334/// # Validation
335///
336/// Constraints enforced at deserialization time:
337/// - `similarity_threshold` in `[0.5, 1.0]`
338/// - `promotion_min_sessions >= 2`
339/// - `sweep_batch_size >= 1`
340/// - `scene_similarity_threshold` in `[0.5, 1.0]`
341/// - `scene_batch_size >= 1`
342#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
343#[serde(default)]
344pub struct TierConfig {
345    /// Enable the tier promotion system. When `false`, all messages remain episodic.
346    /// Default: `false`.
347    pub enabled: bool,
348    /// Minimum number of distinct sessions a fact must appear in before promotion.
349    /// Must be `>= 2`. Default: `3`.
350    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
351    pub promotion_min_sessions: u32,
352    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
353    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
354    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
355    pub similarity_threshold: f32,
356    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
357    pub sweep_interval_secs: u64,
358    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
359    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
360    pub sweep_batch_size: usize,
361    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
362    pub scene_enabled: bool,
363    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
364    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
365    pub scene_similarity_threshold: f32,
366    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
367    #[serde(deserialize_with = "validate_scene_batch_size")]
368    pub scene_batch_size: usize,
369    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
370    /// Falls back to the primary provider when empty. Default: `""`.
371    pub scene_provider: ProviderName,
372    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
373    pub scene_sweep_interval_secs: u64,
374}
375
376fn default_scene_sweep_interval_secs() -> u64 {
377    7200
378}
379
380impl Default for TierConfig {
381    fn default() -> Self {
382        Self {
383            enabled: false,
384            promotion_min_sessions: default_tier_promotion_min_sessions(),
385            similarity_threshold: default_tier_similarity_threshold(),
386            sweep_interval_secs: default_tier_sweep_interval_secs(),
387            sweep_batch_size: default_tier_sweep_batch_size(),
388            scene_enabled: false,
389            scene_similarity_threshold: default_scene_similarity_threshold(),
390            scene_batch_size: default_scene_batch_size(),
391            scene_provider: ProviderName::default(),
392            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
393        }
394    }
395}
396
397fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
398where
399    D: serde::Deserializer<'de>,
400{
401    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
402    if value.is_nan() || value.is_infinite() {
403        return Err(serde::de::Error::custom(
404            "temporal_decay_rate must be a finite number",
405        ));
406    }
407    if !(0.0..=10.0).contains(&value) {
408        return Err(serde::de::Error::custom(
409            "temporal_decay_rate must be in [0.0, 10.0]",
410        ));
411    }
412    Ok(value)
413}
414
415fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
416where
417    D: serde::Deserializer<'de>,
418{
419    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
420    if value.is_nan() || value.is_infinite() {
421        return Err(serde::de::Error::custom(
422            "similarity_threshold must be a finite number",
423        ));
424    }
425    if !(0.0..=1.0).contains(&value) {
426        return Err(serde::de::Error::custom(
427            "similarity_threshold must be in [0.0, 1.0]",
428        ));
429    }
430    Ok(value)
431}
432
433fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
434where
435    D: serde::Deserializer<'de>,
436{
437    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
438    if value.is_nan() || value.is_infinite() {
439        return Err(serde::de::Error::custom(
440            "importance_weight must be a finite number",
441        ));
442    }
443    if value < 0.0 {
444        return Err(serde::de::Error::custom(
445            "importance_weight must be non-negative",
446        ));
447    }
448    if value > 1.0 {
449        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
450    }
451    Ok(value)
452}
453
454fn default_importance_weight() -> f64 {
455    0.15
456}
457
458/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
459///
460/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
461/// Seeds are initialized from fuzzy entity matches, then activation propagates
462/// hop-by-hop with exponential decay and lateral inhibition.
463///
464/// # Validation
465///
466/// Constraints enforced at deserialization time:
467/// - `0.0 < decay_lambda <= 1.0`
468/// - `max_hops >= 1`
469/// - `activation_threshold < inhibition_threshold`
470/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
471#[derive(Debug, Clone, Deserialize, Serialize)]
472#[serde(default)]
473pub struct SpreadingActivationConfig {
474    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
475    pub enabled: bool,
476    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
477    #[serde(deserialize_with = "validate_decay_lambda")]
478    pub decay_lambda: f32,
479    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
480    #[serde(deserialize_with = "validate_max_hops")]
481    pub max_hops: u32,
482    /// Minimum activation score to include a node in results. Default: `0.1`.
483    pub activation_threshold: f32,
484    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
485    pub inhibition_threshold: f32,
486    /// Cap on total activated nodes per spread pass. Default: `50`.
487    pub max_activated_nodes: usize,
488    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
489    #[serde(default = "default_seed_structural_weight")]
490    pub seed_structural_weight: f32,
491    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
492    #[serde(default = "default_seed_community_cap")]
493    pub seed_community_cap: usize,
494    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
495    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
496    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
497    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
498    pub recall_timeout_ms: u64,
499}
500
501fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
502where
503    D: serde::Deserializer<'de>,
504{
505    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
506    if value.is_nan() || value.is_infinite() {
507        return Err(serde::de::Error::custom(
508            "decay_lambda must be a finite number",
509        ));
510    }
511    if !(value > 0.0 && value <= 1.0) {
512        return Err(serde::de::Error::custom(
513            "decay_lambda must be in (0.0, 1.0]",
514        ));
515    }
516    Ok(value)
517}
518
519fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
520where
521    D: serde::Deserializer<'de>,
522{
523    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
524    if value == 0 {
525        return Err(serde::de::Error::custom("max_hops must be >= 1"));
526    }
527    Ok(value)
528}
529
530impl SpreadingActivationConfig {
531    /// Validate cross-field constraints that cannot be expressed in per-field validators.
532    ///
533    /// # Errors
534    ///
535    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
536    pub fn validate(&self) -> Result<(), String> {
537        if self.activation_threshold >= self.inhibition_threshold {
538            return Err(format!(
539                "activation_threshold ({}) must be < inhibition_threshold ({})",
540                self.activation_threshold, self.inhibition_threshold
541            ));
542        }
543        Ok(())
544    }
545}
546
547fn default_seed_structural_weight() -> f32 {
548    0.4
549}
550
551fn default_seed_community_cap() -> usize {
552    3
553}
554
555impl Default for SpreadingActivationConfig {
556    fn default() -> Self {
557        Self {
558            enabled: false,
559            decay_lambda: default_spreading_activation_decay_lambda(),
560            max_hops: default_spreading_activation_max_hops(),
561            activation_threshold: default_spreading_activation_activation_threshold(),
562            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
563            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
564            seed_structural_weight: default_seed_structural_weight(),
565            seed_community_cap: default_seed_community_cap(),
566            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
567        }
568    }
569}
570
571/// Kumiho belief revision configuration.
572#[derive(Debug, Clone, Deserialize, Serialize)]
573#[serde(default)]
574pub struct BeliefRevisionConfig {
575    /// Enable semantic contradiction detection for graph edges. Default: `false`.
576    pub enabled: bool,
577    /// Cosine similarity threshold for considering two facts as contradictory.
578    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
579    #[serde(deserialize_with = "validate_similarity_threshold")]
580    pub similarity_threshold: f32,
581}
582
583fn default_belief_revision_similarity_threshold() -> f32 {
584    0.85
585}
586
587impl Default for BeliefRevisionConfig {
588    fn default() -> Self {
589        Self {
590            enabled: false,
591            similarity_threshold: default_belief_revision_similarity_threshold(),
592        }
593    }
594}
595
596/// D-MEM RPE-based tiered graph extraction routing configuration.
597#[derive(Debug, Clone, Deserialize, Serialize)]
598#[serde(default)]
599pub struct RpeConfig {
600    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
601    pub enabled: bool,
602    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
603    /// Default: `0.3`.
604    #[serde(deserialize_with = "validate_similarity_threshold")]
605    pub threshold: f32,
606    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
607    pub max_skip_turns: u32,
608}
609
610fn default_rpe_threshold() -> f32 {
611    0.3
612}
613
614fn default_rpe_max_skip_turns() -> u32 {
615    5
616}
617
618impl Default for RpeConfig {
619    fn default() -> Self {
620        Self {
621            enabled: false,
622            threshold: default_rpe_threshold(),
623            max_skip_turns: default_rpe_max_skip_turns(),
624        }
625    }
626}
627
628/// Configuration for A-MEM dynamic note linking.
629///
630/// When enabled, after each graph extraction pass, entities extracted from the message are
631/// compared against the entity embedding collection. Pairs with cosine similarity above
632/// `similarity_threshold` receive a `similar_to` edge in the graph.
633#[derive(Debug, Clone, Deserialize, Serialize)]
634#[serde(default)]
635pub struct NoteLinkingConfig {
636    /// Enable A-MEM note linking after graph extraction. Default: `false`.
637    pub enabled: bool,
638    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
639    #[serde(deserialize_with = "validate_similarity_threshold")]
640    pub similarity_threshold: f32,
641    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
642    pub top_k: usize,
643    /// Timeout for the entire linking pass in seconds. Default: `5`.
644    pub timeout_secs: u64,
645}
646
647impl Default for NoteLinkingConfig {
648    fn default() -> Self {
649        Self {
650            enabled: false,
651            similarity_threshold: default_note_linking_similarity_threshold(),
652            top_k: default_note_linking_top_k(),
653            timeout_secs: default_note_linking_timeout_secs(),
654        }
655    }
656}
657
658/// Vector backend selector for embedding storage.
659#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
660#[serde(rename_all = "lowercase")]
661#[non_exhaustive]
662pub enum VectorBackend {
663    Qdrant,
664    #[default]
665    Sqlite,
666}
667
668impl VectorBackend {
669    /// Return the lowercase identifier string for this backend.
670    ///
671    /// # Examples
672    ///
673    /// ```
674    /// use zeph_config::VectorBackend;
675    ///
676    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
677    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
678    /// ```
679    #[must_use]
680    pub fn as_str(&self) -> &'static str {
681        match self {
682            Self::Qdrant => "qdrant",
683            Self::Sqlite => "sqlite",
684        }
685    }
686}
687
688/// Memory subsystem configuration, nested under `[memory]` in TOML.
689///
690/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
691/// multi-tier promotion, and all memory-related background tasks.
692///
693/// # Example (TOML)
694///
695/// ```toml
696/// [memory]
697/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
698/// qdrant_url = "http://localhost:6334"
699/// history_limit = 50
700/// summarization_threshold = 50
701/// auto_budget = true
702/// ```
703#[derive(Debug, Deserialize, Serialize)]
704#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
705pub struct MemoryConfig {
706    #[serde(default)]
707    pub compression_guidelines: CompressionGuidelinesConfig,
708    #[serde(default = "default_sqlite_path_field")]
709    pub sqlite_path: String,
710    pub history_limit: u32,
711    #[serde(default = "default_qdrant_url")]
712    pub qdrant_url: String,
713    /// Optional API key for authenticating to a remote or managed Qdrant cluster.
714    ///
715    /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
716    /// Leave `None` for local dev instances. The actual key is resolved from the vault:
717    /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
718    ///
719    /// The value is wrapped in [`Secret`] to prevent accidental logging.
720    /// `skip_serializing` prevents the key from being written back to TOML on config save.
721    #[serde(default, skip_serializing)]
722    pub qdrant_api_key: Option<Secret>,
723    #[serde(default)]
724    pub semantic: SemanticConfig,
725    #[serde(default = "default_summarization_threshold")]
726    pub summarization_threshold: usize,
727    /// LLM call timeout for summarization, in seconds. Default: `60`.
728    #[serde(default = "default_summarization_llm_timeout_secs")]
729    pub summarization_llm_timeout_secs: u64,
730    #[serde(default = "default_context_budget_tokens")]
731    pub context_budget_tokens: usize,
732    #[serde(default = "default_soft_compaction_threshold")]
733    pub soft_compaction_threshold: f32,
734    #[serde(
735        default = "default_hard_compaction_threshold",
736        alias = "compaction_threshold"
737    )]
738    pub hard_compaction_threshold: f32,
739    #[serde(default = "default_compaction_preserve_tail")]
740    pub compaction_preserve_tail: usize,
741    #[serde(default = "default_compaction_cooldown_turns")]
742    pub compaction_cooldown_turns: u8,
743    #[serde(default = "default_auto_budget")]
744    pub auto_budget: bool,
745    #[serde(default = "default_prune_protect_tokens")]
746    pub prune_protect_tokens: usize,
747    #[serde(default = "default_cross_session_score_threshold")]
748    pub cross_session_score_threshold: f32,
749    #[serde(default)]
750    pub vector_backend: VectorBackend,
751    #[serde(default = "default_token_safety_margin")]
752    pub token_safety_margin: f32,
753    #[serde(default = "default_redact_credentials")]
754    pub redact_credentials: bool,
755    #[serde(default = "default_true")]
756    pub autosave_assistant: bool,
757    #[serde(default = "default_autosave_min_length")]
758    pub autosave_min_length: usize,
759    #[serde(default = "default_tool_call_cutoff")]
760    pub tool_call_cutoff: usize,
761    #[serde(default = "default_sqlite_pool_size")]
762    pub sqlite_pool_size: u32,
763    #[serde(default)]
764    pub sessions: SessionsConfig,
765    #[serde(default)]
766    pub documents: DocumentConfig,
767    #[serde(default)]
768    pub eviction: EvictionConfig,
769    #[serde(default)]
770    pub compression: CompressionConfig,
771    #[serde(default)]
772    pub sidequest: SidequestConfig,
773    #[serde(default)]
774    pub graph: GraphConfig,
775    /// Store a lightweight session summary to the vector store on shutdown when no session
776    /// summary exists yet for this conversation. Enables cross-session recall for short or
777    /// interrupted sessions that never triggered hard compaction. Default: `true`.
778    #[serde(default = "default_shutdown_summary")]
779    pub shutdown_summary: bool,
780    /// Minimum number of user-turn messages required before a shutdown summary is generated.
781    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
782    #[serde(default = "default_shutdown_summary_min_messages")]
783    pub shutdown_summary_min_messages: usize,
784    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
785    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
786    /// Default: `20`.
787    #[serde(default = "default_shutdown_summary_max_messages")]
788    pub shutdown_summary_max_messages: usize,
789    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
790    /// Applies independently to the structured call and to the plain-text fallback.
791    /// Default: `10`.
792    #[serde(default = "default_shutdown_summary_timeout_secs")]
793    pub shutdown_summary_timeout_secs: u64,
794    /// LLM provider used for shutdown summarization calls.
795    ///
796    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
797    /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
798    ///
799    /// Example:
800    /// ```toml
801    /// [memory]
802    /// shutdown_summary_provider = "fast"
803    /// ```
804    #[serde(default)]
805    pub shutdown_summary_provider: ProviderName,
806    /// LLM provider used for deferred tool-pair summarization (context compaction).
807    ///
808    /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
809    /// provider. A mid-tier model is usually sufficient for compaction summaries.
810    ///
811    /// Example:
812    /// ```toml
813    /// [memory]
814    /// compaction_provider = "fast"
815    /// ```
816    #[serde(default)]
817    pub compaction_provider: ProviderName,
818    /// Use structured anchored summaries for context compaction.
819    ///
820    /// When enabled, hard compaction requests a JSON schema from the LLM
821    /// instead of free-form prose. Falls back to prose if the LLM fails
822    /// to produce valid JSON. Default: `false`.
823    #[serde(default)]
824    pub structured_summaries: bool,
825    /// AOI three-layer memory tier promotion system.
826    ///
827    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
828    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
829    #[serde(default)]
830    pub tiers: TierConfig,
831    /// A-MAC adaptive memory admission control.
832    ///
833    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
834    /// if its composite admission score falls below the configured threshold.
835    #[serde(default)]
836    pub admission: AdmissionConfig,
837    /// Session digest generation at session end. Default: disabled.
838    #[serde(default)]
839    pub digest: DigestConfig,
840    /// Context assembly strategy. Default: `full_history` (current behavior).
841    #[serde(default)]
842    pub context_strategy: ContextStrategy,
843    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
844    #[serde(default = "default_crossover_turn_threshold")]
845    pub crossover_turn_threshold: u32,
846    /// All-Mem lifelong memory consolidation sweep.
847    ///
848    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
849    /// messages and merges them into consolidated entries via LLM.
850    #[serde(default)]
851    pub consolidation: ConsolidationConfig,
852    /// `SleepGate` forgetting sweep (#2397).
853    ///
854    /// When `forgetting.enabled = true`, a background loop periodically decays importance
855    /// scores and prunes memories below the forgetting floor.
856    #[serde(default)]
857    pub forgetting: ForgettingConfig,
858    /// `PostgreSQL` connection URL.
859    ///
860    /// Used when the binary is compiled with `--features postgres`.
861    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
862    /// Example: `postgres://user:pass@localhost:5432/zeph`
863    /// Default: `None` (uses `sqlite_path` instead).
864    #[serde(default)]
865    pub database_url: Option<String>,
866    /// Cost-sensitive store routing (#2444).
867    ///
868    /// When `store_routing.enabled = true`, query intent is classified and routed to
869    /// the cheapest sufficient backend instead of querying all stores on every turn.
870    #[serde(default)]
871    pub store_routing: StoreRoutingConfig,
872    /// Persona memory layer (#2461).
873    ///
874    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
875    /// from conversation history and injected into context after the system prompt.
876    #[serde(default)]
877    pub persona: PersonaConfig,
878    /// Trajectory-informed memory (#2498).
879    #[serde(default)]
880    pub trajectory: TrajectoryConfig,
881    /// Category-aware memory (#2428).
882    #[serde(default)]
883    pub category: CategoryConfig,
884    /// `TiMem` temporal-hierarchical memory tree (#2262).
885    #[serde(default)]
886    pub tree: TreeConfig,
887    /// Time-based microcompact (#2699).
888    ///
889    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
890    /// from context when the session has been idle longer than `gap_threshold_minutes`.
891    #[serde(default)]
892    pub microcompact: MicrocompactConfig,
893    /// autoDream background memory consolidation (#2697).
894    ///
895    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
896    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
897    #[serde(default)]
898    pub autodream: AutoDreamConfig,
899    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
900    ///
901    /// Before inserting a new key fact, its nearest neighbour is looked up in the
902    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
903    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
904    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
905    #[serde(default = "default_key_facts_dedup_threshold")]
906    pub key_facts_dedup_threshold: f32,
907    /// Experience compression spectrum (#3305).
908    ///
909    /// Controls three-tier retrieval policy and background skill-promotion engine.
910    #[serde(default)]
911    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
912    /// MemMachine-inspired retrieval-stage tuning (#3340).
913    ///
914    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
915    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
916    /// uniformly across graph, hybrid, and vector-only recall paths.
917    ///
918    /// # Example (TOML)
919    ///
920    /// ```toml
921    /// [memory.retrieval]
922    /// depth = 40
923    /// search_prompt_template = ""
924    /// context_format = "structured"
925    /// ```
926    #[serde(default)]
927    pub retrieval: RetrievalConfig,
928    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
929    ///
930    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
931    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
932    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
933    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
934    /// and injected before the LLM call.
935    #[serde(default)]
936    pub reasoning: ReasoningConfig,
937    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
938    ///
939    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
940    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
941    ///
942    /// # Example (TOML)
943    ///
944    /// ```toml
945    /// [memory.hebbian]
946    /// enabled = true
947    /// hebbian_lr = 0.1
948    /// ```
949    #[serde(default)]
950    pub hebbian: HebbianConfig,
951    /// `MemCoT` rolling semantic state configuration (#3574).
952    ///
953    /// When `enabled = true`, each completed assistant turn spawns a background distillation
954    /// task that compresses the response into a short semantic state buffer. The buffer is
955    /// prepended to graph recall queries so retrieval stays contextually relevant across long
956    /// multi-turn sessions.
957    ///
958    /// # Example (TOML)
959    ///
960    /// ```toml
961    /// [memory.memcot]
962    /// enabled = true
963    /// distill_provider = "fast"
964    /// min_assistant_chars = 200
965    /// max_distills_per_session = 50
966    /// ```
967    #[serde(default)]
968    pub memcot: MemCotConfig,
969    /// `OmniMem` retrieval failure tracking (issue #3576).
970    ///
971    /// When `enabled = true`, no-hit and low-confidence recall events are logged
972    /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
973    ///
974    /// # Example (TOML)
975    ///
976    /// ```toml
977    /// [memory.retrieval_failures]
978    /// enabled = true
979    /// low_confidence_threshold = 0.3
980    /// retention_days = 90
981    /// ```
982    #[serde(default)]
983    pub retrieval_failures: RetrievalFailuresConfig,
984    /// Write quality gate (#3629).
985    ///
986    /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
987    /// writes are rejected before persistence. Evaluated after A-MAC admission control.
988    #[serde(default)]
989    pub quality_gate: WriteQualityGateConfig,
990    /// `MemFlow` tiered intent-driven retrieval (issue #3712).
991    ///
992    /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
993    /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
994    /// `DeepReasoning`) with optional validation and tier escalation.
995    #[serde(default)]
996    pub tiered_retrieval: TieredRetrievalConfig,
997    /// `ScrapMem` optical forgetting (issue #3713).
998    ///
999    /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
1000    /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1001    #[serde(default)]
1002    pub optical_forgetting: OpticalForgettingConfig,
1003    /// EM-Graph episodic event extraction and causal linking (issue #3713).
1004    ///
1005    /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1006    /// and linked via causal relationships, enabling causal-chain retrieval.
1007    #[serde(default)]
1008    pub em_graph: EmGraphConfig,
1009    /// Episodic-to-semantic consolidation daemon (issue #3799).
1010    ///
1011    /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1012    /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1013    /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1014    #[serde(default)]
1015    pub episodic_consolidation: EpisodicConsolidationConfig,
1016    /// MAGE shadow memory trajectory risk accumulator (spec 004-16).
1017    ///
1018    /// Maintains a per-session rolling risk score fed by sanitizer audit signals.
1019    /// When `shadow_memory.enabled = true`, tool execution is gated if cumulative
1020    /// trajectory risk exceeds `risk_threshold`. When `false`, all code paths are
1021    /// zero-cost no-ops.
1022    ///
1023    /// # Example (TOML)
1024    ///
1025    /// ```toml
1026    /// [memory.shadow_memory]
1027    /// enabled = true
1028    /// risk_threshold = 0.75
1029    /// risk_halflife_turns = 10
1030    /// ```
1031    #[serde(default)]
1032    pub shadow_memory: TrajectoryRiskAccumulatorConfig,
1033    /// Five-signal SYNAPSE retrieval (issue #4374).
1034    ///
1035    /// When `five_signal.enabled = true`, SYNAPSE recall weights five signals: recency,
1036    /// relevance, access frequency, causal distance, and novelty. All new signals default
1037    /// to weight `0.0`, preserving exact backward compatibility.
1038    #[serde(default)]
1039    pub five_signal: FiveSignalConfig,
1040    /// Context-Adaptive Memory fidelity scoring (CAM Phase 1, #4547).
1041    ///
1042    /// When `fidelity.enabled = true`, the heuristic fidelity scorer runs after each
1043    /// `apply_prepared_context()` call and assigns `Full / Compressed / Placeholder`
1044    /// levels to historical messages. Default: disabled.
1045    ///
1046    /// # Example (TOML)
1047    ///
1048    /// ```toml
1049    /// [memory.fidelity]
1050    /// enabled = false
1051    /// w_semantic = 0.3
1052    /// w_temporal = 0.3
1053    /// w_importance = 0.2
1054    /// w_plan = 0.2
1055    /// full_threshold = 0.7
1056    /// compressed_threshold = 0.3
1057    /// compressed_max_tokens = 50
1058    /// regrade_threshold = 0.6
1059    /// min_query_length = 8
1060    /// max_scored_messages = 500
1061    /// ```
1062    #[serde(default, skip_serializing_if = "Option::is_none")]
1063    pub fidelity: Option<crate::fidelity::FidelityConfig>,
1064}
1065
1066// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1067
1068/// `MemFlow` tiered intent-driven retrieval configuration.
1069///
1070/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1071/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1072/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1073///
1074/// # Example (TOML)
1075///
1076/// ```toml
1077/// [memory.tiered_retrieval]
1078/// enabled = false
1079/// classifier_provider = ""
1080/// validator_provider = ""
1081/// token_budget = 4096
1082/// validation_enabled = false
1083/// validation_threshold = 0.6
1084/// max_escalations = 1
1085/// classifier_timeout_secs = 5
1086/// validator_timeout_secs = 5
1087///
1088/// # Signal weights (all default to 0.0; set to activate each signal)
1089/// similarity_weight = 1.0
1090/// recency_weight = 0.0
1091/// recency_half_life_days = 7
1092/// tfidf_weight = 0.0
1093/// cognitive_signal_weight = 0.0
1094/// tier_boost_weight = 0.0
1095/// semantic_tier_boost = 1.0
1096/// ```
1097#[derive(Debug, Clone, Deserialize, Serialize)]
1098#[serde(default)]
1099pub struct TieredRetrievalConfig {
1100    /// Enable `MemFlow` tiered retrieval. Default: `false`.
1101    pub enabled: bool,
1102    /// Provider name from `[[llm.providers]]` for intent classification.
1103    ///
1104    /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1105    /// is set but the call fails, falls back to the heuristic (fail-open).
1106    pub classifier_provider: ProviderName,
1107    /// Provider name from `[[llm.providers]]` for evidence validation.
1108    ///
1109    /// When empty or when `validation_enabled = false`, no validation call is made.
1110    pub validator_provider: ProviderName,
1111    /// Maximum tokens to gather for evidence per query. Default: `4096`.
1112    pub token_budget: usize,
1113    /// Enable evidence validation and tier escalation. Default: `false`.
1114    pub validation_enabled: bool,
1115    /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1116    pub validation_threshold: f32,
1117    /// Maximum tier escalations per query. Default: `1`.
1118    pub max_escalations: u8,
1119    /// Timeout in seconds for the classifier LLM call. Default: `5`.
1120    ///
1121    /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1122    pub classifier_timeout_secs: u64,
1123    /// Timeout in seconds for the validator LLM call. Default: `5`.
1124    ///
1125    /// On timeout the validator is treated as sufficient (fail-open).
1126    pub validator_timeout_secs: u64,
1127
1128    // ── Signal weights ────────────────────────────────────────────────────────
1129    /// Weight applied to the raw similarity score from vector/keyword recall. Default: `1.0`.
1130    ///
1131    /// Set to `1.0` and all other weights to `0.0` to reproduce pre-signal behaviour.
1132    pub similarity_weight: f64,
1133    /// Weight applied to the recency decay signal. Default: `0.0` (disabled).
1134    pub recency_weight: f64,
1135    /// Half-life for recency decay in days. Default: `7`.
1136    ///
1137    /// A message that is `recency_half_life_days` old receives a recency score of `0.5`.
1138    /// Set `recency_weight = 0.0` to disable recency scoring entirely.
1139    pub recency_half_life_days: u32,
1140    /// Weight applied to the TF-IDF signal. Default: `0.0` (disabled).
1141    pub tfidf_weight: f64,
1142    /// Weight applied to the cognitive signal (message access frequency). Default: `0.0` (disabled).
1143    pub cognitive_signal_weight: f64,
1144    /// Weight applied to the tier boost signal for consolidated/semantic entries. Default: `0.0` (disabled).
1145    pub tier_boost_weight: f64,
1146    /// Additive score awarded to entries in the `semantic` tier when `tier_boost_weight > 0`. Default: `1.0`.
1147    ///
1148    /// The final contribution is `tier_boost_weight * semantic_tier_boost` for semantic entries
1149    /// and `0.0` for episodic entries.
1150    pub semantic_tier_boost: f64,
1151}
1152
1153impl Default for TieredRetrievalConfig {
1154    fn default() -> Self {
1155        Self {
1156            enabled: false,
1157            classifier_provider: ProviderName::default(),
1158            validator_provider: ProviderName::default(),
1159            token_budget: 4096,
1160            validation_enabled: false,
1161            validation_threshold: 0.6,
1162            max_escalations: 1,
1163            classifier_timeout_secs: 5,
1164            validator_timeout_secs: 5,
1165            similarity_weight: 1.0,
1166            recency_weight: 0.0,
1167            recency_half_life_days: 7,
1168            tfidf_weight: 0.0,
1169            cognitive_signal_weight: 0.0,
1170            tier_boost_weight: 0.0,
1171            semantic_tier_boost: 1.0,
1172        }
1173    }
1174}
1175
1176// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1177
1178/// `ScrapMem` optical forgetting configuration.
1179///
1180/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1181/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1182/// forgetting compresses content in place based on age.
1183///
1184/// # Example (TOML)
1185///
1186/// ```toml
1187/// [memory.optical_forgetting]
1188/// enabled = false
1189/// compress_provider = ""
1190/// compress_after_turns = 100
1191/// summarize_after_turns = 500
1192/// sweep_interval_secs = 3600
1193/// sweep_batch_size = 50
1194/// ```
1195#[derive(Debug, Clone, Deserialize, Serialize)]
1196#[serde(default)]
1197pub struct OpticalForgettingConfig {
1198    /// Enable optical forgetting sweep. Default: `false`.
1199    pub enabled: bool,
1200    /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1201    /// Falls back to the primary provider when empty.
1202    pub compress_provider: ProviderName,
1203    /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1204    pub compress_after_turns: u32,
1205    /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1206    pub summarize_after_turns: u32,
1207    /// How often the sweep runs, in seconds. Default: `3600`.
1208    pub sweep_interval_secs: u64,
1209    /// Maximum messages to compress per sweep iteration. Default: `50`.
1210    pub sweep_batch_size: usize,
1211}
1212
1213impl Default for OpticalForgettingConfig {
1214    fn default() -> Self {
1215        Self {
1216            enabled: false,
1217            compress_provider: ProviderName::default(),
1218            compress_after_turns: 100,
1219            summarize_after_turns: 500,
1220            sweep_interval_secs: 3600,
1221            sweep_batch_size: 50,
1222        }
1223    }
1224}
1225
1226// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1227
1228/// EM-Graph episodic event extraction and causal linking configuration.
1229///
1230/// When enabled, episodic events are extracted from conversation turns and linked
1231/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1232///
1233/// # Example (TOML)
1234///
1235/// ```toml
1236/// [memory.em_graph]
1237/// enabled = false
1238/// extract_provider = ""
1239/// max_chain_depth = 3
1240/// ```
1241#[derive(Debug, Clone, Deserialize, Serialize)]
1242#[serde(default)]
1243pub struct EmGraphConfig {
1244    /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1245    pub enabled: bool,
1246    /// Provider name from `[[llm.providers]]` for event extraction.
1247    /// Falls back to the primary provider when empty.
1248    pub extract_provider: ProviderName,
1249    /// Maximum hops when traversing causal chains during recall. Default: `3`.
1250    pub max_chain_depth: u32,
1251}
1252
1253impl Default for EmGraphConfig {
1254    fn default() -> Self {
1255        Self {
1256            enabled: false,
1257            extract_provider: ProviderName::default(),
1258            max_chain_depth: 3,
1259        }
1260    }
1261}
1262
1263// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1264
1265fn default_episodic_consolidation_interval_secs() -> u64 {
1266    1800
1267}
1268
1269fn default_episodic_consolidation_batch_size() -> usize {
1270    30
1271}
1272
1273fn default_episodic_consolidation_min_age_secs() -> u64 {
1274    300
1275}
1276
1277fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1278    0.6
1279}
1280
1281// ── Five-signal SYNAPSE retrieval config (issue #4374) ────────────────────────
1282
1283fn default_five_signal_w_recency() -> f64 {
1284    0.35
1285}
1286
1287fn default_five_signal_w_relevance() -> f64 {
1288    0.35
1289}
1290
1291fn default_causal_bfs_max_depth() -> u32 {
1292    10
1293}
1294
1295fn default_neutral_causal_distance() -> u32 {
1296    5
1297}
1298
1299fn default_novelty_decay_rate() -> f64 {
1300    0.1
1301}
1302
1303fn default_five_signal_interval_seconds() -> u64 {
1304    7200
1305}
1306
1307fn default_five_signal_batch_size() -> usize {
1308    500
1309}
1310
1311fn default_five_signal_daemon_max_runtime_ms() -> u64 {
1312    30_000
1313}
1314
1315fn default_five_signal_promotion_score_threshold() -> f64 {
1316    0.70
1317}
1318
1319fn default_five_signal_demotion_score_threshold() -> f64 {
1320    0.20
1321}
1322
1323fn default_five_signal_top_k_per_run() -> usize {
1324    500
1325}
1326
1327/// Five-signal SYNAPSE retrieval configuration (issue #4374).
1328///
1329/// Extends SYNAPSE recall with three additional signals — access frequency, causal
1330/// distance, and novelty — beyond the two-signal baseline (recency + relevance).
1331/// All new signal weights default to `0.0`, preserving exact backward compatibility.
1332///
1333/// # Example (TOML)
1334///
1335/// ```toml
1336/// [memory.five_signal]
1337/// enabled = true
1338/// w_recency   = 0.35
1339/// w_relevance = 0.35
1340/// w_frequency = 0.15
1341/// w_causal    = 0.10
1342/// w_novelty   = 0.05
1343///
1344/// [memory.five_signal.consolidation_daemon]
1345/// enabled = true
1346/// interval_seconds = 7200
1347/// ```
1348#[derive(Debug, Clone, Deserialize, Serialize)]
1349pub struct FiveSignalConfig {
1350    /// Master switch. When `false`, the five-signal code path contributes zero overhead.
1351    #[serde(default)]
1352    pub enabled: bool,
1353    /// Weight for the recency signal. Default: `0.35`.
1354    #[serde(default = "default_five_signal_w_recency")]
1355    pub w_recency: f64,
1356    /// Weight for the semantic relevance signal. Default: `0.35`.
1357    #[serde(default = "default_five_signal_w_relevance")]
1358    pub w_relevance: f64,
1359    /// Weight for the access frequency signal. Default: `0.0` (baseline-compatible).
1360    #[serde(default)]
1361    pub w_frequency: f64,
1362    /// Weight for the causal distance signal. Default: `0.0` (baseline-compatible).
1363    #[serde(default)]
1364    pub w_causal: f64,
1365    /// Weight for the novelty signal. Default: `0.0` (baseline-compatible).
1366    #[serde(default)]
1367    pub w_novelty: f64,
1368    /// Maximum BFS depth for causal distance computation. Default: `10`.
1369    #[serde(default = "default_causal_bfs_max_depth")]
1370    pub causal_bfs_max_depth: u32,
1371    /// Causal distance assigned when no goal entity is set or a fact lies beyond
1372    /// `causal_bfs_max_depth`. Default: `5`.
1373    #[serde(default = "default_neutral_causal_distance")]
1374    pub neutral_causal_distance: u32,
1375    /// Decay rate λ in `exp(-λ × days)` for the novelty signal. Default: `0.1`.
1376    #[serde(default = "default_novelty_decay_rate")]
1377    pub novelty_decay_rate: f64,
1378    /// Async consolidation daemon that promotes hot episodic facts to Qdrant.
1379    #[serde(default)]
1380    pub consolidation_daemon: FiveSignalConsolidationConfig,
1381}
1382
1383impl Default for FiveSignalConfig {
1384    fn default() -> Self {
1385        Self {
1386            enabled: false,
1387            w_recency: default_five_signal_w_recency(),
1388            w_relevance: default_five_signal_w_relevance(),
1389            w_frequency: 0.0,
1390            w_causal: 0.0,
1391            w_novelty: 0.0,
1392            causal_bfs_max_depth: default_causal_bfs_max_depth(),
1393            neutral_causal_distance: default_neutral_causal_distance(),
1394            novelty_decay_rate: default_novelty_decay_rate(),
1395            consolidation_daemon: FiveSignalConsolidationConfig::default(),
1396        }
1397    }
1398}
1399
1400/// Async consolidation daemon configuration for five-signal retrieval (issue #4374).
1401///
1402/// When `enabled = true`, a background task runs at `interval_seconds` intervals,
1403/// evaluates the top `top_k_per_run` episodic facts by five-signal score, promotes
1404/// facts above `promotion_score_threshold` to Qdrant, and demotes facts below
1405/// `demotion_score_threshold` to `episodic_only` tier.
1406///
1407/// # Example (TOML)
1408///
1409/// ```toml
1410/// [memory.five_signal.consolidation_daemon]
1411/// enabled = true
1412/// interval_seconds = 7200
1413/// batch_size = 500
1414/// promotion_score_threshold = 0.70
1415/// demotion_score_threshold = 0.20
1416/// ```
1417#[derive(Debug, Clone, Deserialize, Serialize)]
1418pub struct FiveSignalConsolidationConfig {
1419    /// Enable the daemon. Requires the `scheduler` feature. Default: `false`.
1420    #[serde(default)]
1421    pub enabled: bool,
1422    /// Interval between daemon runs in seconds. Default: `7200` (2 hours).
1423    #[serde(default = "default_five_signal_interval_seconds")]
1424    pub interval_seconds: u64,
1425    /// Maximum facts processed (embed + upsert) per run. Default: `500`.
1426    #[serde(default = "default_five_signal_batch_size")]
1427    pub batch_size: usize,
1428    /// Hard timeout per run in milliseconds. Default: `30000`.
1429    #[serde(default = "default_five_signal_daemon_max_runtime_ms")]
1430    pub daemon_max_runtime_ms: u64,
1431    /// Five-signal score above which a fact is promoted to Qdrant. Default: `0.70`.
1432    #[serde(default = "default_five_signal_promotion_score_threshold")]
1433    pub promotion_score_threshold: f64,
1434    /// Five-signal score below which a promoted fact is demoted. Default: `0.20`.
1435    #[serde(default = "default_five_signal_demotion_score_threshold")]
1436    pub demotion_score_threshold: f64,
1437    /// Number of episodic facts queried per run (SQL LIMIT). Must be >= `batch_size`.
1438    /// Default: `500`.
1439    #[serde(default = "default_five_signal_top_k_per_run")]
1440    pub top_k_per_run: usize,
1441}
1442
1443impl Default for FiveSignalConsolidationConfig {
1444    fn default() -> Self {
1445        Self {
1446            enabled: false,
1447            interval_seconds: default_five_signal_interval_seconds(),
1448            batch_size: default_five_signal_batch_size(),
1449            daemon_max_runtime_ms: default_five_signal_daemon_max_runtime_ms(),
1450            promotion_score_threshold: default_five_signal_promotion_score_threshold(),
1451            demotion_score_threshold: default_five_signal_demotion_score_threshold(),
1452            top_k_per_run: default_five_signal_top_k_per_run(),
1453        }
1454    }
1455}
1456
1457/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1458///
1459/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1460/// extracts durable factual statements via LLM, deduplicates them against existing
1461/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1462/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1463///
1464/// # Example (TOML)
1465///
1466/// ```toml
1467/// [memory.episodic_consolidation]
1468/// enabled = false
1469/// consolidation_provider = ""
1470/// interval_secs = 1800
1471/// batch_size = 30
1472/// min_age_secs = 300
1473/// dedup_jaccard_threshold = 0.6
1474/// ```
1475#[derive(Debug, Clone, Deserialize, Serialize)]
1476#[serde(default)]
1477pub struct EpisodicConsolidationConfig {
1478    /// Enable the episodic consolidation daemon. Default: `false`.
1479    pub enabled: bool,
1480    /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1481    /// Falls back to the primary provider when empty.
1482    pub consolidation_provider: ProviderName,
1483    /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1484    #[serde(default = "default_episodic_consolidation_interval_secs")]
1485    pub interval_secs: u64,
1486    /// Maximum number of episodic events to process per sweep. Default: `30`.
1487    #[serde(default = "default_episodic_consolidation_batch_size")]
1488    pub batch_size: usize,
1489    /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1490    /// Prevents consolidating events from the active conversation.
1491    #[serde(default = "default_episodic_consolidation_min_age_secs")]
1492    pub min_age_secs: u64,
1493    /// Jaccard similarity threshold for deduplication against existing key facts.
1494    /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1495    #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1496    pub dedup_jaccard_threshold: f32,
1497}
1498
1499impl Default for EpisodicConsolidationConfig {
1500    fn default() -> Self {
1501        Self {
1502            enabled: false,
1503            consolidation_provider: ProviderName::default(),
1504            interval_secs: default_episodic_consolidation_interval_secs(),
1505            batch_size: default_episodic_consolidation_batch_size(),
1506            min_age_secs: default_episodic_consolidation_min_age_secs(),
1507            dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1508        }
1509    }
1510}
1511
1512fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1513    0.3
1514}
1515
1516fn default_retrieval_failures_retention_days() -> u32 {
1517    90
1518}
1519
1520fn default_retrieval_failures_channel_capacity() -> usize {
1521    256
1522}
1523
1524fn default_retrieval_failures_batch_size() -> usize {
1525    16
1526}
1527
1528fn default_retrieval_failures_flush_interval_ms() -> u64 {
1529    100
1530}
1531
1532fn default_crossover_turn_threshold() -> u32 {
1533    20
1534}
1535
1536fn default_key_facts_dedup_threshold() -> f32 {
1537    0.95
1538}
1539
1540/// Session digest configuration (#2289).
1541#[derive(Debug, Clone, Deserialize, Serialize)]
1542#[serde(default)]
1543pub struct DigestConfig {
1544    /// Enable session digest generation at session end. Default: `false`.
1545    pub enabled: bool,
1546    /// Provider name from `[[llm.providers]]` for digest generation.
1547    /// Falls back to the primary provider when `None`.
1548    #[serde(default)]
1549    pub provider: Option<ProviderName>,
1550    /// Maximum tokens for the digest text. Default: `500`.
1551    pub max_tokens: usize,
1552    /// Maximum messages to feed into the digest prompt. Default: `50`.
1553    pub max_input_messages: usize,
1554}
1555
1556impl Default for DigestConfig {
1557    fn default() -> Self {
1558        Self {
1559            enabled: false,
1560            provider: None,
1561            max_tokens: 500,
1562            max_input_messages: 50,
1563        }
1564    }
1565}
1566
1567/// Context assembly strategy (#2288).
1568#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1569#[serde(rename_all = "snake_case")]
1570#[non_exhaustive]
1571pub enum ContextStrategy {
1572    /// Full conversation history trimmed to budget, with memory augmentation.
1573    /// This is the default and existing behavior.
1574    #[default]
1575    FullHistory,
1576    /// Drop conversation history; assemble context from summaries, semantic recall,
1577    /// cross-session memory, and session digest only.
1578    MemoryFirst,
1579    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1580    /// `crossover_turn_threshold`.
1581    Adaptive,
1582}
1583
1584/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1585#[derive(Debug, Clone, Deserialize, Serialize)]
1586#[serde(default)]
1587pub struct SessionsConfig {
1588    /// Maximum number of sessions returned by list operations (0 = unlimited).
1589    #[serde(default = "default_max_history")]
1590    pub max_history: usize,
1591    /// Maximum characters for auto-generated session titles.
1592    #[serde(default = "default_title_max_chars")]
1593    pub title_max_chars: usize,
1594}
1595
1596impl Default for SessionsConfig {
1597    fn default() -> Self {
1598        Self {
1599            max_history: default_max_history(),
1600            title_max_chars: default_title_max_chars(),
1601        }
1602    }
1603}
1604
1605/// Configuration for the document ingestion and RAG retrieval pipeline.
1606#[derive(Debug, Clone, Deserialize, Serialize)]
1607pub struct DocumentConfig {
1608    #[serde(default = "default_document_collection")]
1609    pub collection: String,
1610    #[serde(default = "default_document_chunk_size")]
1611    pub chunk_size: usize,
1612    #[serde(default = "default_document_chunk_overlap")]
1613    pub chunk_overlap: usize,
1614    /// Number of document chunks to inject into agent context per turn.
1615    #[serde(default = "default_document_top_k")]
1616    pub top_k: usize,
1617    /// Enable document RAG injection into agent context.
1618    #[serde(default)]
1619    pub rag_enabled: bool,
1620}
1621
1622impl Default for DocumentConfig {
1623    fn default() -> Self {
1624        Self {
1625            collection: default_document_collection(),
1626            chunk_size: default_document_chunk_size(),
1627            chunk_overlap: default_document_chunk_overlap(),
1628            top_k: default_document_top_k(),
1629            rag_enabled: false,
1630        }
1631    }
1632}
1633
1634/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1635///
1636/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1637/// re-ranking, and hybrid BM25+vector weighting.
1638///
1639/// # Example (TOML)
1640///
1641/// ```toml
1642/// [memory.semantic]
1643/// enabled = true
1644/// recall_limit = 5
1645/// vector_weight = 0.7
1646/// keyword_weight = 0.3
1647/// mmr_lambda = 0.7
1648/// ```
1649#[derive(Debug, Deserialize, Serialize)]
1650#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1651pub struct SemanticConfig {
1652    /// Enable vector-based semantic recall. Default: `true`.
1653    #[serde(default = "default_semantic_enabled")]
1654    pub enabled: bool,
1655    #[serde(default = "default_recall_limit")]
1656    pub recall_limit: usize,
1657    #[serde(default = "default_vector_weight")]
1658    pub vector_weight: f64,
1659    #[serde(default = "default_keyword_weight")]
1660    pub keyword_weight: f64,
1661    #[serde(default = "default_true")]
1662    pub temporal_decay_enabled: bool,
1663    #[serde(default = "default_temporal_decay_half_life_days")]
1664    pub temporal_decay_half_life_days: u32,
1665    #[serde(default = "default_true")]
1666    pub mmr_enabled: bool,
1667    #[serde(default = "default_mmr_lambda")]
1668    pub mmr_lambda: f32,
1669    #[serde(default = "default_true")]
1670    pub importance_enabled: bool,
1671    #[serde(
1672        default = "default_importance_weight",
1673        deserialize_with = "validate_importance_weight"
1674    )]
1675    pub importance_weight: f64,
1676    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1677    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1678    /// from contending with the guardrail at the API server level (rate limits, Ollama
1679    /// single-model lock). Falls back to the main agent provider when `None`.
1680    #[serde(default)]
1681    pub embedding_provider: Option<ProviderName>,
1682    /// Timeout in seconds applied to every `embed()` call inside `zeph-memory`.
1683    ///
1684    /// Applies to all embedding call sites: admission control, quality gate, recall,
1685    /// summarization, graph retrieval, consolidation, and tree consolidation.
1686    /// Set to a higher value when using slow remote embedding providers.
1687    /// Default: `5`.
1688    #[serde(default = "default_embed_timeout_secs")]
1689    pub embed_timeout_secs: u64,
1690}
1691
1692impl Default for SemanticConfig {
1693    fn default() -> Self {
1694        Self {
1695            enabled: default_semantic_enabled(),
1696            recall_limit: default_recall_limit(),
1697            vector_weight: default_vector_weight(),
1698            keyword_weight: default_keyword_weight(),
1699            temporal_decay_enabled: true,
1700            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1701            mmr_enabled: true,
1702            mmr_lambda: default_mmr_lambda(),
1703            importance_enabled: true,
1704            importance_weight: default_importance_weight(),
1705            embedding_provider: None,
1706            embed_timeout_secs: default_embed_timeout_secs(),
1707        }
1708    }
1709}
1710
1711fn default_embed_timeout_secs() -> u64 {
1712    5
1713}
1714
1715/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1716///
1717/// Controls how each recalled memory entry is presented in the assembled prompt.
1718/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1719/// always contain the raw message text. The format is applied exclusively during
1720/// context assembly and is never persisted.
1721///
1722/// # Token cost
1723///
1724/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1725/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1726#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1727#[serde(rename_all = "snake_case")]
1728#[non_exhaustive]
1729pub enum ContextFormat {
1730    /// Emit a labeled header per snippet:
1731    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1732    ///
1733    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1734    /// memory without re-parsing the recall body.
1735    #[default]
1736    Structured,
1737    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1738    ///
1739    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1740    /// is tight and provenance headers are not needed.
1741    Plain,
1742}
1743
1744/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1745///
1746/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1747/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1748/// configs parse unchanged.
1749///
1750/// # Example (TOML)
1751///
1752/// ```toml
1753/// [memory.retrieval]
1754/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1755/// # search_prompt_template = ""
1756/// # context_format = "structured"
1757/// ```
1758#[derive(Debug, Clone, Deserialize, Serialize)]
1759#[serde(default)]
1760pub struct RetrievalConfig {
1761    /// Number of ANN candidates fetched from the vector store before keyword merge,
1762    /// temporal decay, and MMR re-ranking.
1763    ///
1764    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1765    ///   to pre-#3340 deployments.
1766    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1767    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1768    ///   size, or higher for better MMR diversity.
1769    ///
1770    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1771    /// cannot saturate the requested top-k.
1772    pub depth: u32,
1773    /// Template applied to the raw user query before embedding.
1774    ///
1775    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1776    /// Empty string (default) = identity: the query is embedded as-is.
1777    ///
1778    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1779    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1780    pub search_prompt_template: String,
1781    /// Shape of memory snippets injected into agent context.
1782    ///
1783    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1784    /// Default: `Structured`.
1785    pub context_format: ContextFormat,
1786    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1787    ///
1788    /// When `true` and the query is classified as first-person, the query embedding is
1789    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1790    /// towards persona-relevant content for self-referential queries.
1791    ///
1792    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1793    #[serde(default = "default_query_bias_correction")]
1794    pub query_bias_correction: bool,
1795    /// Blend weight for query-bias correction (MM-F3, #3341).
1796    ///
1797    /// Controls how much the query embedding shifts towards the profile centroid.
1798    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1799    #[serde(default = "default_query_bias_profile_weight")]
1800    pub query_bias_profile_weight: f32,
1801    /// Centroid TTL in seconds (MM-F3, #3341).
1802    ///
1803    /// The profile centroid computed from persona facts is cached for this many seconds.
1804    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1805    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1806    pub query_bias_centroid_ttl_secs: u64,
1807}
1808
1809fn default_query_bias_correction() -> bool {
1810    true
1811}
1812
1813fn default_query_bias_profile_weight() -> f32 {
1814    0.25
1815}
1816
1817fn default_query_bias_centroid_ttl_secs() -> u64 {
1818    300
1819}
1820
1821impl Default for RetrievalConfig {
1822    fn default() -> Self {
1823        Self {
1824            depth: 0,
1825            search_prompt_template: String::new(),
1826            context_format: ContextFormat::default(),
1827            query_bias_correction: default_query_bias_correction(),
1828            query_bias_profile_weight: default_query_bias_profile_weight(),
1829            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1830        }
1831    }
1832}
1833
1834/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1835///
1836/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1837/// recall traversal increments the `weight` column of the traversed edges, building
1838/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1839/// runs a background sweep that identifies high-traffic entity clusters and distills
1840/// them into `graph_rules` entries via an LLM.
1841#[derive(Debug, Clone, Deserialize, Serialize)]
1842#[serde(default)]
1843pub struct HebbianConfig {
1844    /// Master switch. When `false`, no `weight` updates are written to the database
1845    /// and the consolidation loop does not start. Default: `false`.
1846    pub enabled: bool,
1847    /// Weight increment per co-activation (HL-F2, #3344).
1848    ///
1849    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1850    /// startup when `enabled = true`. Default: `0.1`.
1851    pub hebbian_lr: f32,
1852    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1853    ///
1854    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1855    /// Default: `3600` (one hour).
1856    pub consolidation_interval_secs: u64,
1857    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1858    /// candidate (HL-F3, #3345). Default: `5.0`.
1859    pub consolidation_threshold: f64,
1860    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1861    ///
1862    /// Falls back to the main provider when `None` or unresolvable.
1863    #[serde(default)]
1864    pub consolidate_provider: Option<ProviderName>,
1865    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1866    pub max_candidates_per_sweep: usize,
1867    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1868    ///
1869    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1870    /// Default: `86400` (24 hours).
1871    pub consolidation_cooldown_secs: u64,
1872    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1873    /// Default: `30`.
1874    pub consolidation_prompt_timeout_secs: u64,
1875    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1876    /// (HL-F4, #3345). Default: `20`.
1877    pub consolidation_max_neighbors: usize,
1878    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1879    ///
1880    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1881    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1882    pub spreading_activation: bool,
1883    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1884    pub spread_depth: u32,
1885    /// MAGMA edge-type filter for HL-F5 spreading activation.
1886    ///
1887    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1888    /// Empty = traverse all edge types. Default: `[]`.
1889    pub spread_edge_types: Vec<EdgeType>,
1890    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1891    ///
1892    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1893    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1894    pub step_budget_ms: u64,
1895    /// Timeout for the initial query embedding call in HL-F5, in seconds.
1896    ///
1897    /// `0` disables the timeout. Default: `5`.
1898    pub embed_timeout_secs: u64,
1899}
1900
1901impl Default for HebbianConfig {
1902    fn default() -> Self {
1903        Self {
1904            enabled: false,
1905            hebbian_lr: 0.1,
1906            consolidation_interval_secs: 3600,
1907            consolidation_threshold: 5.0,
1908            consolidate_provider: None,
1909            max_candidates_per_sweep: 10,
1910            consolidation_cooldown_secs: 86_400,
1911            consolidation_prompt_timeout_secs: 30,
1912            consolidation_max_neighbors: 20,
1913            spreading_activation: false,
1914            spread_depth: 2,
1915            spread_edge_types: Vec::new(),
1916            step_budget_ms: 8,
1917            embed_timeout_secs: 5,
1918        }
1919    }
1920}
1921
1922/// Compression strategy for active context compression (#1161).
1923#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1924#[serde(tag = "strategy", rename_all = "snake_case")]
1925#[non_exhaustive]
1926pub enum CompressionStrategy {
1927    /// Compress only when reactive compaction fires (current behavior).
1928    #[default]
1929    Reactive,
1930    /// Compress proactively when context exceeds `threshold_tokens`.
1931    Proactive {
1932        /// Token count that triggers proactive compression.
1933        threshold_tokens: usize,
1934        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1935        max_summary_tokens: usize,
1936    },
1937    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1938    /// safety net. The `compress_context` tool is also available in all other strategies.
1939    Autonomous,
1940    /// Knowledge-block-aware compression strategy (#2510).
1941    ///
1942    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1943    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1944    Focus,
1945}
1946
1947/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1948///
1949/// When `context-compression` feature is enabled, this replaces the default oldest-first
1950/// heuristic with scored eviction.
1951#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1952#[serde(rename_all = "snake_case")]
1953#[non_exhaustive]
1954pub enum PruningStrategy {
1955    /// Oldest-first eviction — current default behavior.
1956    #[default]
1957    Reactive,
1958    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1959    /// lowest-first. Requires `context-compression` feature.
1960    TaskAware,
1961    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1962    /// Requires `context-compression` feature.
1963    Mig,
1964    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1965    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1966    /// Requires `context-compression` feature.
1967    Subgoal,
1968    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1969    /// Requires `context-compression` feature.
1970    SubgoalMig,
1971}
1972
1973impl PruningStrategy {
1974    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1975    #[must_use]
1976    pub fn is_subgoal(self) -> bool {
1977        matches!(self, Self::Subgoal | Self::SubgoalMig)
1978    }
1979}
1980
1981// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1982// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1983impl<'de> serde::Deserialize<'de> for PruningStrategy {
1984    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1985        let s = String::deserialize(deserializer)?;
1986        s.parse().map_err(serde::de::Error::custom)
1987    }
1988}
1989
1990impl std::str::FromStr for PruningStrategy {
1991    type Err = String;
1992
1993    fn from_str(s: &str) -> Result<Self, Self::Err> {
1994        match s {
1995            "reactive" => Ok(Self::Reactive),
1996            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1997            "mig" => Ok(Self::Mig),
1998            // task_aware_mig was removed (dead code — was routed to scored path only).
1999            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
2000            "task_aware_mig" | "task-aware-mig" => {
2001                tracing::warn!(
2002                    "pruning strategy `task_aware_mig` has been removed; \
2003                     falling back to `reactive`. Use `task_aware` or `mig` instead."
2004                );
2005                Ok(Self::Reactive)
2006            }
2007            "subgoal" => Ok(Self::Subgoal),
2008            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
2009            other => Err(format!(
2010                "unknown pruning strategy `{other}`, expected \
2011                 reactive|task_aware|mig|subgoal|subgoal_mig"
2012            )),
2013        }
2014    }
2015}
2016
2017fn default_high_density_budget() -> f32 {
2018    0.7
2019}
2020
2021fn default_low_density_budget() -> f32 {
2022    0.3
2023}
2024
2025/// Configuration for the `SleepGate` forgetting sweep (#2397).
2026///
2027/// When `enabled = true`, a background loop periodically decays importance scores
2028/// (synaptic downscaling), restores recently-accessed memories (selective replay),
2029/// and prunes memories below `forgetting_floor` (targeted forgetting).
2030#[derive(Debug, Clone, Deserialize, Serialize)]
2031#[serde(default)]
2032pub struct ForgettingConfig {
2033    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
2034    pub enabled: bool,
2035    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
2036    pub decay_rate: f32,
2037    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
2038    pub forgetting_floor: f32,
2039    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
2040    pub sweep_interval_secs: u64,
2041    /// Maximum messages to process per sweep. Default: `500`.
2042    pub sweep_batch_size: usize,
2043    /// Hours: messages accessed within this window get replay protection. Default: `24`.
2044    pub replay_window_hours: u32,
2045    /// Messages with `access_count` >= this get replay protection. Default: `3`.
2046    pub replay_min_access_count: u32,
2047    /// Hours: never prune messages accessed within this window. Default: `24`.
2048    pub protect_recent_hours: u32,
2049    /// Never prune messages with `access_count` >= this. Default: `3`.
2050    pub protect_min_access_count: u32,
2051}
2052
2053impl Default for ForgettingConfig {
2054    fn default() -> Self {
2055        Self {
2056            enabled: false,
2057            decay_rate: 0.1,
2058            forgetting_floor: 0.05,
2059            sweep_interval_secs: 7200,
2060            sweep_batch_size: 500,
2061            replay_window_hours: 24,
2062            replay_min_access_count: 3,
2063            protect_recent_hours: 24,
2064            protect_min_access_count: 3,
2065        }
2066    }
2067}
2068
2069/// Configuration for active context compression (#1161).
2070#[derive(Debug, Clone, Default, Deserialize, Serialize)]
2071#[serde(default)]
2072pub struct CompressionConfig {
2073    /// Compression strategy.
2074    #[serde(flatten)]
2075    pub strategy: CompressionStrategy,
2076    /// Tool-output pruning strategy (requires `context-compression` feature).
2077    pub pruning_strategy: PruningStrategy,
2078    /// Model to use for compression summaries.
2079    ///
2080    /// Currently unused — the primary summary provider is used regardless of this value.
2081    /// Reserved for future per-compression model selection. Setting this field has no effect.
2082    pub model: String,
2083    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
2084    /// Falls back to the primary provider when empty. Default: `""`.
2085    pub compress_provider: ProviderName,
2086    /// Compaction probe: validates summary quality before committing it (#1609).
2087    #[serde(default)]
2088    pub probe: CompactionProbeConfig,
2089    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
2090    ///
2091    /// When enabled, tool output bodies in the compaction range are saved to
2092    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
2093    /// The LLM summarizes placeholder messages; archived content is appended as
2094    /// a postfix after summarization so references survive compaction.
2095    /// Default: `false`.
2096    #[serde(default)]
2097    pub archive_tool_outputs: bool,
2098    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
2099    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
2100    /// Falls back to the primary provider when empty. Default: `""`.
2101    pub focus_scorer_provider: ProviderName,
2102    /// Token-budget fraction for high-density content in density-aware compression (#2481).
2103    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
2104    #[serde(default = "default_high_density_budget")]
2105    pub high_density_budget: f32,
2106    /// Token-budget fraction for low-density content in density-aware compression (#2481).
2107    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
2108    #[serde(default = "default_low_density_budget")]
2109    pub low_density_budget: f32,
2110    /// Typed-page classification and batch-level assertion checking (#3630).
2111    #[serde(default)]
2112    pub typed_pages: TypedPagesConfig,
2113    /// Acon tool-result compression settings (#4021).
2114    ///
2115    /// Controls per-result and batch-level token budgets for tool outputs before they enter
2116    /// message history. Distinct from `[tools.compression]` (TACO), which applies regex-based
2117    /// rule compression at the executor level.
2118    #[serde(default)]
2119    pub acon: AconConfig,
2120    /// ARC agent-initiated compaction settings (#4020).
2121    ///
2122    /// When `allow_agent_compaction = true`, the agent can call the `request_compaction`
2123    /// internal tool to trigger context summarization on demand.
2124    #[serde(default)]
2125    pub arc: ArcCompactionConfig,
2126}
2127
2128fn default_acon_passthrough_threshold() -> usize {
2129    2000
2130}
2131
2132fn default_acon_summarize_threshold() -> usize {
2133    4000
2134}
2135
2136fn default_acon_total_budget() -> usize {
2137    8000
2138}
2139
2140fn validate_acon_passthrough_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2141where
2142    D: serde::Deserializer<'de>,
2143{
2144    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2145    if value == 0 {
2146        return Err(serde::de::Error::custom(
2147            "acon.passthrough_threshold must be >= 1",
2148        ));
2149    }
2150    Ok(value)
2151}
2152
2153fn validate_acon_summarize_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2154where
2155    D: serde::Deserializer<'de>,
2156{
2157    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2158    if value == 0 {
2159        return Err(serde::de::Error::custom(
2160            "acon.summarize_threshold must be >= 1",
2161        ));
2162    }
2163    Ok(value)
2164}
2165
2166fn validate_acon_total_budget<'de, D>(deserializer: D) -> Result<usize, D::Error>
2167where
2168    D: serde::Deserializer<'de>,
2169{
2170    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2171    if value == 0 {
2172        return Err(serde::de::Error::custom("acon.total_budget must be >= 1"));
2173    }
2174    Ok(value)
2175}
2176
2177/// Token budget configuration for Acon tool-result compression (#4021).
2178///
2179/// Controls per-result and batch-level token budgets for tool outputs injected into context.
2180/// Distinct from `[tools.compression]` (TACO), which applies regex-based rule compression
2181/// at the executor level.
2182///
2183/// # Invariants
2184///
2185/// The following ordering must hold: `passthrough_threshold < summarize_threshold <= total_budget`.
2186/// A config where `passthrough_threshold >= summarize_threshold` would make the summarization path
2187/// unreachable, silently producing incorrect compression behavior.
2188///
2189/// # Example (TOML)
2190///
2191/// ```toml
2192/// [memory.compression.acon]
2193/// enabled = true
2194/// passthrough_threshold = 2000
2195/// summarize_threshold = 4000
2196/// total_budget = 8000
2197/// ```
2198#[derive(Debug, Clone, Deserialize, Serialize)]
2199#[serde(default)]
2200pub struct AconConfig {
2201    /// Enable Acon tool-result compression. Default: `true`.
2202    pub enabled: bool,
2203    /// Token count below which results pass through unchanged.
2204    /// Also the truncation target: results above this get char-truncated to this size.
2205    /// Must be < `summarize_threshold`. Default: `2000`.
2206    #[serde(default = "default_acon_passthrough_threshold")]
2207    #[serde(deserialize_with = "validate_acon_passthrough_threshold")]
2208    pub passthrough_threshold: usize,
2209    /// Token count above which LLM summarization should be attempted before truncation.
2210    /// Must be > `passthrough_threshold` and <= `total_budget`. Default: `4000`.
2211    #[serde(default = "default_acon_summarize_threshold")]
2212    #[serde(deserialize_with = "validate_acon_summarize_threshold")]
2213    pub summarize_threshold: usize,
2214    /// Maximum total tokens for all tool results in a single turn.
2215    /// Must be >= `summarize_threshold`. Default: `8000`.
2216    #[serde(default = "default_acon_total_budget")]
2217    #[serde(deserialize_with = "validate_acon_total_budget")]
2218    pub total_budget: usize,
2219    /// Provider name from `[[llm.providers]]` for LLM summarization of large results.
2220    /// Falls back to the primary provider when empty. Default: `""`.
2221    #[serde(default)]
2222    pub summarize_provider: ProviderName,
2223}
2224
2225impl AconConfig {
2226    /// Validate threshold ordering invariants after deserialization.
2227    ///
2228    /// Returns an error string if `passthrough_threshold >= summarize_threshold` or
2229    /// `summarize_threshold > total_budget`.
2230    ///
2231    /// # Errors
2232    ///
2233    /// Returns a descriptive error string when any threshold invariant is violated.
2234    pub fn validate(&self) -> Result<(), String> {
2235        if self.passthrough_threshold >= self.summarize_threshold {
2236            return Err(format!(
2237                "acon: passthrough_threshold ({}) must be < summarize_threshold ({})",
2238                self.passthrough_threshold, self.summarize_threshold
2239            ));
2240        }
2241        if self.summarize_threshold > self.total_budget {
2242            return Err(format!(
2243                "acon: summarize_threshold ({}) must be <= total_budget ({})",
2244                self.summarize_threshold, self.total_budget
2245            ));
2246        }
2247        Ok(())
2248    }
2249}
2250
2251impl Default for AconConfig {
2252    fn default() -> Self {
2253        Self {
2254            enabled: true,
2255            passthrough_threshold: default_acon_passthrough_threshold(),
2256            summarize_threshold: default_acon_summarize_threshold(),
2257            total_budget: default_acon_total_budget(),
2258            summarize_provider: ProviderName::default(),
2259        }
2260    }
2261}
2262
2263/// Configuration for ARC agent-initiated compaction (#4020).
2264///
2265/// When `allow_agent_compaction = true`, the `request_compaction` internal tool is
2266/// registered and the agent can call it to trigger context summarization on demand.
2267/// Rate limiting is handled by `CompactionState` — only one compaction fires per turn.
2268///
2269/// # Example (TOML)
2270///
2271/// ```toml
2272/// [memory.compression.arc]
2273/// allow_agent_compaction = true
2274/// ```
2275#[derive(Debug, Clone, Deserialize, Serialize)]
2276#[serde(default)]
2277pub struct ArcCompactionConfig {
2278    /// Allow the agent to request compaction via the `request_compaction` tool call.
2279    /// Default: `true`.
2280    pub allow_agent_compaction: bool,
2281}
2282
2283impl Default for ArcCompactionConfig {
2284    fn default() -> Self {
2285        Self {
2286            allow_agent_compaction: true,
2287        }
2288    }
2289}
2290
2291/// Configuration for typed-page compaction invariants (#3630).
2292///
2293/// Controls classification, batch-level assertion checking, and audit logging.
2294/// All behavior is disabled by default; set `enabled = true` to activate.
2295///
2296/// # Example (TOML)
2297///
2298/// ```toml
2299/// [memory.compression.typed_pages]
2300/// enabled = true
2301/// enforcement = "active"
2302/// audit_path = ""
2303/// audit_channel_capacity = 256
2304/// ```
2305#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
2306#[serde(default)]
2307pub struct TypedPagesConfig {
2308    /// Enable typed-page classification and batch-level assertion checking.
2309    /// Default: `false`.
2310    pub enabled: bool,
2311    /// Enforcement mode:
2312    ///
2313    /// - `observe`: classify and emit audit records only; no behavioral change.
2314    /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
2315    ///
2316    /// Default: `"observe"`.
2317    pub enforcement: TypedPagesEnforcement,
2318    /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
2319    /// Default: `""`.
2320    ///
2321    /// # Security
2322    ///
2323    /// This field is **operator-only trusted input** read from the agent's configuration file.
2324    /// Write access to the config file implies file-system write access, so no additional
2325    /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
2326    /// configuration sources.
2327    pub audit_path: String,
2328    /// Bounded channel capacity for the async audit writer. Default: `256`.
2329    pub audit_channel_capacity: usize,
2330}
2331
2332impl Default for TypedPagesConfig {
2333    fn default() -> Self {
2334        Self {
2335            enabled: false,
2336            enforcement: TypedPagesEnforcement::Observe,
2337            audit_path: String::new(),
2338            audit_channel_capacity: 256,
2339        }
2340    }
2341}
2342
2343/// Enforcement mode for typed-page compaction (#3630).
2344#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
2345#[serde(rename_all = "snake_case")]
2346#[non_exhaustive]
2347pub enum TypedPagesEnforcement {
2348    /// Classify and audit only. Zero behavioral change relative to the untyped path.
2349    #[default]
2350    Observe,
2351    /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
2352    Active,
2353}
2354
2355fn default_sidequest_interval_turns() -> u32 {
2356    4
2357}
2358
2359fn default_sidequest_max_eviction_ratio() -> f32 {
2360    0.5
2361}
2362
2363fn default_sidequest_max_cursors() -> usize {
2364    30
2365}
2366
2367fn default_sidequest_min_cursor_tokens() -> usize {
2368    100
2369}
2370
2371/// Configuration for LLM-driven side-thread tool output eviction (#1885).
2372#[derive(Debug, Clone, Deserialize, Serialize)]
2373#[serde(default)]
2374pub struct SidequestConfig {
2375    /// Enable `SideQuest` eviction. Default: `false`.
2376    pub enabled: bool,
2377    /// Run eviction every N user turns. Default: `4`.
2378    #[serde(default = "default_sidequest_interval_turns")]
2379    pub interval_turns: u32,
2380    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
2381    #[serde(default = "default_sidequest_max_eviction_ratio")]
2382    pub max_eviction_ratio: f32,
2383    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
2384    #[serde(default = "default_sidequest_max_cursors")]
2385    pub max_cursors: usize,
2386    /// Exclude tool outputs smaller than this token count from eviction candidates.
2387    /// Default: `100`.
2388    #[serde(default = "default_sidequest_min_cursor_tokens")]
2389    pub min_cursor_tokens: usize,
2390}
2391
2392impl Default for SidequestConfig {
2393    fn default() -> Self {
2394        Self {
2395            enabled: false,
2396            interval_turns: default_sidequest_interval_turns(),
2397            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
2398            max_cursors: default_sidequest_max_cursors(),
2399            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
2400        }
2401    }
2402}
2403
2404/// Graph retrieval strategy for `[memory.graph]`.
2405///
2406/// Selects the algorithm used to traverse the knowledge graph during recall.
2407/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
2408#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
2409#[serde(rename_all = "snake_case")]
2410#[non_exhaustive]
2411pub enum GraphRetrievalStrategy {
2412    /// SYNAPSE spreading activation (default, existing behavior).
2413    #[default]
2414    Synapse,
2415    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
2416    Bfs,
2417    /// A* shortest-path traversal via petgraph.
2418    #[serde(rename = "astar")]
2419    AStar,
2420    /// Concentric BFS expanding outward from seed nodes.
2421    WaterCircles,
2422    /// Beam search: keep top-K candidates per hop.
2423    BeamSearch,
2424    /// Dynamic: LLM classifier selects strategy per query.
2425    Hybrid,
2426}
2427
2428fn default_beam_width() -> usize {
2429    10
2430}
2431
2432/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
2433///
2434/// Controls the width of the beam during graph traversal: how many top candidates
2435/// are retained at each hop.
2436#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2437pub struct BeamSearchConfig {
2438    /// Number of top candidates kept per hop. Default: `10`.
2439    #[serde(default = "default_beam_width")]
2440    pub beam_width: usize,
2441}
2442
2443impl Default for BeamSearchConfig {
2444    fn default() -> Self {
2445        Self {
2446            beam_width: default_beam_width(),
2447        }
2448    }
2449}
2450
2451/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
2452///
2453/// Controls ring-by-ring concentric BFS traversal from seed nodes.
2454#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
2455pub struct WaterCirclesConfig {
2456    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
2457    #[serde(default)]
2458    pub ring_limit: usize,
2459}
2460
2461fn default_evolution_sweep_interval() -> usize {
2462    50
2463}
2464
2465fn default_confidence_prune_threshold() -> f32 {
2466    0.1
2467}
2468
2469/// Experience memory configuration for `[memory.graph.experience]`.
2470///
2471/// Controls recording of tool execution outcomes and graph evolution sweeps.
2472#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2473pub struct ExperienceConfig {
2474    /// Enable experience memory recording. Default: `false`.
2475    #[serde(default)]
2476    pub enabled: bool,
2477    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2478    #[serde(default)]
2479    pub evolution_sweep_enabled: bool,
2480    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2481    #[serde(default = "default_confidence_prune_threshold")]
2482    pub confidence_prune_threshold: f32,
2483    /// Number of turns between evolution sweeps. Default: `50`.
2484    #[serde(default = "default_evolution_sweep_interval")]
2485    pub evolution_sweep_interval: usize,
2486}
2487
2488impl Default for ExperienceConfig {
2489    fn default() -> Self {
2490        Self {
2491            enabled: false,
2492            evolution_sweep_enabled: false,
2493            confidence_prune_threshold: default_confidence_prune_threshold(),
2494            evolution_sweep_interval: default_evolution_sweep_interval(),
2495        }
2496    }
2497}
2498
2499/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2500///
2501/// # Security
2502///
2503/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2504/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2505/// when processing conversations that may contain personal, medical, or sensitive data until
2506/// a redaction pass is implemented on the write path.
2507#[derive(Debug, Clone, Deserialize, Serialize)]
2508#[serde(default)]
2509pub struct GraphConfig {
2510    pub enabled: bool,
2511    pub extract_model: String,
2512    #[serde(default = "default_graph_max_entities_per_message")]
2513    pub max_entities_per_message: usize,
2514    #[serde(default = "default_graph_max_edges_per_message")]
2515    pub max_edges_per_message: usize,
2516    #[serde(default = "default_graph_community_refresh_interval")]
2517    pub community_refresh_interval: usize,
2518    #[serde(default = "default_graph_entity_similarity_threshold")]
2519    pub entity_similarity_threshold: f32,
2520    #[serde(default = "default_graph_extraction_timeout_secs")]
2521    pub extraction_timeout_secs: u64,
2522    #[serde(default)]
2523    pub use_embedding_resolution: bool,
2524    #[serde(default = "default_graph_entity_ambiguous_threshold")]
2525    pub entity_ambiguous_threshold: f32,
2526    #[serde(default = "default_graph_max_hops")]
2527    pub max_hops: u32,
2528    #[serde(default = "default_graph_recall_limit")]
2529    pub recall_limit: usize,
2530    /// Days to retain expired (superseded) edges before deletion. Default: 90.
2531    #[serde(default = "default_graph_expired_edge_retention_days")]
2532    pub expired_edge_retention_days: u32,
2533    /// Maximum entities to retain in the graph. 0 = unlimited.
2534    #[serde(default)]
2535    pub max_entities: usize,
2536    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2537    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2538    pub community_summary_max_prompt_bytes: usize,
2539    /// Maximum concurrent LLM calls during community summarization. Default: 4.
2540    #[serde(default = "default_graph_community_summary_concurrency")]
2541    pub community_summary_concurrency: usize,
2542    /// Number of edges fetched per chunk during community detection. Default: 10000.
2543    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2544    #[serde(default = "default_lpa_edge_chunk_size")]
2545    pub lpa_edge_chunk_size: usize,
2546    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2547    ///
2548    /// When > 0, recent edges receive a small additive score boost over older edges.
2549    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2550    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2551    #[serde(
2552        default = "default_graph_temporal_decay_rate",
2553        deserialize_with = "validate_temporal_decay_rate"
2554    )]
2555    pub temporal_decay_rate: f64,
2556    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2557    ///
2558    /// Caps the result set returned for a given source entity + predicate pair. Prevents
2559    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2560    /// or API endpoints.
2561    #[serde(default = "default_graph_edge_history_limit")]
2562    pub edge_history_limit: usize,
2563    /// A-MEM dynamic note linking configuration.
2564    ///
2565    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2566    /// semantically similar entities via `similar_to` edges. Requires an embedding store
2567    /// (`qdrant` or `sqlite` vector backend) to be configured.
2568    #[serde(default)]
2569    pub note_linking: NoteLinkingConfig,
2570    /// SYNAPSE spreading activation retrieval configuration.
2571    ///
2572    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2573    /// with lateral inhibition and temporal decay instead of BFS.
2574    #[serde(default)]
2575    pub spreading_activation: SpreadingActivationConfig,
2576    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2577    ///
2578    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2579    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2580    #[serde(default)]
2581    pub retrieval_strategy: GraphRetrievalStrategy,
2582    /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2583    ///
2584    /// When non-empty, graph extraction (and downstream note linking and community
2585    /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2586    /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2587    /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2588    /// which causes systematic quality gate rejections. A named provider built via
2589    /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2590    /// has no quality gate attached.
2591    ///
2592    /// Falls back to the primary provider when empty. Default: `""` (use primary).
2593    #[serde(default)]
2594    pub extract_provider: ProviderName,
2595    /// Named LLM provider for hybrid strategy classification.
2596    /// Falls back to the default provider when `None`.
2597    #[serde(default)]
2598    pub strategy_classifier_provider: Option<ProviderName>,
2599    /// Beam search configuration.
2600    #[serde(default)]
2601    pub beam_search: BeamSearchConfig,
2602    /// `WaterCircles` BFS configuration.
2603    #[serde(default)]
2604    pub watercircles: WaterCirclesConfig,
2605    /// Experience memory configuration.
2606    #[serde(default)]
2607    pub experience: ExperienceConfig,
2608    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2609    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2610    #[serde(
2611        default = "default_link_weight_decay_lambda",
2612        deserialize_with = "validate_link_weight_decay_lambda"
2613    )]
2614    pub link_weight_decay_lambda: f64,
2615    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2616    #[serde(default = "default_link_weight_decay_interval_secs")]
2617    pub link_weight_decay_interval_secs: u64,
2618    /// Kumiho AGM-inspired belief revision configuration.
2619    ///
2620    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2621    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2622    /// `superseded_by` pointer and the new edge becomes the current belief.
2623    #[serde(default)]
2624    pub belief_revision: BeliefRevisionConfig,
2625    /// D-MEM RPE-based tiered graph extraction routing.
2626    ///
2627    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2628    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2629    #[serde(default)]
2630    pub rpe: RpeConfig,
2631    /// `SQLite` connection pool size dedicated to graph operations.
2632    ///
2633    /// Graph tables share the same database file as messages/embeddings but use a
2634    /// separate pool to prevent pool starvation when community detection or spreading
2635    /// activation runs concurrently with regular memory operations. Default: `3`.
2636    #[serde(default = "default_graph_pool_size")]
2637    pub pool_size: u32,
2638    /// APEX-MEM append-only write path (#3631).
2639    ///
2640    /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2641    /// supersession chains instead of the legacy destructive-update path.
2642    #[serde(default)]
2643    pub apex_mem: ApexMemConfig,
2644    /// LLM call timeout per extraction request, in seconds. Default: `30`.
2645    #[serde(default = "default_graph_llm_timeout_secs")]
2646    pub llm_timeout_secs: u64,
2647    /// PRISM query-sensitive edge costing in A* graph recall.
2648    ///
2649    /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2650    /// between the query embedding and the target entity embedding:
2651    /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2652    /// Edges toward semantically relevant entities receive lower cost and are therefore
2653    /// preferred by A*, producing query-aligned recall paths.
2654    ///
2655    /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2656    /// store is unavailable or a target entity has no stored embedding, falls back to the
2657    /// baseline cost `1.0 - confidence`.
2658    ///
2659    /// Default: `false` (preserves existing A* behaviour).
2660    #[serde(default)]
2661    pub query_sensitive_cost: bool,
2662
2663    /// Implicit conflict detection for SYNAPSE recall (spec 004-17, STALE/CUPMem).
2664    ///
2665    /// When enabled, write-time fuzzy predicate matching detects implicit conflicts
2666    /// between graph edges and annotates SYNAPSE recall results accordingly.
2667    #[serde(default)]
2668    pub implicit_conflict: ImplicitConflictConfig,
2669}
2670
2671/// Similarity method for implicit conflict detection.
2672#[derive(
2673    Debug,
2674    Clone,
2675    Copy,
2676    PartialEq,
2677    Eq,
2678    Default,
2679    serde::Serialize,
2680    serde::Deserialize,
2681    schemars::JsonSchema,
2682)]
2683#[serde(rename_all = "snake_case")]
2684#[non_exhaustive]
2685pub enum SimilarityMethod {
2686    /// Normalized Levenshtein edit distance.
2687    #[default]
2688    Levenshtein,
2689    /// Cosine similarity over pre-computed predicate embeddings.
2690    Embedding,
2691    /// Either method triggers detection.
2692    Both,
2693}
2694
2695/// Resolution strategy when an implicit conflict is detected.
2696#[derive(
2697    Debug,
2698    Clone,
2699    Copy,
2700    PartialEq,
2701    Eq,
2702    Default,
2703    serde::Serialize,
2704    serde::Deserialize,
2705    schemars::JsonSchema,
2706)]
2707#[serde(rename_all = "snake_case")]
2708#[non_exhaustive]
2709pub enum ConflictResolutionStrategy {
2710    /// Mark the pair as a candidate but do not supersede either edge.
2711    #[default]
2712    FlagOnly,
2713    /// Supersede the older edge via APEX-MEM `insert_or_supersede`.
2714    Recency,
2715    /// Supersede the lower-confidence edge.
2716    Confidence,
2717    /// Delegate resolution to an LLM provider; fall back to `flag_only` on timeout.
2718    Llm,
2719}
2720
2721/// Configuration for the optional background consolidation daemon (spec 004-17).
2722#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2723#[serde(default)]
2724pub struct ConsolidationDaemonConfig {
2725    /// Enable the background consolidation daemon.
2726    pub enabled: bool,
2727    /// How often the daemon runs, in seconds. Default: 7200 (2 hours).
2728    #[serde(default = "default_ic_daemon_interval_secs")]
2729    pub interval_seconds: u64,
2730    /// Maximum number of candidates processed per daemon run. Default: 100.
2731    #[serde(default = "default_ic_daemon_batch_size")]
2732    pub batch_size: usize,
2733}
2734
2735impl Default for ConsolidationDaemonConfig {
2736    fn default() -> Self {
2737        Self {
2738            enabled: false,
2739            interval_seconds: default_ic_daemon_interval_secs(),
2740            batch_size: default_ic_daemon_batch_size(),
2741        }
2742    }
2743}
2744
2745fn default_ic_daemon_interval_secs() -> u64 {
2746    7200
2747}
2748
2749fn default_ic_daemon_batch_size() -> usize {
2750    100
2751}
2752
2753/// Configuration for implicit conflict detection (spec 004-17, STALE/CUPMem).
2754///
2755/// Controls write-time fuzzy predicate matching and SYNAPSE recall annotation.
2756/// All detection is gated behind `enabled = false` by default — no overhead when disabled.
2757///
2758/// TOML path: `[memory.graph.implicit_conflict]`
2759///
2760/// # Examples
2761///
2762/// ```toml
2763/// [memory.graph.implicit_conflict]
2764/// enabled = true
2765/// similarity_method = "levenshtein"
2766/// conflict_similarity_threshold = 0.80
2767/// resolution_strategy = "flag_only"
2768/// candidate_ttl_days = 30
2769/// propagation_depth = 2
2770/// ```
2771#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2772#[serde(default)]
2773pub struct ImplicitConflictConfig {
2774    /// Enable implicit conflict detection. Default: `false`.
2775    pub enabled: bool,
2776    /// Similarity method used to detect candidate pairs.
2777    #[serde(default)]
2778    pub similarity_method: SimilarityMethod,
2779    /// Minimum similarity score to flag a pair as a conflict candidate. Default: 0.80.
2780    #[serde(default = "default_ic_similarity_threshold")]
2781    pub conflict_similarity_threshold: f64,
2782    /// How to resolve detected conflicts. Default: `flag_only`.
2783    #[serde(default)]
2784    pub resolution_strategy: ConflictResolutionStrategy,
2785    /// Provider name (from `[[llm.providers]]`) for LLM-mediated resolution.
2786    #[serde(default)]
2787    pub implicit_conflict_provider: crate::providers::ProviderName,
2788    /// LLM resolution timeout in milliseconds. Default: 800.
2789    #[serde(default = "default_ic_llm_timeout_ms")]
2790    pub conflict_llm_timeout_ms: u64,
2791    /// Days before an unresolved candidate entry expires. Default: 30.
2792    #[serde(default = "default_ic_candidate_ttl_days")]
2793    pub candidate_ttl_days: u32,
2794    /// SYNAPSE propagation depth for surfacing superseding facts. Default: 2.
2795    #[serde(default = "default_ic_propagation_depth")]
2796    pub propagation_depth: u32,
2797    /// Background consolidation daemon configuration.
2798    #[serde(default)]
2799    pub consolidation_daemon: ConsolidationDaemonConfig,
2800}
2801
2802impl Default for ImplicitConflictConfig {
2803    fn default() -> Self {
2804        Self {
2805            enabled: false,
2806            similarity_method: SimilarityMethod::default(),
2807            conflict_similarity_threshold: default_ic_similarity_threshold(),
2808            resolution_strategy: ConflictResolutionStrategy::default(),
2809            implicit_conflict_provider: crate::providers::ProviderName::default(),
2810            conflict_llm_timeout_ms: default_ic_llm_timeout_ms(),
2811            candidate_ttl_days: default_ic_candidate_ttl_days(),
2812            propagation_depth: default_ic_propagation_depth(),
2813            consolidation_daemon: ConsolidationDaemonConfig::default(),
2814        }
2815    }
2816}
2817
2818fn default_ic_similarity_threshold() -> f64 {
2819    0.80
2820}
2821
2822fn default_ic_llm_timeout_ms() -> u64 {
2823    800
2824}
2825
2826fn default_ic_candidate_ttl_days() -> u32 {
2827    30
2828}
2829
2830fn default_ic_propagation_depth() -> u32 {
2831    2
2832}
2833
2834fn default_graph_pool_size() -> u32 {
2835    3
2836}
2837
2838fn default_graph_llm_timeout_secs() -> u64 {
2839    30
2840}
2841
2842/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
2843///
2844/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
2845/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
2846/// the full supersession chain and enables conflict resolution.
2847///
2848/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
2849#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2850#[serde(default)]
2851pub struct ApexMemConfig {
2852    /// Enable the APEX-MEM append-only write path. Default: `false`.
2853    pub enabled: bool,
2854}
2855
2856fn default_quality_gate_threshold() -> f32 {
2857    0.55
2858}
2859
2860fn default_quality_gate_recent_window() -> usize {
2861    32
2862}
2863
2864fn default_quality_gate_contradiction_grace_seconds() -> u64 {
2865    300
2866}
2867
2868fn default_quality_gate_information_value_weight() -> f32 {
2869    0.4
2870}
2871
2872fn default_quality_gate_reference_completeness_weight() -> f32 {
2873    0.3
2874}
2875
2876fn default_quality_gate_contradiction_weight() -> f32 {
2877    0.3
2878}
2879
2880fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
2881    0.35
2882}
2883
2884fn default_quality_gate_llm_timeout_ms() -> u64 {
2885    500
2886}
2887
2888fn default_quality_gate_llm_weight() -> f32 {
2889    0.5
2890}
2891
2892fn default_quality_gate_reference_check_lang_en() -> bool {
2893    true
2894}
2895
2896/// Write quality gate configuration (`[memory.quality_gate]`).
2897///
2898/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
2899/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
2900/// scoring is opt-in via `quality_gate_provider`.
2901///
2902/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
2903#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2904#[serde(default)]
2905pub struct WriteQualityGateConfig {
2906    /// Enable the write quality gate. Default: `false`.
2907    pub enabled: bool,
2908    /// Combined score threshold below which writes are rejected. Default: `0.55`.
2909    #[serde(default = "default_quality_gate_threshold")]
2910    pub threshold: f32,
2911    /// Number of recent writes compared for information-value scoring. Default: `32`.
2912    #[serde(default = "default_quality_gate_recent_window")]
2913    pub recent_window: usize,
2914    /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
2915    #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
2916    pub contradiction_grace_seconds: u64,
2917    /// Weight of `information_value` sub-score. Default: `0.4`.
2918    #[serde(default = "default_quality_gate_information_value_weight")]
2919    pub information_value_weight: f32,
2920    /// Weight of `reference_completeness` sub-score. Default: `0.3`.
2921    #[serde(default = "default_quality_gate_reference_completeness_weight")]
2922    pub reference_completeness_weight: f32,
2923    /// Weight of `contradiction` sub-score. Default: `0.3`.
2924    #[serde(default = "default_quality_gate_contradiction_weight")]
2925    pub contradiction_weight: f32,
2926    /// Rolling rejection-rate alarm ratio. Default: `0.35`.
2927    #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
2928    pub rejection_rate_alarm_ratio: f32,
2929    /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
2930    #[serde(default)]
2931    pub quality_gate_provider: ProviderName,
2932    /// LLM timeout in milliseconds. Default: `500`.
2933    #[serde(default = "default_quality_gate_llm_timeout_ms")]
2934    pub llm_timeout_ms: u64,
2935    /// LLM blend weight into final score. Default: `0.5`.
2936    #[serde(default = "default_quality_gate_llm_weight")]
2937    pub llm_weight: f32,
2938    /// Enable pronoun/deictic reference checks (English only). Default: `true`.
2939    #[serde(default = "default_quality_gate_reference_check_lang_en")]
2940    pub reference_check_lang_en: bool,
2941}
2942
2943impl Default for WriteQualityGateConfig {
2944    fn default() -> Self {
2945        Self {
2946            enabled: false,
2947            threshold: default_quality_gate_threshold(),
2948            recent_window: default_quality_gate_recent_window(),
2949            contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
2950            information_value_weight: default_quality_gate_information_value_weight(),
2951            reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
2952            contradiction_weight: default_quality_gate_contradiction_weight(),
2953            rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
2954            quality_gate_provider: ProviderName::default(),
2955            llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
2956            llm_weight: default_quality_gate_llm_weight(),
2957            reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
2958        }
2959    }
2960}
2961
2962impl Default for GraphConfig {
2963    fn default() -> Self {
2964        Self {
2965            enabled: false,
2966            extract_model: String::new(),
2967            max_entities_per_message: default_graph_max_entities_per_message(),
2968            max_edges_per_message: default_graph_max_edges_per_message(),
2969            community_refresh_interval: default_graph_community_refresh_interval(),
2970            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
2971            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
2972            use_embedding_resolution: false,
2973            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
2974            max_hops: default_graph_max_hops(),
2975            recall_limit: default_graph_recall_limit(),
2976            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
2977            max_entities: 0,
2978            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
2979            community_summary_concurrency: default_graph_community_summary_concurrency(),
2980            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
2981            temporal_decay_rate: default_graph_temporal_decay_rate(),
2982            edge_history_limit: default_graph_edge_history_limit(),
2983            note_linking: NoteLinkingConfig::default(),
2984            spreading_activation: SpreadingActivationConfig::default(),
2985            retrieval_strategy: GraphRetrievalStrategy::default(),
2986            extract_provider: ProviderName::default(),
2987            strategy_classifier_provider: None,
2988            beam_search: BeamSearchConfig::default(),
2989            watercircles: WaterCirclesConfig::default(),
2990            experience: ExperienceConfig::default(),
2991            link_weight_decay_lambda: default_link_weight_decay_lambda(),
2992            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
2993            belief_revision: BeliefRevisionConfig::default(),
2994            rpe: RpeConfig::default(),
2995            pool_size: default_graph_pool_size(),
2996            apex_mem: ApexMemConfig::default(),
2997            llm_timeout_secs: default_graph_llm_timeout_secs(),
2998            query_sensitive_cost: false,
2999            implicit_conflict: ImplicitConflictConfig::default(),
3000        }
3001    }
3002}
3003
3004fn default_consolidation_confidence_threshold() -> f32 {
3005    0.7
3006}
3007
3008fn default_consolidation_sweep_interval_secs() -> u64 {
3009    3600
3010}
3011
3012fn default_consolidation_sweep_batch_size() -> usize {
3013    50
3014}
3015
3016fn default_consolidation_similarity_threshold() -> f32 {
3017    0.85
3018}
3019
3020/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
3021///
3022/// When `enabled = true`, a background loop periodically clusters semantically similar messages
3023/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
3024/// they are marked as consolidated and deprioritized in recall via temporal decay.
3025#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3026#[serde(default)]
3027pub struct ConsolidationConfig {
3028    /// Enable the consolidation background loop. Default: `false`.
3029    pub enabled: bool,
3030    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3031    /// Falls back to the primary provider when empty. Default: `""`.
3032    #[serde(default)]
3033    pub consolidation_provider: ProviderName,
3034    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
3035    #[serde(default = "default_consolidation_confidence_threshold")]
3036    pub confidence_threshold: f32,
3037    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
3038    #[serde(default = "default_consolidation_sweep_interval_secs")]
3039    pub sweep_interval_secs: u64,
3040    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
3041    #[serde(default = "default_consolidation_sweep_batch_size")]
3042    pub sweep_batch_size: usize,
3043    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
3044    /// Default: `0.85`.
3045    #[serde(default = "default_consolidation_similarity_threshold")]
3046    pub similarity_threshold: f32,
3047    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3048    #[serde(default = "default_consolidation_llm_timeout_secs")]
3049    pub llm_timeout_secs: u64,
3050    /// Per-call timeout for every `embed()` invocation in the consolidation sweep, in seconds.
3051    /// Default: `5`.
3052    #[serde(default = "default_embed_timeout_secs")]
3053    pub embed_timeout_secs: u64,
3054}
3055
3056impl Default for ConsolidationConfig {
3057    fn default() -> Self {
3058        Self {
3059            enabled: false,
3060            consolidation_provider: ProviderName::default(),
3061            confidence_threshold: default_consolidation_confidence_threshold(),
3062            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
3063            sweep_batch_size: default_consolidation_sweep_batch_size(),
3064            similarity_threshold: default_consolidation_similarity_threshold(),
3065            llm_timeout_secs: default_consolidation_llm_timeout_secs(),
3066            embed_timeout_secs: default_embed_timeout_secs(),
3067        }
3068    }
3069}
3070
3071fn default_consolidation_llm_timeout_secs() -> u64 {
3072    30
3073}
3074
3075fn default_link_weight_decay_lambda() -> f64 {
3076    0.95
3077}
3078
3079fn default_link_weight_decay_interval_secs() -> u64 {
3080    86400
3081}
3082
3083fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
3084where
3085    D: serde::Deserializer<'de>,
3086{
3087    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
3088    if value.is_nan() || value.is_infinite() {
3089        return Err(serde::de::Error::custom(
3090            "link_weight_decay_lambda must be a finite number",
3091        ));
3092    }
3093    if !(value > 0.0 && value <= 1.0) {
3094        return Err(serde::de::Error::custom(
3095            "link_weight_decay_lambda must be in (0.0, 1.0]",
3096        ));
3097    }
3098    Ok(value)
3099}
3100
3101fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
3102where
3103    D: serde::Deserializer<'de>,
3104{
3105    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3106    if value.is_nan() || value.is_infinite() {
3107        return Err(serde::de::Error::custom(
3108            "threshold must be a finite number",
3109        ));
3110    }
3111    if !(0.0..=1.0).contains(&value) {
3112        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
3113    }
3114    Ok(value)
3115}
3116
3117fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
3118where
3119    D: serde::Deserializer<'de>,
3120{
3121    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3122    if value.is_nan() || value.is_infinite() {
3123        return Err(serde::de::Error::custom(
3124            "fast_path_margin must be a finite number",
3125        ));
3126    }
3127    if !(0.0..=1.0).contains(&value) {
3128        return Err(serde::de::Error::custom(
3129            "fast_path_margin must be in [0.0, 1.0]",
3130        ));
3131    }
3132    Ok(value)
3133}
3134
3135fn default_admission_threshold() -> f32 {
3136    0.40
3137}
3138
3139fn default_admission_fast_path_margin() -> f32 {
3140    0.15
3141}
3142
3143fn default_rl_min_samples() -> u32 {
3144    500
3145}
3146
3147fn default_rl_retrain_interval_secs() -> u64 {
3148    3600
3149}
3150
3151/// Admission decision strategy.
3152///
3153/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
3154/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
3155#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
3156#[serde(rename_all = "snake_case")]
3157#[non_exhaustive]
3158pub enum AdmissionStrategy {
3159    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
3160    #[default]
3161    Heuristic,
3162    /// Learned model: logistic regression trained on recall feedback.
3163    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
3164    Rl,
3165}
3166
3167fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
3168where
3169    D: serde::Deserializer<'de>,
3170{
3171    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3172    if value < 0.0 {
3173        return Err(serde::de::Error::custom(
3174            "admission weight must be non-negative (>= 0.0)",
3175        ));
3176    }
3177    Ok(value)
3178}
3179
3180/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
3181///
3182/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
3183/// All values must be non-negative.
3184#[derive(Debug, Clone, Deserialize, Serialize)]
3185#[serde(default)]
3186pub struct AdmissionWeights {
3187    /// LLM-estimated future reuse probability. Default: `0.30`.
3188    #[serde(deserialize_with = "validate_admission_weight")]
3189    pub future_utility: f32,
3190    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
3191    #[serde(deserialize_with = "validate_admission_weight")]
3192    pub factual_confidence: f32,
3193    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
3194    #[serde(deserialize_with = "validate_admission_weight")]
3195    pub semantic_novelty: f32,
3196    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
3197    #[serde(deserialize_with = "validate_admission_weight")]
3198    pub temporal_recency: f32,
3199    /// Content type prior based on role. Default: `0.15`.
3200    #[serde(deserialize_with = "validate_admission_weight")]
3201    pub content_type_prior: f32,
3202    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
3203    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
3204    /// Normalized automatically at runtime. Default: `0.0`.
3205    #[serde(deserialize_with = "validate_admission_weight")]
3206    pub goal_utility: f32,
3207}
3208
3209impl Default for AdmissionWeights {
3210    fn default() -> Self {
3211        Self {
3212            future_utility: 0.30,
3213            factual_confidence: 0.15,
3214            semantic_novelty: 0.30,
3215            temporal_recency: 0.10,
3216            content_type_prior: 0.15,
3217            goal_utility: 0.0,
3218        }
3219    }
3220}
3221
3222impl AdmissionWeights {
3223    /// Return weights normalized so they sum to 1.0.
3224    ///
3225    /// All weights are non-negative; the sum is always > 0 when defaults are used.
3226    #[must_use]
3227    pub fn normalized(&self) -> Self {
3228        let sum = self.future_utility
3229            + self.factual_confidence
3230            + self.semantic_novelty
3231            + self.temporal_recency
3232            + self.content_type_prior
3233            + self.goal_utility;
3234        if sum <= f32::EPSILON {
3235            return Self::default();
3236        }
3237        Self {
3238            future_utility: self.future_utility / sum,
3239            factual_confidence: self.factual_confidence / sum,
3240            semantic_novelty: self.semantic_novelty / sum,
3241            temporal_recency: self.temporal_recency / sum,
3242            content_type_prior: self.content_type_prior / sum,
3243            goal_utility: self.goal_utility / sum,
3244        }
3245    }
3246}
3247
3248/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
3249///
3250/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
3251/// Messages below the composite admission threshold are rejected and not persisted.
3252#[derive(Debug, Clone, Deserialize, Serialize)]
3253#[serde(default)]
3254pub struct AdmissionConfig {
3255    /// Enable A-MAC admission control. Default: `false`.
3256    pub enabled: bool,
3257    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
3258    /// Default: `0.40`.
3259    #[serde(deserialize_with = "validate_admission_threshold")]
3260    pub threshold: f32,
3261    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
3262    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
3263    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
3264    pub fast_path_margin: f32,
3265    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
3266    /// Falls back to the primary provider when empty. Default: `""`.
3267    pub admission_provider: ProviderName,
3268    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
3269    pub weights: AdmissionWeights,
3270    /// Admission decision strategy. Default: `heuristic`.
3271    #[serde(default)]
3272    pub admission_strategy: AdmissionStrategy,
3273    /// Minimum training samples before the RL model is activated.
3274    /// Below this count the system falls back to `Heuristic`. Default: `500`.
3275    #[serde(default = "default_rl_min_samples")]
3276    pub rl_min_samples: u32,
3277    /// Background RL model retraining interval in seconds. Default: `3600`.
3278    #[serde(default = "default_rl_retrain_interval_secs")]
3279    pub rl_retrain_interval_secs: u64,
3280    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
3281    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
3282    /// Zero regression when `false`. Default: `false`.
3283    #[serde(default)]
3284    pub goal_conditioned_write: bool,
3285    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
3286    /// Used only for borderline cases (similarity within 0.1 of threshold).
3287    /// Falls back to the primary provider when empty. Default: `""`.
3288    #[serde(default)]
3289    pub goal_utility_provider: ProviderName,
3290    /// Minimum cosine similarity between goal embedding and candidate memory
3291    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
3292    #[serde(default = "default_goal_utility_threshold")]
3293    pub goal_utility_threshold: f32,
3294    /// Weight of the `goal_utility` factor in the composite admission score.
3295    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
3296    #[serde(default = "default_goal_utility_weight")]
3297    pub goal_utility_weight: f32,
3298}
3299
3300fn default_goal_utility_threshold() -> f32 {
3301    0.4
3302}
3303
3304fn default_goal_utility_weight() -> f32 {
3305    0.25
3306}
3307
3308impl Default for AdmissionConfig {
3309    fn default() -> Self {
3310        Self {
3311            enabled: false,
3312            threshold: default_admission_threshold(),
3313            fast_path_margin: default_admission_fast_path_margin(),
3314            admission_provider: ProviderName::default(),
3315            weights: AdmissionWeights::default(),
3316            admission_strategy: AdmissionStrategy::default(),
3317            rl_min_samples: default_rl_min_samples(),
3318            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
3319            goal_conditioned_write: false,
3320            goal_utility_provider: ProviderName::default(),
3321            goal_utility_threshold: default_goal_utility_threshold(),
3322            goal_utility_weight: default_goal_utility_weight(),
3323        }
3324    }
3325}
3326
3327/// Routing strategy for `[memory.store_routing]`.
3328#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
3329#[serde(rename_all = "snake_case")]
3330#[non_exhaustive]
3331pub enum StoreRoutingStrategy {
3332    /// Pure heuristic pattern matching. Zero LLM calls. Default.
3333    #[default]
3334    Heuristic,
3335    /// LLM-based classification via `routing_classifier_provider`.
3336    Llm,
3337    /// Heuristic first; escalates to LLM only when confidence is low.
3338    Hybrid,
3339}
3340
3341/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
3342///
3343/// Controls how each query is classified and routed to the appropriate memory
3344/// backend(s), avoiding unnecessary store queries for simple lookups.
3345#[derive(Debug, Clone, Deserialize, Serialize)]
3346#[serde(default)]
3347pub struct StoreRoutingConfig {
3348    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
3349    /// directly (existing behavior). Default: `false`.
3350    pub enabled: bool,
3351    /// Routing strategy. Default: `heuristic`.
3352    pub strategy: StoreRoutingStrategy,
3353    /// Provider name from `[[llm.providers]]` for LLM-based classification.
3354    /// Falls back to the primary provider when empty. Default: `""`.
3355    pub routing_classifier_provider: ProviderName,
3356    /// Route to use when the classifier is uncertain (confidence < threshold).
3357    ///
3358    /// Defaults to [`MemoryRoute::Hybrid`].
3359    pub fallback_route: MemoryRoute,
3360    /// Confidence threshold below which `HybridRouter` escalates to LLM.
3361    /// Range: `[0.0, 1.0]`. Default: `0.7`.
3362    pub confidence_threshold: f32,
3363}
3364
3365impl Default for StoreRoutingConfig {
3366    fn default() -> Self {
3367        Self {
3368            enabled: false,
3369            strategy: StoreRoutingStrategy::Heuristic,
3370            routing_classifier_provider: ProviderName::default(),
3371            fallback_route: MemoryRoute::Hybrid,
3372            confidence_threshold: 0.7,
3373        }
3374    }
3375}
3376
3377/// Persona memory layer configuration (#2461).
3378///
3379/// When `enabled = true`, user preferences and domain knowledge are extracted from
3380/// conversation history via a cheap LLM provider and injected after the system prompt.
3381#[derive(Debug, Clone, Deserialize, Serialize)]
3382#[serde(default)]
3383pub struct PersonaConfig {
3384    /// Enable persona memory extraction and injection. Default: `false`.
3385    pub enabled: bool,
3386    /// Provider name from `[[llm.providers]]` for persona extraction.
3387    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
3388    pub persona_provider: ProviderName,
3389    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
3390    pub min_confidence: f64,
3391    /// Minimum user messages before extraction runs in a session. Default: `3`.
3392    pub min_messages: usize,
3393    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
3394    pub max_messages: usize,
3395    /// LLM timeout for the extraction call in seconds. Default: `10`.
3396    pub extraction_timeout_secs: u64,
3397    /// Token budget allocated to persona context in assembly. Default: `500`.
3398    pub context_budget_tokens: usize,
3399}
3400
3401impl Default for PersonaConfig {
3402    fn default() -> Self {
3403        Self {
3404            enabled: false,
3405            persona_provider: ProviderName::default(),
3406            min_confidence: 0.6,
3407            min_messages: 3,
3408            max_messages: 10,
3409            extraction_timeout_secs: 10,
3410            context_budget_tokens: 500,
3411        }
3412    }
3413}
3414
3415/// Trajectory-informed memory configuration (#2498).
3416///
3417/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
3418/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
3419/// Procedural entries are injected into context as "past experience" during assembly.
3420#[derive(Debug, Clone, Deserialize, Serialize)]
3421#[serde(default)]
3422pub struct TrajectoryConfig {
3423    /// Enable trajectory extraction and context injection. Default: `false`.
3424    pub enabled: bool,
3425    /// Provider name from `[[llm.providers]]` for extraction.
3426    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3427    pub trajectory_provider: ProviderName,
3428    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
3429    pub context_budget_tokens: usize,
3430    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
3431    pub max_messages: usize,
3432    /// LLM timeout for the extraction call in seconds. Default: `10`.
3433    pub extraction_timeout_secs: u64,
3434    /// Number of procedural entries retrieved for context injection. Default: `5`.
3435    pub recall_top_k: usize,
3436    /// Minimum confidence score for entries included in context. Default: `0.6`.
3437    pub min_confidence: f64,
3438}
3439
3440impl Default for TrajectoryConfig {
3441    fn default() -> Self {
3442        Self {
3443            enabled: false,
3444            trajectory_provider: ProviderName::default(),
3445            context_budget_tokens: 400,
3446            max_messages: 10,
3447            extraction_timeout_secs: 10,
3448            recall_top_k: 5,
3449            min_confidence: 0.6,
3450        }
3451    }
3452}
3453
3454/// Category-aware memory configuration (#2428).
3455///
3456/// When `enabled = true`, messages are auto-tagged with a category derived from the active
3457/// skill or tool context. The category is stored in the `messages.category` column and used
3458/// as a Qdrant payload filter during recall.
3459#[derive(Debug, Clone, Deserialize, Serialize)]
3460#[serde(default)]
3461pub struct CategoryConfig {
3462    /// Enable category tagging and category-filtered recall. Default: `false`.
3463    pub enabled: bool,
3464    /// Automatically assign category from skill metadata or tool type. Default: `true`.
3465    pub auto_tag: bool,
3466}
3467
3468impl Default for CategoryConfig {
3469    fn default() -> Self {
3470        Self {
3471            enabled: false,
3472            auto_tag: true,
3473        }
3474    }
3475}
3476
3477/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
3478///
3479/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
3480/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
3481/// for complex queries.
3482#[derive(Debug, Clone, Deserialize, Serialize)]
3483#[serde(default)]
3484pub struct TreeConfig {
3485    /// Enable the memory tree and background consolidation loop. Default: `false`.
3486    pub enabled: bool,
3487    /// Provider name from `[[llm.providers]]` for node consolidation.
3488    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3489    pub consolidation_provider: ProviderName,
3490    /// Interval between consolidation sweeps in seconds. Default: `300`.
3491    pub sweep_interval_secs: u64,
3492    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
3493    pub batch_size: usize,
3494    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
3495    pub similarity_threshold: f32,
3496    /// Maximum tree depth (levels above leaves). Default: `3`.
3497    pub max_level: u32,
3498    /// Token budget allocated to tree memory in context assembly. Default: `400`.
3499    pub context_budget_tokens: usize,
3500    /// Number of tree nodes retrieved for context. Default: `5`.
3501    pub recall_top_k: usize,
3502    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
3503    pub min_cluster_size: usize,
3504}
3505
3506impl Default for TreeConfig {
3507    fn default() -> Self {
3508        Self {
3509            enabled: false,
3510            consolidation_provider: ProviderName::default(),
3511            sweep_interval_secs: 300,
3512            batch_size: 20,
3513            similarity_threshold: 0.8,
3514            max_level: 3,
3515            context_budget_tokens: 400,
3516            recall_top_k: 5,
3517            min_cluster_size: 2,
3518        }
3519    }
3520}
3521
3522/// Time-based microcompact configuration (#2699).
3523///
3524/// When `enabled = true`, low-value tool outputs are cleared from context
3525/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
3526/// The most recent `keep_recent` tool messages are preserved unconditionally.
3527#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3528#[serde(default)]
3529pub struct MicrocompactConfig {
3530    /// Enable time-based microcompaction. Default: `false`.
3531    pub enabled: bool,
3532    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
3533    pub gap_threshold_minutes: u32,
3534    /// Number of most recent compactable tool messages to preserve. Default: `3`.
3535    pub keep_recent: usize,
3536}
3537
3538impl Default for MicrocompactConfig {
3539    fn default() -> Self {
3540        Self {
3541            enabled: false,
3542            gap_threshold_minutes: 60,
3543            keep_recent: 3,
3544        }
3545    }
3546}
3547
3548/// autoDream background memory consolidation configuration (#2697).
3549///
3550/// When `enabled = true`, a constrained consolidation subagent runs after
3551/// a session ends if both `min_sessions` and `min_hours` gates pass.
3552#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3553#[serde(default)]
3554pub struct AutoDreamConfig {
3555    /// Enable autoDream consolidation. Default: `false`.
3556    pub enabled: bool,
3557    /// Minimum number of sessions between consolidations. Default: `3`.
3558    pub min_sessions: u32,
3559    /// Minimum hours between consolidations. Default: `24`.
3560    pub min_hours: u32,
3561    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3562    /// Falls back to the primary provider when empty. Default: `""`.
3563    pub consolidation_provider: ProviderName,
3564    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
3565    pub max_iterations: u8,
3566    /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3567    #[serde(default = "default_autodream_llm_timeout_secs")]
3568    pub llm_timeout_secs: u64,
3569}
3570
3571impl Default for AutoDreamConfig {
3572    fn default() -> Self {
3573        Self {
3574            enabled: false,
3575            min_sessions: 3,
3576            min_hours: 24,
3577            consolidation_provider: ProviderName::default(),
3578            max_iterations: 8,
3579            llm_timeout_secs: default_autodream_llm_timeout_secs(),
3580        }
3581    }
3582}
3583
3584fn default_autodream_llm_timeout_secs() -> u64 {
3585    30
3586}
3587
3588/// `MagicDocs` auto-maintained markdown configuration (#2702).
3589///
3590/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
3591/// are registered and periodically updated by a constrained subagent.
3592#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3593#[serde(default)]
3594pub struct MagicDocsConfig {
3595    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
3596    pub enabled: bool,
3597    /// Minimum turns between updates for a given doc path. Default: `5`.
3598    pub min_turns_between_updates: u32,
3599    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
3600    /// Falls back to the primary provider when empty. Default: `""`.
3601    pub update_provider: ProviderName,
3602    /// Maximum agent loop iterations per doc update. Default: `4`.
3603    pub max_iterations: u8,
3604}
3605
3606impl Default for MagicDocsConfig {
3607    fn default() -> Self {
3608        Self {
3609            enabled: false,
3610            min_turns_between_updates: 5,
3611            update_provider: ProviderName::default(),
3612            max_iterations: 4,
3613        }
3614    }
3615}
3616
3617#[cfg(test)]
3618mod tests {
3619    use super::*;
3620
3621    // Verify that serde deserialization routes through FromStr so that removed variants
3622    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
3623    #[test]
3624    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
3625        #[derive(serde::Deserialize)]
3626        struct Wrapper {
3627            #[allow(dead_code)]
3628            pruning_strategy: PruningStrategy,
3629        }
3630        let toml = r#"pruning_strategy = "task_aware_mig""#;
3631        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
3632        assert_eq!(
3633            w.pruning_strategy,
3634            PruningStrategy::Reactive,
3635            "task_aware_mig must fall back to Reactive"
3636        );
3637    }
3638
3639    #[test]
3640    fn pruning_strategy_toml_round_trip() {
3641        #[derive(serde::Deserialize)]
3642        struct Wrapper {
3643            #[allow(dead_code)]
3644            pruning_strategy: PruningStrategy,
3645        }
3646        for (input, expected) in [
3647            ("reactive", PruningStrategy::Reactive),
3648            ("task_aware", PruningStrategy::TaskAware),
3649            ("mig", PruningStrategy::Mig),
3650        ] {
3651            let toml = format!(r#"pruning_strategy = "{input}""#);
3652            let w: Wrapper = toml::from_str(&toml)
3653                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3654            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3655        }
3656    }
3657
3658    #[test]
3659    fn pruning_strategy_toml_unknown_value_errors() {
3660        #[derive(serde::Deserialize)]
3661        #[allow(dead_code)]
3662        struct Wrapper {
3663            pruning_strategy: PruningStrategy,
3664        }
3665        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3666        assert!(
3667            toml::from_str::<Wrapper>(toml).is_err(),
3668            "unknown strategy must produce an error"
3669        );
3670    }
3671
3672    #[test]
3673    fn tier_config_defaults_are_correct() {
3674        let cfg = TierConfig::default();
3675        assert!(!cfg.enabled);
3676        assert_eq!(cfg.promotion_min_sessions, 3);
3677        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3678        assert_eq!(cfg.sweep_interval_secs, 3600);
3679        assert_eq!(cfg.sweep_batch_size, 100);
3680    }
3681
3682    #[test]
3683    fn tier_config_rejects_min_sessions_below_2() {
3684        let toml = "promotion_min_sessions = 1";
3685        assert!(toml::from_str::<TierConfig>(toml).is_err());
3686    }
3687
3688    #[test]
3689    fn tier_config_rejects_similarity_threshold_below_0_5() {
3690        let toml = "similarity_threshold = 0.4";
3691        assert!(toml::from_str::<TierConfig>(toml).is_err());
3692    }
3693
3694    #[test]
3695    fn tier_config_rejects_zero_sweep_batch_size() {
3696        let toml = "sweep_batch_size = 0";
3697        assert!(toml::from_str::<TierConfig>(toml).is_err());
3698    }
3699
3700    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3701        let input = format!("importance_weight = {toml_val}");
3702        toml::from_str::<SemanticConfig>(&input)
3703    }
3704
3705    #[test]
3706    fn importance_weight_default_is_0_15() {
3707        let cfg = SemanticConfig::default();
3708        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3709    }
3710
3711    #[test]
3712    fn importance_weight_valid_zero() {
3713        let cfg = deserialize_importance_weight("0.0").unwrap();
3714        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3715    }
3716
3717    #[test]
3718    fn importance_weight_valid_one() {
3719        let cfg = deserialize_importance_weight("1.0").unwrap();
3720        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3721    }
3722
3723    #[test]
3724    fn importance_weight_rejects_near_zero_negative() {
3725        // TOML does not have a NaN literal, but we can test via a f64 that
3726        // the validator rejects out-of-range values. Test with negative here
3727        // and rely on validate_importance_weight rejecting non-finite via
3728        // a constructed deserializer call.
3729        let result = deserialize_importance_weight("-0.01");
3730        assert!(
3731            result.is_err(),
3732            "negative importance_weight must be rejected"
3733        );
3734    }
3735
3736    #[test]
3737    fn importance_weight_rejects_negative() {
3738        let result = deserialize_importance_weight("-1.0");
3739        assert!(result.is_err(), "negative value must be rejected");
3740    }
3741
3742    #[test]
3743    fn importance_weight_rejects_greater_than_one() {
3744        let result = deserialize_importance_weight("1.01");
3745        assert!(result.is_err(), "value > 1.0 must be rejected");
3746    }
3747
3748    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3749
3750    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3751    #[test]
3752    fn admission_weights_normalized_sums_to_one() {
3753        let w = AdmissionWeights {
3754            future_utility: 2.0,
3755            factual_confidence: 1.0,
3756            semantic_novelty: 3.0,
3757            temporal_recency: 1.0,
3758            content_type_prior: 3.0,
3759            goal_utility: 0.0,
3760        };
3761        let n = w.normalized();
3762        let sum = n.future_utility
3763            + n.factual_confidence
3764            + n.semantic_novelty
3765            + n.temporal_recency
3766            + n.content_type_prior;
3767        assert!(
3768            (sum - 1.0).abs() < 0.001,
3769            "normalized weights must sum to 1.0, got {sum}"
3770        );
3771    }
3772
3773    // Test: already-normalized weights are preserved.
3774    #[test]
3775    fn admission_weights_normalized_preserves_already_unit_sum() {
3776        let w = AdmissionWeights::default();
3777        let n = w.normalized();
3778        let sum = n.future_utility
3779            + n.factual_confidence
3780            + n.semantic_novelty
3781            + n.temporal_recency
3782            + n.content_type_prior;
3783        assert!(
3784            (sum - 1.0).abs() < 0.001,
3785            "default weights sum to ~1.0 after normalization"
3786        );
3787    }
3788
3789    // Test: zero weights fall back to default (no divide-by-zero panic).
3790    #[test]
3791    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3792        let w = AdmissionWeights {
3793            future_utility: 0.0,
3794            factual_confidence: 0.0,
3795            semantic_novelty: 0.0,
3796            temporal_recency: 0.0,
3797            content_type_prior: 0.0,
3798            goal_utility: 0.0,
3799        };
3800        let n = w.normalized();
3801        let default = AdmissionWeights::default();
3802        assert!(
3803            (n.future_utility - default.future_utility).abs() < 0.001,
3804            "zero-sum weights must fall back to defaults"
3805        );
3806    }
3807
3808    // Test: AdmissionConfig default values match documented defaults.
3809    #[test]
3810    fn admission_config_defaults() {
3811        let cfg = AdmissionConfig::default();
3812        assert!(!cfg.enabled);
3813        assert!((cfg.threshold - 0.40).abs() < 0.001);
3814        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3815        assert!(cfg.admission_provider.is_empty());
3816    }
3817
3818    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3819
3820    #[test]
3821    fn spreading_activation_default_recall_timeout_ms_is_1000() {
3822        let cfg = SpreadingActivationConfig::default();
3823        assert_eq!(
3824            cfg.recall_timeout_ms, 1000,
3825            "default recall_timeout_ms must be 1000ms"
3826        );
3827    }
3828
3829    #[test]
3830    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
3831        #[derive(serde::Deserialize)]
3832        struct Wrapper {
3833            recall_timeout_ms: u64,
3834        }
3835        let toml = "recall_timeout_ms = 500";
3836        let w: Wrapper = toml::from_str(toml).unwrap();
3837        assert_eq!(w.recall_timeout_ms, 500);
3838    }
3839
3840    #[test]
3841    fn spreading_activation_validate_cross_field_constraints() {
3842        let mut cfg = SpreadingActivationConfig::default();
3843        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
3844        assert!(cfg.validate().is_ok());
3845
3846        // Equal thresholds must be rejected.
3847        cfg.activation_threshold = 0.5;
3848        cfg.inhibition_threshold = 0.5;
3849        assert!(cfg.validate().is_err());
3850    }
3851
3852    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
3853
3854    #[test]
3855    fn compression_config_focus_strategy_deserializes() {
3856        let toml = r#"strategy = "focus""#;
3857        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3858        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
3859    }
3860
3861    #[test]
3862    fn compression_config_density_budget_defaults_on_deserialize() {
3863        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
3864        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
3865        let toml = r#"strategy = "reactive""#;
3866        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3867        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
3868        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
3869    }
3870
3871    #[test]
3872    fn compression_config_density_budget_round_trip() {
3873        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
3874        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3875        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
3876        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
3877    }
3878
3879    #[test]
3880    fn compression_config_focus_scorer_provider_default_empty() {
3881        let cfg = CompressionConfig::default();
3882        assert!(cfg.focus_scorer_provider.is_empty());
3883    }
3884
3885    #[test]
3886    fn compression_config_focus_scorer_provider_round_trip() {
3887        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
3888        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3889        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
3890    }
3891}
3892
3893/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
3894///
3895/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
3896/// Successful and failed reasoning chains are compressed into short, generalizable strategy
3897/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
3898/// and injected into the prompt preamble.
3899///
3900/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
3901///
3902/// # Example
3903///
3904/// ```toml
3905/// [memory.reasoning]
3906/// enabled = true
3907/// extract_provider = "fast"
3908/// distill_provider = "fast"
3909/// top_k = 3
3910/// store_limit = 1000
3911/// ```
3912#[derive(Debug, Clone, Deserialize, Serialize)]
3913#[serde(default)]
3914pub struct ReasoningConfig {
3915    /// Enable the reasoning-bank pipeline. Default: `false`.
3916    pub enabled: bool,
3917    /// Provider name from `[[llm.providers]]` for the self-judge step.
3918    /// Falls back to the primary provider when empty. Default: `""`.
3919    pub extract_provider: ProviderName,
3920    /// Provider name from `[[llm.providers]]` for the distillation step.
3921    /// Falls back to the primary provider when empty. Default: `""`.
3922    pub distill_provider: ProviderName,
3923    /// Number of strategies retrieved per turn for context injection. Default: `3`.
3924    pub top_k: usize,
3925    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
3926    pub store_limit: usize,
3927    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
3928    pub max_messages: usize,
3929    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
3930    pub max_message_chars: usize,
3931    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
3932    pub context_budget_tokens: usize,
3933    /// Minimum number of messages required before self-judge fires. Default: `2`.
3934    pub min_messages: usize,
3935    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
3936    pub extraction_timeout_secs: u64,
3937    /// Timeout in seconds for the distillation LLM call. Default: `30`.
3938    pub distill_timeout_secs: u64,
3939    /// Maximum number of recent messages passed to the self-judge evaluator.
3940    /// Narrowing to the last user+assistant pair improves classification accuracy.
3941    /// Default: `2`.
3942    pub self_judge_window: usize,
3943    /// Minimum characters in the assistant response to trigger self-judge.
3944    /// Short or trivial responses are skipped. Default: `50`.
3945    pub min_assistant_chars: usize,
3946}
3947
3948impl Default for ReasoningConfig {
3949    fn default() -> Self {
3950        Self {
3951            enabled: false,
3952            extract_provider: ProviderName::default(),
3953            distill_provider: ProviderName::default(),
3954            top_k: 3,
3955            store_limit: 1000,
3956            max_messages: 6,
3957            max_message_chars: 2000,
3958            context_budget_tokens: 500,
3959            min_messages: 2,
3960            extraction_timeout_secs: 30,
3961            distill_timeout_secs: 30,
3962            self_judge_window: 2,
3963            min_assistant_chars: 50,
3964        }
3965    }
3966}
3967
3968// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
3969
3970/// Eviction policy variant.
3971///
3972/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
3973#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
3974#[serde(rename_all = "lowercase")]
3975#[non_exhaustive]
3976pub enum EvictionPolicy {
3977    /// Ebbinghaus forgetting-curve eviction.
3978    #[default]
3979    Ebbinghaus,
3980}
3981
3982/// Configuration for the memory eviction policy.
3983///
3984/// Controls which policy runs during the periodic sweep and how many entries
3985/// are retained. `zeph-memory` re-exports this type from here.
3986#[derive(Debug, Clone, Deserialize, Serialize)]
3987pub struct EvictionConfig {
3988    /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
3989    pub policy: EvictionPolicy,
3990    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
3991    pub max_entries: usize,
3992    /// How often to run the eviction sweep, in seconds.
3993    pub sweep_interval_secs: u64,
3994}
3995
3996impl Default for EvictionConfig {
3997    fn default() -> Self {
3998        Self {
3999            policy: EvictionPolicy::Ebbinghaus,
4000            max_entries: 0,
4001            sweep_interval_secs: 3600,
4002        }
4003    }
4004}
4005
4006// ── Compression guidelines config (moved from zeph-memory) ───────────────────
4007
4008/// Configuration for ACON failure-driven compression guidelines.
4009///
4010/// `zeph-memory` re-exports this type from here.
4011#[derive(Debug, Clone, Deserialize, Serialize)]
4012#[serde(default)]
4013pub struct CompressionGuidelinesConfig {
4014    /// Enable the feature. Default: `false`.
4015    pub enabled: bool,
4016    /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
4017    pub update_threshold: u16,
4018    /// Maximum token budget for the guidelines document. Default: `500`.
4019    pub max_guidelines_tokens: usize,
4020    /// Maximum failure pairs consumed per update cycle. Default: `10`.
4021    pub max_pairs_per_update: usize,
4022    /// Number of turns after hard compaction to watch for context loss. Default: `10`.
4023    pub detection_window_turns: u64,
4024    /// Interval in seconds between background updater checks. Default: `300`.
4025    pub update_interval_secs: u64,
4026    /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
4027    pub max_stored_pairs: usize,
4028    /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
4029    /// `None` (or `Some("")`) falls back to the primary provider.
4030    #[serde(default, skip_serializing_if = "Option::is_none")]
4031    pub guidelines_provider: Option<ProviderName>,
4032    /// Maintain separate guideline documents per content category.
4033    #[serde(default)]
4034    pub categorized_guidelines: bool,
4035}
4036
4037impl Default for CompressionGuidelinesConfig {
4038    fn default() -> Self {
4039        Self {
4040            enabled: false,
4041            update_threshold: 5,
4042            max_guidelines_tokens: 500,
4043            max_pairs_per_update: 10,
4044            detection_window_turns: 10,
4045            update_interval_secs: 300,
4046            max_stored_pairs: 100,
4047            guidelines_provider: None,
4048            categorized_guidelines: false,
4049        }
4050    }
4051}
4052
4053// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
4054
4055/// Functional category of a compaction probe question.
4056///
4057/// `zeph-memory` re-exports this type from here.
4058#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
4059#[serde(rename_all = "lowercase")]
4060#[non_exhaustive]
4061pub enum ProbeCategory {
4062    /// Did specific facts survive? (file paths, function names, values, decisions)
4063    Recall,
4064    /// Does the agent know which files/tools/URLs it used?
4065    Artifact,
4066    /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
4067    Continuation,
4068    /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
4069    Decision,
4070}
4071
4072/// Configuration for the compaction probe.
4073///
4074/// `zeph-memory` re-exports this type from here.
4075#[derive(Debug, Clone, Serialize, Deserialize)]
4076#[serde(default)]
4077pub struct CompactionProbeConfig {
4078    /// Enable compaction probe validation. Default: `false`.
4079    pub enabled: bool,
4080    /// Provider name from `[[llm.providers]]` for probe LLM calls.
4081    /// `None` (or `Some("")`) uses the summary provider.
4082    #[serde(default, skip_serializing_if = "Option::is_none")]
4083    pub probe_provider: Option<ProviderName>,
4084    /// Minimum score to pass without warnings. Default: `0.6`.
4085    pub threshold: f32,
4086    /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
4087    pub hard_fail_threshold: f32,
4088    /// Maximum number of probe questions to generate. Default: `5`.
4089    pub max_questions: usize,
4090    /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
4091    pub timeout_secs: u64,
4092    /// Optional per-category weight multipliers for the overall score.
4093    #[serde(default)]
4094    pub category_weights: Option<HashMap<ProbeCategory, f32>>,
4095}
4096
4097impl Default for CompactionProbeConfig {
4098    fn default() -> Self {
4099        Self {
4100            enabled: false,
4101            probe_provider: None,
4102            threshold: 0.6,
4103            hard_fail_threshold: 0.35,
4104            max_questions: 5,
4105            timeout_secs: 15,
4106            category_weights: None,
4107        }
4108    }
4109}
4110
4111// ── MemCoT semantic state config ─────────────────────────────────────────────
4112
4113/// `MemCoT` semantic-state distillation configuration.
4114///
4115/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
4116/// summarizing conceptual progress across turns. This buffer is injected into graph
4117/// recall queries to improve retrieval relevance.
4118///
4119/// All LLM work (distillation) runs asynchronously — never on the turn thread.
4120/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
4121///
4122/// # Config example
4123///
4124/// ```toml
4125/// [memory.memcot]
4126/// enabled = true
4127/// distill_provider = "fast"
4128/// distill_timeout_secs = 5
4129/// min_assistant_chars = 200
4130/// min_distill_interval_secs = 30
4131/// max_distills_per_session = 50
4132/// max_state_chars = 800
4133/// recall_view = "head"
4134/// ```
4135#[derive(Debug, Clone, Serialize, Deserialize)]
4136#[serde(default)]
4137pub struct MemCotConfig {
4138    /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
4139    ///
4140    /// When `false`, the accumulator is never allocated and no LLM calls are made.
4141    pub enabled: bool,
4142    /// Provider name from `[[llm.providers]]` for distillation.
4143    ///
4144    /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
4145    /// A startup warning is emitted when the resolved model does not look fast-tier.
4146    /// Falls back to the primary provider when empty. Default: `""`.
4147    pub distill_provider: ProviderName,
4148    /// Timeout in seconds for each distillation LLM call. Default: `5`.
4149    pub distill_timeout_secs: u64,
4150    /// Minimum characters in the assistant response to trigger distillation.
4151    /// Short or trivial replies are skipped. Default: `200`.
4152    pub min_assistant_chars: usize,
4153    /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
4154    ///
4155    /// Prevents runaway costs on long sessions with rapid turns.
4156    /// Clearing `/new` resets this counter.
4157    pub min_distill_interval_secs: u64,
4158    /// Maximum distillation spawns per conversation session. Default: `50`.
4159    ///
4160    /// Once this cap is reached the accumulator stops distilling for the rest of the
4161    /// session. Counter is reset when the user sends `/new`.
4162    pub max_distills_per_session: u64,
4163    /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
4164    /// Default: `800`.
4165    pub max_state_chars: usize,
4166    /// Recall view applied when `MemCoT` is active. Default: `Head`.
4167    ///
4168    /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
4169    /// - `zoom_in`: adds source-message provenance to each returned fact.
4170    /// - `zoom_out`: expands 1-hop neighbors per returned fact.
4171    ///
4172    /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
4173    pub recall_view: RecallViewConfig,
4174    /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
4175    pub zoom_out_neighbor_cap: usize,
4176    /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
4177    /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
4178    #[serde(default, skip_serializing_if = "Vec::is_empty")]
4179    pub fast_tier_models: Vec<String>,
4180}
4181
4182/// Recall view variant exposed in config.
4183///
4184/// Maps 1-to-1 to `zeph_memory::RecallView`.
4185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
4186#[serde(rename_all = "snake_case")]
4187#[non_exhaustive]
4188pub enum RecallViewConfig {
4189    /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
4190    #[default]
4191    Head,
4192    /// Adds source-message provenance to each returned fact.
4193    ZoomIn,
4194    /// Expands 1-hop neighbor facts per returned fact.
4195    ZoomOut,
4196}
4197
4198impl Default for MemCotConfig {
4199    fn default() -> Self {
4200        Self {
4201            enabled: false,
4202            distill_provider: ProviderName::default(),
4203            distill_timeout_secs: 5,
4204            min_assistant_chars: 200,
4205            min_distill_interval_secs: 30,
4206            max_distills_per_session: 50,
4207            max_state_chars: 800,
4208            recall_view: RecallViewConfig::Head,
4209            zoom_out_neighbor_cap: 3,
4210            fast_tier_models: Vec::new(),
4211        }
4212    }
4213}
4214
4215/// `OmniMem` retrieval failure tracking configuration (issue #3576).
4216///
4217/// Controls the async logger that records no-hit and low-confidence recall events
4218/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
4219#[derive(Debug, Clone, Deserialize, Serialize)]
4220#[serde(default)]
4221pub struct RetrievalFailuresConfig {
4222    /// Enable retrieval failure logging. Default: `false`.
4223    pub enabled: bool,
4224    /// Composite recall score below which a result is classified as low-confidence.
4225    ///
4226    /// The threshold applies to the post-reranking composite score (which incorporates
4227    /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
4228    /// the scoring pipeline in use. Default: `0.3`.
4229    #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
4230    pub low_confidence_threshold: f32,
4231    /// Days to retain failure records before automatic cleanup. Default: `90`.
4232    #[serde(default = "default_retrieval_failures_retention_days")]
4233    pub retention_days: u32,
4234    /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
4235    #[serde(default = "default_retrieval_failures_channel_capacity")]
4236    pub channel_capacity: usize,
4237    /// Maximum records collected before flushing a batch INSERT. Default: `16`.
4238    #[serde(default = "default_retrieval_failures_batch_size")]
4239    pub batch_size: usize,
4240    /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
4241    #[serde(default = "default_retrieval_failures_flush_interval_ms")]
4242    pub flush_interval_ms: u64,
4243}
4244
4245impl Default for RetrievalFailuresConfig {
4246    fn default() -> Self {
4247        Self {
4248            enabled: false,
4249            low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
4250            retention_days: default_retrieval_failures_retention_days(),
4251            channel_capacity: default_retrieval_failures_channel_capacity(),
4252            batch_size: default_retrieval_failures_batch_size(),
4253            flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
4254        }
4255    }
4256}
4257
4258// ── TrajectoryRiskAccumulator config (spec 004-16) ─────────────────────────────
4259
4260fn validate_tra_nonneg_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
4261where
4262    D: serde::Deserializer<'de>,
4263{
4264    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
4265    if value.is_nan() || value.is_infinite() || value < 0.0 {
4266        return Err(serde::de::Error::custom(
4267            "signal weight and severity multiplier values must be finite and non-negative",
4268        ));
4269    }
4270    Ok(value)
4271}
4272
4273/// Per-signal-type base weights for the trajectory risk accumulator.
4274///
4275/// Each weight is in `(0.0, 1.0]` and is multiplied by the severity multiplier
4276/// before being added to `trajectory_risk`.
4277///
4278/// # Example (TOML)
4279///
4280/// ```toml
4281/// [memory.shadow_memory.signal_weights]
4282/// prompt_injection = 0.6
4283/// ```
4284#[derive(Debug, Clone, Serialize, Deserialize)]
4285pub struct TrajectorySignalWeights {
4286    /// Weight for `PolicyViolation` signals. Default: `0.30`.
4287    #[serde(
4288        default = "default_sw_policy_violation",
4289        deserialize_with = "validate_tra_nonneg_weight"
4290    )]
4291    pub policy_violation: f64,
4292    /// Weight for `PromptInjectionPattern` signals. Default: `0.50`.
4293    #[serde(
4294        default = "default_sw_prompt_injection",
4295        deserialize_with = "validate_tra_nonneg_weight"
4296    )]
4297    pub prompt_injection: f64,
4298    /// Weight for `ToolChainAnomaly` signals. Default: `0.25`.
4299    #[serde(
4300        default = "default_sw_tool_chain_anomaly",
4301        deserialize_with = "validate_tra_nonneg_weight"
4302    )]
4303    pub tool_chain_anomaly: f64,
4304    /// Weight for `ConfidenceDrop` signals. Default: `0.15`.
4305    #[serde(
4306        default = "default_sw_confidence_drop",
4307        deserialize_with = "validate_tra_nonneg_weight"
4308    )]
4309    pub confidence_drop: f64,
4310}
4311
4312fn default_sw_policy_violation() -> f64 {
4313    0.30
4314}
4315fn default_sw_prompt_injection() -> f64 {
4316    0.50
4317}
4318fn default_sw_tool_chain_anomaly() -> f64 {
4319    0.25
4320}
4321fn default_sw_confidence_drop() -> f64 {
4322    0.15
4323}
4324
4325impl Default for TrajectorySignalWeights {
4326    fn default() -> Self {
4327        Self {
4328            policy_violation: default_sw_policy_violation(),
4329            prompt_injection: default_sw_prompt_injection(),
4330            tool_chain_anomaly: default_sw_tool_chain_anomaly(),
4331            confidence_drop: default_sw_confidence_drop(),
4332        }
4333    }
4334}
4335
4336/// Per-severity multipliers applied on top of signal base weights.
4337///
4338/// # Example (TOML)
4339///
4340/// ```toml
4341/// [memory.shadow_memory.severity_multipliers]
4342/// high = 3.0
4343/// ```
4344#[derive(Debug, Clone, Serialize, Deserialize)]
4345pub struct TrajectorySeverityMultipliers {
4346    /// Multiplier for low-severity signals. Default: `0.5`.
4347    #[serde(
4348        default = "default_sev_low",
4349        deserialize_with = "validate_tra_nonneg_weight"
4350    )]
4351    pub low: f64,
4352    /// Multiplier for medium-severity signals. Default: `1.0`.
4353    #[serde(
4354        default = "default_sev_medium",
4355        deserialize_with = "validate_tra_nonneg_weight"
4356    )]
4357    pub medium: f64,
4358    /// Multiplier for high-severity signals. Default: `2.0`.
4359    #[serde(
4360        default = "default_sev_high",
4361        deserialize_with = "validate_tra_nonneg_weight"
4362    )]
4363    pub high: f64,
4364}
4365
4366fn default_sev_low() -> f64 {
4367    0.5
4368}
4369fn default_sev_medium() -> f64 {
4370    1.0
4371}
4372fn default_sev_high() -> f64 {
4373    2.0
4374}
4375
4376impl Default for TrajectorySeverityMultipliers {
4377    fn default() -> Self {
4378        Self {
4379            low: default_sev_low(),
4380            medium: default_sev_medium(),
4381            high: default_sev_high(),
4382        }
4383    }
4384}
4385
4386/// Configuration for the MAGE trajectory risk accumulator (spec 004-16).
4387///
4388/// Controls how per-turn safety signals accumulate into a session-level risk score
4389/// and when tool execution is blocked or escalated.
4390///
4391/// # Example (TOML)
4392///
4393/// ```toml
4394/// [memory.shadow_memory]
4395/// enabled = true
4396/// risk_threshold = 0.75
4397/// escalation_threshold = 0.50
4398/// risk_halflife_turns = 10
4399/// signal_history_cap = 200
4400/// tui_show_risk_gauge = true
4401/// reset_on_compaction = false
4402/// ```
4403#[derive(Debug, Clone, Serialize, Deserialize)]
4404pub struct TrajectoryRiskAccumulatorConfig {
4405    /// Enable shadow memory. When `false`, `TrajectoryRiskAccumulator` is a zero-cost noop.
4406    #[serde(default)]
4407    pub enabled: bool,
4408    /// Block tool execution when `trajectory_risk >= risk_threshold`. Default: `0.75`.
4409    #[serde(default = "default_tra_risk_threshold")]
4410    pub risk_threshold: f64,
4411    /// Escalate to human confirmation when risk is in `[escalation_threshold, risk_threshold)`.
4412    /// Default: `0.50`.
4413    #[serde(default = "default_tra_escalation_threshold")]
4414    pub escalation_threshold: f64,
4415    /// Number of turns after which accumulated risk halves (exponential decay). Default: `10`.
4416    #[serde(default = "default_tra_risk_halflife_turns")]
4417    pub risk_halflife_turns: u32,
4418    /// Maximum number of signal events kept in the ring buffer. Default: `200`.
4419    #[serde(default = "default_tra_signal_history_cap")]
4420    pub signal_history_cap: usize,
4421    /// Show a risk gauge in the TUI security panel when the TUI is enabled. Default: `true`.
4422    #[serde(default = "default_true")]
4423    pub tui_show_risk_gauge: bool,
4424    /// Reset `trajectory_risk` to zero when a context compaction occurs. Default: `false`.
4425    #[serde(default)]
4426    pub reset_on_compaction: bool,
4427    /// Per-signal-type base weights.
4428    #[serde(default)]
4429    pub signal_weights: TrajectorySignalWeights,
4430    /// Per-severity multipliers applied on top of signal weights.
4431    #[serde(default)]
4432    pub severity_multipliers: TrajectorySeverityMultipliers,
4433}
4434
4435fn default_tra_risk_threshold() -> f64 {
4436    0.75
4437}
4438fn default_tra_escalation_threshold() -> f64 {
4439    0.50
4440}
4441fn default_tra_risk_halflife_turns() -> u32 {
4442    10
4443}
4444fn default_tra_signal_history_cap() -> usize {
4445    200
4446}
4447
4448impl Default for TrajectoryRiskAccumulatorConfig {
4449    fn default() -> Self {
4450        Self {
4451            enabled: false,
4452            risk_threshold: default_tra_risk_threshold(),
4453            escalation_threshold: default_tra_escalation_threshold(),
4454            risk_halflife_turns: default_tra_risk_halflife_turns(),
4455            signal_history_cap: default_tra_signal_history_cap(),
4456            tui_show_risk_gauge: true,
4457            reset_on_compaction: false,
4458            signal_weights: TrajectorySignalWeights::default(),
4459            severity_multipliers: TrajectorySeverityMultipliers::default(),
4460        }
4461    }
4462}
4463
4464#[cfg(test)]
4465mod memcot_config_tests {
4466    use super::*;
4467
4468    #[test]
4469    fn memcot_config_default_disabled() {
4470        let cfg = MemCotConfig::default();
4471        assert!(!cfg.enabled);
4472        assert!(cfg.distill_provider.is_empty());
4473        assert_eq!(cfg.distill_timeout_secs, 5);
4474        assert_eq!(cfg.min_assistant_chars, 200);
4475        assert_eq!(cfg.min_distill_interval_secs, 30);
4476        assert_eq!(cfg.max_distills_per_session, 50);
4477        assert_eq!(cfg.max_state_chars, 800);
4478        assert_eq!(cfg.recall_view, RecallViewConfig::Head);
4479        assert_eq!(cfg.zoom_out_neighbor_cap, 3);
4480    }
4481
4482    #[test]
4483    fn memcot_config_round_trip() {
4484        let toml = r#"
4485            enabled = true
4486            distill_provider = "fast"
4487            distill_timeout_secs = 10
4488            min_assistant_chars = 100
4489            min_distill_interval_secs = 60
4490            max_distills_per_session = 20
4491            max_state_chars = 400
4492            recall_view = "zoom_in"
4493            zoom_out_neighbor_cap = 5
4494        "#;
4495        let cfg: MemCotConfig = toml::from_str(toml).unwrap();
4496        assert!(cfg.enabled);
4497        assert_eq!(cfg.distill_provider.as_str(), "fast");
4498        assert_eq!(cfg.distill_timeout_secs, 10);
4499        assert_eq!(cfg.min_distill_interval_secs, 60);
4500        assert_eq!(cfg.max_distills_per_session, 20);
4501        assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
4502        assert_eq!(cfg.zoom_out_neighbor_cap, 5);
4503    }
4504}
4505
4506#[cfg(test)]
4507mod apex_mem_quality_gate_config_tests {
4508    use super::*;
4509
4510    #[test]
4511    fn apex_mem_config_default_disabled() {
4512        let cfg = ApexMemConfig::default();
4513        assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
4514    }
4515
4516    #[test]
4517    fn apex_mem_config_serde_round_trip() {
4518        let toml = "enabled = true";
4519        let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
4520        assert!(cfg.enabled);
4521    }
4522
4523    #[test]
4524    fn apex_mem_config_empty_toml_uses_defaults() {
4525        let cfg: ApexMemConfig = toml::from_str("").unwrap();
4526        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4527    }
4528
4529    #[test]
4530    fn write_quality_gate_config_default_disabled() {
4531        let cfg = WriteQualityGateConfig::default();
4532        assert!(!cfg.enabled);
4533        assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
4534        assert_eq!(cfg.recent_window, 32);
4535        assert_eq!(cfg.contradiction_grace_seconds, 300);
4536        assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
4537        assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
4538        assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
4539        assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
4540        assert!(cfg.quality_gate_provider.is_empty());
4541        assert_eq!(cfg.llm_timeout_ms, 500);
4542        assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
4543        assert!(cfg.reference_check_lang_en);
4544    }
4545
4546    #[test]
4547    fn write_quality_gate_config_serde_round_trip() {
4548        let toml = r#"
4549            enabled = true
4550            threshold = 0.70
4551            recent_window = 16
4552            contradiction_grace_seconds = 600
4553            information_value_weight = 0.5
4554            reference_completeness_weight = 0.25
4555            contradiction_weight = 0.25
4556            rejection_rate_alarm_ratio = 0.50
4557            quality_gate_provider = "fast"
4558            llm_timeout_ms = 1000
4559            llm_weight = 0.3
4560            reference_check_lang_en = false
4561        "#;
4562        let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
4563        assert!(cfg.enabled);
4564        assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
4565        assert_eq!(cfg.recent_window, 16);
4566        assert_eq!(cfg.contradiction_grace_seconds, 600);
4567        assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
4568        assert_eq!(cfg.llm_timeout_ms, 1000);
4569        assert!(!cfg.reference_check_lang_en);
4570    }
4571
4572    #[test]
4573    fn write_quality_gate_config_empty_toml_uses_defaults() {
4574        let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
4575        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4576        assert_eq!(cfg.recent_window, 32);
4577    }
4578
4579    #[test]
4580    fn memory_config_shutdown_summary_provider_toml_roundtrip() {
4581        let toml = r#"
4582            history_limit = 50
4583            shutdown_summary_provider = "fast"
4584        "#;
4585        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4586        assert_eq!(
4587            cfg.shutdown_summary_provider.as_str(),
4588            "fast",
4589            "shutdown_summary_provider must deserialize from TOML"
4590        );
4591    }
4592
4593    #[test]
4594    fn five_signal_config_default_is_disabled() {
4595        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4596        assert!(!cfg.five_signal.enabled);
4597        assert!((cfg.five_signal.w_recency - 0.35).abs() < 1e-9);
4598        assert!((cfg.five_signal.w_relevance - 0.35).abs() < 1e-9);
4599        assert!((cfg.five_signal.w_frequency).abs() < 1e-9);
4600        assert!((cfg.five_signal.w_causal).abs() < 1e-9);
4601        assert!((cfg.five_signal.w_novelty).abs() < 1e-9);
4602    }
4603
4604    #[test]
4605    fn five_signal_config_toml_roundtrip() {
4606        let toml = r"
4607            history_limit = 50
4608            [five_signal]
4609            enabled = true
4610            w_recency = 0.35
4611            w_relevance = 0.35
4612            w_frequency = 0.15
4613            w_causal = 0.10
4614            w_novelty = 0.05
4615        ";
4616        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4617        assert!(cfg.five_signal.enabled);
4618        assert!((cfg.five_signal.w_frequency - 0.15).abs() < 1e-9);
4619    }
4620
4621    #[test]
4622    fn memory_config_shutdown_summary_provider_default_is_empty() {
4623        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4624        assert_eq!(
4625            cfg.shutdown_summary_provider.as_str(),
4626            "",
4627            "shutdown_summary_provider must default to empty string"
4628        );
4629    }
4630
4631    #[test]
4632    fn memory_config_compaction_provider_toml_roundtrip() {
4633        let toml = r#"
4634            history_limit = 50
4635            compaction_provider = "mid"
4636        "#;
4637        let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4638        assert_eq!(
4639            cfg.compaction_provider.as_str(),
4640            "mid",
4641            "compaction_provider must deserialize from TOML"
4642        );
4643    }
4644
4645    #[test]
4646    fn memory_config_compaction_provider_default_is_empty() {
4647        let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4648        assert_eq!(
4649            cfg.compaction_provider.as_str(),
4650            "",
4651            "compaction_provider must default to empty string"
4652        );
4653    }
4654}
zeph_config/memory.rs

zeph_config/
memory.rs