zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::defaults::{default_sqlite_path_field, default_true};
7use crate::providers::ProviderName;
8
9fn default_sqlite_pool_size() -> u32 {
10    5
11}
12
13fn default_max_history() -> usize {
14    100
15}
16
17fn default_title_max_chars() -> usize {
18    60
19}
20
21fn default_document_collection() -> String {
22    "zeph_documents".into()
23}
24
25fn default_document_chunk_size() -> usize {
26    1000
27}
28
29fn default_document_chunk_overlap() -> usize {
30    100
31}
32
33fn default_document_top_k() -> usize {
34    3
35}
36
37fn default_autosave_min_length() -> usize {
38    20
39}
40
41fn default_tool_call_cutoff() -> usize {
42    6
43}
44
45fn default_token_safety_margin() -> f32 {
46    1.0
47}
48
49fn default_redact_credentials() -> bool {
50    true
51}
52
53fn default_qdrant_url() -> String {
54    "http://localhost:6334".into()
55}
56
57fn default_summarization_threshold() -> usize {
58    50
59}
60
61fn default_context_budget_tokens() -> usize {
62    0
63}
64
65fn default_soft_compaction_threshold() -> f32 {
66    0.60
67}
68
69fn default_hard_compaction_threshold() -> f32 {
70    0.90
71}
72
73fn default_compaction_preserve_tail() -> usize {
74    6
75}
76
77fn default_compaction_cooldown_turns() -> u8 {
78    2
79}
80
81fn default_auto_budget() -> bool {
82    true
83}
84
85fn default_prune_protect_tokens() -> usize {
86    40_000
87}
88
89fn default_cross_session_score_threshold() -> f32 {
90    0.35
91}
92
93fn default_temporal_decay_half_life_days() -> u32 {
94    30
95}
96
97fn default_mmr_lambda() -> f32 {
98    0.7
99}
100
101fn default_semantic_enabled() -> bool {
102    true
103}
104
105fn default_recall_limit() -> usize {
106    5
107}
108
109fn default_vector_weight() -> f64 {
110    0.7
111}
112
113fn default_keyword_weight() -> f64 {
114    0.3
115}
116
117fn default_graph_max_entities_per_message() -> usize {
118    10
119}
120
121fn default_graph_max_edges_per_message() -> usize {
122    15
123}
124
125fn default_graph_community_refresh_interval() -> usize {
126    100
127}
128
129fn default_graph_community_summary_max_prompt_bytes() -> usize {
130    8192
131}
132
133fn default_graph_community_summary_concurrency() -> usize {
134    4
135}
136
137fn default_lpa_edge_chunk_size() -> usize {
138    10_000
139}
140
141fn default_graph_entity_similarity_threshold() -> f32 {
142    0.85
143}
144
145fn default_graph_entity_ambiguous_threshold() -> f32 {
146    0.70
147}
148
149fn default_graph_extraction_timeout_secs() -> u64 {
150    15
151}
152
153fn default_graph_max_hops() -> u32 {
154    2
155}
156
157fn default_graph_recall_limit() -> usize {
158    10
159}
160
161fn default_graph_expired_edge_retention_days() -> u32 {
162    90
163}
164
165fn default_graph_temporal_decay_rate() -> f64 {
166    0.0
167}
168
169fn default_graph_edge_history_limit() -> usize {
170    100
171}
172
173fn default_spreading_activation_decay_lambda() -> f32 {
174    0.85
175}
176
177fn default_spreading_activation_max_hops() -> u32 {
178    3
179}
180
181fn default_spreading_activation_activation_threshold() -> f32 {
182    0.1
183}
184
185fn default_spreading_activation_inhibition_threshold() -> f32 {
186    0.8
187}
188
189fn default_spreading_activation_max_activated_nodes() -> usize {
190    50
191}
192
193fn default_spreading_activation_recall_timeout_ms() -> u64 {
194    1000
195}
196
197fn default_note_linking_similarity_threshold() -> f32 {
198    0.85
199}
200
201fn default_note_linking_top_k() -> usize {
202    10
203}
204
205fn default_note_linking_timeout_secs() -> u64 {
206    5
207}
208
209fn default_shutdown_summary() -> bool {
210    true
211}
212
213fn default_shutdown_summary_min_messages() -> usize {
214    4
215}
216
217fn default_shutdown_summary_max_messages() -> usize {
218    20
219}
220
221fn default_shutdown_summary_timeout_secs() -> u64 {
222    10
223}
224
225fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
226where
227    D: serde::Deserializer<'de>,
228{
229    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
230    if value.is_nan() || value.is_infinite() {
231        return Err(serde::de::Error::custom(
232            "similarity_threshold must be a finite number",
233        ));
234    }
235    if !(0.5..=1.0).contains(&value) {
236        return Err(serde::de::Error::custom(
237            "similarity_threshold must be in [0.5, 1.0]",
238        ));
239    }
240    Ok(value)
241}
242
243fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
244where
245    D: serde::Deserializer<'de>,
246{
247    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
248    if value < 2 {
249        return Err(serde::de::Error::custom(
250            "promotion_min_sessions must be >= 2",
251        ));
252    }
253    Ok(value)
254}
255
256fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
257where
258    D: serde::Deserializer<'de>,
259{
260    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
261    if value == 0 {
262        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
263    }
264    Ok(value)
265}
266
267fn default_tier_promotion_min_sessions() -> u32 {
268    3
269}
270
271fn default_tier_similarity_threshold() -> f32 {
272    0.92
273}
274
275fn default_tier_sweep_interval_secs() -> u64 {
276    3600
277}
278
279fn default_tier_sweep_batch_size() -> usize {
280    100
281}
282
283fn default_scene_similarity_threshold() -> f32 {
284    0.80
285}
286
287fn default_scene_batch_size() -> usize {
288    50
289}
290
291fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
292where
293    D: serde::Deserializer<'de>,
294{
295    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
296    if value.is_nan() || value.is_infinite() {
297        return Err(serde::de::Error::custom(
298            "scene_similarity_threshold must be a finite number",
299        ));
300    }
301    if !(0.5..=1.0).contains(&value) {
302        return Err(serde::de::Error::custom(
303            "scene_similarity_threshold must be in [0.5, 1.0]",
304        ));
305    }
306    Ok(value)
307}
308
309fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
310where
311    D: serde::Deserializer<'de>,
312{
313    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
314    if value == 0 {
315        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
316    }
317    Ok(value)
318}
319
320/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
321///
322/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
323/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
324///
325/// # Validation
326///
327/// Constraints enforced at deserialization time:
328/// - `similarity_threshold` in `[0.5, 1.0]`
329/// - `promotion_min_sessions >= 2`
330/// - `sweep_batch_size >= 1`
331/// - `scene_similarity_threshold` in `[0.5, 1.0]`
332/// - `scene_batch_size >= 1`
333#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
334#[serde(default)]
335pub struct TierConfig {
336    /// Enable the tier promotion system. When `false`, all messages remain episodic.
337    /// Default: `false`.
338    pub enabled: bool,
339    /// Minimum number of distinct sessions a fact must appear in before promotion.
340    /// Must be `>= 2`. Default: `3`.
341    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
342    pub promotion_min_sessions: u32,
343    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
344    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
345    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
346    pub similarity_threshold: f32,
347    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
348    pub sweep_interval_secs: u64,
349    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
350    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
351    pub sweep_batch_size: usize,
352    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
353    pub scene_enabled: bool,
354    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
355    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
356    pub scene_similarity_threshold: f32,
357    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
358    #[serde(deserialize_with = "validate_scene_batch_size")]
359    pub scene_batch_size: usize,
360    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
361    /// Falls back to the primary provider when empty. Default: `""`.
362    pub scene_provider: ProviderName,
363    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
364    pub scene_sweep_interval_secs: u64,
365}
366
367fn default_scene_sweep_interval_secs() -> u64 {
368    7200
369}
370
371impl Default for TierConfig {
372    fn default() -> Self {
373        Self {
374            enabled: false,
375            promotion_min_sessions: default_tier_promotion_min_sessions(),
376            similarity_threshold: default_tier_similarity_threshold(),
377            sweep_interval_secs: default_tier_sweep_interval_secs(),
378            sweep_batch_size: default_tier_sweep_batch_size(),
379            scene_enabled: false,
380            scene_similarity_threshold: default_scene_similarity_threshold(),
381            scene_batch_size: default_scene_batch_size(),
382            scene_provider: ProviderName::default(),
383            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
384        }
385    }
386}
387
388fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
389where
390    D: serde::Deserializer<'de>,
391{
392    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
393    if value.is_nan() || value.is_infinite() {
394        return Err(serde::de::Error::custom(
395            "temporal_decay_rate must be a finite number",
396        ));
397    }
398    if !(0.0..=10.0).contains(&value) {
399        return Err(serde::de::Error::custom(
400            "temporal_decay_rate must be in [0.0, 10.0]",
401        ));
402    }
403    Ok(value)
404}
405
406fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
407where
408    D: serde::Deserializer<'de>,
409{
410    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
411    if value.is_nan() || value.is_infinite() {
412        return Err(serde::de::Error::custom(
413            "similarity_threshold must be a finite number",
414        ));
415    }
416    if !(0.0..=1.0).contains(&value) {
417        return Err(serde::de::Error::custom(
418            "similarity_threshold must be in [0.0, 1.0]",
419        ));
420    }
421    Ok(value)
422}
423
424fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
425where
426    D: serde::Deserializer<'de>,
427{
428    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
429    if value.is_nan() || value.is_infinite() {
430        return Err(serde::de::Error::custom(
431            "importance_weight must be a finite number",
432        ));
433    }
434    if value < 0.0 {
435        return Err(serde::de::Error::custom(
436            "importance_weight must be non-negative",
437        ));
438    }
439    if value > 1.0 {
440        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
441    }
442    Ok(value)
443}
444
445fn default_importance_weight() -> f64 {
446    0.15
447}
448
449/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
450///
451/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
452/// Seeds are initialized from fuzzy entity matches, then activation propagates
453/// hop-by-hop with exponential decay and lateral inhibition.
454///
455/// # Validation
456///
457/// Constraints enforced at deserialization time:
458/// - `0.0 < decay_lambda <= 1.0`
459/// - `max_hops >= 1`
460/// - `activation_threshold < inhibition_threshold`
461/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
462#[derive(Debug, Clone, Deserialize, Serialize)]
463#[serde(default)]
464pub struct SpreadingActivationConfig {
465    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
466    pub enabled: bool,
467    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
468    #[serde(deserialize_with = "validate_decay_lambda")]
469    pub decay_lambda: f32,
470    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
471    #[serde(deserialize_with = "validate_max_hops")]
472    pub max_hops: u32,
473    /// Minimum activation score to include a node in results. Default: `0.1`.
474    pub activation_threshold: f32,
475    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
476    pub inhibition_threshold: f32,
477    /// Cap on total activated nodes per spread pass. Default: `50`.
478    pub max_activated_nodes: usize,
479    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
480    #[serde(default = "default_seed_structural_weight")]
481    pub seed_structural_weight: f32,
482    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
483    #[serde(default = "default_seed_community_cap")]
484    pub seed_community_cap: usize,
485    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
486    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
487    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
488    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
489    pub recall_timeout_ms: u64,
490}
491
492fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
493where
494    D: serde::Deserializer<'de>,
495{
496    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
497    if value.is_nan() || value.is_infinite() {
498        return Err(serde::de::Error::custom(
499            "decay_lambda must be a finite number",
500        ));
501    }
502    if !(value > 0.0 && value <= 1.0) {
503        return Err(serde::de::Error::custom(
504            "decay_lambda must be in (0.0, 1.0]",
505        ));
506    }
507    Ok(value)
508}
509
510fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
511where
512    D: serde::Deserializer<'de>,
513{
514    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
515    if value == 0 {
516        return Err(serde::de::Error::custom("max_hops must be >= 1"));
517    }
518    Ok(value)
519}
520
521impl SpreadingActivationConfig {
522    /// Validate cross-field constraints that cannot be expressed in per-field validators.
523    ///
524    /// # Errors
525    ///
526    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
527    pub fn validate(&self) -> Result<(), String> {
528        if self.activation_threshold >= self.inhibition_threshold {
529            return Err(format!(
530                "activation_threshold ({}) must be < inhibition_threshold ({})",
531                self.activation_threshold, self.inhibition_threshold
532            ));
533        }
534        Ok(())
535    }
536}
537
538fn default_seed_structural_weight() -> f32 {
539    0.4
540}
541
542fn default_seed_community_cap() -> usize {
543    3
544}
545
546impl Default for SpreadingActivationConfig {
547    fn default() -> Self {
548        Self {
549            enabled: false,
550            decay_lambda: default_spreading_activation_decay_lambda(),
551            max_hops: default_spreading_activation_max_hops(),
552            activation_threshold: default_spreading_activation_activation_threshold(),
553            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
554            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
555            seed_structural_weight: default_seed_structural_weight(),
556            seed_community_cap: default_seed_community_cap(),
557            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
558        }
559    }
560}
561
562/// Kumiho belief revision configuration.
563#[derive(Debug, Clone, Deserialize, Serialize)]
564#[serde(default)]
565pub struct BeliefRevisionConfig {
566    /// Enable semantic contradiction detection for graph edges. Default: `false`.
567    pub enabled: bool,
568    /// Cosine similarity threshold for considering two facts as contradictory.
569    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
570    #[serde(deserialize_with = "validate_similarity_threshold")]
571    pub similarity_threshold: f32,
572}
573
574fn default_belief_revision_similarity_threshold() -> f32 {
575    0.85
576}
577
578impl Default for BeliefRevisionConfig {
579    fn default() -> Self {
580        Self {
581            enabled: false,
582            similarity_threshold: default_belief_revision_similarity_threshold(),
583        }
584    }
585}
586
587/// D-MEM RPE-based tiered graph extraction routing configuration.
588#[derive(Debug, Clone, Deserialize, Serialize)]
589#[serde(default)]
590pub struct RpeConfig {
591    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
592    pub enabled: bool,
593    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
594    /// Default: `0.3`.
595    #[serde(deserialize_with = "validate_similarity_threshold")]
596    pub threshold: f32,
597    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
598    pub max_skip_turns: u32,
599}
600
601fn default_rpe_threshold() -> f32 {
602    0.3
603}
604
605fn default_rpe_max_skip_turns() -> u32 {
606    5
607}
608
609impl Default for RpeConfig {
610    fn default() -> Self {
611        Self {
612            enabled: false,
613            threshold: default_rpe_threshold(),
614            max_skip_turns: default_rpe_max_skip_turns(),
615        }
616    }
617}
618
619/// Configuration for A-MEM dynamic note linking.
620///
621/// When enabled, after each graph extraction pass, entities extracted from the message are
622/// compared against the entity embedding collection. Pairs with cosine similarity above
623/// `similarity_threshold` receive a `similar_to` edge in the graph.
624#[derive(Debug, Clone, Deserialize, Serialize)]
625#[serde(default)]
626pub struct NoteLinkingConfig {
627    /// Enable A-MEM note linking after graph extraction. Default: `false`.
628    pub enabled: bool,
629    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
630    #[serde(deserialize_with = "validate_similarity_threshold")]
631    pub similarity_threshold: f32,
632    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
633    pub top_k: usize,
634    /// Timeout for the entire linking pass in seconds. Default: `5`.
635    pub timeout_secs: u64,
636}
637
638impl Default for NoteLinkingConfig {
639    fn default() -> Self {
640        Self {
641            enabled: false,
642            similarity_threshold: default_note_linking_similarity_threshold(),
643            top_k: default_note_linking_top_k(),
644            timeout_secs: default_note_linking_timeout_secs(),
645        }
646    }
647}
648
649/// Vector backend selector for embedding storage.
650#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
651#[serde(rename_all = "lowercase")]
652pub enum VectorBackend {
653    Qdrant,
654    #[default]
655    Sqlite,
656}
657
658impl VectorBackend {
659    /// Return the lowercase identifier string for this backend.
660    ///
661    /// # Examples
662    ///
663    /// ```
664    /// use zeph_config::VectorBackend;
665    ///
666    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
667    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
668    /// ```
669    #[must_use]
670    pub fn as_str(&self) -> &'static str {
671        match self {
672            Self::Qdrant => "qdrant",
673            Self::Sqlite => "sqlite",
674        }
675    }
676}
677
678/// Memory subsystem configuration, nested under `[memory]` in TOML.
679///
680/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
681/// multi-tier promotion, and all memory-related background tasks.
682///
683/// # Example (TOML)
684///
685/// ```toml
686/// [memory]
687/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
688/// qdrant_url = "http://localhost:6334"
689/// history_limit = 50
690/// summarization_threshold = 50
691/// auto_budget = true
692/// ```
693#[derive(Debug, Deserialize, Serialize)]
694#[allow(clippy::struct_excessive_bools)]
695pub struct MemoryConfig {
696    #[serde(default)]
697    pub compression_guidelines: zeph_memory::CompressionGuidelinesConfig,
698    #[serde(default = "default_sqlite_path_field")]
699    pub sqlite_path: String,
700    pub history_limit: u32,
701    #[serde(default = "default_qdrant_url")]
702    pub qdrant_url: String,
703    #[serde(default)]
704    pub semantic: SemanticConfig,
705    #[serde(default = "default_summarization_threshold")]
706    pub summarization_threshold: usize,
707    #[serde(default = "default_context_budget_tokens")]
708    pub context_budget_tokens: usize,
709    #[serde(default = "default_soft_compaction_threshold")]
710    pub soft_compaction_threshold: f32,
711    #[serde(
712        default = "default_hard_compaction_threshold",
713        alias = "compaction_threshold"
714    )]
715    pub hard_compaction_threshold: f32,
716    #[serde(default = "default_compaction_preserve_tail")]
717    pub compaction_preserve_tail: usize,
718    #[serde(default = "default_compaction_cooldown_turns")]
719    pub compaction_cooldown_turns: u8,
720    #[serde(default = "default_auto_budget")]
721    pub auto_budget: bool,
722    #[serde(default = "default_prune_protect_tokens")]
723    pub prune_protect_tokens: usize,
724    #[serde(default = "default_cross_session_score_threshold")]
725    pub cross_session_score_threshold: f32,
726    #[serde(default)]
727    pub vector_backend: VectorBackend,
728    #[serde(default = "default_token_safety_margin")]
729    pub token_safety_margin: f32,
730    #[serde(default = "default_redact_credentials")]
731    pub redact_credentials: bool,
732    #[serde(default = "default_true")]
733    pub autosave_assistant: bool,
734    #[serde(default = "default_autosave_min_length")]
735    pub autosave_min_length: usize,
736    #[serde(default = "default_tool_call_cutoff")]
737    pub tool_call_cutoff: usize,
738    #[serde(default = "default_sqlite_pool_size")]
739    pub sqlite_pool_size: u32,
740    #[serde(default)]
741    pub sessions: SessionsConfig,
742    #[serde(default)]
743    pub documents: DocumentConfig,
744    #[serde(default)]
745    pub eviction: zeph_memory::EvictionConfig,
746    #[serde(default)]
747    pub compression: CompressionConfig,
748    #[serde(default)]
749    pub sidequest: SidequestConfig,
750    #[serde(default)]
751    pub graph: GraphConfig,
752    /// Store a lightweight session summary to the vector store on shutdown when no session
753    /// summary exists yet for this conversation. Enables cross-session recall for short or
754    /// interrupted sessions that never triggered hard compaction. Default: `true`.
755    #[serde(default = "default_shutdown_summary")]
756    pub shutdown_summary: bool,
757    /// Minimum number of user-turn messages required before a shutdown summary is generated.
758    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
759    #[serde(default = "default_shutdown_summary_min_messages")]
760    pub shutdown_summary_min_messages: usize,
761    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
762    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
763    /// Default: `20`.
764    #[serde(default = "default_shutdown_summary_max_messages")]
765    pub shutdown_summary_max_messages: usize,
766    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
767    /// Applies independently to the structured call and to the plain-text fallback.
768    /// Default: `10`.
769    #[serde(default = "default_shutdown_summary_timeout_secs")]
770    pub shutdown_summary_timeout_secs: u64,
771    /// Use structured anchored summaries for context compaction.
772    ///
773    /// When enabled, hard compaction requests a JSON schema from the LLM
774    /// instead of free-form prose. Falls back to prose if the LLM fails
775    /// to produce valid JSON. Default: `false`.
776    #[serde(default)]
777    pub structured_summaries: bool,
778    /// AOI three-layer memory tier promotion system.
779    ///
780    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
781    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
782    #[serde(default)]
783    pub tiers: TierConfig,
784    /// A-MAC adaptive memory admission control.
785    ///
786    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
787    /// if its composite admission score falls below the configured threshold.
788    #[serde(default)]
789    pub admission: AdmissionConfig,
790    /// Session digest generation at session end. Default: disabled.
791    #[serde(default)]
792    pub digest: DigestConfig,
793    /// Context assembly strategy. Default: `full_history` (current behavior).
794    #[serde(default)]
795    pub context_strategy: ContextStrategy,
796    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
797    #[serde(default = "default_crossover_turn_threshold")]
798    pub crossover_turn_threshold: u32,
799    /// All-Mem lifelong memory consolidation sweep.
800    ///
801    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
802    /// messages and merges them into consolidated entries via LLM.
803    #[serde(default)]
804    pub consolidation: ConsolidationConfig,
805    /// `SleepGate` forgetting sweep (#2397).
806    ///
807    /// When `forgetting.enabled = true`, a background loop periodically decays importance
808    /// scores and prunes memories below the forgetting floor.
809    #[serde(default)]
810    pub forgetting: ForgettingConfig,
811    /// `PostgreSQL` connection URL.
812    ///
813    /// Used when the binary is compiled with `--features postgres`.
814    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
815    /// Example: `postgres://user:pass@localhost:5432/zeph`
816    /// Default: `None` (uses `sqlite_path` instead).
817    #[serde(default)]
818    pub database_url: Option<String>,
819    /// Cost-sensitive store routing (#2444).
820    ///
821    /// When `store_routing.enabled = true`, query intent is classified and routed to
822    /// the cheapest sufficient backend instead of querying all stores on every turn.
823    #[serde(default)]
824    pub store_routing: StoreRoutingConfig,
825    /// Persona memory layer (#2461).
826    ///
827    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
828    /// from conversation history and injected into context after the system prompt.
829    #[serde(default)]
830    pub persona: PersonaConfig,
831    /// Trajectory-informed memory (#2498).
832    #[serde(default)]
833    pub trajectory: TrajectoryConfig,
834    /// Category-aware memory (#2428).
835    #[serde(default)]
836    pub category: CategoryConfig,
837    /// `TiMem` temporal-hierarchical memory tree (#2262).
838    #[serde(default)]
839    pub tree: TreeConfig,
840    /// Time-based microcompact (#2699).
841    ///
842    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
843    /// from context when the session has been idle longer than `gap_threshold_minutes`.
844    #[serde(default)]
845    pub microcompact: MicrocompactConfig,
846    /// autoDream background memory consolidation (#2697).
847    ///
848    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
849    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
850    #[serde(default)]
851    pub autodream: AutoDreamConfig,
852    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
853    ///
854    /// Before inserting a new key fact, its nearest neighbour is looked up in the
855    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
856    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
857    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
858    #[serde(default = "default_key_facts_dedup_threshold")]
859    pub key_facts_dedup_threshold: f32,
860}
861
862fn default_crossover_turn_threshold() -> u32 {
863    20
864}
865
866fn default_key_facts_dedup_threshold() -> f32 {
867    0.95
868}
869
870/// Session digest configuration (#2289).
871#[derive(Debug, Clone, Deserialize, Serialize)]
872#[serde(default)]
873pub struct DigestConfig {
874    /// Enable session digest generation at session end. Default: `false`.
875    pub enabled: bool,
876    /// Provider name from `[[llm.providers]]` for digest generation.
877    /// Falls back to the primary provider when empty. Default: `""`.
878    pub provider: String,
879    /// Maximum tokens for the digest text. Default: `500`.
880    pub max_tokens: usize,
881    /// Maximum messages to feed into the digest prompt. Default: `50`.
882    pub max_input_messages: usize,
883}
884
885impl Default for DigestConfig {
886    fn default() -> Self {
887        Self {
888            enabled: false,
889            provider: String::new(),
890            max_tokens: 500,
891            max_input_messages: 50,
892        }
893    }
894}
895
896/// Context assembly strategy (#2288).
897#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
898#[serde(rename_all = "snake_case")]
899pub enum ContextStrategy {
900    /// Full conversation history trimmed to budget, with memory augmentation.
901    /// This is the default and existing behavior.
902    #[default]
903    FullHistory,
904    /// Drop conversation history; assemble context from summaries, semantic recall,
905    /// cross-session memory, and session digest only.
906    MemoryFirst,
907    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
908    /// `crossover_turn_threshold`.
909    Adaptive,
910}
911
912/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
913#[derive(Debug, Clone, Deserialize, Serialize)]
914#[serde(default)]
915pub struct SessionsConfig {
916    /// Maximum number of sessions returned by list operations (0 = unlimited).
917    #[serde(default = "default_max_history")]
918    pub max_history: usize,
919    /// Maximum characters for auto-generated session titles.
920    #[serde(default = "default_title_max_chars")]
921    pub title_max_chars: usize,
922}
923
924impl Default for SessionsConfig {
925    fn default() -> Self {
926        Self {
927            max_history: default_max_history(),
928            title_max_chars: default_title_max_chars(),
929        }
930    }
931}
932
933/// Configuration for the document ingestion and RAG retrieval pipeline.
934#[derive(Debug, Clone, Deserialize, Serialize)]
935pub struct DocumentConfig {
936    #[serde(default = "default_document_collection")]
937    pub collection: String,
938    #[serde(default = "default_document_chunk_size")]
939    pub chunk_size: usize,
940    #[serde(default = "default_document_chunk_overlap")]
941    pub chunk_overlap: usize,
942    /// Number of document chunks to inject into agent context per turn.
943    #[serde(default = "default_document_top_k")]
944    pub top_k: usize,
945    /// Enable document RAG injection into agent context.
946    #[serde(default)]
947    pub rag_enabled: bool,
948}
949
950impl Default for DocumentConfig {
951    fn default() -> Self {
952        Self {
953            collection: default_document_collection(),
954            chunk_size: default_document_chunk_size(),
955            chunk_overlap: default_document_chunk_overlap(),
956            top_k: default_document_top_k(),
957            rag_enabled: false,
958        }
959    }
960}
961
962/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
963///
964/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
965/// re-ranking, and hybrid BM25+vector weighting.
966///
967/// # Example (TOML)
968///
969/// ```toml
970/// [memory.semantic]
971/// enabled = true
972/// recall_limit = 5
973/// vector_weight = 0.7
974/// keyword_weight = 0.3
975/// mmr_lambda = 0.7
976/// ```
977#[derive(Debug, Deserialize, Serialize)]
978#[allow(clippy::struct_excessive_bools)]
979pub struct SemanticConfig {
980    /// Enable vector-based semantic recall. Default: `true`.
981    #[serde(default = "default_semantic_enabled")]
982    pub enabled: bool,
983    #[serde(default = "default_recall_limit")]
984    pub recall_limit: usize,
985    #[serde(default = "default_vector_weight")]
986    pub vector_weight: f64,
987    #[serde(default = "default_keyword_weight")]
988    pub keyword_weight: f64,
989    #[serde(default = "default_true")]
990    pub temporal_decay_enabled: bool,
991    #[serde(default = "default_temporal_decay_half_life_days")]
992    pub temporal_decay_half_life_days: u32,
993    #[serde(default = "default_true")]
994    pub mmr_enabled: bool,
995    #[serde(default = "default_mmr_lambda")]
996    pub mmr_lambda: f32,
997    #[serde(default = "default_true")]
998    pub importance_enabled: bool,
999    #[serde(
1000        default = "default_importance_weight",
1001        deserialize_with = "validate_importance_weight"
1002    )]
1003    pub importance_weight: f64,
1004    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1005    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1006    /// from contending with the guardrail at the API server level (rate limits, Ollama
1007    /// single-model lock). When unset or empty, falls back to the main agent provider.
1008    #[serde(default)]
1009    pub embed_provider: Option<String>,
1010}
1011
1012impl Default for SemanticConfig {
1013    fn default() -> Self {
1014        Self {
1015            enabled: default_semantic_enabled(),
1016            recall_limit: default_recall_limit(),
1017            vector_weight: default_vector_weight(),
1018            keyword_weight: default_keyword_weight(),
1019            temporal_decay_enabled: true,
1020            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1021            mmr_enabled: true,
1022            mmr_lambda: default_mmr_lambda(),
1023            importance_enabled: true,
1024            importance_weight: default_importance_weight(),
1025            embed_provider: None,
1026        }
1027    }
1028}
1029
1030/// Compression strategy for active context compression (#1161).
1031#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1032#[serde(tag = "strategy", rename_all = "snake_case")]
1033pub enum CompressionStrategy {
1034    /// Compress only when reactive compaction fires (current behavior).
1035    #[default]
1036    Reactive,
1037    /// Compress proactively when context exceeds `threshold_tokens`.
1038    Proactive {
1039        /// Token count that triggers proactive compression.
1040        threshold_tokens: usize,
1041        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1042        max_summary_tokens: usize,
1043    },
1044    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1045    /// safety net. The `compress_context` tool is also available in all other strategies.
1046    Autonomous,
1047    /// Knowledge-block-aware compression strategy (#2510).
1048    ///
1049    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1050    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1051    Focus,
1052}
1053
1054/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1055///
1056/// When `context-compression` feature is enabled, this replaces the default oldest-first
1057/// heuristic with scored eviction.
1058#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1059#[serde(rename_all = "snake_case")]
1060pub enum PruningStrategy {
1061    /// Oldest-first eviction — current default behavior.
1062    #[default]
1063    Reactive,
1064    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1065    /// lowest-first. Requires `context-compression` feature.
1066    TaskAware,
1067    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1068    /// Requires `context-compression` feature.
1069    Mig,
1070    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1071    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1072    /// Requires `context-compression` feature.
1073    Subgoal,
1074    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1075    /// Requires `context-compression` feature.
1076    SubgoalMig,
1077}
1078
1079impl PruningStrategy {
1080    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1081    #[must_use]
1082    pub fn is_subgoal(self) -> bool {
1083        matches!(self, Self::Subgoal | Self::SubgoalMig)
1084    }
1085}
1086
1087// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1088// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1089impl<'de> serde::Deserialize<'de> for PruningStrategy {
1090    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1091        let s = String::deserialize(deserializer)?;
1092        s.parse().map_err(serde::de::Error::custom)
1093    }
1094}
1095
1096impl std::str::FromStr for PruningStrategy {
1097    type Err = String;
1098
1099    fn from_str(s: &str) -> Result<Self, Self::Err> {
1100        match s {
1101            "reactive" => Ok(Self::Reactive),
1102            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1103            "mig" => Ok(Self::Mig),
1104            // task_aware_mig was removed (dead code — was routed to scored path only).
1105            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1106            "task_aware_mig" | "task-aware-mig" => {
1107                tracing::warn!(
1108                    "pruning strategy `task_aware_mig` has been removed; \
1109                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1110                );
1111                Ok(Self::Reactive)
1112            }
1113            "subgoal" => Ok(Self::Subgoal),
1114            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1115            other => Err(format!(
1116                "unknown pruning strategy `{other}`, expected \
1117                 reactive|task_aware|mig|subgoal|subgoal_mig"
1118            )),
1119        }
1120    }
1121}
1122
1123fn default_high_density_budget() -> f32 {
1124    0.7
1125}
1126
1127fn default_low_density_budget() -> f32 {
1128    0.3
1129}
1130
1131/// Configuration for the performance-floor compression ratio predictor (#2460).
1132///
1133/// When `enabled = true`, before hard compaction the predictor selects the most aggressive
1134/// compression ratio that keeps the predicted probe score above `probe.hard_fail_threshold`.
1135/// Requires enough training data (`min_samples`) before activating — during cold start the
1136/// predictor returns `None` and default behavior applies.
1137#[derive(Debug, Clone, Deserialize, Serialize)]
1138#[serde(default)]
1139pub struct CompressionPredictorConfig {
1140    /// Enable the adaptive compression ratio predictor. Default: `false`.
1141    pub enabled: bool,
1142    /// Minimum training samples before the predictor activates. Default: `10`.
1143    pub min_samples: u64,
1144    /// Candidate compression ratios evaluated from most to least aggressive.
1145    /// Default: `[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]`.
1146    pub candidate_ratios: Vec<f32>,
1147    /// Retrain the model after this many new samples. Default: `5`.
1148    pub retrain_interval: u64,
1149    /// Maximum training samples to retain (sliding window). Default: `200`.
1150    pub max_training_samples: usize,
1151}
1152
1153impl Default for CompressionPredictorConfig {
1154    fn default() -> Self {
1155        Self {
1156            enabled: false,
1157            min_samples: 10,
1158            candidate_ratios: vec![0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
1159            retrain_interval: 5,
1160            max_training_samples: 200,
1161        }
1162    }
1163}
1164
1165/// Configuration for the `SleepGate` forgetting sweep (#2397).
1166///
1167/// When `enabled = true`, a background loop periodically decays importance scores
1168/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1169/// and prunes memories below `forgetting_floor` (targeted forgetting).
1170#[derive(Debug, Clone, Deserialize, Serialize)]
1171#[serde(default)]
1172pub struct ForgettingConfig {
1173    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1174    pub enabled: bool,
1175    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1176    pub decay_rate: f32,
1177    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1178    pub forgetting_floor: f32,
1179    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1180    pub sweep_interval_secs: u64,
1181    /// Maximum messages to process per sweep. Default: `500`.
1182    pub sweep_batch_size: usize,
1183    /// Hours: messages accessed within this window get replay protection. Default: `24`.
1184    pub replay_window_hours: u32,
1185    /// Messages with `access_count` >= this get replay protection. Default: `3`.
1186    pub replay_min_access_count: u32,
1187    /// Hours: never prune messages accessed within this window. Default: `24`.
1188    pub protect_recent_hours: u32,
1189    /// Never prune messages with `access_count` >= this. Default: `3`.
1190    pub protect_min_access_count: u32,
1191}
1192
1193impl Default for ForgettingConfig {
1194    fn default() -> Self {
1195        Self {
1196            enabled: false,
1197            decay_rate: 0.1,
1198            forgetting_floor: 0.05,
1199            sweep_interval_secs: 7200,
1200            sweep_batch_size: 500,
1201            replay_window_hours: 24,
1202            replay_min_access_count: 3,
1203            protect_recent_hours: 24,
1204            protect_min_access_count: 3,
1205        }
1206    }
1207}
1208
1209/// Configuration for active context compression (#1161).
1210#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1211#[serde(default)]
1212pub struct CompressionConfig {
1213    /// Compression strategy.
1214    #[serde(flatten)]
1215    pub strategy: CompressionStrategy,
1216    /// Tool-output pruning strategy (requires `context-compression` feature).
1217    pub pruning_strategy: PruningStrategy,
1218    /// Model to use for compression summaries.
1219    ///
1220    /// Currently unused — the primary summary provider is used regardless of this value.
1221    /// Reserved for future per-compression model selection. Setting this field has no effect.
1222    pub model: String,
1223    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1224    /// Falls back to the primary provider when empty. Default: `""`.
1225    pub compress_provider: ProviderName,
1226    /// Compaction probe: validates summary quality before committing it (#1609).
1227    #[serde(default)]
1228    pub probe: zeph_memory::CompactionProbeConfig,
1229    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1230    ///
1231    /// When enabled, tool output bodies in the compaction range are saved to
1232    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1233    /// The LLM summarizes placeholder messages; archived content is appended as
1234    /// a postfix after summarization so references survive compaction.
1235    /// Default: `false`.
1236    #[serde(default)]
1237    pub archive_tool_outputs: bool,
1238    /// Provider for Focus strategy segment scoring (#2510).
1239    /// Falls back to the primary provider when empty. Default: `""`.
1240    pub focus_scorer_provider: ProviderName,
1241    /// Token-budget fraction for high-density content in density-aware compression (#2481).
1242    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1243    #[serde(default = "default_high_density_budget")]
1244    pub high_density_budget: f32,
1245    /// Token-budget fraction for low-density content in density-aware compression (#2481).
1246    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1247    #[serde(default = "default_low_density_budget")]
1248    pub low_density_budget: f32,
1249    /// Performance-floor compression ratio predictor (#2460).
1250    #[serde(default)]
1251    pub predictor: CompressionPredictorConfig,
1252}
1253
1254fn default_sidequest_interval_turns() -> u32 {
1255    4
1256}
1257
1258fn default_sidequest_max_eviction_ratio() -> f32 {
1259    0.5
1260}
1261
1262fn default_sidequest_max_cursors() -> usize {
1263    30
1264}
1265
1266fn default_sidequest_min_cursor_tokens() -> usize {
1267    100
1268}
1269
1270/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1271#[derive(Debug, Clone, Deserialize, Serialize)]
1272#[serde(default)]
1273pub struct SidequestConfig {
1274    /// Enable `SideQuest` eviction. Default: `false`.
1275    pub enabled: bool,
1276    /// Run eviction every N user turns. Default: `4`.
1277    #[serde(default = "default_sidequest_interval_turns")]
1278    pub interval_turns: u32,
1279    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1280    #[serde(default = "default_sidequest_max_eviction_ratio")]
1281    pub max_eviction_ratio: f32,
1282    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1283    #[serde(default = "default_sidequest_max_cursors")]
1284    pub max_cursors: usize,
1285    /// Exclude tool outputs smaller than this token count from eviction candidates.
1286    /// Default: `100`.
1287    #[serde(default = "default_sidequest_min_cursor_tokens")]
1288    pub min_cursor_tokens: usize,
1289}
1290
1291impl Default for SidequestConfig {
1292    fn default() -> Self {
1293        Self {
1294            enabled: false,
1295            interval_turns: default_sidequest_interval_turns(),
1296            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1297            max_cursors: default_sidequest_max_cursors(),
1298            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1299        }
1300    }
1301}
1302
1303/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
1304///
1305/// # Security
1306///
1307/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
1308/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
1309/// when processing conversations that may contain personal, medical, or sensitive data until
1310/// a redaction pass is implemented on the write path.
1311#[derive(Debug, Clone, Deserialize, Serialize)]
1312#[serde(default)]
1313pub struct GraphConfig {
1314    pub enabled: bool,
1315    pub extract_model: String,
1316    #[serde(default = "default_graph_max_entities_per_message")]
1317    pub max_entities_per_message: usize,
1318    #[serde(default = "default_graph_max_edges_per_message")]
1319    pub max_edges_per_message: usize,
1320    #[serde(default = "default_graph_community_refresh_interval")]
1321    pub community_refresh_interval: usize,
1322    #[serde(default = "default_graph_entity_similarity_threshold")]
1323    pub entity_similarity_threshold: f32,
1324    #[serde(default = "default_graph_extraction_timeout_secs")]
1325    pub extraction_timeout_secs: u64,
1326    #[serde(default)]
1327    pub use_embedding_resolution: bool,
1328    #[serde(default = "default_graph_entity_ambiguous_threshold")]
1329    pub entity_ambiguous_threshold: f32,
1330    #[serde(default = "default_graph_max_hops")]
1331    pub max_hops: u32,
1332    #[serde(default = "default_graph_recall_limit")]
1333    pub recall_limit: usize,
1334    /// Days to retain expired (superseded) edges before deletion. Default: 90.
1335    #[serde(default = "default_graph_expired_edge_retention_days")]
1336    pub expired_edge_retention_days: u32,
1337    /// Maximum entities to retain in the graph. 0 = unlimited.
1338    #[serde(default)]
1339    pub max_entities: usize,
1340    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
1341    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
1342    pub community_summary_max_prompt_bytes: usize,
1343    /// Maximum concurrent LLM calls during community summarization. Default: 4.
1344    #[serde(default = "default_graph_community_summary_concurrency")]
1345    pub community_summary_concurrency: usize,
1346    /// Number of edges fetched per chunk during community detection. Default: 10000.
1347    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
1348    #[serde(default = "default_lpa_edge_chunk_size")]
1349    pub lpa_edge_chunk_size: usize,
1350    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
1351    ///
1352    /// When > 0, recent edges receive a small additive score boost over older edges.
1353    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
1354    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
1355    #[serde(
1356        default = "default_graph_temporal_decay_rate",
1357        deserialize_with = "validate_temporal_decay_rate"
1358    )]
1359    pub temporal_decay_rate: f64,
1360    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
1361    ///
1362    /// Caps the result set returned for a given source entity + predicate pair. Prevents
1363    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
1364    /// or API endpoints.
1365    #[serde(default = "default_graph_edge_history_limit")]
1366    pub edge_history_limit: usize,
1367    /// A-MEM dynamic note linking configuration.
1368    ///
1369    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
1370    /// semantically similar entities via `similar_to` edges. Requires an embedding store
1371    /// (`qdrant` or `sqlite` vector backend) to be configured.
1372    #[serde(default)]
1373    pub note_linking: NoteLinkingConfig,
1374    /// SYNAPSE spreading activation retrieval configuration.
1375    ///
1376    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
1377    /// with lateral inhibition and temporal decay instead of BFS.
1378    #[serde(default)]
1379    pub spreading_activation: SpreadingActivationConfig,
1380    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
1381    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
1382    #[serde(
1383        default = "default_link_weight_decay_lambda",
1384        deserialize_with = "validate_link_weight_decay_lambda"
1385    )]
1386    pub link_weight_decay_lambda: f64,
1387    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
1388    #[serde(default = "default_link_weight_decay_interval_secs")]
1389    pub link_weight_decay_interval_secs: u64,
1390    /// Kumiho AGM-inspired belief revision configuration.
1391    ///
1392    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
1393    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
1394    /// `superseded_by` pointer and the new edge becomes the current belief.
1395    #[serde(default)]
1396    pub belief_revision: BeliefRevisionConfig,
1397    /// D-MEM RPE-based tiered graph extraction routing.
1398    ///
1399    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
1400    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
1401    #[serde(default)]
1402    pub rpe: RpeConfig,
1403    /// `SQLite` connection pool size dedicated to graph operations.
1404    ///
1405    /// Graph tables share the same database file as messages/embeddings but use a
1406    /// separate pool to prevent pool starvation when community detection or spreading
1407    /// activation runs concurrently with regular memory operations. Default: `3`.
1408    #[serde(default = "default_graph_pool_size")]
1409    pub pool_size: u32,
1410}
1411
1412fn default_graph_pool_size() -> u32 {
1413    3
1414}
1415
1416impl Default for GraphConfig {
1417    fn default() -> Self {
1418        Self {
1419            enabled: false,
1420            extract_model: String::new(),
1421            max_entities_per_message: default_graph_max_entities_per_message(),
1422            max_edges_per_message: default_graph_max_edges_per_message(),
1423            community_refresh_interval: default_graph_community_refresh_interval(),
1424            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
1425            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
1426            use_embedding_resolution: false,
1427            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
1428            max_hops: default_graph_max_hops(),
1429            recall_limit: default_graph_recall_limit(),
1430            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
1431            max_entities: 0,
1432            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
1433            community_summary_concurrency: default_graph_community_summary_concurrency(),
1434            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
1435            temporal_decay_rate: default_graph_temporal_decay_rate(),
1436            edge_history_limit: default_graph_edge_history_limit(),
1437            note_linking: NoteLinkingConfig::default(),
1438            spreading_activation: SpreadingActivationConfig::default(),
1439            link_weight_decay_lambda: default_link_weight_decay_lambda(),
1440            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
1441            belief_revision: BeliefRevisionConfig::default(),
1442            rpe: RpeConfig::default(),
1443            pool_size: default_graph_pool_size(),
1444        }
1445    }
1446}
1447
1448fn default_consolidation_confidence_threshold() -> f32 {
1449    0.7
1450}
1451
1452fn default_consolidation_sweep_interval_secs() -> u64 {
1453    3600
1454}
1455
1456fn default_consolidation_sweep_batch_size() -> usize {
1457    50
1458}
1459
1460fn default_consolidation_similarity_threshold() -> f32 {
1461    0.85
1462}
1463
1464/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
1465///
1466/// When `enabled = true`, a background loop periodically clusters semantically similar messages
1467/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
1468/// they are marked as consolidated and deprioritized in recall via temporal decay.
1469#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1470#[serde(default)]
1471pub struct ConsolidationConfig {
1472    /// Enable the consolidation background loop. Default: `false`.
1473    pub enabled: bool,
1474    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
1475    /// Falls back to the primary provider when empty. Default: `""`.
1476    #[serde(default)]
1477    pub consolidation_provider: ProviderName,
1478    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
1479    #[serde(default = "default_consolidation_confidence_threshold")]
1480    pub confidence_threshold: f32,
1481    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
1482    #[serde(default = "default_consolidation_sweep_interval_secs")]
1483    pub sweep_interval_secs: u64,
1484    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
1485    #[serde(default = "default_consolidation_sweep_batch_size")]
1486    pub sweep_batch_size: usize,
1487    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
1488    /// Default: `0.85`.
1489    #[serde(default = "default_consolidation_similarity_threshold")]
1490    pub similarity_threshold: f32,
1491}
1492
1493impl Default for ConsolidationConfig {
1494    fn default() -> Self {
1495        Self {
1496            enabled: false,
1497            consolidation_provider: ProviderName::default(),
1498            confidence_threshold: default_consolidation_confidence_threshold(),
1499            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
1500            sweep_batch_size: default_consolidation_sweep_batch_size(),
1501            similarity_threshold: default_consolidation_similarity_threshold(),
1502        }
1503    }
1504}
1505
1506fn default_link_weight_decay_lambda() -> f64 {
1507    0.95
1508}
1509
1510fn default_link_weight_decay_interval_secs() -> u64 {
1511    86400
1512}
1513
1514fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
1515where
1516    D: serde::Deserializer<'de>,
1517{
1518    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
1519    if value.is_nan() || value.is_infinite() {
1520        return Err(serde::de::Error::custom(
1521            "link_weight_decay_lambda must be a finite number",
1522        ));
1523    }
1524    if !(value > 0.0 && value <= 1.0) {
1525        return Err(serde::de::Error::custom(
1526            "link_weight_decay_lambda must be in (0.0, 1.0]",
1527        ));
1528    }
1529    Ok(value)
1530}
1531
1532fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
1533where
1534    D: serde::Deserializer<'de>,
1535{
1536    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1537    if value.is_nan() || value.is_infinite() {
1538        return Err(serde::de::Error::custom(
1539            "threshold must be a finite number",
1540        ));
1541    }
1542    if !(0.0..=1.0).contains(&value) {
1543        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
1544    }
1545    Ok(value)
1546}
1547
1548fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
1549where
1550    D: serde::Deserializer<'de>,
1551{
1552    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1553    if value.is_nan() || value.is_infinite() {
1554        return Err(serde::de::Error::custom(
1555            "fast_path_margin must be a finite number",
1556        ));
1557    }
1558    if !(0.0..=1.0).contains(&value) {
1559        return Err(serde::de::Error::custom(
1560            "fast_path_margin must be in [0.0, 1.0]",
1561        ));
1562    }
1563    Ok(value)
1564}
1565
1566fn default_admission_threshold() -> f32 {
1567    0.40
1568}
1569
1570fn default_admission_fast_path_margin() -> f32 {
1571    0.15
1572}
1573
1574fn default_rl_min_samples() -> u32 {
1575    500
1576}
1577
1578fn default_rl_retrain_interval_secs() -> u64 {
1579    3600
1580}
1581
1582/// Admission decision strategy.
1583///
1584/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
1585/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
1586#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
1587#[serde(rename_all = "snake_case")]
1588pub enum AdmissionStrategy {
1589    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
1590    #[default]
1591    Heuristic,
1592    /// Learned model: logistic regression trained on recall feedback.
1593    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
1594    Rl,
1595}
1596
1597fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
1598where
1599    D: serde::Deserializer<'de>,
1600{
1601    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1602    if value < 0.0 {
1603        return Err(serde::de::Error::custom(
1604            "admission weight must be non-negative (>= 0.0)",
1605        ));
1606    }
1607    Ok(value)
1608}
1609
1610/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
1611///
1612/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
1613/// All values must be non-negative.
1614#[derive(Debug, Clone, Deserialize, Serialize)]
1615#[serde(default)]
1616pub struct AdmissionWeights {
1617    /// LLM-estimated future reuse probability. Default: `0.30`.
1618    #[serde(deserialize_with = "validate_admission_weight")]
1619    pub future_utility: f32,
1620    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
1621    #[serde(deserialize_with = "validate_admission_weight")]
1622    pub factual_confidence: f32,
1623    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
1624    #[serde(deserialize_with = "validate_admission_weight")]
1625    pub semantic_novelty: f32,
1626    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
1627    #[serde(deserialize_with = "validate_admission_weight")]
1628    pub temporal_recency: f32,
1629    /// Content type prior based on role. Default: `0.15`.
1630    #[serde(deserialize_with = "validate_admission_weight")]
1631    pub content_type_prior: f32,
1632    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
1633    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
1634    /// Normalized automatically at runtime. Default: `0.0`.
1635    #[serde(deserialize_with = "validate_admission_weight")]
1636    pub goal_utility: f32,
1637}
1638
1639impl Default for AdmissionWeights {
1640    fn default() -> Self {
1641        Self {
1642            future_utility: 0.30,
1643            factual_confidence: 0.15,
1644            semantic_novelty: 0.30,
1645            temporal_recency: 0.10,
1646            content_type_prior: 0.15,
1647            goal_utility: 0.0,
1648        }
1649    }
1650}
1651
1652impl AdmissionWeights {
1653    /// Return weights normalized so they sum to 1.0.
1654    ///
1655    /// All weights are non-negative; the sum is always > 0 when defaults are used.
1656    #[must_use]
1657    pub fn normalized(&self) -> Self {
1658        let sum = self.future_utility
1659            + self.factual_confidence
1660            + self.semantic_novelty
1661            + self.temporal_recency
1662            + self.content_type_prior
1663            + self.goal_utility;
1664        if sum <= f32::EPSILON {
1665            return Self::default();
1666        }
1667        Self {
1668            future_utility: self.future_utility / sum,
1669            factual_confidence: self.factual_confidence / sum,
1670            semantic_novelty: self.semantic_novelty / sum,
1671            temporal_recency: self.temporal_recency / sum,
1672            content_type_prior: self.content_type_prior / sum,
1673            goal_utility: self.goal_utility / sum,
1674        }
1675    }
1676}
1677
1678/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
1679///
1680/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
1681/// Messages below the composite admission threshold are rejected and not persisted.
1682#[derive(Debug, Clone, Deserialize, Serialize)]
1683#[serde(default)]
1684pub struct AdmissionConfig {
1685    /// Enable A-MAC admission control. Default: `false`.
1686    pub enabled: bool,
1687    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
1688    /// Default: `0.40`.
1689    #[serde(deserialize_with = "validate_admission_threshold")]
1690    pub threshold: f32,
1691    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
1692    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
1693    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
1694    pub fast_path_margin: f32,
1695    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
1696    /// Falls back to the primary provider when empty. Default: `""`.
1697    pub admission_provider: ProviderName,
1698    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
1699    pub weights: AdmissionWeights,
1700    /// Admission decision strategy. Default: `heuristic`.
1701    #[serde(default)]
1702    pub admission_strategy: AdmissionStrategy,
1703    /// Minimum training samples before the RL model is activated.
1704    /// Below this count the system falls back to `Heuristic`. Default: `500`.
1705    #[serde(default = "default_rl_min_samples")]
1706    pub rl_min_samples: u32,
1707    /// Background RL model retraining interval in seconds. Default: `3600`.
1708    #[serde(default = "default_rl_retrain_interval_secs")]
1709    pub rl_retrain_interval_secs: u64,
1710    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
1711    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
1712    /// Zero regression when `false`. Default: `false`.
1713    #[serde(default)]
1714    pub goal_conditioned_write: bool,
1715    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
1716    /// Used only for borderline cases (similarity within 0.1 of threshold).
1717    /// Falls back to the primary provider when empty. Default: `""`.
1718    #[serde(default)]
1719    pub goal_utility_provider: ProviderName,
1720    /// Minimum cosine similarity between goal embedding and candidate memory
1721    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
1722    #[serde(default = "default_goal_utility_threshold")]
1723    pub goal_utility_threshold: f32,
1724    /// Weight of the `goal_utility` factor in the composite admission score.
1725    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
1726    #[serde(default = "default_goal_utility_weight")]
1727    pub goal_utility_weight: f32,
1728}
1729
1730fn default_goal_utility_threshold() -> f32 {
1731    0.4
1732}
1733
1734fn default_goal_utility_weight() -> f32 {
1735    0.25
1736}
1737
1738impl Default for AdmissionConfig {
1739    fn default() -> Self {
1740        Self {
1741            enabled: false,
1742            threshold: default_admission_threshold(),
1743            fast_path_margin: default_admission_fast_path_margin(),
1744            admission_provider: ProviderName::default(),
1745            weights: AdmissionWeights::default(),
1746            admission_strategy: AdmissionStrategy::default(),
1747            rl_min_samples: default_rl_min_samples(),
1748            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
1749            goal_conditioned_write: false,
1750            goal_utility_provider: ProviderName::default(),
1751            goal_utility_threshold: default_goal_utility_threshold(),
1752            goal_utility_weight: default_goal_utility_weight(),
1753        }
1754    }
1755}
1756
1757/// Routing strategy for `[memory.store_routing]`.
1758#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
1759#[serde(rename_all = "snake_case")]
1760pub enum StoreRoutingStrategy {
1761    /// Pure heuristic pattern matching. Zero LLM calls. Default.
1762    #[default]
1763    Heuristic,
1764    /// LLM-based classification via `routing_classifier_provider`.
1765    Llm,
1766    /// Heuristic first; escalates to LLM only when confidence is low.
1767    Hybrid,
1768}
1769
1770/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
1771///
1772/// Controls how each query is classified and routed to the appropriate memory
1773/// backend(s), avoiding unnecessary store queries for simple lookups.
1774#[derive(Debug, Clone, Deserialize, Serialize)]
1775#[serde(default)]
1776pub struct StoreRoutingConfig {
1777    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
1778    /// directly (existing behavior). Default: `false`.
1779    pub enabled: bool,
1780    /// Routing strategy. Default: `heuristic`.
1781    pub strategy: StoreRoutingStrategy,
1782    /// Provider name from `[[llm.providers]]` for LLM-based classification.
1783    /// Falls back to the primary provider when empty. Default: `""`.
1784    pub routing_classifier_provider: ProviderName,
1785    /// Route to use when the classifier is uncertain (confidence < threshold).
1786    /// Default: `"hybrid"`.
1787    pub fallback_route: String,
1788    /// Confidence threshold below which `HybridRouter` escalates to LLM.
1789    /// Range: `[0.0, 1.0]`. Default: `0.7`.
1790    pub confidence_threshold: f32,
1791}
1792
1793impl Default for StoreRoutingConfig {
1794    fn default() -> Self {
1795        Self {
1796            enabled: false,
1797            strategy: StoreRoutingStrategy::Heuristic,
1798            routing_classifier_provider: ProviderName::default(),
1799            fallback_route: "hybrid".into(),
1800            confidence_threshold: 0.7,
1801        }
1802    }
1803}
1804
1805/// Persona memory layer configuration (#2461).
1806///
1807/// When `enabled = true`, user preferences and domain knowledge are extracted from
1808/// conversation history via a cheap LLM provider and injected after the system prompt.
1809#[derive(Debug, Clone, Deserialize, Serialize)]
1810#[serde(default)]
1811pub struct PersonaConfig {
1812    /// Enable persona memory extraction and injection. Default: `false`.
1813    pub enabled: bool,
1814    /// Provider name from `[[llm.providers]]` for persona extraction.
1815    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
1816    pub persona_provider: ProviderName,
1817    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
1818    pub min_confidence: f64,
1819    /// Minimum user messages before extraction runs in a session. Default: `3`.
1820    pub min_messages: usize,
1821    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
1822    pub max_messages: usize,
1823    /// LLM timeout for the extraction call in seconds. Default: `10`.
1824    pub extraction_timeout_secs: u64,
1825    /// Token budget allocated to persona context in assembly. Default: `500`.
1826    pub context_budget_tokens: usize,
1827}
1828
1829impl Default for PersonaConfig {
1830    fn default() -> Self {
1831        Self {
1832            enabled: false,
1833            persona_provider: ProviderName::default(),
1834            min_confidence: 0.6,
1835            min_messages: 3,
1836            max_messages: 10,
1837            extraction_timeout_secs: 10,
1838            context_budget_tokens: 500,
1839        }
1840    }
1841}
1842
1843/// Trajectory-informed memory configuration (#2498).
1844///
1845/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
1846/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
1847/// Procedural entries are injected into context as "past experience" during assembly.
1848#[derive(Debug, Clone, Deserialize, Serialize)]
1849#[serde(default)]
1850pub struct TrajectoryConfig {
1851    /// Enable trajectory extraction and context injection. Default: `false`.
1852    pub enabled: bool,
1853    /// Provider name from `[[llm.providers]]` for extraction.
1854    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
1855    pub trajectory_provider: ProviderName,
1856    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
1857    pub context_budget_tokens: usize,
1858    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
1859    pub max_messages: usize,
1860    /// LLM timeout for the extraction call in seconds. Default: `10`.
1861    pub extraction_timeout_secs: u64,
1862    /// Number of procedural entries retrieved for context injection. Default: `5`.
1863    pub recall_top_k: usize,
1864    /// Minimum confidence score for entries included in context. Default: `0.6`.
1865    pub min_confidence: f64,
1866}
1867
1868impl Default for TrajectoryConfig {
1869    fn default() -> Self {
1870        Self {
1871            enabled: false,
1872            trajectory_provider: ProviderName::default(),
1873            context_budget_tokens: 400,
1874            max_messages: 10,
1875            extraction_timeout_secs: 10,
1876            recall_top_k: 5,
1877            min_confidence: 0.6,
1878        }
1879    }
1880}
1881
1882/// Category-aware memory configuration (#2428).
1883///
1884/// When `enabled = true`, messages are auto-tagged with a category derived from the active
1885/// skill or tool context. The category is stored in the `messages.category` column and used
1886/// as a Qdrant payload filter during recall.
1887#[derive(Debug, Clone, Deserialize, Serialize)]
1888#[serde(default)]
1889pub struct CategoryConfig {
1890    /// Enable category tagging and category-filtered recall. Default: `false`.
1891    pub enabled: bool,
1892    /// Automatically assign category from skill metadata or tool type. Default: `true`.
1893    pub auto_tag: bool,
1894}
1895
1896impl Default for CategoryConfig {
1897    fn default() -> Self {
1898        Self {
1899            enabled: false,
1900            auto_tag: true,
1901        }
1902    }
1903}
1904
1905/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
1906///
1907/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
1908/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
1909/// for complex queries.
1910#[derive(Debug, Clone, Deserialize, Serialize)]
1911#[serde(default)]
1912pub struct TreeConfig {
1913    /// Enable the memory tree and background consolidation loop. Default: `false`.
1914    pub enabled: bool,
1915    /// Provider name from `[[llm.providers]]` for node consolidation.
1916    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
1917    pub consolidation_provider: ProviderName,
1918    /// Interval between consolidation sweeps in seconds. Default: `300`.
1919    pub sweep_interval_secs: u64,
1920    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
1921    pub batch_size: usize,
1922    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
1923    pub similarity_threshold: f32,
1924    /// Maximum tree depth (levels above leaves). Default: `3`.
1925    pub max_level: u32,
1926    /// Token budget allocated to tree memory in context assembly. Default: `400`.
1927    pub context_budget_tokens: usize,
1928    /// Number of tree nodes retrieved for context. Default: `5`.
1929    pub recall_top_k: usize,
1930    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
1931    pub min_cluster_size: usize,
1932}
1933
1934impl Default for TreeConfig {
1935    fn default() -> Self {
1936        Self {
1937            enabled: false,
1938            consolidation_provider: ProviderName::default(),
1939            sweep_interval_secs: 300,
1940            batch_size: 20,
1941            similarity_threshold: 0.8,
1942            max_level: 3,
1943            context_budget_tokens: 400,
1944            recall_top_k: 5,
1945            min_cluster_size: 2,
1946        }
1947    }
1948}
1949
1950/// Time-based microcompact configuration (#2699).
1951///
1952/// When `enabled = true`, low-value tool outputs are cleared from context
1953/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
1954/// The most recent `keep_recent` tool messages are preserved unconditionally.
1955#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1956#[serde(default)]
1957pub struct MicrocompactConfig {
1958    /// Enable time-based microcompaction. Default: `false`.
1959    pub enabled: bool,
1960    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
1961    pub gap_threshold_minutes: u32,
1962    /// Number of most recent compactable tool messages to preserve. Default: `3`.
1963    pub keep_recent: usize,
1964}
1965
1966impl Default for MicrocompactConfig {
1967    fn default() -> Self {
1968        Self {
1969            enabled: false,
1970            gap_threshold_minutes: 60,
1971            keep_recent: 3,
1972        }
1973    }
1974}
1975
1976/// autoDream background memory consolidation configuration (#2697).
1977///
1978/// When `enabled = true`, a constrained consolidation subagent runs after
1979/// a session ends if both `min_sessions` and `min_hours` gates pass.
1980#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1981#[serde(default)]
1982pub struct AutoDreamConfig {
1983    /// Enable autoDream consolidation. Default: `false`.
1984    pub enabled: bool,
1985    /// Minimum number of sessions between consolidations. Default: `3`.
1986    pub min_sessions: u32,
1987    /// Minimum hours between consolidations. Default: `24`.
1988    pub min_hours: u32,
1989    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
1990    /// Falls back to the primary provider when empty. Default: `""`.
1991    pub consolidation_provider: ProviderName,
1992    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
1993    pub max_iterations: u8,
1994}
1995
1996impl Default for AutoDreamConfig {
1997    fn default() -> Self {
1998        Self {
1999            enabled: false,
2000            min_sessions: 3,
2001            min_hours: 24,
2002            consolidation_provider: ProviderName::default(),
2003            max_iterations: 8,
2004        }
2005    }
2006}
2007
2008/// `MagicDocs` auto-maintained markdown configuration (#2702).
2009///
2010/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2011/// are registered and periodically updated by a constrained subagent.
2012#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2013#[serde(default)]
2014pub struct MagicDocsConfig {
2015    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2016    pub enabled: bool,
2017    /// Minimum turns between updates for a given doc path. Default: `5`.
2018    pub min_turns_between_updates: u32,
2019    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2020    /// Falls back to the primary provider when empty. Default: `""`.
2021    pub update_provider: ProviderName,
2022    /// Maximum agent loop iterations per doc update. Default: `4`.
2023    pub max_iterations: u8,
2024}
2025
2026impl Default for MagicDocsConfig {
2027    fn default() -> Self {
2028        Self {
2029            enabled: false,
2030            min_turns_between_updates: 5,
2031            update_provider: ProviderName::default(),
2032            max_iterations: 4,
2033        }
2034    }
2035}
2036
2037#[cfg(test)]
2038mod tests {
2039    use super::*;
2040
2041    // Verify that serde deserialization routes through FromStr so that removed variants
2042    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2043    #[test]
2044    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2045        #[derive(serde::Deserialize)]
2046        struct Wrapper {
2047            #[allow(dead_code)]
2048            pruning_strategy: PruningStrategy,
2049        }
2050        let toml = r#"pruning_strategy = "task_aware_mig""#;
2051        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2052        assert_eq!(
2053            w.pruning_strategy,
2054            PruningStrategy::Reactive,
2055            "task_aware_mig must fall back to Reactive"
2056        );
2057    }
2058
2059    #[test]
2060    fn pruning_strategy_toml_round_trip() {
2061        #[derive(serde::Deserialize)]
2062        struct Wrapper {
2063            #[allow(dead_code)]
2064            pruning_strategy: PruningStrategy,
2065        }
2066        for (input, expected) in [
2067            ("reactive", PruningStrategy::Reactive),
2068            ("task_aware", PruningStrategy::TaskAware),
2069            ("mig", PruningStrategy::Mig),
2070        ] {
2071            let toml = format!(r#"pruning_strategy = "{input}""#);
2072            let w: Wrapper = toml::from_str(&toml)
2073                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
2074            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
2075        }
2076    }
2077
2078    #[test]
2079    fn pruning_strategy_toml_unknown_value_errors() {
2080        #[derive(serde::Deserialize)]
2081        #[allow(dead_code)]
2082        struct Wrapper {
2083            pruning_strategy: PruningStrategy,
2084        }
2085        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
2086        assert!(
2087            toml::from_str::<Wrapper>(toml).is_err(),
2088            "unknown strategy must produce an error"
2089        );
2090    }
2091
2092    #[test]
2093    fn tier_config_defaults_are_correct() {
2094        let cfg = TierConfig::default();
2095        assert!(!cfg.enabled);
2096        assert_eq!(cfg.promotion_min_sessions, 3);
2097        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
2098        assert_eq!(cfg.sweep_interval_secs, 3600);
2099        assert_eq!(cfg.sweep_batch_size, 100);
2100    }
2101
2102    #[test]
2103    fn tier_config_rejects_min_sessions_below_2() {
2104        let toml = "promotion_min_sessions = 1";
2105        assert!(toml::from_str::<TierConfig>(toml).is_err());
2106    }
2107
2108    #[test]
2109    fn tier_config_rejects_similarity_threshold_below_0_5() {
2110        let toml = "similarity_threshold = 0.4";
2111        assert!(toml::from_str::<TierConfig>(toml).is_err());
2112    }
2113
2114    #[test]
2115    fn tier_config_rejects_zero_sweep_batch_size() {
2116        let toml = "sweep_batch_size = 0";
2117        assert!(toml::from_str::<TierConfig>(toml).is_err());
2118    }
2119
2120    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
2121        let input = format!("importance_weight = {toml_val}");
2122        toml::from_str::<SemanticConfig>(&input)
2123    }
2124
2125    #[test]
2126    fn importance_weight_default_is_0_15() {
2127        let cfg = SemanticConfig::default();
2128        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
2129    }
2130
2131    #[test]
2132    fn importance_weight_valid_zero() {
2133        let cfg = deserialize_importance_weight("0.0").unwrap();
2134        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
2135    }
2136
2137    #[test]
2138    fn importance_weight_valid_one() {
2139        let cfg = deserialize_importance_weight("1.0").unwrap();
2140        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
2141    }
2142
2143    #[test]
2144    fn importance_weight_rejects_near_zero_negative() {
2145        // TOML does not have a NaN literal, but we can test via a f64 that
2146        // the validator rejects out-of-range values. Test with negative here
2147        // and rely on validate_importance_weight rejecting non-finite via
2148        // a constructed deserializer call.
2149        let result = deserialize_importance_weight("-0.01");
2150        assert!(
2151            result.is_err(),
2152            "negative importance_weight must be rejected"
2153        );
2154    }
2155
2156    #[test]
2157    fn importance_weight_rejects_negative() {
2158        let result = deserialize_importance_weight("-1.0");
2159        assert!(result.is_err(), "negative value must be rejected");
2160    }
2161
2162    #[test]
2163    fn importance_weight_rejects_greater_than_one() {
2164        let result = deserialize_importance_weight("1.01");
2165        assert!(result.is_err(), "value > 1.0 must be rejected");
2166    }
2167
2168    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
2169
2170    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
2171    #[test]
2172    fn admission_weights_normalized_sums_to_one() {
2173        let w = AdmissionWeights {
2174            future_utility: 2.0,
2175            factual_confidence: 1.0,
2176            semantic_novelty: 3.0,
2177            temporal_recency: 1.0,
2178            content_type_prior: 3.0,
2179            goal_utility: 0.0,
2180        };
2181        let n = w.normalized();
2182        let sum = n.future_utility
2183            + n.factual_confidence
2184            + n.semantic_novelty
2185            + n.temporal_recency
2186            + n.content_type_prior;
2187        assert!(
2188            (sum - 1.0).abs() < 0.001,
2189            "normalized weights must sum to 1.0, got {sum}"
2190        );
2191    }
2192
2193    // Test: already-normalized weights are preserved.
2194    #[test]
2195    fn admission_weights_normalized_preserves_already_unit_sum() {
2196        let w = AdmissionWeights::default();
2197        let n = w.normalized();
2198        let sum = n.future_utility
2199            + n.factual_confidence
2200            + n.semantic_novelty
2201            + n.temporal_recency
2202            + n.content_type_prior;
2203        assert!(
2204            (sum - 1.0).abs() < 0.001,
2205            "default weights sum to ~1.0 after normalization"
2206        );
2207    }
2208
2209    // Test: zero weights fall back to default (no divide-by-zero panic).
2210    #[test]
2211    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
2212        let w = AdmissionWeights {
2213            future_utility: 0.0,
2214            factual_confidence: 0.0,
2215            semantic_novelty: 0.0,
2216            temporal_recency: 0.0,
2217            content_type_prior: 0.0,
2218            goal_utility: 0.0,
2219        };
2220        let n = w.normalized();
2221        let default = AdmissionWeights::default();
2222        assert!(
2223            (n.future_utility - default.future_utility).abs() < 0.001,
2224            "zero-sum weights must fall back to defaults"
2225        );
2226    }
2227
2228    // Test: AdmissionConfig default values match documented defaults.
2229    #[test]
2230    fn admission_config_defaults() {
2231        let cfg = AdmissionConfig::default();
2232        assert!(!cfg.enabled);
2233        assert!((cfg.threshold - 0.40).abs() < 0.001);
2234        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
2235        assert!(cfg.admission_provider.is_empty());
2236    }
2237
2238    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
2239
2240    #[test]
2241    fn spreading_activation_default_recall_timeout_ms_is_1000() {
2242        let cfg = SpreadingActivationConfig::default();
2243        assert_eq!(
2244            cfg.recall_timeout_ms, 1000,
2245            "default recall_timeout_ms must be 1000ms"
2246        );
2247    }
2248
2249    #[test]
2250    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
2251        #[derive(serde::Deserialize)]
2252        struct Wrapper {
2253            recall_timeout_ms: u64,
2254        }
2255        let toml = "recall_timeout_ms = 500";
2256        let w: Wrapper = toml::from_str(toml).unwrap();
2257        assert_eq!(w.recall_timeout_ms, 500);
2258    }
2259
2260    #[test]
2261    fn spreading_activation_validate_cross_field_constraints() {
2262        let mut cfg = SpreadingActivationConfig::default();
2263        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
2264        assert!(cfg.validate().is_ok());
2265
2266        // Equal thresholds must be rejected.
2267        cfg.activation_threshold = 0.5;
2268        cfg.inhibition_threshold = 0.5;
2269        assert!(cfg.validate().is_err());
2270    }
2271
2272    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
2273
2274    #[test]
2275    fn compression_config_focus_strategy_deserializes() {
2276        let toml = r#"strategy = "focus""#;
2277        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2278        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
2279    }
2280
2281    #[test]
2282    fn compression_config_density_budget_defaults_on_deserialize() {
2283        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
2284        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
2285        let toml = r#"strategy = "reactive""#;
2286        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2287        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
2288        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
2289    }
2290
2291    #[test]
2292    fn compression_config_density_budget_round_trip() {
2293        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
2294        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2295        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
2296        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
2297    }
2298
2299    #[test]
2300    fn compression_config_focus_scorer_provider_default_empty() {
2301        let cfg = CompressionConfig::default();
2302        assert!(cfg.focus_scorer_provider.is_empty());
2303    }
2304
2305    #[test]
2306    fn compression_config_focus_scorer_provider_round_trip() {
2307        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
2308        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2309        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
2310    }
2311}
zeph_config/memory.rs

zeph_config/
memory.rs