zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::defaults::{default_sqlite_path_field, default_true};
7
8fn default_sqlite_pool_size() -> u32 {
9    5
10}
11
12fn default_max_history() -> usize {
13    100
14}
15
16fn default_title_max_chars() -> usize {
17    60
18}
19
20fn default_document_collection() -> String {
21    "zeph_documents".into()
22}
23
24fn default_document_chunk_size() -> usize {
25    1000
26}
27
28fn default_document_chunk_overlap() -> usize {
29    100
30}
31
32fn default_document_top_k() -> usize {
33    3
34}
35
36fn default_autosave_min_length() -> usize {
37    20
38}
39
40fn default_tool_call_cutoff() -> usize {
41    6
42}
43
44fn default_token_safety_margin() -> f32 {
45    1.0
46}
47
48fn default_redact_credentials() -> bool {
49    true
50}
51
52fn default_qdrant_url() -> String {
53    "http://localhost:6334".into()
54}
55
56fn default_summarization_threshold() -> usize {
57    50
58}
59
60fn default_context_budget_tokens() -> usize {
61    0
62}
63
64fn default_soft_compaction_threshold() -> f32 {
65    0.60
66}
67
68fn default_hard_compaction_threshold() -> f32 {
69    0.90
70}
71
72fn default_compaction_preserve_tail() -> usize {
73    6
74}
75
76fn default_compaction_cooldown_turns() -> u8 {
77    2
78}
79
80fn default_auto_budget() -> bool {
81    true
82}
83
84fn default_prune_protect_tokens() -> usize {
85    40_000
86}
87
88fn default_cross_session_score_threshold() -> f32 {
89    0.35
90}
91
92fn default_temporal_decay_half_life_days() -> u32 {
93    30
94}
95
96fn default_mmr_lambda() -> f32 {
97    0.7
98}
99
100fn default_semantic_enabled() -> bool {
101    true
102}
103
104fn default_recall_limit() -> usize {
105    5
106}
107
108fn default_vector_weight() -> f64 {
109    0.7
110}
111
112fn default_keyword_weight() -> f64 {
113    0.3
114}
115
116fn default_graph_max_entities_per_message() -> usize {
117    10
118}
119
120fn default_graph_max_edges_per_message() -> usize {
121    15
122}
123
124fn default_graph_community_refresh_interval() -> usize {
125    100
126}
127
128fn default_graph_community_summary_max_prompt_bytes() -> usize {
129    8192
130}
131
132fn default_graph_community_summary_concurrency() -> usize {
133    4
134}
135
136fn default_lpa_edge_chunk_size() -> usize {
137    10_000
138}
139
140fn default_graph_entity_similarity_threshold() -> f32 {
141    0.85
142}
143
144fn default_graph_entity_ambiguous_threshold() -> f32 {
145    0.70
146}
147
148fn default_graph_extraction_timeout_secs() -> u64 {
149    15
150}
151
152fn default_graph_max_hops() -> u32 {
153    2
154}
155
156fn default_graph_recall_limit() -> usize {
157    10
158}
159
160fn default_graph_expired_edge_retention_days() -> u32 {
161    90
162}
163
164fn default_graph_temporal_decay_rate() -> f64 {
165    0.0
166}
167
168fn default_graph_edge_history_limit() -> usize {
169    100
170}
171
172fn default_spreading_activation_decay_lambda() -> f32 {
173    0.85
174}
175
176fn default_spreading_activation_max_hops() -> u32 {
177    3
178}
179
180fn default_spreading_activation_activation_threshold() -> f32 {
181    0.1
182}
183
184fn default_spreading_activation_inhibition_threshold() -> f32 {
185    0.8
186}
187
188fn default_spreading_activation_max_activated_nodes() -> usize {
189    50
190}
191
192fn default_spreading_activation_recall_timeout_ms() -> u64 {
193    1000
194}
195
196fn default_note_linking_similarity_threshold() -> f32 {
197    0.85
198}
199
200fn default_note_linking_top_k() -> usize {
201    10
202}
203
204fn default_note_linking_timeout_secs() -> u64 {
205    5
206}
207
208fn default_shutdown_summary() -> bool {
209    true
210}
211
212fn default_shutdown_summary_min_messages() -> usize {
213    4
214}
215
216fn default_shutdown_summary_max_messages() -> usize {
217    20
218}
219
220fn default_shutdown_summary_timeout_secs() -> u64 {
221    10
222}
223
224fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
225where
226    D: serde::Deserializer<'de>,
227{
228    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
229    if value.is_nan() || value.is_infinite() {
230        return Err(serde::de::Error::custom(
231            "similarity_threshold must be a finite number",
232        ));
233    }
234    if !(0.5..=1.0).contains(&value) {
235        return Err(serde::de::Error::custom(
236            "similarity_threshold must be in [0.5, 1.0]",
237        ));
238    }
239    Ok(value)
240}
241
242fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
243where
244    D: serde::Deserializer<'de>,
245{
246    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
247    if value < 2 {
248        return Err(serde::de::Error::custom(
249            "promotion_min_sessions must be >= 2",
250        ));
251    }
252    Ok(value)
253}
254
255fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
256where
257    D: serde::Deserializer<'de>,
258{
259    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
260    if value == 0 {
261        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
262    }
263    Ok(value)
264}
265
266fn default_tier_promotion_min_sessions() -> u32 {
267    3
268}
269
270fn default_tier_similarity_threshold() -> f32 {
271    0.92
272}
273
274fn default_tier_sweep_interval_secs() -> u64 {
275    3600
276}
277
278fn default_tier_sweep_batch_size() -> usize {
279    100
280}
281
282fn default_scene_similarity_threshold() -> f32 {
283    0.80
284}
285
286fn default_scene_batch_size() -> usize {
287    50
288}
289
290fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
291where
292    D: serde::Deserializer<'de>,
293{
294    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
295    if value.is_nan() || value.is_infinite() {
296        return Err(serde::de::Error::custom(
297            "scene_similarity_threshold must be a finite number",
298        ));
299    }
300    if !(0.5..=1.0).contains(&value) {
301        return Err(serde::de::Error::custom(
302            "scene_similarity_threshold must be in [0.5, 1.0]",
303        ));
304    }
305    Ok(value)
306}
307
308fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
309where
310    D: serde::Deserializer<'de>,
311{
312    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
313    if value == 0 {
314        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
315    }
316    Ok(value)
317}
318
319/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
320///
321/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
322/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
323///
324/// # Validation
325///
326/// Constraints enforced at deserialization time:
327/// - `similarity_threshold` in `[0.5, 1.0]`
328/// - `promotion_min_sessions >= 2`
329/// - `sweep_batch_size >= 1`
330/// - `scene_similarity_threshold` in `[0.5, 1.0]`
331/// - `scene_batch_size >= 1`
332#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
333#[serde(default)]
334pub struct TierConfig {
335    /// Enable the tier promotion system. When `false`, all messages remain episodic.
336    /// Default: `false`.
337    pub enabled: bool,
338    /// Minimum number of distinct sessions a fact must appear in before promotion.
339    /// Must be `>= 2`. Default: `3`.
340    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
341    pub promotion_min_sessions: u32,
342    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
343    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
344    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
345    pub similarity_threshold: f32,
346    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
347    pub sweep_interval_secs: u64,
348    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
349    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
350    pub sweep_batch_size: usize,
351    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
352    pub scene_enabled: bool,
353    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
354    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
355    pub scene_similarity_threshold: f32,
356    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
357    #[serde(deserialize_with = "validate_scene_batch_size")]
358    pub scene_batch_size: usize,
359    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
360    /// Falls back to the primary provider when empty. Default: `""`.
361    pub scene_provider: String,
362    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
363    pub scene_sweep_interval_secs: u64,
364}
365
366fn default_scene_sweep_interval_secs() -> u64 {
367    7200
368}
369
370impl Default for TierConfig {
371    fn default() -> Self {
372        Self {
373            enabled: false,
374            promotion_min_sessions: default_tier_promotion_min_sessions(),
375            similarity_threshold: default_tier_similarity_threshold(),
376            sweep_interval_secs: default_tier_sweep_interval_secs(),
377            sweep_batch_size: default_tier_sweep_batch_size(),
378            scene_enabled: false,
379            scene_similarity_threshold: default_scene_similarity_threshold(),
380            scene_batch_size: default_scene_batch_size(),
381            scene_provider: String::new(),
382            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
383        }
384    }
385}
386
387fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
388where
389    D: serde::Deserializer<'de>,
390{
391    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
392    if value.is_nan() || value.is_infinite() {
393        return Err(serde::de::Error::custom(
394            "temporal_decay_rate must be a finite number",
395        ));
396    }
397    if !(0.0..=10.0).contains(&value) {
398        return Err(serde::de::Error::custom(
399            "temporal_decay_rate must be in [0.0, 10.0]",
400        ));
401    }
402    Ok(value)
403}
404
405fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
406where
407    D: serde::Deserializer<'de>,
408{
409    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
410    if value.is_nan() || value.is_infinite() {
411        return Err(serde::de::Error::custom(
412            "similarity_threshold must be a finite number",
413        ));
414    }
415    if !(0.0..=1.0).contains(&value) {
416        return Err(serde::de::Error::custom(
417            "similarity_threshold must be in [0.0, 1.0]",
418        ));
419    }
420    Ok(value)
421}
422
423fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
424where
425    D: serde::Deserializer<'de>,
426{
427    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
428    if value.is_nan() || value.is_infinite() {
429        return Err(serde::de::Error::custom(
430            "importance_weight must be a finite number",
431        ));
432    }
433    if value < 0.0 {
434        return Err(serde::de::Error::custom(
435            "importance_weight must be non-negative",
436        ));
437    }
438    if value > 1.0 {
439        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
440    }
441    Ok(value)
442}
443
444fn default_importance_weight() -> f64 {
445    0.15
446}
447
448/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
449///
450/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
451/// Seeds are initialized from fuzzy entity matches, then activation propagates
452/// hop-by-hop with exponential decay and lateral inhibition.
453///
454/// # Validation
455///
456/// Constraints enforced at deserialization time:
457/// - `0.0 < decay_lambda <= 1.0`
458/// - `max_hops >= 1`
459/// - `activation_threshold < inhibition_threshold`
460/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
461#[derive(Debug, Clone, Deserialize, Serialize)]
462#[serde(default)]
463pub struct SpreadingActivationConfig {
464    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
465    pub enabled: bool,
466    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
467    #[serde(deserialize_with = "validate_decay_lambda")]
468    pub decay_lambda: f32,
469    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
470    #[serde(deserialize_with = "validate_max_hops")]
471    pub max_hops: u32,
472    /// Minimum activation score to include a node in results. Default: `0.1`.
473    pub activation_threshold: f32,
474    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
475    pub inhibition_threshold: f32,
476    /// Cap on total activated nodes per spread pass. Default: `50`.
477    pub max_activated_nodes: usize,
478    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
479    #[serde(default = "default_seed_structural_weight")]
480    pub seed_structural_weight: f32,
481    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
482    #[serde(default = "default_seed_community_cap")]
483    pub seed_community_cap: usize,
484    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
485    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
486    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
487    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
488    pub recall_timeout_ms: u64,
489}
490
491fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
492where
493    D: serde::Deserializer<'de>,
494{
495    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
496    if value.is_nan() || value.is_infinite() {
497        return Err(serde::de::Error::custom(
498            "decay_lambda must be a finite number",
499        ));
500    }
501    if !(value > 0.0 && value <= 1.0) {
502        return Err(serde::de::Error::custom(
503            "decay_lambda must be in (0.0, 1.0]",
504        ));
505    }
506    Ok(value)
507}
508
509fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
510where
511    D: serde::Deserializer<'de>,
512{
513    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
514    if value == 0 {
515        return Err(serde::de::Error::custom("max_hops must be >= 1"));
516    }
517    Ok(value)
518}
519
520impl SpreadingActivationConfig {
521    /// Validate cross-field constraints that cannot be expressed in per-field validators.
522    ///
523    /// # Errors
524    ///
525    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
526    pub fn validate(&self) -> Result<(), String> {
527        if self.activation_threshold >= self.inhibition_threshold {
528            return Err(format!(
529                "activation_threshold ({}) must be < inhibition_threshold ({})",
530                self.activation_threshold, self.inhibition_threshold
531            ));
532        }
533        Ok(())
534    }
535}
536
537fn default_seed_structural_weight() -> f32 {
538    0.4
539}
540
541fn default_seed_community_cap() -> usize {
542    3
543}
544
545impl Default for SpreadingActivationConfig {
546    fn default() -> Self {
547        Self {
548            enabled: false,
549            decay_lambda: default_spreading_activation_decay_lambda(),
550            max_hops: default_spreading_activation_max_hops(),
551            activation_threshold: default_spreading_activation_activation_threshold(),
552            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
553            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
554            seed_structural_weight: default_seed_structural_weight(),
555            seed_community_cap: default_seed_community_cap(),
556            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
557        }
558    }
559}
560
561/// Kumiho belief revision configuration.
562#[derive(Debug, Clone, Deserialize, Serialize)]
563#[serde(default)]
564pub struct BeliefRevisionConfig {
565    /// Enable semantic contradiction detection for graph edges. Default: `false`.
566    pub enabled: bool,
567    /// Cosine similarity threshold for considering two facts as contradictory.
568    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
569    #[serde(deserialize_with = "validate_similarity_threshold")]
570    pub similarity_threshold: f32,
571}
572
573fn default_belief_revision_similarity_threshold() -> f32 {
574    0.85
575}
576
577impl Default for BeliefRevisionConfig {
578    fn default() -> Self {
579        Self {
580            enabled: false,
581            similarity_threshold: default_belief_revision_similarity_threshold(),
582        }
583    }
584}
585
586/// D-MEM RPE-based tiered graph extraction routing configuration.
587#[derive(Debug, Clone, Deserialize, Serialize)]
588#[serde(default)]
589pub struct RpeConfig {
590    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
591    pub enabled: bool,
592    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
593    /// Default: `0.3`.
594    #[serde(deserialize_with = "validate_similarity_threshold")]
595    pub threshold: f32,
596    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
597    pub max_skip_turns: u32,
598}
599
600fn default_rpe_threshold() -> f32 {
601    0.3
602}
603
604fn default_rpe_max_skip_turns() -> u32 {
605    5
606}
607
608impl Default for RpeConfig {
609    fn default() -> Self {
610        Self {
611            enabled: false,
612            threshold: default_rpe_threshold(),
613            max_skip_turns: default_rpe_max_skip_turns(),
614        }
615    }
616}
617
618/// Configuration for A-MEM dynamic note linking.
619///
620/// When enabled, after each graph extraction pass, entities extracted from the message are
621/// compared against the entity embedding collection. Pairs with cosine similarity above
622/// `similarity_threshold` receive a `similar_to` edge in the graph.
623#[derive(Debug, Clone, Deserialize, Serialize)]
624#[serde(default)]
625pub struct NoteLinkingConfig {
626    /// Enable A-MEM note linking after graph extraction. Default: `false`.
627    pub enabled: bool,
628    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
629    #[serde(deserialize_with = "validate_similarity_threshold")]
630    pub similarity_threshold: f32,
631    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
632    pub top_k: usize,
633    /// Timeout for the entire linking pass in seconds. Default: `5`.
634    pub timeout_secs: u64,
635}
636
637impl Default for NoteLinkingConfig {
638    fn default() -> Self {
639        Self {
640            enabled: false,
641            similarity_threshold: default_note_linking_similarity_threshold(),
642            top_k: default_note_linking_top_k(),
643            timeout_secs: default_note_linking_timeout_secs(),
644        }
645    }
646}
647
648/// Vector backend selector for embedding storage.
649#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
650#[serde(rename_all = "lowercase")]
651pub enum VectorBackend {
652    Qdrant,
653    #[default]
654    Sqlite,
655}
656
657impl VectorBackend {
658    #[must_use]
659    pub fn as_str(&self) -> &'static str {
660        match self {
661            Self::Qdrant => "qdrant",
662            Self::Sqlite => "sqlite",
663        }
664    }
665}
666
667#[derive(Debug, Deserialize, Serialize)]
668#[allow(clippy::struct_excessive_bools)]
669pub struct MemoryConfig {
670    #[serde(default)]
671    pub compression_guidelines: zeph_memory::CompressionGuidelinesConfig,
672    #[serde(default = "default_sqlite_path_field")]
673    pub sqlite_path: String,
674    pub history_limit: u32,
675    #[serde(default = "default_qdrant_url")]
676    pub qdrant_url: String,
677    #[serde(default)]
678    pub semantic: SemanticConfig,
679    #[serde(default = "default_summarization_threshold")]
680    pub summarization_threshold: usize,
681    #[serde(default = "default_context_budget_tokens")]
682    pub context_budget_tokens: usize,
683    #[serde(default = "default_soft_compaction_threshold")]
684    pub soft_compaction_threshold: f32,
685    #[serde(
686        default = "default_hard_compaction_threshold",
687        alias = "compaction_threshold"
688    )]
689    pub hard_compaction_threshold: f32,
690    #[serde(default = "default_compaction_preserve_tail")]
691    pub compaction_preserve_tail: usize,
692    #[serde(default = "default_compaction_cooldown_turns")]
693    pub compaction_cooldown_turns: u8,
694    #[serde(default = "default_auto_budget")]
695    pub auto_budget: bool,
696    #[serde(default = "default_prune_protect_tokens")]
697    pub prune_protect_tokens: usize,
698    #[serde(default = "default_cross_session_score_threshold")]
699    pub cross_session_score_threshold: f32,
700    #[serde(default)]
701    pub vector_backend: VectorBackend,
702    #[serde(default = "default_token_safety_margin")]
703    pub token_safety_margin: f32,
704    #[serde(default = "default_redact_credentials")]
705    pub redact_credentials: bool,
706    #[serde(default = "default_true")]
707    pub autosave_assistant: bool,
708    #[serde(default = "default_autosave_min_length")]
709    pub autosave_min_length: usize,
710    #[serde(default = "default_tool_call_cutoff")]
711    pub tool_call_cutoff: usize,
712    #[serde(default = "default_sqlite_pool_size")]
713    pub sqlite_pool_size: u32,
714    #[serde(default)]
715    pub sessions: SessionsConfig,
716    #[serde(default)]
717    pub documents: DocumentConfig,
718    #[serde(default)]
719    pub eviction: zeph_memory::EvictionConfig,
720    #[serde(default)]
721    pub compression: CompressionConfig,
722    #[serde(default)]
723    pub sidequest: SidequestConfig,
724    #[serde(default)]
725    pub graph: GraphConfig,
726    /// Store a lightweight session summary to the vector store on shutdown when no session
727    /// summary exists yet for this conversation. Enables cross-session recall for short or
728    /// interrupted sessions that never triggered hard compaction. Default: `true`.
729    #[serde(default = "default_shutdown_summary")]
730    pub shutdown_summary: bool,
731    /// Minimum number of user-turn messages required before a shutdown summary is generated.
732    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
733    #[serde(default = "default_shutdown_summary_min_messages")]
734    pub shutdown_summary_min_messages: usize,
735    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
736    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
737    /// Default: `20`.
738    #[serde(default = "default_shutdown_summary_max_messages")]
739    pub shutdown_summary_max_messages: usize,
740    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
741    /// Applies independently to the structured call and to the plain-text fallback.
742    /// Default: `10`.
743    #[serde(default = "default_shutdown_summary_timeout_secs")]
744    pub shutdown_summary_timeout_secs: u64,
745    /// Use structured anchored summaries for context compaction.
746    ///
747    /// When enabled, hard compaction requests a JSON schema from the LLM
748    /// instead of free-form prose. Falls back to prose if the LLM fails
749    /// to produce valid JSON. Default: `false`.
750    #[serde(default)]
751    pub structured_summaries: bool,
752    /// AOI three-layer memory tier promotion system.
753    ///
754    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
755    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
756    #[serde(default)]
757    pub tiers: TierConfig,
758    /// A-MAC adaptive memory admission control.
759    ///
760    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
761    /// if its composite admission score falls below the configured threshold.
762    #[serde(default)]
763    pub admission: AdmissionConfig,
764    /// Session digest generation at session end. Default: disabled.
765    #[serde(default)]
766    pub digest: DigestConfig,
767    /// Context assembly strategy. Default: `full_history` (current behavior).
768    #[serde(default)]
769    pub context_strategy: ContextStrategy,
770    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
771    #[serde(default = "default_crossover_turn_threshold")]
772    pub crossover_turn_threshold: u32,
773    /// All-Mem lifelong memory consolidation sweep.
774    ///
775    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
776    /// messages and merges them into consolidated entries via LLM.
777    #[serde(default)]
778    pub consolidation: ConsolidationConfig,
779    /// `PostgreSQL` connection URL.
780    ///
781    /// Used when the binary is compiled with `--features postgres`.
782    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
783    /// Example: `postgres://user:pass@localhost:5432/zeph`
784    /// Default: `None` (uses `sqlite_path` instead).
785    #[serde(default)]
786    pub database_url: Option<String>,
787    /// Cost-sensitive store routing (#2444).
788    ///
789    /// When `store_routing.enabled = true`, query intent is classified and routed to
790    /// the cheapest sufficient backend instead of querying all stores on every turn.
791    #[serde(default)]
792    pub store_routing: StoreRoutingConfig,
793}
794
795fn default_crossover_turn_threshold() -> u32 {
796    20
797}
798
799/// Session digest configuration (#2289).
800#[derive(Debug, Clone, Deserialize, Serialize)]
801#[serde(default)]
802pub struct DigestConfig {
803    /// Enable session digest generation at session end. Default: `false`.
804    pub enabled: bool,
805    /// Provider name from `[[llm.providers]]` for digest generation.
806    /// Falls back to the primary provider when empty. Default: `""`.
807    pub provider: String,
808    /// Maximum tokens for the digest text. Default: `500`.
809    pub max_tokens: usize,
810    /// Maximum messages to feed into the digest prompt. Default: `50`.
811    pub max_input_messages: usize,
812}
813
814impl Default for DigestConfig {
815    fn default() -> Self {
816        Self {
817            enabled: false,
818            provider: String::new(),
819            max_tokens: 500,
820            max_input_messages: 50,
821        }
822    }
823}
824
825/// Context assembly strategy (#2288).
826#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
827#[serde(rename_all = "snake_case")]
828pub enum ContextStrategy {
829    /// Full conversation history trimmed to budget, with memory augmentation.
830    /// This is the default and existing behavior.
831    #[default]
832    FullHistory,
833    /// Drop conversation history; assemble context from summaries, semantic recall,
834    /// cross-session memory, and session digest only.
835    MemoryFirst,
836    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
837    /// `crossover_turn_threshold`.
838    Adaptive,
839}
840
841#[derive(Debug, Clone, Deserialize, Serialize)]
842#[serde(default)]
843pub struct SessionsConfig {
844    /// Maximum number of sessions returned by list operations (0 = unlimited).
845    #[serde(default = "default_max_history")]
846    pub max_history: usize,
847    /// Maximum characters for auto-generated session titles.
848    #[serde(default = "default_title_max_chars")]
849    pub title_max_chars: usize,
850}
851
852impl Default for SessionsConfig {
853    fn default() -> Self {
854        Self {
855            max_history: default_max_history(),
856            title_max_chars: default_title_max_chars(),
857        }
858    }
859}
860
861/// Configuration for the document ingestion and RAG retrieval pipeline.
862#[derive(Debug, Clone, Deserialize, Serialize)]
863pub struct DocumentConfig {
864    #[serde(default = "default_document_collection")]
865    pub collection: String,
866    #[serde(default = "default_document_chunk_size")]
867    pub chunk_size: usize,
868    #[serde(default = "default_document_chunk_overlap")]
869    pub chunk_overlap: usize,
870    /// Number of document chunks to inject into agent context per turn.
871    #[serde(default = "default_document_top_k")]
872    pub top_k: usize,
873    /// Enable document RAG injection into agent context.
874    #[serde(default)]
875    pub rag_enabled: bool,
876}
877
878impl Default for DocumentConfig {
879    fn default() -> Self {
880        Self {
881            collection: default_document_collection(),
882            chunk_size: default_document_chunk_size(),
883            chunk_overlap: default_document_chunk_overlap(),
884            top_k: default_document_top_k(),
885            rag_enabled: false,
886        }
887    }
888}
889
890#[derive(Debug, Deserialize, Serialize)]
891#[allow(clippy::struct_excessive_bools)]
892pub struct SemanticConfig {
893    #[serde(default = "default_semantic_enabled")]
894    pub enabled: bool,
895    #[serde(default = "default_recall_limit")]
896    pub recall_limit: usize,
897    #[serde(default = "default_vector_weight")]
898    pub vector_weight: f64,
899    #[serde(default = "default_keyword_weight")]
900    pub keyword_weight: f64,
901    #[serde(default = "default_true")]
902    pub temporal_decay_enabled: bool,
903    #[serde(default = "default_temporal_decay_half_life_days")]
904    pub temporal_decay_half_life_days: u32,
905    #[serde(default = "default_true")]
906    pub mmr_enabled: bool,
907    #[serde(default = "default_mmr_lambda")]
908    pub mmr_lambda: f32,
909    #[serde(default = "default_true")]
910    pub importance_enabled: bool,
911    #[serde(
912        default = "default_importance_weight",
913        deserialize_with = "validate_importance_weight"
914    )]
915    pub importance_weight: f64,
916}
917
918impl Default for SemanticConfig {
919    fn default() -> Self {
920        Self {
921            enabled: default_semantic_enabled(),
922            recall_limit: default_recall_limit(),
923            vector_weight: default_vector_weight(),
924            keyword_weight: default_keyword_weight(),
925            temporal_decay_enabled: true,
926            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
927            mmr_enabled: true,
928            mmr_lambda: default_mmr_lambda(),
929            importance_enabled: true,
930            importance_weight: default_importance_weight(),
931        }
932    }
933}
934
935/// Compression strategy for active context compression (#1161).
936#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
937#[serde(tag = "strategy", rename_all = "snake_case")]
938pub enum CompressionStrategy {
939    /// Compress only when reactive compaction fires (current behavior).
940    #[default]
941    Reactive,
942    /// Compress proactively when context exceeds `threshold_tokens`.
943    Proactive {
944        /// Token count that triggers proactive compression.
945        threshold_tokens: usize,
946        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
947        max_summary_tokens: usize,
948    },
949    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
950    /// safety net. The `compress_context` tool is also available in all other strategies.
951    Autonomous,
952}
953
954/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
955///
956/// When `context-compression` feature is enabled, this replaces the default oldest-first
957/// heuristic with scored eviction.
958#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
959#[serde(rename_all = "snake_case")]
960pub enum PruningStrategy {
961    /// Oldest-first eviction — current default behavior.
962    #[default]
963    Reactive,
964    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
965    /// lowest-first. Requires `context-compression` feature.
966    TaskAware,
967    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
968    /// Requires `context-compression` feature.
969    Mig,
970    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
971    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
972    /// Requires `context-compression` feature.
973    Subgoal,
974    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
975    /// Requires `context-compression` feature.
976    SubgoalMig,
977}
978
979impl PruningStrategy {
980    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
981    #[must_use]
982    pub fn is_subgoal(self) -> bool {
983        matches!(self, Self::Subgoal | Self::SubgoalMig)
984    }
985}
986
987// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
988// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
989impl<'de> serde::Deserialize<'de> for PruningStrategy {
990    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
991        let s = String::deserialize(deserializer)?;
992        s.parse().map_err(serde::de::Error::custom)
993    }
994}
995
996impl std::str::FromStr for PruningStrategy {
997    type Err = String;
998
999    fn from_str(s: &str) -> Result<Self, Self::Err> {
1000        match s {
1001            "reactive" => Ok(Self::Reactive),
1002            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1003            "mig" => Ok(Self::Mig),
1004            // task_aware_mig was removed (dead code — was routed to scored path only).
1005            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1006            "task_aware_mig" | "task-aware-mig" => {
1007                tracing::warn!(
1008                    "pruning strategy `task_aware_mig` has been removed; \
1009                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1010                );
1011                Ok(Self::Reactive)
1012            }
1013            "subgoal" => Ok(Self::Subgoal),
1014            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1015            other => Err(format!(
1016                "unknown pruning strategy `{other}`, expected \
1017                 reactive|task_aware|mig|subgoal|subgoal_mig"
1018            )),
1019        }
1020    }
1021}
1022
1023/// Configuration for active context compression (#1161).
1024#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1025#[serde(default)]
1026pub struct CompressionConfig {
1027    /// Compression strategy.
1028    #[serde(flatten)]
1029    pub strategy: CompressionStrategy,
1030    /// Tool-output pruning strategy (requires `context-compression` feature).
1031    pub pruning_strategy: PruningStrategy,
1032    /// Model to use for compression summaries.
1033    ///
1034    /// Currently unused — the primary summary provider is used regardless of this value.
1035    /// Reserved for future per-compression model selection. Setting this field has no effect.
1036    pub model: String,
1037    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1038    /// Falls back to the primary provider when empty. Default: `""`.
1039    pub compress_provider: String,
1040    /// Compaction probe: validates summary quality before committing it (#1609).
1041    #[serde(default)]
1042    pub probe: zeph_memory::CompactionProbeConfig,
1043    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1044    ///
1045    /// When enabled, tool output bodies in the compaction range are saved to
1046    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1047    /// The LLM summarizes placeholder messages; archived content is appended as
1048    /// a postfix after summarization so references survive compaction.
1049    /// Default: `false`.
1050    #[serde(default)]
1051    pub archive_tool_outputs: bool,
1052}
1053
1054fn default_sidequest_interval_turns() -> u32 {
1055    4
1056}
1057
1058fn default_sidequest_max_eviction_ratio() -> f32 {
1059    0.5
1060}
1061
1062fn default_sidequest_max_cursors() -> usize {
1063    30
1064}
1065
1066fn default_sidequest_min_cursor_tokens() -> usize {
1067    100
1068}
1069
1070/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1071#[derive(Debug, Clone, Deserialize, Serialize)]
1072#[serde(default)]
1073pub struct SidequestConfig {
1074    /// Enable `SideQuest` eviction. Default: `false`.
1075    pub enabled: bool,
1076    /// Run eviction every N user turns. Default: `4`.
1077    #[serde(default = "default_sidequest_interval_turns")]
1078    pub interval_turns: u32,
1079    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1080    #[serde(default = "default_sidequest_max_eviction_ratio")]
1081    pub max_eviction_ratio: f32,
1082    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1083    #[serde(default = "default_sidequest_max_cursors")]
1084    pub max_cursors: usize,
1085    /// Exclude tool outputs smaller than this token count from eviction candidates.
1086    /// Default: `100`.
1087    #[serde(default = "default_sidequest_min_cursor_tokens")]
1088    pub min_cursor_tokens: usize,
1089}
1090
1091impl Default for SidequestConfig {
1092    fn default() -> Self {
1093        Self {
1094            enabled: false,
1095            interval_turns: default_sidequest_interval_turns(),
1096            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1097            max_cursors: default_sidequest_max_cursors(),
1098            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1099        }
1100    }
1101}
1102
1103/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
1104///
1105/// # Security
1106///
1107/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
1108/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
1109/// when processing conversations that may contain personal, medical, or sensitive data until
1110/// a redaction pass is implemented on the write path.
1111#[derive(Debug, Clone, Deserialize, Serialize)]
1112#[serde(default)]
1113pub struct GraphConfig {
1114    pub enabled: bool,
1115    pub extract_model: String,
1116    #[serde(default = "default_graph_max_entities_per_message")]
1117    pub max_entities_per_message: usize,
1118    #[serde(default = "default_graph_max_edges_per_message")]
1119    pub max_edges_per_message: usize,
1120    #[serde(default = "default_graph_community_refresh_interval")]
1121    pub community_refresh_interval: usize,
1122    #[serde(default = "default_graph_entity_similarity_threshold")]
1123    pub entity_similarity_threshold: f32,
1124    #[serde(default = "default_graph_extraction_timeout_secs")]
1125    pub extraction_timeout_secs: u64,
1126    #[serde(default)]
1127    pub use_embedding_resolution: bool,
1128    #[serde(default = "default_graph_entity_ambiguous_threshold")]
1129    pub entity_ambiguous_threshold: f32,
1130    #[serde(default = "default_graph_max_hops")]
1131    pub max_hops: u32,
1132    #[serde(default = "default_graph_recall_limit")]
1133    pub recall_limit: usize,
1134    /// Days to retain expired (superseded) edges before deletion. Default: 90.
1135    #[serde(default = "default_graph_expired_edge_retention_days")]
1136    pub expired_edge_retention_days: u32,
1137    /// Maximum entities to retain in the graph. 0 = unlimited.
1138    #[serde(default)]
1139    pub max_entities: usize,
1140    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
1141    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
1142    pub community_summary_max_prompt_bytes: usize,
1143    /// Maximum concurrent LLM calls during community summarization. Default: 4.
1144    #[serde(default = "default_graph_community_summary_concurrency")]
1145    pub community_summary_concurrency: usize,
1146    /// Number of edges fetched per chunk during community detection. Default: 10000.
1147    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
1148    #[serde(default = "default_lpa_edge_chunk_size")]
1149    pub lpa_edge_chunk_size: usize,
1150    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
1151    ///
1152    /// When > 0, recent edges receive a small additive score boost over older edges.
1153    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
1154    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
1155    #[serde(
1156        default = "default_graph_temporal_decay_rate",
1157        deserialize_with = "validate_temporal_decay_rate"
1158    )]
1159    pub temporal_decay_rate: f64,
1160    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
1161    ///
1162    /// Caps the result set returned for a given source entity + predicate pair. Prevents
1163    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
1164    /// or API endpoints.
1165    #[serde(default = "default_graph_edge_history_limit")]
1166    pub edge_history_limit: usize,
1167    /// A-MEM dynamic note linking configuration.
1168    ///
1169    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
1170    /// semantically similar entities via `similar_to` edges. Requires an embedding store
1171    /// (`qdrant` or `sqlite` vector backend) to be configured.
1172    #[serde(default)]
1173    pub note_linking: NoteLinkingConfig,
1174    /// SYNAPSE spreading activation retrieval configuration.
1175    ///
1176    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
1177    /// with lateral inhibition and temporal decay instead of BFS.
1178    #[serde(default)]
1179    pub spreading_activation: SpreadingActivationConfig,
1180    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
1181    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
1182    #[serde(
1183        default = "default_link_weight_decay_lambda",
1184        deserialize_with = "validate_link_weight_decay_lambda"
1185    )]
1186    pub link_weight_decay_lambda: f64,
1187    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
1188    #[serde(default = "default_link_weight_decay_interval_secs")]
1189    pub link_weight_decay_interval_secs: u64,
1190    /// Kumiho AGM-inspired belief revision configuration.
1191    ///
1192    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
1193    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
1194    /// `superseded_by` pointer and the new edge becomes the current belief.
1195    #[serde(default)]
1196    pub belief_revision: BeliefRevisionConfig,
1197    /// D-MEM RPE-based tiered graph extraction routing.
1198    ///
1199    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
1200    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
1201    #[serde(default)]
1202    pub rpe: RpeConfig,
1203}
1204
1205impl Default for GraphConfig {
1206    fn default() -> Self {
1207        Self {
1208            enabled: false,
1209            extract_model: String::new(),
1210            max_entities_per_message: default_graph_max_entities_per_message(),
1211            max_edges_per_message: default_graph_max_edges_per_message(),
1212            community_refresh_interval: default_graph_community_refresh_interval(),
1213            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
1214            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
1215            use_embedding_resolution: false,
1216            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
1217            max_hops: default_graph_max_hops(),
1218            recall_limit: default_graph_recall_limit(),
1219            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
1220            max_entities: 0,
1221            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
1222            community_summary_concurrency: default_graph_community_summary_concurrency(),
1223            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
1224            temporal_decay_rate: default_graph_temporal_decay_rate(),
1225            edge_history_limit: default_graph_edge_history_limit(),
1226            note_linking: NoteLinkingConfig::default(),
1227            spreading_activation: SpreadingActivationConfig::default(),
1228            link_weight_decay_lambda: default_link_weight_decay_lambda(),
1229            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
1230            belief_revision: BeliefRevisionConfig::default(),
1231            rpe: RpeConfig::default(),
1232        }
1233    }
1234}
1235
1236fn default_consolidation_confidence_threshold() -> f32 {
1237    0.7
1238}
1239
1240fn default_consolidation_sweep_interval_secs() -> u64 {
1241    3600
1242}
1243
1244fn default_consolidation_sweep_batch_size() -> usize {
1245    50
1246}
1247
1248fn default_consolidation_similarity_threshold() -> f32 {
1249    0.85
1250}
1251
1252/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
1253///
1254/// When `enabled = true`, a background loop periodically clusters semantically similar messages
1255/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
1256/// they are marked as consolidated and deprioritized in recall via temporal decay.
1257#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1258#[serde(default)]
1259pub struct ConsolidationConfig {
1260    /// Enable the consolidation background loop. Default: `false`.
1261    pub enabled: bool,
1262    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
1263    /// Falls back to the primary provider when empty. Default: `""`.
1264    #[serde(default)]
1265    pub consolidation_provider: String,
1266    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
1267    #[serde(default = "default_consolidation_confidence_threshold")]
1268    pub confidence_threshold: f32,
1269    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
1270    #[serde(default = "default_consolidation_sweep_interval_secs")]
1271    pub sweep_interval_secs: u64,
1272    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
1273    #[serde(default = "default_consolidation_sweep_batch_size")]
1274    pub sweep_batch_size: usize,
1275    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
1276    /// Default: `0.85`.
1277    #[serde(default = "default_consolidation_similarity_threshold")]
1278    pub similarity_threshold: f32,
1279}
1280
1281impl Default for ConsolidationConfig {
1282    fn default() -> Self {
1283        Self {
1284            enabled: false,
1285            consolidation_provider: String::new(),
1286            confidence_threshold: default_consolidation_confidence_threshold(),
1287            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
1288            sweep_batch_size: default_consolidation_sweep_batch_size(),
1289            similarity_threshold: default_consolidation_similarity_threshold(),
1290        }
1291    }
1292}
1293
1294fn default_link_weight_decay_lambda() -> f64 {
1295    0.95
1296}
1297
1298fn default_link_weight_decay_interval_secs() -> u64 {
1299    86400
1300}
1301
1302fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
1303where
1304    D: serde::Deserializer<'de>,
1305{
1306    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
1307    if value.is_nan() || value.is_infinite() {
1308        return Err(serde::de::Error::custom(
1309            "link_weight_decay_lambda must be a finite number",
1310        ));
1311    }
1312    if !(value > 0.0 && value <= 1.0) {
1313        return Err(serde::de::Error::custom(
1314            "link_weight_decay_lambda must be in (0.0, 1.0]",
1315        ));
1316    }
1317    Ok(value)
1318}
1319
1320fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
1321where
1322    D: serde::Deserializer<'de>,
1323{
1324    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1325    if value.is_nan() || value.is_infinite() {
1326        return Err(serde::de::Error::custom(
1327            "threshold must be a finite number",
1328        ));
1329    }
1330    if !(0.0..=1.0).contains(&value) {
1331        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
1332    }
1333    Ok(value)
1334}
1335
1336fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
1337where
1338    D: serde::Deserializer<'de>,
1339{
1340    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1341    if value.is_nan() || value.is_infinite() {
1342        return Err(serde::de::Error::custom(
1343            "fast_path_margin must be a finite number",
1344        ));
1345    }
1346    if !(0.0..=1.0).contains(&value) {
1347        return Err(serde::de::Error::custom(
1348            "fast_path_margin must be in [0.0, 1.0]",
1349        ));
1350    }
1351    Ok(value)
1352}
1353
1354fn default_admission_threshold() -> f32 {
1355    0.40
1356}
1357
1358fn default_admission_fast_path_margin() -> f32 {
1359    0.15
1360}
1361
1362fn default_rl_min_samples() -> u32 {
1363    500
1364}
1365
1366fn default_rl_retrain_interval_secs() -> u64 {
1367    3600
1368}
1369
1370/// Admission decision strategy.
1371///
1372/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
1373/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
1374#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
1375#[serde(rename_all = "snake_case")]
1376pub enum AdmissionStrategy {
1377    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
1378    #[default]
1379    Heuristic,
1380    /// Learned model: logistic regression trained on recall feedback.
1381    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
1382    Rl,
1383}
1384
1385fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
1386where
1387    D: serde::Deserializer<'de>,
1388{
1389    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1390    if value < 0.0 {
1391        return Err(serde::de::Error::custom(
1392            "admission weight must be non-negative (>= 0.0)",
1393        ));
1394    }
1395    Ok(value)
1396}
1397
1398/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
1399///
1400/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
1401/// All values must be non-negative.
1402#[derive(Debug, Clone, Deserialize, Serialize)]
1403#[serde(default)]
1404pub struct AdmissionWeights {
1405    /// LLM-estimated future reuse probability. Default: `0.30`.
1406    #[serde(deserialize_with = "validate_admission_weight")]
1407    pub future_utility: f32,
1408    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
1409    #[serde(deserialize_with = "validate_admission_weight")]
1410    pub factual_confidence: f32,
1411    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
1412    #[serde(deserialize_with = "validate_admission_weight")]
1413    pub semantic_novelty: f32,
1414    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
1415    #[serde(deserialize_with = "validate_admission_weight")]
1416    pub temporal_recency: f32,
1417    /// Content type prior based on role. Default: `0.15`.
1418    #[serde(deserialize_with = "validate_admission_weight")]
1419    pub content_type_prior: f32,
1420    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
1421    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
1422    /// Normalized automatically at runtime. Default: `0.0`.
1423    #[serde(deserialize_with = "validate_admission_weight")]
1424    pub goal_utility: f32,
1425}
1426
1427impl Default for AdmissionWeights {
1428    fn default() -> Self {
1429        Self {
1430            future_utility: 0.30,
1431            factual_confidence: 0.15,
1432            semantic_novelty: 0.30,
1433            temporal_recency: 0.10,
1434            content_type_prior: 0.15,
1435            goal_utility: 0.0,
1436        }
1437    }
1438}
1439
1440impl AdmissionWeights {
1441    /// Return weights normalized so they sum to 1.0.
1442    ///
1443    /// All weights are non-negative; the sum is always > 0 when defaults are used.
1444    #[must_use]
1445    pub fn normalized(&self) -> Self {
1446        let sum = self.future_utility
1447            + self.factual_confidence
1448            + self.semantic_novelty
1449            + self.temporal_recency
1450            + self.content_type_prior
1451            + self.goal_utility;
1452        if sum <= f32::EPSILON {
1453            return Self::default();
1454        }
1455        Self {
1456            future_utility: self.future_utility / sum,
1457            factual_confidence: self.factual_confidence / sum,
1458            semantic_novelty: self.semantic_novelty / sum,
1459            temporal_recency: self.temporal_recency / sum,
1460            content_type_prior: self.content_type_prior / sum,
1461            goal_utility: self.goal_utility / sum,
1462        }
1463    }
1464}
1465
1466/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
1467///
1468/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
1469/// Messages below the composite admission threshold are rejected and not persisted.
1470#[derive(Debug, Clone, Deserialize, Serialize)]
1471#[serde(default)]
1472pub struct AdmissionConfig {
1473    /// Enable A-MAC admission control. Default: `false`.
1474    pub enabled: bool,
1475    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
1476    /// Default: `0.40`.
1477    #[serde(deserialize_with = "validate_admission_threshold")]
1478    pub threshold: f32,
1479    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
1480    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
1481    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
1482    pub fast_path_margin: f32,
1483    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
1484    /// Falls back to the primary provider when empty. Default: `""`.
1485    pub admission_provider: String,
1486    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
1487    pub weights: AdmissionWeights,
1488    /// Admission decision strategy. Default: `heuristic`.
1489    #[serde(default)]
1490    pub admission_strategy: AdmissionStrategy,
1491    /// Minimum training samples before the RL model is activated.
1492    /// Below this count the system falls back to `Heuristic`. Default: `500`.
1493    #[serde(default = "default_rl_min_samples")]
1494    pub rl_min_samples: u32,
1495    /// Background RL model retraining interval in seconds. Default: `3600`.
1496    #[serde(default = "default_rl_retrain_interval_secs")]
1497    pub rl_retrain_interval_secs: u64,
1498    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
1499    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
1500    /// Zero regression when `false`. Default: `false`.
1501    #[serde(default)]
1502    pub goal_conditioned_write: bool,
1503    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
1504    /// Used only for borderline cases (similarity within 0.1 of threshold).
1505    /// Falls back to the primary provider when empty. Default: `""`.
1506    #[serde(default)]
1507    pub goal_utility_provider: String,
1508    /// Minimum cosine similarity between goal embedding and candidate memory
1509    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
1510    #[serde(default = "default_goal_utility_threshold")]
1511    pub goal_utility_threshold: f32,
1512    /// Weight of the `goal_utility` factor in the composite admission score.
1513    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
1514    #[serde(default = "default_goal_utility_weight")]
1515    pub goal_utility_weight: f32,
1516}
1517
1518fn default_goal_utility_threshold() -> f32 {
1519    0.4
1520}
1521
1522fn default_goal_utility_weight() -> f32 {
1523    0.25
1524}
1525
1526impl Default for AdmissionConfig {
1527    fn default() -> Self {
1528        Self {
1529            enabled: false,
1530            threshold: default_admission_threshold(),
1531            fast_path_margin: default_admission_fast_path_margin(),
1532            admission_provider: String::new(),
1533            weights: AdmissionWeights::default(),
1534            admission_strategy: AdmissionStrategy::default(),
1535            rl_min_samples: default_rl_min_samples(),
1536            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
1537            goal_conditioned_write: false,
1538            goal_utility_provider: String::new(),
1539            goal_utility_threshold: default_goal_utility_threshold(),
1540            goal_utility_weight: default_goal_utility_weight(),
1541        }
1542    }
1543}
1544
1545/// Routing strategy for `[memory.store_routing]`.
1546#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
1547#[serde(rename_all = "snake_case")]
1548pub enum StoreRoutingStrategy {
1549    /// Pure heuristic pattern matching. Zero LLM calls. Default.
1550    #[default]
1551    Heuristic,
1552    /// LLM-based classification via `routing_classifier_provider`.
1553    Llm,
1554    /// Heuristic first; escalates to LLM only when confidence is low.
1555    Hybrid,
1556}
1557
1558/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
1559///
1560/// Controls how each query is classified and routed to the appropriate memory
1561/// backend(s), avoiding unnecessary store queries for simple lookups.
1562#[derive(Debug, Clone, Deserialize, Serialize)]
1563#[serde(default)]
1564pub struct StoreRoutingConfig {
1565    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
1566    /// directly (existing behavior). Default: `false`.
1567    pub enabled: bool,
1568    /// Routing strategy. Default: `heuristic`.
1569    pub strategy: StoreRoutingStrategy,
1570    /// Provider name from `[[llm.providers]]` for LLM-based classification.
1571    /// Falls back to the primary provider when empty. Default: `""`.
1572    pub routing_classifier_provider: String,
1573    /// Route to use when the classifier is uncertain (confidence < threshold).
1574    /// Default: `"hybrid"`.
1575    pub fallback_route: String,
1576    /// Confidence threshold below which `HybridRouter` escalates to LLM.
1577    /// Range: `[0.0, 1.0]`. Default: `0.7`.
1578    pub confidence_threshold: f32,
1579}
1580
1581impl Default for StoreRoutingConfig {
1582    fn default() -> Self {
1583        Self {
1584            enabled: false,
1585            strategy: StoreRoutingStrategy::Heuristic,
1586            routing_classifier_provider: String::new(),
1587            fallback_route: "hybrid".into(),
1588            confidence_threshold: 0.7,
1589        }
1590    }
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595    use super::*;
1596
1597    // Verify that serde deserialization routes through FromStr so that removed variants
1598    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
1599    #[test]
1600    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
1601        #[derive(serde::Deserialize)]
1602        struct Wrapper {
1603            #[allow(dead_code)]
1604            pruning_strategy: PruningStrategy,
1605        }
1606        let toml = r#"pruning_strategy = "task_aware_mig""#;
1607        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
1608        assert_eq!(
1609            w.pruning_strategy,
1610            PruningStrategy::Reactive,
1611            "task_aware_mig must fall back to Reactive"
1612        );
1613    }
1614
1615    #[test]
1616    fn pruning_strategy_toml_round_trip() {
1617        #[derive(serde::Deserialize)]
1618        struct Wrapper {
1619            #[allow(dead_code)]
1620            pruning_strategy: PruningStrategy,
1621        }
1622        for (input, expected) in [
1623            ("reactive", PruningStrategy::Reactive),
1624            ("task_aware", PruningStrategy::TaskAware),
1625            ("mig", PruningStrategy::Mig),
1626        ] {
1627            let toml = format!(r#"pruning_strategy = "{input}""#);
1628            let w: Wrapper = toml::from_str(&toml)
1629                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
1630            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
1631        }
1632    }
1633
1634    #[test]
1635    fn pruning_strategy_toml_unknown_value_errors() {
1636        #[derive(serde::Deserialize)]
1637        #[allow(dead_code)]
1638        struct Wrapper {
1639            pruning_strategy: PruningStrategy,
1640        }
1641        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
1642        assert!(
1643            toml::from_str::<Wrapper>(toml).is_err(),
1644            "unknown strategy must produce an error"
1645        );
1646    }
1647
1648    #[test]
1649    fn tier_config_defaults_are_correct() {
1650        let cfg = TierConfig::default();
1651        assert!(!cfg.enabled);
1652        assert_eq!(cfg.promotion_min_sessions, 3);
1653        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
1654        assert_eq!(cfg.sweep_interval_secs, 3600);
1655        assert_eq!(cfg.sweep_batch_size, 100);
1656    }
1657
1658    #[test]
1659    fn tier_config_rejects_min_sessions_below_2() {
1660        let toml = "promotion_min_sessions = 1";
1661        assert!(toml::from_str::<TierConfig>(toml).is_err());
1662    }
1663
1664    #[test]
1665    fn tier_config_rejects_similarity_threshold_below_0_5() {
1666        let toml = "similarity_threshold = 0.4";
1667        assert!(toml::from_str::<TierConfig>(toml).is_err());
1668    }
1669
1670    #[test]
1671    fn tier_config_rejects_zero_sweep_batch_size() {
1672        let toml = "sweep_batch_size = 0";
1673        assert!(toml::from_str::<TierConfig>(toml).is_err());
1674    }
1675
1676    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
1677        let input = format!("importance_weight = {toml_val}");
1678        toml::from_str::<SemanticConfig>(&input)
1679    }
1680
1681    #[test]
1682    fn importance_weight_default_is_0_15() {
1683        let cfg = SemanticConfig::default();
1684        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
1685    }
1686
1687    #[test]
1688    fn importance_weight_valid_zero() {
1689        let cfg = deserialize_importance_weight("0.0").unwrap();
1690        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
1691    }
1692
1693    #[test]
1694    fn importance_weight_valid_one() {
1695        let cfg = deserialize_importance_weight("1.0").unwrap();
1696        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
1697    }
1698
1699    #[test]
1700    fn importance_weight_rejects_near_zero_negative() {
1701        // TOML does not have a NaN literal, but we can test via a f64 that
1702        // the validator rejects out-of-range values. Test with negative here
1703        // and rely on validate_importance_weight rejecting non-finite via
1704        // a constructed deserializer call.
1705        let result = deserialize_importance_weight("-0.01");
1706        assert!(
1707            result.is_err(),
1708            "negative importance_weight must be rejected"
1709        );
1710    }
1711
1712    #[test]
1713    fn importance_weight_rejects_negative() {
1714        let result = deserialize_importance_weight("-1.0");
1715        assert!(result.is_err(), "negative value must be rejected");
1716    }
1717
1718    #[test]
1719    fn importance_weight_rejects_greater_than_one() {
1720        let result = deserialize_importance_weight("1.01");
1721        assert!(result.is_err(), "value > 1.0 must be rejected");
1722    }
1723
1724    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
1725
1726    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
1727    #[test]
1728    fn admission_weights_normalized_sums_to_one() {
1729        let w = AdmissionWeights {
1730            future_utility: 2.0,
1731            factual_confidence: 1.0,
1732            semantic_novelty: 3.0,
1733            temporal_recency: 1.0,
1734            content_type_prior: 3.0,
1735            goal_utility: 0.0,
1736        };
1737        let n = w.normalized();
1738        let sum = n.future_utility
1739            + n.factual_confidence
1740            + n.semantic_novelty
1741            + n.temporal_recency
1742            + n.content_type_prior;
1743        assert!(
1744            (sum - 1.0).abs() < 0.001,
1745            "normalized weights must sum to 1.0, got {sum}"
1746        );
1747    }
1748
1749    // Test: already-normalized weights are preserved.
1750    #[test]
1751    fn admission_weights_normalized_preserves_already_unit_sum() {
1752        let w = AdmissionWeights::default();
1753        let n = w.normalized();
1754        let sum = n.future_utility
1755            + n.factual_confidence
1756            + n.semantic_novelty
1757            + n.temporal_recency
1758            + n.content_type_prior;
1759        assert!(
1760            (sum - 1.0).abs() < 0.001,
1761            "default weights sum to ~1.0 after normalization"
1762        );
1763    }
1764
1765    // Test: zero weights fall back to default (no divide-by-zero panic).
1766    #[test]
1767    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
1768        let w = AdmissionWeights {
1769            future_utility: 0.0,
1770            factual_confidence: 0.0,
1771            semantic_novelty: 0.0,
1772            temporal_recency: 0.0,
1773            content_type_prior: 0.0,
1774            goal_utility: 0.0,
1775        };
1776        let n = w.normalized();
1777        let default = AdmissionWeights::default();
1778        assert!(
1779            (n.future_utility - default.future_utility).abs() < 0.001,
1780            "zero-sum weights must fall back to defaults"
1781        );
1782    }
1783
1784    // Test: AdmissionConfig default values match documented defaults.
1785    #[test]
1786    fn admission_config_defaults() {
1787        let cfg = AdmissionConfig::default();
1788        assert!(!cfg.enabled);
1789        assert!((cfg.threshold - 0.40).abs() < 0.001);
1790        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
1791        assert!(cfg.admission_provider.is_empty());
1792    }
1793
1794    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
1795
1796    #[test]
1797    fn spreading_activation_default_recall_timeout_ms_is_1000() {
1798        let cfg = SpreadingActivationConfig::default();
1799        assert_eq!(
1800            cfg.recall_timeout_ms, 1000,
1801            "default recall_timeout_ms must be 1000ms"
1802        );
1803    }
1804
1805    #[test]
1806    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
1807        #[derive(serde::Deserialize)]
1808        struct Wrapper {
1809            recall_timeout_ms: u64,
1810        }
1811        let toml = "recall_timeout_ms = 500";
1812        let w: Wrapper = toml::from_str(toml).unwrap();
1813        assert_eq!(w.recall_timeout_ms, 500);
1814    }
1815
1816    #[test]
1817    fn spreading_activation_validate_cross_field_constraints() {
1818        let mut cfg = SpreadingActivationConfig::default();
1819        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
1820        assert!(cfg.validate().is_ok());
1821
1822        // Equal thresholds must be rejected.
1823        cfg.activation_threshold = 0.5;
1824        cfg.inhibition_threshold = 0.5;
1825        assert!(cfg.validate().is_err());
1826    }
1827}
zeph_config/memory.rs

zeph_config/
memory.rs