zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::defaults::{default_sqlite_path_field, default_true};
7use crate::providers::ProviderName;
8
9fn default_sqlite_pool_size() -> u32 {
10    5
11}
12
13fn default_max_history() -> usize {
14    100
15}
16
17fn default_title_max_chars() -> usize {
18    60
19}
20
21fn default_document_collection() -> String {
22    "zeph_documents".into()
23}
24
25fn default_document_chunk_size() -> usize {
26    1000
27}
28
29fn default_document_chunk_overlap() -> usize {
30    100
31}
32
33fn default_document_top_k() -> usize {
34    3
35}
36
37fn default_autosave_min_length() -> usize {
38    20
39}
40
41fn default_tool_call_cutoff() -> usize {
42    6
43}
44
45fn default_token_safety_margin() -> f32 {
46    1.0
47}
48
49fn default_redact_credentials() -> bool {
50    true
51}
52
53fn default_qdrant_url() -> String {
54    "http://localhost:6334".into()
55}
56
57fn default_summarization_threshold() -> usize {
58    50
59}
60
61fn default_context_budget_tokens() -> usize {
62    0
63}
64
65fn default_soft_compaction_threshold() -> f32 {
66    0.60
67}
68
69fn default_hard_compaction_threshold() -> f32 {
70    0.90
71}
72
73fn default_compaction_preserve_tail() -> usize {
74    6
75}
76
77fn default_compaction_cooldown_turns() -> u8 {
78    2
79}
80
81fn default_auto_budget() -> bool {
82    true
83}
84
85fn default_prune_protect_tokens() -> usize {
86    40_000
87}
88
89fn default_cross_session_score_threshold() -> f32 {
90    0.35
91}
92
93fn default_temporal_decay_half_life_days() -> u32 {
94    30
95}
96
97fn default_mmr_lambda() -> f32 {
98    0.7
99}
100
101fn default_semantic_enabled() -> bool {
102    true
103}
104
105fn default_recall_limit() -> usize {
106    5
107}
108
109fn default_vector_weight() -> f64 {
110    0.7
111}
112
113fn default_keyword_weight() -> f64 {
114    0.3
115}
116
117fn default_graph_max_entities_per_message() -> usize {
118    10
119}
120
121fn default_graph_max_edges_per_message() -> usize {
122    15
123}
124
125fn default_graph_community_refresh_interval() -> usize {
126    100
127}
128
129fn default_graph_community_summary_max_prompt_bytes() -> usize {
130    8192
131}
132
133fn default_graph_community_summary_concurrency() -> usize {
134    4
135}
136
137fn default_lpa_edge_chunk_size() -> usize {
138    10_000
139}
140
141fn default_graph_entity_similarity_threshold() -> f32 {
142    0.85
143}
144
145fn default_graph_entity_ambiguous_threshold() -> f32 {
146    0.70
147}
148
149fn default_graph_extraction_timeout_secs() -> u64 {
150    15
151}
152
153fn default_graph_max_hops() -> u32 {
154    2
155}
156
157fn default_graph_recall_limit() -> usize {
158    10
159}
160
161fn default_graph_expired_edge_retention_days() -> u32 {
162    90
163}
164
165fn default_graph_temporal_decay_rate() -> f64 {
166    0.0
167}
168
169fn default_graph_edge_history_limit() -> usize {
170    100
171}
172
173fn default_spreading_activation_decay_lambda() -> f32 {
174    0.85
175}
176
177fn default_spreading_activation_max_hops() -> u32 {
178    3
179}
180
181fn default_spreading_activation_activation_threshold() -> f32 {
182    0.1
183}
184
185fn default_spreading_activation_inhibition_threshold() -> f32 {
186    0.8
187}
188
189fn default_spreading_activation_max_activated_nodes() -> usize {
190    50
191}
192
193fn default_spreading_activation_recall_timeout_ms() -> u64 {
194    1000
195}
196
197fn default_note_linking_similarity_threshold() -> f32 {
198    0.85
199}
200
201fn default_note_linking_top_k() -> usize {
202    10
203}
204
205fn default_note_linking_timeout_secs() -> u64 {
206    5
207}
208
209fn default_shutdown_summary() -> bool {
210    true
211}
212
213fn default_shutdown_summary_min_messages() -> usize {
214    4
215}
216
217fn default_shutdown_summary_max_messages() -> usize {
218    20
219}
220
221fn default_shutdown_summary_timeout_secs() -> u64 {
222    10
223}
224
225fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
226where
227    D: serde::Deserializer<'de>,
228{
229    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
230    if value.is_nan() || value.is_infinite() {
231        return Err(serde::de::Error::custom(
232            "similarity_threshold must be a finite number",
233        ));
234    }
235    if !(0.5..=1.0).contains(&value) {
236        return Err(serde::de::Error::custom(
237            "similarity_threshold must be in [0.5, 1.0]",
238        ));
239    }
240    Ok(value)
241}
242
243fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
244where
245    D: serde::Deserializer<'de>,
246{
247    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
248    if value < 2 {
249        return Err(serde::de::Error::custom(
250            "promotion_min_sessions must be >= 2",
251        ));
252    }
253    Ok(value)
254}
255
256fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
257where
258    D: serde::Deserializer<'de>,
259{
260    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
261    if value == 0 {
262        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
263    }
264    Ok(value)
265}
266
267fn default_tier_promotion_min_sessions() -> u32 {
268    3
269}
270
271fn default_tier_similarity_threshold() -> f32 {
272    0.92
273}
274
275fn default_tier_sweep_interval_secs() -> u64 {
276    3600
277}
278
279fn default_tier_sweep_batch_size() -> usize {
280    100
281}
282
283fn default_scene_similarity_threshold() -> f32 {
284    0.80
285}
286
287fn default_scene_batch_size() -> usize {
288    50
289}
290
291fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
292where
293    D: serde::Deserializer<'de>,
294{
295    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
296    if value.is_nan() || value.is_infinite() {
297        return Err(serde::de::Error::custom(
298            "scene_similarity_threshold must be a finite number",
299        ));
300    }
301    if !(0.5..=1.0).contains(&value) {
302        return Err(serde::de::Error::custom(
303            "scene_similarity_threshold must be in [0.5, 1.0]",
304        ));
305    }
306    Ok(value)
307}
308
309fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
310where
311    D: serde::Deserializer<'de>,
312{
313    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
314    if value == 0 {
315        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
316    }
317    Ok(value)
318}
319
320/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
321///
322/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
323/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
324///
325/// # Validation
326///
327/// Constraints enforced at deserialization time:
328/// - `similarity_threshold` in `[0.5, 1.0]`
329/// - `promotion_min_sessions >= 2`
330/// - `sweep_batch_size >= 1`
331/// - `scene_similarity_threshold` in `[0.5, 1.0]`
332/// - `scene_batch_size >= 1`
333#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
334#[serde(default)]
335pub struct TierConfig {
336    /// Enable the tier promotion system. When `false`, all messages remain episodic.
337    /// Default: `false`.
338    pub enabled: bool,
339    /// Minimum number of distinct sessions a fact must appear in before promotion.
340    /// Must be `>= 2`. Default: `3`.
341    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
342    pub promotion_min_sessions: u32,
343    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
344    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
345    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
346    pub similarity_threshold: f32,
347    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
348    pub sweep_interval_secs: u64,
349    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
350    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
351    pub sweep_batch_size: usize,
352    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
353    pub scene_enabled: bool,
354    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
355    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
356    pub scene_similarity_threshold: f32,
357    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
358    #[serde(deserialize_with = "validate_scene_batch_size")]
359    pub scene_batch_size: usize,
360    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
361    /// Falls back to the primary provider when empty. Default: `""`.
362    pub scene_provider: ProviderName,
363    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
364    pub scene_sweep_interval_secs: u64,
365}
366
367fn default_scene_sweep_interval_secs() -> u64 {
368    7200
369}
370
371impl Default for TierConfig {
372    fn default() -> Self {
373        Self {
374            enabled: false,
375            promotion_min_sessions: default_tier_promotion_min_sessions(),
376            similarity_threshold: default_tier_similarity_threshold(),
377            sweep_interval_secs: default_tier_sweep_interval_secs(),
378            sweep_batch_size: default_tier_sweep_batch_size(),
379            scene_enabled: false,
380            scene_similarity_threshold: default_scene_similarity_threshold(),
381            scene_batch_size: default_scene_batch_size(),
382            scene_provider: ProviderName::default(),
383            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
384        }
385    }
386}
387
388fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
389where
390    D: serde::Deserializer<'de>,
391{
392    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
393    if value.is_nan() || value.is_infinite() {
394        return Err(serde::de::Error::custom(
395            "temporal_decay_rate must be a finite number",
396        ));
397    }
398    if !(0.0..=10.0).contains(&value) {
399        return Err(serde::de::Error::custom(
400            "temporal_decay_rate must be in [0.0, 10.0]",
401        ));
402    }
403    Ok(value)
404}
405
406fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
407where
408    D: serde::Deserializer<'de>,
409{
410    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
411    if value.is_nan() || value.is_infinite() {
412        return Err(serde::de::Error::custom(
413            "similarity_threshold must be a finite number",
414        ));
415    }
416    if !(0.0..=1.0).contains(&value) {
417        return Err(serde::de::Error::custom(
418            "similarity_threshold must be in [0.0, 1.0]",
419        ));
420    }
421    Ok(value)
422}
423
424fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
425where
426    D: serde::Deserializer<'de>,
427{
428    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
429    if value.is_nan() || value.is_infinite() {
430        return Err(serde::de::Error::custom(
431            "importance_weight must be a finite number",
432        ));
433    }
434    if value < 0.0 {
435        return Err(serde::de::Error::custom(
436            "importance_weight must be non-negative",
437        ));
438    }
439    if value > 1.0 {
440        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
441    }
442    Ok(value)
443}
444
445fn default_importance_weight() -> f64 {
446    0.15
447}
448
449/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
450///
451/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
452/// Seeds are initialized from fuzzy entity matches, then activation propagates
453/// hop-by-hop with exponential decay and lateral inhibition.
454///
455/// # Validation
456///
457/// Constraints enforced at deserialization time:
458/// - `0.0 < decay_lambda <= 1.0`
459/// - `max_hops >= 1`
460/// - `activation_threshold < inhibition_threshold`
461/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
462#[derive(Debug, Clone, Deserialize, Serialize)]
463#[serde(default)]
464pub struct SpreadingActivationConfig {
465    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
466    pub enabled: bool,
467    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
468    #[serde(deserialize_with = "validate_decay_lambda")]
469    pub decay_lambda: f32,
470    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
471    #[serde(deserialize_with = "validate_max_hops")]
472    pub max_hops: u32,
473    /// Minimum activation score to include a node in results. Default: `0.1`.
474    pub activation_threshold: f32,
475    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
476    pub inhibition_threshold: f32,
477    /// Cap on total activated nodes per spread pass. Default: `50`.
478    pub max_activated_nodes: usize,
479    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
480    #[serde(default = "default_seed_structural_weight")]
481    pub seed_structural_weight: f32,
482    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
483    #[serde(default = "default_seed_community_cap")]
484    pub seed_community_cap: usize,
485    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
486    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
487    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
488    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
489    pub recall_timeout_ms: u64,
490}
491
492fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
493where
494    D: serde::Deserializer<'de>,
495{
496    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
497    if value.is_nan() || value.is_infinite() {
498        return Err(serde::de::Error::custom(
499            "decay_lambda must be a finite number",
500        ));
501    }
502    if !(value > 0.0 && value <= 1.0) {
503        return Err(serde::de::Error::custom(
504            "decay_lambda must be in (0.0, 1.0]",
505        ));
506    }
507    Ok(value)
508}
509
510fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
511where
512    D: serde::Deserializer<'de>,
513{
514    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
515    if value == 0 {
516        return Err(serde::de::Error::custom("max_hops must be >= 1"));
517    }
518    Ok(value)
519}
520
521impl SpreadingActivationConfig {
522    /// Validate cross-field constraints that cannot be expressed in per-field validators.
523    ///
524    /// # Errors
525    ///
526    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
527    pub fn validate(&self) -> Result<(), String> {
528        if self.activation_threshold >= self.inhibition_threshold {
529            return Err(format!(
530                "activation_threshold ({}) must be < inhibition_threshold ({})",
531                self.activation_threshold, self.inhibition_threshold
532            ));
533        }
534        Ok(())
535    }
536}
537
538fn default_seed_structural_weight() -> f32 {
539    0.4
540}
541
542fn default_seed_community_cap() -> usize {
543    3
544}
545
546impl Default for SpreadingActivationConfig {
547    fn default() -> Self {
548        Self {
549            enabled: false,
550            decay_lambda: default_spreading_activation_decay_lambda(),
551            max_hops: default_spreading_activation_max_hops(),
552            activation_threshold: default_spreading_activation_activation_threshold(),
553            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
554            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
555            seed_structural_weight: default_seed_structural_weight(),
556            seed_community_cap: default_seed_community_cap(),
557            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
558        }
559    }
560}
561
562/// Kumiho belief revision configuration.
563#[derive(Debug, Clone, Deserialize, Serialize)]
564#[serde(default)]
565pub struct BeliefRevisionConfig {
566    /// Enable semantic contradiction detection for graph edges. Default: `false`.
567    pub enabled: bool,
568    /// Cosine similarity threshold for considering two facts as contradictory.
569    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
570    #[serde(deserialize_with = "validate_similarity_threshold")]
571    pub similarity_threshold: f32,
572}
573
574fn default_belief_revision_similarity_threshold() -> f32 {
575    0.85
576}
577
578impl Default for BeliefRevisionConfig {
579    fn default() -> Self {
580        Self {
581            enabled: false,
582            similarity_threshold: default_belief_revision_similarity_threshold(),
583        }
584    }
585}
586
587/// D-MEM RPE-based tiered graph extraction routing configuration.
588#[derive(Debug, Clone, Deserialize, Serialize)]
589#[serde(default)]
590pub struct RpeConfig {
591    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
592    pub enabled: bool,
593    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
594    /// Default: `0.3`.
595    #[serde(deserialize_with = "validate_similarity_threshold")]
596    pub threshold: f32,
597    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
598    pub max_skip_turns: u32,
599}
600
601fn default_rpe_threshold() -> f32 {
602    0.3
603}
604
605fn default_rpe_max_skip_turns() -> u32 {
606    5
607}
608
609impl Default for RpeConfig {
610    fn default() -> Self {
611        Self {
612            enabled: false,
613            threshold: default_rpe_threshold(),
614            max_skip_turns: default_rpe_max_skip_turns(),
615        }
616    }
617}
618
619/// Configuration for A-MEM dynamic note linking.
620///
621/// When enabled, after each graph extraction pass, entities extracted from the message are
622/// compared against the entity embedding collection. Pairs with cosine similarity above
623/// `similarity_threshold` receive a `similar_to` edge in the graph.
624#[derive(Debug, Clone, Deserialize, Serialize)]
625#[serde(default)]
626pub struct NoteLinkingConfig {
627    /// Enable A-MEM note linking after graph extraction. Default: `false`.
628    pub enabled: bool,
629    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
630    #[serde(deserialize_with = "validate_similarity_threshold")]
631    pub similarity_threshold: f32,
632    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
633    pub top_k: usize,
634    /// Timeout for the entire linking pass in seconds. Default: `5`.
635    pub timeout_secs: u64,
636}
637
638impl Default for NoteLinkingConfig {
639    fn default() -> Self {
640        Self {
641            enabled: false,
642            similarity_threshold: default_note_linking_similarity_threshold(),
643            top_k: default_note_linking_top_k(),
644            timeout_secs: default_note_linking_timeout_secs(),
645        }
646    }
647}
648
649/// Vector backend selector for embedding storage.
650#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
651#[serde(rename_all = "lowercase")]
652pub enum VectorBackend {
653    Qdrant,
654    #[default]
655    Sqlite,
656}
657
658impl VectorBackend {
659    /// Return the lowercase identifier string for this backend.
660    ///
661    /// # Examples
662    ///
663    /// ```
664    /// use zeph_config::VectorBackend;
665    ///
666    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
667    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
668    /// ```
669    #[must_use]
670    pub fn as_str(&self) -> &'static str {
671        match self {
672            Self::Qdrant => "qdrant",
673            Self::Sqlite => "sqlite",
674        }
675    }
676}
677
678/// Memory subsystem configuration, nested under `[memory]` in TOML.
679///
680/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
681/// multi-tier promotion, and all memory-related background tasks.
682///
683/// # Example (TOML)
684///
685/// ```toml
686/// [memory]
687/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
688/// qdrant_url = "http://localhost:6334"
689/// history_limit = 50
690/// summarization_threshold = 50
691/// auto_budget = true
692/// ```
693#[derive(Debug, Deserialize, Serialize)]
694#[allow(clippy::struct_excessive_bools)]
695pub struct MemoryConfig {
696    #[serde(default)]
697    pub compression_guidelines: zeph_memory::CompressionGuidelinesConfig,
698    #[serde(default = "default_sqlite_path_field")]
699    pub sqlite_path: String,
700    pub history_limit: u32,
701    #[serde(default = "default_qdrant_url")]
702    pub qdrant_url: String,
703    #[serde(default)]
704    pub semantic: SemanticConfig,
705    #[serde(default = "default_summarization_threshold")]
706    pub summarization_threshold: usize,
707    #[serde(default = "default_context_budget_tokens")]
708    pub context_budget_tokens: usize,
709    #[serde(default = "default_soft_compaction_threshold")]
710    pub soft_compaction_threshold: f32,
711    #[serde(
712        default = "default_hard_compaction_threshold",
713        alias = "compaction_threshold"
714    )]
715    pub hard_compaction_threshold: f32,
716    #[serde(default = "default_compaction_preserve_tail")]
717    pub compaction_preserve_tail: usize,
718    #[serde(default = "default_compaction_cooldown_turns")]
719    pub compaction_cooldown_turns: u8,
720    #[serde(default = "default_auto_budget")]
721    pub auto_budget: bool,
722    #[serde(default = "default_prune_protect_tokens")]
723    pub prune_protect_tokens: usize,
724    #[serde(default = "default_cross_session_score_threshold")]
725    pub cross_session_score_threshold: f32,
726    #[serde(default)]
727    pub vector_backend: VectorBackend,
728    #[serde(default = "default_token_safety_margin")]
729    pub token_safety_margin: f32,
730    #[serde(default = "default_redact_credentials")]
731    pub redact_credentials: bool,
732    #[serde(default = "default_true")]
733    pub autosave_assistant: bool,
734    #[serde(default = "default_autosave_min_length")]
735    pub autosave_min_length: usize,
736    #[serde(default = "default_tool_call_cutoff")]
737    pub tool_call_cutoff: usize,
738    #[serde(default = "default_sqlite_pool_size")]
739    pub sqlite_pool_size: u32,
740    #[serde(default)]
741    pub sessions: SessionsConfig,
742    #[serde(default)]
743    pub documents: DocumentConfig,
744    #[serde(default)]
745    pub eviction: zeph_memory::EvictionConfig,
746    #[serde(default)]
747    pub compression: CompressionConfig,
748    #[serde(default)]
749    pub sidequest: SidequestConfig,
750    #[serde(default)]
751    pub graph: GraphConfig,
752    /// Store a lightweight session summary to the vector store on shutdown when no session
753    /// summary exists yet for this conversation. Enables cross-session recall for short or
754    /// interrupted sessions that never triggered hard compaction. Default: `true`.
755    #[serde(default = "default_shutdown_summary")]
756    pub shutdown_summary: bool,
757    /// Minimum number of user-turn messages required before a shutdown summary is generated.
758    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
759    #[serde(default = "default_shutdown_summary_min_messages")]
760    pub shutdown_summary_min_messages: usize,
761    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
762    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
763    /// Default: `20`.
764    #[serde(default = "default_shutdown_summary_max_messages")]
765    pub shutdown_summary_max_messages: usize,
766    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
767    /// Applies independently to the structured call and to the plain-text fallback.
768    /// Default: `10`.
769    #[serde(default = "default_shutdown_summary_timeout_secs")]
770    pub shutdown_summary_timeout_secs: u64,
771    /// Use structured anchored summaries for context compaction.
772    ///
773    /// When enabled, hard compaction requests a JSON schema from the LLM
774    /// instead of free-form prose. Falls back to prose if the LLM fails
775    /// to produce valid JSON. Default: `false`.
776    #[serde(default)]
777    pub structured_summaries: bool,
778    /// AOI three-layer memory tier promotion system.
779    ///
780    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
781    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
782    #[serde(default)]
783    pub tiers: TierConfig,
784    /// A-MAC adaptive memory admission control.
785    ///
786    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
787    /// if its composite admission score falls below the configured threshold.
788    #[serde(default)]
789    pub admission: AdmissionConfig,
790    /// Session digest generation at session end. Default: disabled.
791    #[serde(default)]
792    pub digest: DigestConfig,
793    /// Context assembly strategy. Default: `full_history` (current behavior).
794    #[serde(default)]
795    pub context_strategy: ContextStrategy,
796    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
797    #[serde(default = "default_crossover_turn_threshold")]
798    pub crossover_turn_threshold: u32,
799    /// All-Mem lifelong memory consolidation sweep.
800    ///
801    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
802    /// messages and merges them into consolidated entries via LLM.
803    #[serde(default)]
804    pub consolidation: ConsolidationConfig,
805    /// `SleepGate` forgetting sweep (#2397).
806    ///
807    /// When `forgetting.enabled = true`, a background loop periodically decays importance
808    /// scores and prunes memories below the forgetting floor.
809    #[serde(default)]
810    pub forgetting: ForgettingConfig,
811    /// `PostgreSQL` connection URL.
812    ///
813    /// Used when the binary is compiled with `--features postgres`.
814    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
815    /// Example: `postgres://user:pass@localhost:5432/zeph`
816    /// Default: `None` (uses `sqlite_path` instead).
817    #[serde(default)]
818    pub database_url: Option<String>,
819    /// Cost-sensitive store routing (#2444).
820    ///
821    /// When `store_routing.enabled = true`, query intent is classified and routed to
822    /// the cheapest sufficient backend instead of querying all stores on every turn.
823    #[serde(default)]
824    pub store_routing: StoreRoutingConfig,
825    /// Persona memory layer (#2461).
826    ///
827    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
828    /// from conversation history and injected into context after the system prompt.
829    #[serde(default)]
830    pub persona: PersonaConfig,
831    /// Trajectory-informed memory (#2498).
832    #[serde(default)]
833    pub trajectory: TrajectoryConfig,
834    /// Category-aware memory (#2428).
835    #[serde(default)]
836    pub category: CategoryConfig,
837    /// `TiMem` temporal-hierarchical memory tree (#2262).
838    #[serde(default)]
839    pub tree: TreeConfig,
840    /// Time-based microcompact (#2699).
841    ///
842    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
843    /// from context when the session has been idle longer than `gap_threshold_minutes`.
844    #[serde(default)]
845    pub microcompact: MicrocompactConfig,
846    /// autoDream background memory consolidation (#2697).
847    ///
848    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
849    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
850    #[serde(default)]
851    pub autodream: AutoDreamConfig,
852    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
853    ///
854    /// Before inserting a new key fact, its nearest neighbour is looked up in the
855    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
856    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
857    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
858    #[serde(default = "default_key_facts_dedup_threshold")]
859    pub key_facts_dedup_threshold: f32,
860    /// Experience compression spectrum (#3305).
861    ///
862    /// Controls three-tier retrieval policy and background skill-promotion engine.
863    #[serde(default)]
864    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
865    /// MemMachine-inspired retrieval-stage tuning (#3340).
866    ///
867    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
868    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
869    /// uniformly across graph, hybrid, and vector-only recall paths.
870    ///
871    /// # Example (TOML)
872    ///
873    /// ```toml
874    /// [memory.retrieval]
875    /// depth = 40
876    /// search_prompt_template = ""
877    /// context_format = "structured"
878    /// ```
879    #[serde(default)]
880    pub retrieval: RetrievalConfig,
881    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
882    ///
883    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
884    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
885    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
886    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
887    /// and injected before the LLM call.
888    #[serde(default)]
889    pub reasoning: ReasoningConfig,
890    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
891    ///
892    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
893    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
894    ///
895    /// # Example (TOML)
896    ///
897    /// ```toml
898    /// [memory.hebbian]
899    /// enabled = true
900    /// hebbian_lr = 0.1
901    /// ```
902    #[serde(default)]
903    pub hebbian: HebbianConfig,
904}
905
906fn default_crossover_turn_threshold() -> u32 {
907    20
908}
909
910fn default_key_facts_dedup_threshold() -> f32 {
911    0.95
912}
913
914/// Session digest configuration (#2289).
915#[derive(Debug, Clone, Deserialize, Serialize)]
916#[serde(default)]
917pub struct DigestConfig {
918    /// Enable session digest generation at session end. Default: `false`.
919    pub enabled: bool,
920    /// Provider name from `[[llm.providers]]` for digest generation.
921    /// Falls back to the primary provider when empty. Default: `""`.
922    pub provider: String,
923    /// Maximum tokens for the digest text. Default: `500`.
924    pub max_tokens: usize,
925    /// Maximum messages to feed into the digest prompt. Default: `50`.
926    pub max_input_messages: usize,
927}
928
929impl Default for DigestConfig {
930    fn default() -> Self {
931        Self {
932            enabled: false,
933            provider: String::new(),
934            max_tokens: 500,
935            max_input_messages: 50,
936        }
937    }
938}
939
940/// Context assembly strategy (#2288).
941#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
942#[serde(rename_all = "snake_case")]
943pub enum ContextStrategy {
944    /// Full conversation history trimmed to budget, with memory augmentation.
945    /// This is the default and existing behavior.
946    #[default]
947    FullHistory,
948    /// Drop conversation history; assemble context from summaries, semantic recall,
949    /// cross-session memory, and session digest only.
950    MemoryFirst,
951    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
952    /// `crossover_turn_threshold`.
953    Adaptive,
954}
955
956/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
957#[derive(Debug, Clone, Deserialize, Serialize)]
958#[serde(default)]
959pub struct SessionsConfig {
960    /// Maximum number of sessions returned by list operations (0 = unlimited).
961    #[serde(default = "default_max_history")]
962    pub max_history: usize,
963    /// Maximum characters for auto-generated session titles.
964    #[serde(default = "default_title_max_chars")]
965    pub title_max_chars: usize,
966}
967
968impl Default for SessionsConfig {
969    fn default() -> Self {
970        Self {
971            max_history: default_max_history(),
972            title_max_chars: default_title_max_chars(),
973        }
974    }
975}
976
977/// Configuration for the document ingestion and RAG retrieval pipeline.
978#[derive(Debug, Clone, Deserialize, Serialize)]
979pub struct DocumentConfig {
980    #[serde(default = "default_document_collection")]
981    pub collection: String,
982    #[serde(default = "default_document_chunk_size")]
983    pub chunk_size: usize,
984    #[serde(default = "default_document_chunk_overlap")]
985    pub chunk_overlap: usize,
986    /// Number of document chunks to inject into agent context per turn.
987    #[serde(default = "default_document_top_k")]
988    pub top_k: usize,
989    /// Enable document RAG injection into agent context.
990    #[serde(default)]
991    pub rag_enabled: bool,
992}
993
994impl Default for DocumentConfig {
995    fn default() -> Self {
996        Self {
997            collection: default_document_collection(),
998            chunk_size: default_document_chunk_size(),
999            chunk_overlap: default_document_chunk_overlap(),
1000            top_k: default_document_top_k(),
1001            rag_enabled: false,
1002        }
1003    }
1004}
1005
1006/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1007///
1008/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1009/// re-ranking, and hybrid BM25+vector weighting.
1010///
1011/// # Example (TOML)
1012///
1013/// ```toml
1014/// [memory.semantic]
1015/// enabled = true
1016/// recall_limit = 5
1017/// vector_weight = 0.7
1018/// keyword_weight = 0.3
1019/// mmr_lambda = 0.7
1020/// ```
1021#[derive(Debug, Deserialize, Serialize)]
1022#[allow(clippy::struct_excessive_bools)]
1023pub struct SemanticConfig {
1024    /// Enable vector-based semantic recall. Default: `true`.
1025    #[serde(default = "default_semantic_enabled")]
1026    pub enabled: bool,
1027    #[serde(default = "default_recall_limit")]
1028    pub recall_limit: usize,
1029    #[serde(default = "default_vector_weight")]
1030    pub vector_weight: f64,
1031    #[serde(default = "default_keyword_weight")]
1032    pub keyword_weight: f64,
1033    #[serde(default = "default_true")]
1034    pub temporal_decay_enabled: bool,
1035    #[serde(default = "default_temporal_decay_half_life_days")]
1036    pub temporal_decay_half_life_days: u32,
1037    #[serde(default = "default_true")]
1038    pub mmr_enabled: bool,
1039    #[serde(default = "default_mmr_lambda")]
1040    pub mmr_lambda: f32,
1041    #[serde(default = "default_true")]
1042    pub importance_enabled: bool,
1043    #[serde(
1044        default = "default_importance_weight",
1045        deserialize_with = "validate_importance_weight"
1046    )]
1047    pub importance_weight: f64,
1048    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1049    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1050    /// from contending with the guardrail at the API server level (rate limits, Ollama
1051    /// single-model lock). When unset or empty, falls back to the main agent provider.
1052    #[serde(default)]
1053    pub embed_provider: Option<String>,
1054}
1055
1056impl Default for SemanticConfig {
1057    fn default() -> Self {
1058        Self {
1059            enabled: default_semantic_enabled(),
1060            recall_limit: default_recall_limit(),
1061            vector_weight: default_vector_weight(),
1062            keyword_weight: default_keyword_weight(),
1063            temporal_decay_enabled: true,
1064            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1065            mmr_enabled: true,
1066            mmr_lambda: default_mmr_lambda(),
1067            importance_enabled: true,
1068            importance_weight: default_importance_weight(),
1069            embed_provider: None,
1070        }
1071    }
1072}
1073
1074/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1075///
1076/// Controls how each recalled memory entry is presented in the assembled prompt.
1077/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1078/// always contain the raw message text. The format is applied exclusively during
1079/// context assembly and is never persisted.
1080///
1081/// # Token cost
1082///
1083/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1084/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1085#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1086#[serde(rename_all = "snake_case")]
1087pub enum ContextFormat {
1088    /// Emit a labeled header per snippet:
1089    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1090    ///
1091    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1092    /// memory without re-parsing the recall body.
1093    #[default]
1094    Structured,
1095    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1096    ///
1097    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1098    /// is tight and provenance headers are not needed.
1099    Plain,
1100}
1101
1102/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1103///
1104/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1105/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1106/// configs parse unchanged.
1107///
1108/// # Example (TOML)
1109///
1110/// ```toml
1111/// [memory.retrieval]
1112/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1113/// # search_prompt_template = ""
1114/// # context_format = "structured"
1115/// ```
1116#[derive(Debug, Clone, Deserialize, Serialize)]
1117#[serde(default)]
1118pub struct RetrievalConfig {
1119    /// Number of ANN candidates fetched from the vector store before keyword merge,
1120    /// temporal decay, and MMR re-ranking.
1121    ///
1122    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1123    ///   to pre-#3340 deployments.
1124    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1125    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1126    ///   size, or higher for better MMR diversity.
1127    ///
1128    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1129    /// cannot saturate the requested top-k.
1130    pub depth: u32,
1131    /// Template applied to the raw user query before embedding.
1132    ///
1133    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1134    /// Empty string (default) = identity: the query is embedded as-is.
1135    ///
1136    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1137    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1138    pub search_prompt_template: String,
1139    /// Shape of memory snippets injected into agent context.
1140    ///
1141    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1142    /// Default: `Structured`.
1143    pub context_format: ContextFormat,
1144    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1145    ///
1146    /// When `true` and the query is classified as first-person, the query embedding is
1147    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1148    /// towards persona-relevant content for self-referential queries.
1149    ///
1150    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1151    #[serde(default = "default_query_bias_correction")]
1152    pub query_bias_correction: bool,
1153    /// Blend weight for query-bias correction (MM-F3, #3341).
1154    ///
1155    /// Controls how much the query embedding shifts towards the profile centroid.
1156    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1157    #[serde(default = "default_query_bias_profile_weight")]
1158    pub query_bias_profile_weight: f32,
1159    /// Centroid TTL in seconds (MM-F3, #3341).
1160    ///
1161    /// The profile centroid computed from persona facts is cached for this many seconds.
1162    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1163    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1164    pub query_bias_centroid_ttl_secs: u64,
1165}
1166
1167fn default_query_bias_correction() -> bool {
1168    true
1169}
1170
1171fn default_query_bias_profile_weight() -> f32 {
1172    0.25
1173}
1174
1175fn default_query_bias_centroid_ttl_secs() -> u64 {
1176    300
1177}
1178
1179impl Default for RetrievalConfig {
1180    fn default() -> Self {
1181        Self {
1182            depth: 0,
1183            search_prompt_template: String::new(),
1184            context_format: ContextFormat::default(),
1185            query_bias_correction: default_query_bias_correction(),
1186            query_bias_profile_weight: default_query_bias_profile_weight(),
1187            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1188        }
1189    }
1190}
1191
1192/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1193///
1194/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1195/// recall traversal increments the `weight` column of the traversed edges, building
1196/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1197/// runs a background sweep that identifies high-traffic entity clusters and distills
1198/// them into `graph_rules` entries via an LLM.
1199#[derive(Debug, Clone, Deserialize, Serialize)]
1200#[serde(default)]
1201pub struct HebbianConfig {
1202    /// Master switch. When `false`, no `weight` updates are written to the database
1203    /// and the consolidation loop does not start. Default: `false`.
1204    pub enabled: bool,
1205    /// Weight increment per co-activation (HL-F2, #3344).
1206    ///
1207    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1208    /// startup when `enabled = true`. Default: `0.1`.
1209    pub hebbian_lr: f32,
1210    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1211    ///
1212    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1213    /// Default: `3600` (one hour).
1214    pub consolidation_interval_secs: u64,
1215    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1216    /// candidate (HL-F3, #3345). Default: `5.0`.
1217    pub consolidation_threshold: f64,
1218    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1219    ///
1220    /// Falls back to the main provider when empty or unresolvable. Default: `"fast"`.
1221    pub consolidate_provider: String,
1222    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1223    pub max_candidates_per_sweep: usize,
1224    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1225    ///
1226    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1227    /// Default: `86400` (24 hours).
1228    pub consolidation_cooldown_secs: u64,
1229    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1230    /// Default: `30`.
1231    pub consolidation_prompt_timeout_secs: u64,
1232    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1233    /// (HL-F4, #3345). Default: `20`.
1234    pub consolidation_max_neighbors: usize,
1235    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1236    ///
1237    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1238    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1239    pub spreading_activation: bool,
1240    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1241    pub spread_depth: u32,
1242    /// MAGMA edge-type filter for HL-F5 spreading activation.
1243    ///
1244    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1245    /// Empty = traverse all edge types. Default: `[]`.
1246    pub spread_edge_types: Vec<String>,
1247    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1248    ///
1249    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1250    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1251    pub step_budget_ms: u64,
1252}
1253
1254impl Default for HebbianConfig {
1255    fn default() -> Self {
1256        Self {
1257            enabled: false,
1258            hebbian_lr: 0.1,
1259            consolidation_interval_secs: 3600,
1260            consolidation_threshold: 5.0,
1261            consolidate_provider: "fast".to_owned(),
1262            max_candidates_per_sweep: 10,
1263            consolidation_cooldown_secs: 86_400,
1264            consolidation_prompt_timeout_secs: 30,
1265            consolidation_max_neighbors: 20,
1266            spreading_activation: false,
1267            spread_depth: 2,
1268            spread_edge_types: Vec::new(),
1269            step_budget_ms: 8,
1270        }
1271    }
1272}
1273
1274/// Compression strategy for active context compression (#1161).
1275#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1276#[serde(tag = "strategy", rename_all = "snake_case")]
1277pub enum CompressionStrategy {
1278    /// Compress only when reactive compaction fires (current behavior).
1279    #[default]
1280    Reactive,
1281    /// Compress proactively when context exceeds `threshold_tokens`.
1282    Proactive {
1283        /// Token count that triggers proactive compression.
1284        threshold_tokens: usize,
1285        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1286        max_summary_tokens: usize,
1287    },
1288    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1289    /// safety net. The `compress_context` tool is also available in all other strategies.
1290    Autonomous,
1291    /// Knowledge-block-aware compression strategy (#2510).
1292    ///
1293    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1294    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1295    Focus,
1296}
1297
1298/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1299///
1300/// When `context-compression` feature is enabled, this replaces the default oldest-first
1301/// heuristic with scored eviction.
1302#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1303#[serde(rename_all = "snake_case")]
1304pub enum PruningStrategy {
1305    /// Oldest-first eviction — current default behavior.
1306    #[default]
1307    Reactive,
1308    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1309    /// lowest-first. Requires `context-compression` feature.
1310    TaskAware,
1311    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1312    /// Requires `context-compression` feature.
1313    Mig,
1314    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1315    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1316    /// Requires `context-compression` feature.
1317    Subgoal,
1318    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1319    /// Requires `context-compression` feature.
1320    SubgoalMig,
1321}
1322
1323impl PruningStrategy {
1324    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1325    #[must_use]
1326    pub fn is_subgoal(self) -> bool {
1327        matches!(self, Self::Subgoal | Self::SubgoalMig)
1328    }
1329}
1330
1331// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1332// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1333impl<'de> serde::Deserialize<'de> for PruningStrategy {
1334    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1335        let s = String::deserialize(deserializer)?;
1336        s.parse().map_err(serde::de::Error::custom)
1337    }
1338}
1339
1340impl std::str::FromStr for PruningStrategy {
1341    type Err = String;
1342
1343    fn from_str(s: &str) -> Result<Self, Self::Err> {
1344        match s {
1345            "reactive" => Ok(Self::Reactive),
1346            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1347            "mig" => Ok(Self::Mig),
1348            // task_aware_mig was removed (dead code — was routed to scored path only).
1349            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1350            "task_aware_mig" | "task-aware-mig" => {
1351                tracing::warn!(
1352                    "pruning strategy `task_aware_mig` has been removed; \
1353                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1354                );
1355                Ok(Self::Reactive)
1356            }
1357            "subgoal" => Ok(Self::Subgoal),
1358            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1359            other => Err(format!(
1360                "unknown pruning strategy `{other}`, expected \
1361                 reactive|task_aware|mig|subgoal|subgoal_mig"
1362            )),
1363        }
1364    }
1365}
1366
1367fn default_high_density_budget() -> f32 {
1368    0.7
1369}
1370
1371fn default_low_density_budget() -> f32 {
1372    0.3
1373}
1374
1375/// Configuration for the `SleepGate` forgetting sweep (#2397).
1376///
1377/// When `enabled = true`, a background loop periodically decays importance scores
1378/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1379/// and prunes memories below `forgetting_floor` (targeted forgetting).
1380#[derive(Debug, Clone, Deserialize, Serialize)]
1381#[serde(default)]
1382pub struct ForgettingConfig {
1383    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1384    pub enabled: bool,
1385    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1386    pub decay_rate: f32,
1387    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1388    pub forgetting_floor: f32,
1389    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1390    pub sweep_interval_secs: u64,
1391    /// Maximum messages to process per sweep. Default: `500`.
1392    pub sweep_batch_size: usize,
1393    /// Hours: messages accessed within this window get replay protection. Default: `24`.
1394    pub replay_window_hours: u32,
1395    /// Messages with `access_count` >= this get replay protection. Default: `3`.
1396    pub replay_min_access_count: u32,
1397    /// Hours: never prune messages accessed within this window. Default: `24`.
1398    pub protect_recent_hours: u32,
1399    /// Never prune messages with `access_count` >= this. Default: `3`.
1400    pub protect_min_access_count: u32,
1401}
1402
1403impl Default for ForgettingConfig {
1404    fn default() -> Self {
1405        Self {
1406            enabled: false,
1407            decay_rate: 0.1,
1408            forgetting_floor: 0.05,
1409            sweep_interval_secs: 7200,
1410            sweep_batch_size: 500,
1411            replay_window_hours: 24,
1412            replay_min_access_count: 3,
1413            protect_recent_hours: 24,
1414            protect_min_access_count: 3,
1415        }
1416    }
1417}
1418
1419/// Configuration for active context compression (#1161).
1420#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1421#[serde(default)]
1422pub struct CompressionConfig {
1423    /// Compression strategy.
1424    #[serde(flatten)]
1425    pub strategy: CompressionStrategy,
1426    /// Tool-output pruning strategy (requires `context-compression` feature).
1427    pub pruning_strategy: PruningStrategy,
1428    /// Model to use for compression summaries.
1429    ///
1430    /// Currently unused — the primary summary provider is used regardless of this value.
1431    /// Reserved for future per-compression model selection. Setting this field has no effect.
1432    pub model: String,
1433    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1434    /// Falls back to the primary provider when empty. Default: `""`.
1435    pub compress_provider: ProviderName,
1436    /// Compaction probe: validates summary quality before committing it (#1609).
1437    #[serde(default)]
1438    pub probe: zeph_memory::CompactionProbeConfig,
1439    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1440    ///
1441    /// When enabled, tool output bodies in the compaction range are saved to
1442    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1443    /// The LLM summarizes placeholder messages; archived content is appended as
1444    /// a postfix after summarization so references survive compaction.
1445    /// Default: `false`.
1446    #[serde(default)]
1447    pub archive_tool_outputs: bool,
1448    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
1449    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
1450    /// Falls back to the primary provider when empty. Default: `""`.
1451    pub focus_scorer_provider: ProviderName,
1452    /// Token-budget fraction for high-density content in density-aware compression (#2481).
1453    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1454    #[serde(default = "default_high_density_budget")]
1455    pub high_density_budget: f32,
1456    /// Token-budget fraction for low-density content in density-aware compression (#2481).
1457    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1458    #[serde(default = "default_low_density_budget")]
1459    pub low_density_budget: f32,
1460}
1461
1462fn default_sidequest_interval_turns() -> u32 {
1463    4
1464}
1465
1466fn default_sidequest_max_eviction_ratio() -> f32 {
1467    0.5
1468}
1469
1470fn default_sidequest_max_cursors() -> usize {
1471    30
1472}
1473
1474fn default_sidequest_min_cursor_tokens() -> usize {
1475    100
1476}
1477
1478/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1479#[derive(Debug, Clone, Deserialize, Serialize)]
1480#[serde(default)]
1481pub struct SidequestConfig {
1482    /// Enable `SideQuest` eviction. Default: `false`.
1483    pub enabled: bool,
1484    /// Run eviction every N user turns. Default: `4`.
1485    #[serde(default = "default_sidequest_interval_turns")]
1486    pub interval_turns: u32,
1487    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1488    #[serde(default = "default_sidequest_max_eviction_ratio")]
1489    pub max_eviction_ratio: f32,
1490    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1491    #[serde(default = "default_sidequest_max_cursors")]
1492    pub max_cursors: usize,
1493    /// Exclude tool outputs smaller than this token count from eviction candidates.
1494    /// Default: `100`.
1495    #[serde(default = "default_sidequest_min_cursor_tokens")]
1496    pub min_cursor_tokens: usize,
1497}
1498
1499impl Default for SidequestConfig {
1500    fn default() -> Self {
1501        Self {
1502            enabled: false,
1503            interval_turns: default_sidequest_interval_turns(),
1504            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1505            max_cursors: default_sidequest_max_cursors(),
1506            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1507        }
1508    }
1509}
1510
1511/// Graph retrieval strategy for `[memory.graph]`.
1512///
1513/// Selects the algorithm used to traverse the knowledge graph during recall.
1514/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
1515#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
1516#[serde(rename_all = "snake_case")]
1517pub enum GraphRetrievalStrategy {
1518    /// SYNAPSE spreading activation (default, existing behavior).
1519    #[default]
1520    Synapse,
1521    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
1522    Bfs,
1523    /// A* shortest-path traversal via petgraph.
1524    #[serde(rename = "astar")]
1525    AStar,
1526    /// Concentric BFS expanding outward from seed nodes.
1527    WaterCircles,
1528    /// Beam search: keep top-K candidates per hop.
1529    BeamSearch,
1530    /// Dynamic: LLM classifier selects strategy per query.
1531    Hybrid,
1532}
1533
1534fn default_beam_width() -> usize {
1535    10
1536}
1537
1538/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
1539///
1540/// Controls the width of the beam during graph traversal: how many top candidates
1541/// are retained at each hop.
1542#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1543pub struct BeamSearchConfig {
1544    /// Number of top candidates kept per hop. Default: `10`.
1545    #[serde(default = "default_beam_width")]
1546    pub beam_width: usize,
1547}
1548
1549impl Default for BeamSearchConfig {
1550    fn default() -> Self {
1551        Self {
1552            beam_width: default_beam_width(),
1553        }
1554    }
1555}
1556
1557/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
1558///
1559/// Controls ring-by-ring concentric BFS traversal from seed nodes.
1560#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
1561pub struct WaterCirclesConfig {
1562    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
1563    #[serde(default)]
1564    pub ring_limit: usize,
1565}
1566
1567fn default_evolution_sweep_interval() -> usize {
1568    50
1569}
1570
1571fn default_confidence_prune_threshold() -> f32 {
1572    0.1
1573}
1574
1575/// Experience memory configuration for `[memory.graph.experience]`.
1576///
1577/// Controls recording of tool execution outcomes and graph evolution sweeps.
1578#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1579pub struct ExperienceConfig {
1580    /// Enable experience memory recording. Default: `false`.
1581    #[serde(default)]
1582    pub enabled: bool,
1583    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
1584    #[serde(default)]
1585    pub evolution_sweep_enabled: bool,
1586    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
1587    #[serde(default = "default_confidence_prune_threshold")]
1588    pub confidence_prune_threshold: f32,
1589    /// Number of turns between evolution sweeps. Default: `50`.
1590    #[serde(default = "default_evolution_sweep_interval")]
1591    pub evolution_sweep_interval: usize,
1592}
1593
1594impl Default for ExperienceConfig {
1595    fn default() -> Self {
1596        Self {
1597            enabled: false,
1598            evolution_sweep_enabled: false,
1599            confidence_prune_threshold: default_confidence_prune_threshold(),
1600            evolution_sweep_interval: default_evolution_sweep_interval(),
1601        }
1602    }
1603}
1604
1605/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
1606///
1607/// # Security
1608///
1609/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
1610/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
1611/// when processing conversations that may contain personal, medical, or sensitive data until
1612/// a redaction pass is implemented on the write path.
1613#[derive(Debug, Clone, Deserialize, Serialize)]
1614#[serde(default)]
1615pub struct GraphConfig {
1616    pub enabled: bool,
1617    pub extract_model: String,
1618    #[serde(default = "default_graph_max_entities_per_message")]
1619    pub max_entities_per_message: usize,
1620    #[serde(default = "default_graph_max_edges_per_message")]
1621    pub max_edges_per_message: usize,
1622    #[serde(default = "default_graph_community_refresh_interval")]
1623    pub community_refresh_interval: usize,
1624    #[serde(default = "default_graph_entity_similarity_threshold")]
1625    pub entity_similarity_threshold: f32,
1626    #[serde(default = "default_graph_extraction_timeout_secs")]
1627    pub extraction_timeout_secs: u64,
1628    #[serde(default)]
1629    pub use_embedding_resolution: bool,
1630    #[serde(default = "default_graph_entity_ambiguous_threshold")]
1631    pub entity_ambiguous_threshold: f32,
1632    #[serde(default = "default_graph_max_hops")]
1633    pub max_hops: u32,
1634    #[serde(default = "default_graph_recall_limit")]
1635    pub recall_limit: usize,
1636    /// Days to retain expired (superseded) edges before deletion. Default: 90.
1637    #[serde(default = "default_graph_expired_edge_retention_days")]
1638    pub expired_edge_retention_days: u32,
1639    /// Maximum entities to retain in the graph. 0 = unlimited.
1640    #[serde(default)]
1641    pub max_entities: usize,
1642    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
1643    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
1644    pub community_summary_max_prompt_bytes: usize,
1645    /// Maximum concurrent LLM calls during community summarization. Default: 4.
1646    #[serde(default = "default_graph_community_summary_concurrency")]
1647    pub community_summary_concurrency: usize,
1648    /// Number of edges fetched per chunk during community detection. Default: 10000.
1649    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
1650    #[serde(default = "default_lpa_edge_chunk_size")]
1651    pub lpa_edge_chunk_size: usize,
1652    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
1653    ///
1654    /// When > 0, recent edges receive a small additive score boost over older edges.
1655    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
1656    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
1657    #[serde(
1658        default = "default_graph_temporal_decay_rate",
1659        deserialize_with = "validate_temporal_decay_rate"
1660    )]
1661    pub temporal_decay_rate: f64,
1662    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
1663    ///
1664    /// Caps the result set returned for a given source entity + predicate pair. Prevents
1665    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
1666    /// or API endpoints.
1667    #[serde(default = "default_graph_edge_history_limit")]
1668    pub edge_history_limit: usize,
1669    /// A-MEM dynamic note linking configuration.
1670    ///
1671    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
1672    /// semantically similar entities via `similar_to` edges. Requires an embedding store
1673    /// (`qdrant` or `sqlite` vector backend) to be configured.
1674    #[serde(default)]
1675    pub note_linking: NoteLinkingConfig,
1676    /// SYNAPSE spreading activation retrieval configuration.
1677    ///
1678    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
1679    /// with lateral inhibition and temporal decay instead of BFS.
1680    #[serde(default)]
1681    pub spreading_activation: SpreadingActivationConfig,
1682    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
1683    ///
1684    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
1685    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
1686    #[serde(default)]
1687    pub retrieval_strategy: GraphRetrievalStrategy,
1688    /// Named LLM provider for hybrid strategy classification. Empty = use default provider.
1689    #[serde(default)]
1690    pub strategy_classifier_provider: String,
1691    /// Beam search configuration.
1692    #[serde(default)]
1693    pub beam_search: BeamSearchConfig,
1694    /// `WaterCircles` BFS configuration.
1695    #[serde(default)]
1696    pub watercircles: WaterCirclesConfig,
1697    /// Experience memory configuration.
1698    #[serde(default)]
1699    pub experience: ExperienceConfig,
1700    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
1701    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
1702    #[serde(
1703        default = "default_link_weight_decay_lambda",
1704        deserialize_with = "validate_link_weight_decay_lambda"
1705    )]
1706    pub link_weight_decay_lambda: f64,
1707    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
1708    #[serde(default = "default_link_weight_decay_interval_secs")]
1709    pub link_weight_decay_interval_secs: u64,
1710    /// Kumiho AGM-inspired belief revision configuration.
1711    ///
1712    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
1713    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
1714    /// `superseded_by` pointer and the new edge becomes the current belief.
1715    #[serde(default)]
1716    pub belief_revision: BeliefRevisionConfig,
1717    /// D-MEM RPE-based tiered graph extraction routing.
1718    ///
1719    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
1720    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
1721    #[serde(default)]
1722    pub rpe: RpeConfig,
1723    /// `SQLite` connection pool size dedicated to graph operations.
1724    ///
1725    /// Graph tables share the same database file as messages/embeddings but use a
1726    /// separate pool to prevent pool starvation when community detection or spreading
1727    /// activation runs concurrently with regular memory operations. Default: `3`.
1728    #[serde(default = "default_graph_pool_size")]
1729    pub pool_size: u32,
1730}
1731
1732fn default_graph_pool_size() -> u32 {
1733    3
1734}
1735
1736impl Default for GraphConfig {
1737    fn default() -> Self {
1738        Self {
1739            enabled: false,
1740            extract_model: String::new(),
1741            max_entities_per_message: default_graph_max_entities_per_message(),
1742            max_edges_per_message: default_graph_max_edges_per_message(),
1743            community_refresh_interval: default_graph_community_refresh_interval(),
1744            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
1745            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
1746            use_embedding_resolution: false,
1747            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
1748            max_hops: default_graph_max_hops(),
1749            recall_limit: default_graph_recall_limit(),
1750            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
1751            max_entities: 0,
1752            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
1753            community_summary_concurrency: default_graph_community_summary_concurrency(),
1754            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
1755            temporal_decay_rate: default_graph_temporal_decay_rate(),
1756            edge_history_limit: default_graph_edge_history_limit(),
1757            note_linking: NoteLinkingConfig::default(),
1758            spreading_activation: SpreadingActivationConfig::default(),
1759            retrieval_strategy: GraphRetrievalStrategy::default(),
1760            strategy_classifier_provider: String::new(),
1761            beam_search: BeamSearchConfig::default(),
1762            watercircles: WaterCirclesConfig::default(),
1763            experience: ExperienceConfig::default(),
1764            link_weight_decay_lambda: default_link_weight_decay_lambda(),
1765            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
1766            belief_revision: BeliefRevisionConfig::default(),
1767            rpe: RpeConfig::default(),
1768            pool_size: default_graph_pool_size(),
1769        }
1770    }
1771}
1772
1773fn default_consolidation_confidence_threshold() -> f32 {
1774    0.7
1775}
1776
1777fn default_consolidation_sweep_interval_secs() -> u64 {
1778    3600
1779}
1780
1781fn default_consolidation_sweep_batch_size() -> usize {
1782    50
1783}
1784
1785fn default_consolidation_similarity_threshold() -> f32 {
1786    0.85
1787}
1788
1789/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
1790///
1791/// When `enabled = true`, a background loop periodically clusters semantically similar messages
1792/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
1793/// they are marked as consolidated and deprioritized in recall via temporal decay.
1794#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1795#[serde(default)]
1796pub struct ConsolidationConfig {
1797    /// Enable the consolidation background loop. Default: `false`.
1798    pub enabled: bool,
1799    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
1800    /// Falls back to the primary provider when empty. Default: `""`.
1801    #[serde(default)]
1802    pub consolidation_provider: ProviderName,
1803    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
1804    #[serde(default = "default_consolidation_confidence_threshold")]
1805    pub confidence_threshold: f32,
1806    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
1807    #[serde(default = "default_consolidation_sweep_interval_secs")]
1808    pub sweep_interval_secs: u64,
1809    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
1810    #[serde(default = "default_consolidation_sweep_batch_size")]
1811    pub sweep_batch_size: usize,
1812    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
1813    /// Default: `0.85`.
1814    #[serde(default = "default_consolidation_similarity_threshold")]
1815    pub similarity_threshold: f32,
1816}
1817
1818impl Default for ConsolidationConfig {
1819    fn default() -> Self {
1820        Self {
1821            enabled: false,
1822            consolidation_provider: ProviderName::default(),
1823            confidence_threshold: default_consolidation_confidence_threshold(),
1824            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
1825            sweep_batch_size: default_consolidation_sweep_batch_size(),
1826            similarity_threshold: default_consolidation_similarity_threshold(),
1827        }
1828    }
1829}
1830
1831fn default_link_weight_decay_lambda() -> f64 {
1832    0.95
1833}
1834
1835fn default_link_weight_decay_interval_secs() -> u64 {
1836    86400
1837}
1838
1839fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
1840where
1841    D: serde::Deserializer<'de>,
1842{
1843    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
1844    if value.is_nan() || value.is_infinite() {
1845        return Err(serde::de::Error::custom(
1846            "link_weight_decay_lambda must be a finite number",
1847        ));
1848    }
1849    if !(value > 0.0 && value <= 1.0) {
1850        return Err(serde::de::Error::custom(
1851            "link_weight_decay_lambda must be in (0.0, 1.0]",
1852        ));
1853    }
1854    Ok(value)
1855}
1856
1857fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
1858where
1859    D: serde::Deserializer<'de>,
1860{
1861    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1862    if value.is_nan() || value.is_infinite() {
1863        return Err(serde::de::Error::custom(
1864            "threshold must be a finite number",
1865        ));
1866    }
1867    if !(0.0..=1.0).contains(&value) {
1868        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
1869    }
1870    Ok(value)
1871}
1872
1873fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
1874where
1875    D: serde::Deserializer<'de>,
1876{
1877    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1878    if value.is_nan() || value.is_infinite() {
1879        return Err(serde::de::Error::custom(
1880            "fast_path_margin must be a finite number",
1881        ));
1882    }
1883    if !(0.0..=1.0).contains(&value) {
1884        return Err(serde::de::Error::custom(
1885            "fast_path_margin must be in [0.0, 1.0]",
1886        ));
1887    }
1888    Ok(value)
1889}
1890
1891fn default_admission_threshold() -> f32 {
1892    0.40
1893}
1894
1895fn default_admission_fast_path_margin() -> f32 {
1896    0.15
1897}
1898
1899fn default_rl_min_samples() -> u32 {
1900    500
1901}
1902
1903fn default_rl_retrain_interval_secs() -> u64 {
1904    3600
1905}
1906
1907/// Admission decision strategy.
1908///
1909/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
1910/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
1911#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
1912#[serde(rename_all = "snake_case")]
1913pub enum AdmissionStrategy {
1914    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
1915    #[default]
1916    Heuristic,
1917    /// Learned model: logistic regression trained on recall feedback.
1918    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
1919    Rl,
1920}
1921
1922fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
1923where
1924    D: serde::Deserializer<'de>,
1925{
1926    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1927    if value < 0.0 {
1928        return Err(serde::de::Error::custom(
1929            "admission weight must be non-negative (>= 0.0)",
1930        ));
1931    }
1932    Ok(value)
1933}
1934
1935/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
1936///
1937/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
1938/// All values must be non-negative.
1939#[derive(Debug, Clone, Deserialize, Serialize)]
1940#[serde(default)]
1941pub struct AdmissionWeights {
1942    /// LLM-estimated future reuse probability. Default: `0.30`.
1943    #[serde(deserialize_with = "validate_admission_weight")]
1944    pub future_utility: f32,
1945    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
1946    #[serde(deserialize_with = "validate_admission_weight")]
1947    pub factual_confidence: f32,
1948    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
1949    #[serde(deserialize_with = "validate_admission_weight")]
1950    pub semantic_novelty: f32,
1951    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
1952    #[serde(deserialize_with = "validate_admission_weight")]
1953    pub temporal_recency: f32,
1954    /// Content type prior based on role. Default: `0.15`.
1955    #[serde(deserialize_with = "validate_admission_weight")]
1956    pub content_type_prior: f32,
1957    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
1958    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
1959    /// Normalized automatically at runtime. Default: `0.0`.
1960    #[serde(deserialize_with = "validate_admission_weight")]
1961    pub goal_utility: f32,
1962}
1963
1964impl Default for AdmissionWeights {
1965    fn default() -> Self {
1966        Self {
1967            future_utility: 0.30,
1968            factual_confidence: 0.15,
1969            semantic_novelty: 0.30,
1970            temporal_recency: 0.10,
1971            content_type_prior: 0.15,
1972            goal_utility: 0.0,
1973        }
1974    }
1975}
1976
1977impl AdmissionWeights {
1978    /// Return weights normalized so they sum to 1.0.
1979    ///
1980    /// All weights are non-negative; the sum is always > 0 when defaults are used.
1981    #[must_use]
1982    pub fn normalized(&self) -> Self {
1983        let sum = self.future_utility
1984            + self.factual_confidence
1985            + self.semantic_novelty
1986            + self.temporal_recency
1987            + self.content_type_prior
1988            + self.goal_utility;
1989        if sum <= f32::EPSILON {
1990            return Self::default();
1991        }
1992        Self {
1993            future_utility: self.future_utility / sum,
1994            factual_confidence: self.factual_confidence / sum,
1995            semantic_novelty: self.semantic_novelty / sum,
1996            temporal_recency: self.temporal_recency / sum,
1997            content_type_prior: self.content_type_prior / sum,
1998            goal_utility: self.goal_utility / sum,
1999        }
2000    }
2001}
2002
2003/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
2004///
2005/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
2006/// Messages below the composite admission threshold are rejected and not persisted.
2007#[derive(Debug, Clone, Deserialize, Serialize)]
2008#[serde(default)]
2009pub struct AdmissionConfig {
2010    /// Enable A-MAC admission control. Default: `false`.
2011    pub enabled: bool,
2012    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
2013    /// Default: `0.40`.
2014    #[serde(deserialize_with = "validate_admission_threshold")]
2015    pub threshold: f32,
2016    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
2017    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
2018    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
2019    pub fast_path_margin: f32,
2020    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
2021    /// Falls back to the primary provider when empty. Default: `""`.
2022    pub admission_provider: ProviderName,
2023    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
2024    pub weights: AdmissionWeights,
2025    /// Admission decision strategy. Default: `heuristic`.
2026    #[serde(default)]
2027    pub admission_strategy: AdmissionStrategy,
2028    /// Minimum training samples before the RL model is activated.
2029    /// Below this count the system falls back to `Heuristic`. Default: `500`.
2030    #[serde(default = "default_rl_min_samples")]
2031    pub rl_min_samples: u32,
2032    /// Background RL model retraining interval in seconds. Default: `3600`.
2033    #[serde(default = "default_rl_retrain_interval_secs")]
2034    pub rl_retrain_interval_secs: u64,
2035    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
2036    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
2037    /// Zero regression when `false`. Default: `false`.
2038    #[serde(default)]
2039    pub goal_conditioned_write: bool,
2040    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
2041    /// Used only for borderline cases (similarity within 0.1 of threshold).
2042    /// Falls back to the primary provider when empty. Default: `""`.
2043    #[serde(default)]
2044    pub goal_utility_provider: ProviderName,
2045    /// Minimum cosine similarity between goal embedding and candidate memory
2046    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
2047    #[serde(default = "default_goal_utility_threshold")]
2048    pub goal_utility_threshold: f32,
2049    /// Weight of the `goal_utility` factor in the composite admission score.
2050    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
2051    #[serde(default = "default_goal_utility_weight")]
2052    pub goal_utility_weight: f32,
2053}
2054
2055fn default_goal_utility_threshold() -> f32 {
2056    0.4
2057}
2058
2059fn default_goal_utility_weight() -> f32 {
2060    0.25
2061}
2062
2063impl Default for AdmissionConfig {
2064    fn default() -> Self {
2065        Self {
2066            enabled: false,
2067            threshold: default_admission_threshold(),
2068            fast_path_margin: default_admission_fast_path_margin(),
2069            admission_provider: ProviderName::default(),
2070            weights: AdmissionWeights::default(),
2071            admission_strategy: AdmissionStrategy::default(),
2072            rl_min_samples: default_rl_min_samples(),
2073            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
2074            goal_conditioned_write: false,
2075            goal_utility_provider: ProviderName::default(),
2076            goal_utility_threshold: default_goal_utility_threshold(),
2077            goal_utility_weight: default_goal_utility_weight(),
2078        }
2079    }
2080}
2081
2082/// Routing strategy for `[memory.store_routing]`.
2083#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
2084#[serde(rename_all = "snake_case")]
2085pub enum StoreRoutingStrategy {
2086    /// Pure heuristic pattern matching. Zero LLM calls. Default.
2087    #[default]
2088    Heuristic,
2089    /// LLM-based classification via `routing_classifier_provider`.
2090    Llm,
2091    /// Heuristic first; escalates to LLM only when confidence is low.
2092    Hybrid,
2093}
2094
2095/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
2096///
2097/// Controls how each query is classified and routed to the appropriate memory
2098/// backend(s), avoiding unnecessary store queries for simple lookups.
2099#[derive(Debug, Clone, Deserialize, Serialize)]
2100#[serde(default)]
2101pub struct StoreRoutingConfig {
2102    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
2103    /// directly (existing behavior). Default: `false`.
2104    pub enabled: bool,
2105    /// Routing strategy. Default: `heuristic`.
2106    pub strategy: StoreRoutingStrategy,
2107    /// Provider name from `[[llm.providers]]` for LLM-based classification.
2108    /// Falls back to the primary provider when empty. Default: `""`.
2109    pub routing_classifier_provider: ProviderName,
2110    /// Route to use when the classifier is uncertain (confidence < threshold).
2111    /// Default: `"hybrid"`.
2112    pub fallback_route: String,
2113    /// Confidence threshold below which `HybridRouter` escalates to LLM.
2114    /// Range: `[0.0, 1.0]`. Default: `0.7`.
2115    pub confidence_threshold: f32,
2116}
2117
2118impl Default for StoreRoutingConfig {
2119    fn default() -> Self {
2120        Self {
2121            enabled: false,
2122            strategy: StoreRoutingStrategy::Heuristic,
2123            routing_classifier_provider: ProviderName::default(),
2124            fallback_route: "hybrid".into(),
2125            confidence_threshold: 0.7,
2126        }
2127    }
2128}
2129
2130/// Persona memory layer configuration (#2461).
2131///
2132/// When `enabled = true`, user preferences and domain knowledge are extracted from
2133/// conversation history via a cheap LLM provider and injected after the system prompt.
2134#[derive(Debug, Clone, Deserialize, Serialize)]
2135#[serde(default)]
2136pub struct PersonaConfig {
2137    /// Enable persona memory extraction and injection. Default: `false`.
2138    pub enabled: bool,
2139    /// Provider name from `[[llm.providers]]` for persona extraction.
2140    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
2141    pub persona_provider: ProviderName,
2142    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
2143    pub min_confidence: f64,
2144    /// Minimum user messages before extraction runs in a session. Default: `3`.
2145    pub min_messages: usize,
2146    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
2147    pub max_messages: usize,
2148    /// LLM timeout for the extraction call in seconds. Default: `10`.
2149    pub extraction_timeout_secs: u64,
2150    /// Token budget allocated to persona context in assembly. Default: `500`.
2151    pub context_budget_tokens: usize,
2152}
2153
2154impl Default for PersonaConfig {
2155    fn default() -> Self {
2156        Self {
2157            enabled: false,
2158            persona_provider: ProviderName::default(),
2159            min_confidence: 0.6,
2160            min_messages: 3,
2161            max_messages: 10,
2162            extraction_timeout_secs: 10,
2163            context_budget_tokens: 500,
2164        }
2165    }
2166}
2167
2168/// Trajectory-informed memory configuration (#2498).
2169///
2170/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
2171/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
2172/// Procedural entries are injected into context as "past experience" during assembly.
2173#[derive(Debug, Clone, Deserialize, Serialize)]
2174#[serde(default)]
2175pub struct TrajectoryConfig {
2176    /// Enable trajectory extraction and context injection. Default: `false`.
2177    pub enabled: bool,
2178    /// Provider name from `[[llm.providers]]` for extraction.
2179    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2180    pub trajectory_provider: ProviderName,
2181    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
2182    pub context_budget_tokens: usize,
2183    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
2184    pub max_messages: usize,
2185    /// LLM timeout for the extraction call in seconds. Default: `10`.
2186    pub extraction_timeout_secs: u64,
2187    /// Number of procedural entries retrieved for context injection. Default: `5`.
2188    pub recall_top_k: usize,
2189    /// Minimum confidence score for entries included in context. Default: `0.6`.
2190    pub min_confidence: f64,
2191}
2192
2193impl Default for TrajectoryConfig {
2194    fn default() -> Self {
2195        Self {
2196            enabled: false,
2197            trajectory_provider: ProviderName::default(),
2198            context_budget_tokens: 400,
2199            max_messages: 10,
2200            extraction_timeout_secs: 10,
2201            recall_top_k: 5,
2202            min_confidence: 0.6,
2203        }
2204    }
2205}
2206
2207/// Category-aware memory configuration (#2428).
2208///
2209/// When `enabled = true`, messages are auto-tagged with a category derived from the active
2210/// skill or tool context. The category is stored in the `messages.category` column and used
2211/// as a Qdrant payload filter during recall.
2212#[derive(Debug, Clone, Deserialize, Serialize)]
2213#[serde(default)]
2214pub struct CategoryConfig {
2215    /// Enable category tagging and category-filtered recall. Default: `false`.
2216    pub enabled: bool,
2217    /// Automatically assign category from skill metadata or tool type. Default: `true`.
2218    pub auto_tag: bool,
2219}
2220
2221impl Default for CategoryConfig {
2222    fn default() -> Self {
2223        Self {
2224            enabled: false,
2225            auto_tag: true,
2226        }
2227    }
2228}
2229
2230/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
2231///
2232/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
2233/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
2234/// for complex queries.
2235#[derive(Debug, Clone, Deserialize, Serialize)]
2236#[serde(default)]
2237pub struct TreeConfig {
2238    /// Enable the memory tree and background consolidation loop. Default: `false`.
2239    pub enabled: bool,
2240    /// Provider name from `[[llm.providers]]` for node consolidation.
2241    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2242    pub consolidation_provider: ProviderName,
2243    /// Interval between consolidation sweeps in seconds. Default: `300`.
2244    pub sweep_interval_secs: u64,
2245    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
2246    pub batch_size: usize,
2247    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
2248    pub similarity_threshold: f32,
2249    /// Maximum tree depth (levels above leaves). Default: `3`.
2250    pub max_level: u32,
2251    /// Token budget allocated to tree memory in context assembly. Default: `400`.
2252    pub context_budget_tokens: usize,
2253    /// Number of tree nodes retrieved for context. Default: `5`.
2254    pub recall_top_k: usize,
2255    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
2256    pub min_cluster_size: usize,
2257}
2258
2259impl Default for TreeConfig {
2260    fn default() -> Self {
2261        Self {
2262            enabled: false,
2263            consolidation_provider: ProviderName::default(),
2264            sweep_interval_secs: 300,
2265            batch_size: 20,
2266            similarity_threshold: 0.8,
2267            max_level: 3,
2268            context_budget_tokens: 400,
2269            recall_top_k: 5,
2270            min_cluster_size: 2,
2271        }
2272    }
2273}
2274
2275/// Time-based microcompact configuration (#2699).
2276///
2277/// When `enabled = true`, low-value tool outputs are cleared from context
2278/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
2279/// The most recent `keep_recent` tool messages are preserved unconditionally.
2280#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2281#[serde(default)]
2282pub struct MicrocompactConfig {
2283    /// Enable time-based microcompaction. Default: `false`.
2284    pub enabled: bool,
2285    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
2286    pub gap_threshold_minutes: u32,
2287    /// Number of most recent compactable tool messages to preserve. Default: `3`.
2288    pub keep_recent: usize,
2289}
2290
2291impl Default for MicrocompactConfig {
2292    fn default() -> Self {
2293        Self {
2294            enabled: false,
2295            gap_threshold_minutes: 60,
2296            keep_recent: 3,
2297        }
2298    }
2299}
2300
2301/// autoDream background memory consolidation configuration (#2697).
2302///
2303/// When `enabled = true`, a constrained consolidation subagent runs after
2304/// a session ends if both `min_sessions` and `min_hours` gates pass.
2305#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2306#[serde(default)]
2307pub struct AutoDreamConfig {
2308    /// Enable autoDream consolidation. Default: `false`.
2309    pub enabled: bool,
2310    /// Minimum number of sessions between consolidations. Default: `3`.
2311    pub min_sessions: u32,
2312    /// Minimum hours between consolidations. Default: `24`.
2313    pub min_hours: u32,
2314    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2315    /// Falls back to the primary provider when empty. Default: `""`.
2316    pub consolidation_provider: ProviderName,
2317    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
2318    pub max_iterations: u8,
2319}
2320
2321impl Default for AutoDreamConfig {
2322    fn default() -> Self {
2323        Self {
2324            enabled: false,
2325            min_sessions: 3,
2326            min_hours: 24,
2327            consolidation_provider: ProviderName::default(),
2328            max_iterations: 8,
2329        }
2330    }
2331}
2332
2333/// `MagicDocs` auto-maintained markdown configuration (#2702).
2334///
2335/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2336/// are registered and periodically updated by a constrained subagent.
2337#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2338#[serde(default)]
2339pub struct MagicDocsConfig {
2340    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2341    pub enabled: bool,
2342    /// Minimum turns between updates for a given doc path. Default: `5`.
2343    pub min_turns_between_updates: u32,
2344    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2345    /// Falls back to the primary provider when empty. Default: `""`.
2346    pub update_provider: ProviderName,
2347    /// Maximum agent loop iterations per doc update. Default: `4`.
2348    pub max_iterations: u8,
2349}
2350
2351impl Default for MagicDocsConfig {
2352    fn default() -> Self {
2353        Self {
2354            enabled: false,
2355            min_turns_between_updates: 5,
2356            update_provider: ProviderName::default(),
2357            max_iterations: 4,
2358        }
2359    }
2360}
2361
2362#[cfg(test)]
2363mod tests {
2364    use super::*;
2365
2366    // Verify that serde deserialization routes through FromStr so that removed variants
2367    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2368    #[test]
2369    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2370        #[derive(serde::Deserialize)]
2371        struct Wrapper {
2372            #[allow(dead_code)]
2373            pruning_strategy: PruningStrategy,
2374        }
2375        let toml = r#"pruning_strategy = "task_aware_mig""#;
2376        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2377        assert_eq!(
2378            w.pruning_strategy,
2379            PruningStrategy::Reactive,
2380            "task_aware_mig must fall back to Reactive"
2381        );
2382    }
2383
2384    #[test]
2385    fn pruning_strategy_toml_round_trip() {
2386        #[derive(serde::Deserialize)]
2387        struct Wrapper {
2388            #[allow(dead_code)]
2389            pruning_strategy: PruningStrategy,
2390        }
2391        for (input, expected) in [
2392            ("reactive", PruningStrategy::Reactive),
2393            ("task_aware", PruningStrategy::TaskAware),
2394            ("mig", PruningStrategy::Mig),
2395        ] {
2396            let toml = format!(r#"pruning_strategy = "{input}""#);
2397            let w: Wrapper = toml::from_str(&toml)
2398                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
2399            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
2400        }
2401    }
2402
2403    #[test]
2404    fn pruning_strategy_toml_unknown_value_errors() {
2405        #[derive(serde::Deserialize)]
2406        #[allow(dead_code)]
2407        struct Wrapper {
2408            pruning_strategy: PruningStrategy,
2409        }
2410        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
2411        assert!(
2412            toml::from_str::<Wrapper>(toml).is_err(),
2413            "unknown strategy must produce an error"
2414        );
2415    }
2416
2417    #[test]
2418    fn tier_config_defaults_are_correct() {
2419        let cfg = TierConfig::default();
2420        assert!(!cfg.enabled);
2421        assert_eq!(cfg.promotion_min_sessions, 3);
2422        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
2423        assert_eq!(cfg.sweep_interval_secs, 3600);
2424        assert_eq!(cfg.sweep_batch_size, 100);
2425    }
2426
2427    #[test]
2428    fn tier_config_rejects_min_sessions_below_2() {
2429        let toml = "promotion_min_sessions = 1";
2430        assert!(toml::from_str::<TierConfig>(toml).is_err());
2431    }
2432
2433    #[test]
2434    fn tier_config_rejects_similarity_threshold_below_0_5() {
2435        let toml = "similarity_threshold = 0.4";
2436        assert!(toml::from_str::<TierConfig>(toml).is_err());
2437    }
2438
2439    #[test]
2440    fn tier_config_rejects_zero_sweep_batch_size() {
2441        let toml = "sweep_batch_size = 0";
2442        assert!(toml::from_str::<TierConfig>(toml).is_err());
2443    }
2444
2445    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
2446        let input = format!("importance_weight = {toml_val}");
2447        toml::from_str::<SemanticConfig>(&input)
2448    }
2449
2450    #[test]
2451    fn importance_weight_default_is_0_15() {
2452        let cfg = SemanticConfig::default();
2453        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
2454    }
2455
2456    #[test]
2457    fn importance_weight_valid_zero() {
2458        let cfg = deserialize_importance_weight("0.0").unwrap();
2459        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
2460    }
2461
2462    #[test]
2463    fn importance_weight_valid_one() {
2464        let cfg = deserialize_importance_weight("1.0").unwrap();
2465        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
2466    }
2467
2468    #[test]
2469    fn importance_weight_rejects_near_zero_negative() {
2470        // TOML does not have a NaN literal, but we can test via a f64 that
2471        // the validator rejects out-of-range values. Test with negative here
2472        // and rely on validate_importance_weight rejecting non-finite via
2473        // a constructed deserializer call.
2474        let result = deserialize_importance_weight("-0.01");
2475        assert!(
2476            result.is_err(),
2477            "negative importance_weight must be rejected"
2478        );
2479    }
2480
2481    #[test]
2482    fn importance_weight_rejects_negative() {
2483        let result = deserialize_importance_weight("-1.0");
2484        assert!(result.is_err(), "negative value must be rejected");
2485    }
2486
2487    #[test]
2488    fn importance_weight_rejects_greater_than_one() {
2489        let result = deserialize_importance_weight("1.01");
2490        assert!(result.is_err(), "value > 1.0 must be rejected");
2491    }
2492
2493    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
2494
2495    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
2496    #[test]
2497    fn admission_weights_normalized_sums_to_one() {
2498        let w = AdmissionWeights {
2499            future_utility: 2.0,
2500            factual_confidence: 1.0,
2501            semantic_novelty: 3.0,
2502            temporal_recency: 1.0,
2503            content_type_prior: 3.0,
2504            goal_utility: 0.0,
2505        };
2506        let n = w.normalized();
2507        let sum = n.future_utility
2508            + n.factual_confidence
2509            + n.semantic_novelty
2510            + n.temporal_recency
2511            + n.content_type_prior;
2512        assert!(
2513            (sum - 1.0).abs() < 0.001,
2514            "normalized weights must sum to 1.0, got {sum}"
2515        );
2516    }
2517
2518    // Test: already-normalized weights are preserved.
2519    #[test]
2520    fn admission_weights_normalized_preserves_already_unit_sum() {
2521        let w = AdmissionWeights::default();
2522        let n = w.normalized();
2523        let sum = n.future_utility
2524            + n.factual_confidence
2525            + n.semantic_novelty
2526            + n.temporal_recency
2527            + n.content_type_prior;
2528        assert!(
2529            (sum - 1.0).abs() < 0.001,
2530            "default weights sum to ~1.0 after normalization"
2531        );
2532    }
2533
2534    // Test: zero weights fall back to default (no divide-by-zero panic).
2535    #[test]
2536    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
2537        let w = AdmissionWeights {
2538            future_utility: 0.0,
2539            factual_confidence: 0.0,
2540            semantic_novelty: 0.0,
2541            temporal_recency: 0.0,
2542            content_type_prior: 0.0,
2543            goal_utility: 0.0,
2544        };
2545        let n = w.normalized();
2546        let default = AdmissionWeights::default();
2547        assert!(
2548            (n.future_utility - default.future_utility).abs() < 0.001,
2549            "zero-sum weights must fall back to defaults"
2550        );
2551    }
2552
2553    // Test: AdmissionConfig default values match documented defaults.
2554    #[test]
2555    fn admission_config_defaults() {
2556        let cfg = AdmissionConfig::default();
2557        assert!(!cfg.enabled);
2558        assert!((cfg.threshold - 0.40).abs() < 0.001);
2559        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
2560        assert!(cfg.admission_provider.is_empty());
2561    }
2562
2563    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
2564
2565    #[test]
2566    fn spreading_activation_default_recall_timeout_ms_is_1000() {
2567        let cfg = SpreadingActivationConfig::default();
2568        assert_eq!(
2569            cfg.recall_timeout_ms, 1000,
2570            "default recall_timeout_ms must be 1000ms"
2571        );
2572    }
2573
2574    #[test]
2575    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
2576        #[derive(serde::Deserialize)]
2577        struct Wrapper {
2578            recall_timeout_ms: u64,
2579        }
2580        let toml = "recall_timeout_ms = 500";
2581        let w: Wrapper = toml::from_str(toml).unwrap();
2582        assert_eq!(w.recall_timeout_ms, 500);
2583    }
2584
2585    #[test]
2586    fn spreading_activation_validate_cross_field_constraints() {
2587        let mut cfg = SpreadingActivationConfig::default();
2588        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
2589        assert!(cfg.validate().is_ok());
2590
2591        // Equal thresholds must be rejected.
2592        cfg.activation_threshold = 0.5;
2593        cfg.inhibition_threshold = 0.5;
2594        assert!(cfg.validate().is_err());
2595    }
2596
2597    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
2598
2599    #[test]
2600    fn compression_config_focus_strategy_deserializes() {
2601        let toml = r#"strategy = "focus""#;
2602        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2603        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
2604    }
2605
2606    #[test]
2607    fn compression_config_density_budget_defaults_on_deserialize() {
2608        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
2609        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
2610        let toml = r#"strategy = "reactive""#;
2611        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2612        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
2613        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
2614    }
2615
2616    #[test]
2617    fn compression_config_density_budget_round_trip() {
2618        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
2619        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2620        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
2621        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
2622    }
2623
2624    #[test]
2625    fn compression_config_focus_scorer_provider_default_empty() {
2626        let cfg = CompressionConfig::default();
2627        assert!(cfg.focus_scorer_provider.is_empty());
2628    }
2629
2630    #[test]
2631    fn compression_config_focus_scorer_provider_round_trip() {
2632        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
2633        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2634        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
2635    }
2636}
2637
2638/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
2639///
2640/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
2641/// Successful and failed reasoning chains are compressed into short, generalizable strategy
2642/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
2643/// and injected into the prompt preamble.
2644///
2645/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
2646///
2647/// # Example
2648///
2649/// ```toml
2650/// [memory.reasoning]
2651/// enabled = true
2652/// extract_provider = "fast"
2653/// distill_provider = "fast"
2654/// top_k = 3
2655/// store_limit = 1000
2656/// ```
2657#[derive(Debug, Clone, Deserialize, Serialize)]
2658#[serde(default)]
2659pub struct ReasoningConfig {
2660    /// Enable the reasoning-bank pipeline. Default: `false`.
2661    pub enabled: bool,
2662    /// Provider name from `[[llm.providers]]` for the self-judge step.
2663    /// Falls back to the primary provider when empty. Default: `""`.
2664    pub extract_provider: ProviderName,
2665    /// Provider name from `[[llm.providers]]` for the distillation step.
2666    /// Falls back to the primary provider when empty. Default: `""`.
2667    pub distill_provider: ProviderName,
2668    /// Number of strategies retrieved per turn for context injection. Default: `3`.
2669    pub top_k: usize,
2670    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
2671    pub store_limit: usize,
2672    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
2673    pub max_messages: usize,
2674    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
2675    pub max_message_chars: usize,
2676    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
2677    pub context_budget_tokens: usize,
2678    /// Minimum number of messages required before self-judge fires. Default: `2`.
2679    pub min_messages: usize,
2680    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
2681    pub extraction_timeout_secs: u64,
2682    /// Timeout in seconds for the distillation LLM call. Default: `30`.
2683    pub distill_timeout_secs: u64,
2684    /// Maximum number of recent messages passed to the self-judge evaluator.
2685    /// Narrowing to the last user+assistant pair improves classification accuracy.
2686    /// Default: `2`.
2687    pub self_judge_window: usize,
2688    /// Minimum characters in the assistant response to trigger self-judge.
2689    /// Short or trivial responses are skipped. Default: `50`.
2690    pub min_assistant_chars: usize,
2691}
2692
2693impl Default for ReasoningConfig {
2694    fn default() -> Self {
2695        Self {
2696            enabled: false,
2697            extract_provider: ProviderName::default(),
2698            distill_provider: ProviderName::default(),
2699            top_k: 3,
2700            store_limit: 1000,
2701            max_messages: 6,
2702            max_message_chars: 2000,
2703            context_budget_tokens: 500,
2704            min_messages: 2,
2705            extraction_timeout_secs: 30,
2706            distill_timeout_secs: 30,
2707            self_judge_window: 2,
2708            min_assistant_chars: 50,
2709        }
2710    }
2711}
zeph_config/memory.rs

zeph_config/
memory.rs