Skip to main content

zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::secret::Secret;
9
10use crate::defaults::{default_sqlite_path_field, default_true};
11use crate::providers::ProviderName;
12
13fn default_sqlite_pool_size() -> u32 {
14    5
15}
16
17fn default_max_history() -> usize {
18    100
19}
20
21fn default_title_max_chars() -> usize {
22    60
23}
24
25fn default_document_collection() -> String {
26    "zeph_documents".into()
27}
28
29fn default_document_chunk_size() -> usize {
30    1000
31}
32
33fn default_document_chunk_overlap() -> usize {
34    100
35}
36
37fn default_document_top_k() -> usize {
38    3
39}
40
41fn default_autosave_min_length() -> usize {
42    20
43}
44
45fn default_tool_call_cutoff() -> usize {
46    6
47}
48
49fn default_token_safety_margin() -> f32 {
50    1.0
51}
52
53fn default_redact_credentials() -> bool {
54    true
55}
56
57fn default_qdrant_url() -> String {
58    "http://localhost:6334".into()
59}
60
61fn default_summarization_threshold() -> usize {
62    50
63}
64
65fn default_context_budget_tokens() -> usize {
66    0
67}
68
69fn default_soft_compaction_threshold() -> f32 {
70    0.60
71}
72
73fn default_hard_compaction_threshold() -> f32 {
74    0.90
75}
76
77fn default_compaction_preserve_tail() -> usize {
78    6
79}
80
81fn default_compaction_cooldown_turns() -> u8 {
82    2
83}
84
85fn default_auto_budget() -> bool {
86    true
87}
88
89fn default_prune_protect_tokens() -> usize {
90    40_000
91}
92
93fn default_cross_session_score_threshold() -> f32 {
94    0.35
95}
96
97fn default_temporal_decay_half_life_days() -> u32 {
98    30
99}
100
101fn default_mmr_lambda() -> f32 {
102    0.7
103}
104
105fn default_semantic_enabled() -> bool {
106    true
107}
108
109fn default_recall_limit() -> usize {
110    5
111}
112
113fn default_vector_weight() -> f64 {
114    0.7
115}
116
117fn default_keyword_weight() -> f64 {
118    0.3
119}
120
121fn default_graph_max_entities_per_message() -> usize {
122    10
123}
124
125fn default_graph_max_edges_per_message() -> usize {
126    15
127}
128
129fn default_graph_community_refresh_interval() -> usize {
130    100
131}
132
133fn default_graph_community_summary_max_prompt_bytes() -> usize {
134    8192
135}
136
137fn default_graph_community_summary_concurrency() -> usize {
138    4
139}
140
141fn default_lpa_edge_chunk_size() -> usize {
142    10_000
143}
144
145fn default_graph_entity_similarity_threshold() -> f32 {
146    0.85
147}
148
149fn default_graph_entity_ambiguous_threshold() -> f32 {
150    0.70
151}
152
153fn default_graph_extraction_timeout_secs() -> u64 {
154    15
155}
156
157fn default_graph_max_hops() -> u32 {
158    2
159}
160
161fn default_graph_recall_limit() -> usize {
162    10
163}
164
165fn default_graph_expired_edge_retention_days() -> u32 {
166    90
167}
168
169fn default_graph_temporal_decay_rate() -> f64 {
170    0.0
171}
172
173fn default_graph_edge_history_limit() -> usize {
174    100
175}
176
177fn default_spreading_activation_decay_lambda() -> f32 {
178    0.85
179}
180
181fn default_spreading_activation_max_hops() -> u32 {
182    3
183}
184
185fn default_spreading_activation_activation_threshold() -> f32 {
186    0.1
187}
188
189fn default_spreading_activation_inhibition_threshold() -> f32 {
190    0.8
191}
192
193fn default_spreading_activation_max_activated_nodes() -> usize {
194    50
195}
196
197fn default_spreading_activation_recall_timeout_ms() -> u64 {
198    1000
199}
200
201fn default_note_linking_similarity_threshold() -> f32 {
202    0.85
203}
204
205fn default_note_linking_top_k() -> usize {
206    10
207}
208
209fn default_note_linking_timeout_secs() -> u64 {
210    5
211}
212
213fn default_shutdown_summary() -> bool {
214    true
215}
216
217fn default_shutdown_summary_min_messages() -> usize {
218    4
219}
220
221fn default_shutdown_summary_max_messages() -> usize {
222    20
223}
224
225fn default_shutdown_summary_timeout_secs() -> u64 {
226    30
227}
228
229fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
230where
231    D: serde::Deserializer<'de>,
232{
233    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
234    if value.is_nan() || value.is_infinite() {
235        return Err(serde::de::Error::custom(
236            "similarity_threshold must be a finite number",
237        ));
238    }
239    if !(0.5..=1.0).contains(&value) {
240        return Err(serde::de::Error::custom(
241            "similarity_threshold must be in [0.5, 1.0]",
242        ));
243    }
244    Ok(value)
245}
246
247fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
248where
249    D: serde::Deserializer<'de>,
250{
251    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
252    if value < 2 {
253        return Err(serde::de::Error::custom(
254            "promotion_min_sessions must be >= 2",
255        ));
256    }
257    Ok(value)
258}
259
260fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
261where
262    D: serde::Deserializer<'de>,
263{
264    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
265    if value == 0 {
266        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
267    }
268    Ok(value)
269}
270
271fn default_tier_promotion_min_sessions() -> u32 {
272    3
273}
274
275fn default_tier_similarity_threshold() -> f32 {
276    0.92
277}
278
279fn default_tier_sweep_interval_secs() -> u64 {
280    3600
281}
282
283fn default_tier_sweep_batch_size() -> usize {
284    100
285}
286
287fn default_scene_similarity_threshold() -> f32 {
288    0.80
289}
290
291fn default_scene_batch_size() -> usize {
292    50
293}
294
295fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
296where
297    D: serde::Deserializer<'de>,
298{
299    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
300    if value.is_nan() || value.is_infinite() {
301        return Err(serde::de::Error::custom(
302            "scene_similarity_threshold must be a finite number",
303        ));
304    }
305    if !(0.5..=1.0).contains(&value) {
306        return Err(serde::de::Error::custom(
307            "scene_similarity_threshold must be in [0.5, 1.0]",
308        ));
309    }
310    Ok(value)
311}
312
313fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
314where
315    D: serde::Deserializer<'de>,
316{
317    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
318    if value == 0 {
319        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
320    }
321    Ok(value)
322}
323
324/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
325///
326/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
327/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
328///
329/// # Validation
330///
331/// Constraints enforced at deserialization time:
332/// - `similarity_threshold` in `[0.5, 1.0]`
333/// - `promotion_min_sessions >= 2`
334/// - `sweep_batch_size >= 1`
335/// - `scene_similarity_threshold` in `[0.5, 1.0]`
336/// - `scene_batch_size >= 1`
337#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
338#[serde(default)]
339pub struct TierConfig {
340    /// Enable the tier promotion system. When `false`, all messages remain episodic.
341    /// Default: `false`.
342    pub enabled: bool,
343    /// Minimum number of distinct sessions a fact must appear in before promotion.
344    /// Must be `>= 2`. Default: `3`.
345    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
346    pub promotion_min_sessions: u32,
347    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
348    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
349    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
350    pub similarity_threshold: f32,
351    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
352    pub sweep_interval_secs: u64,
353    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
354    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
355    pub sweep_batch_size: usize,
356    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
357    pub scene_enabled: bool,
358    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
359    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
360    pub scene_similarity_threshold: f32,
361    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
362    #[serde(deserialize_with = "validate_scene_batch_size")]
363    pub scene_batch_size: usize,
364    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
365    /// Falls back to the primary provider when empty. Default: `""`.
366    pub scene_provider: ProviderName,
367    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
368    pub scene_sweep_interval_secs: u64,
369}
370
371fn default_scene_sweep_interval_secs() -> u64 {
372    7200
373}
374
375impl Default for TierConfig {
376    fn default() -> Self {
377        Self {
378            enabled: false,
379            promotion_min_sessions: default_tier_promotion_min_sessions(),
380            similarity_threshold: default_tier_similarity_threshold(),
381            sweep_interval_secs: default_tier_sweep_interval_secs(),
382            sweep_batch_size: default_tier_sweep_batch_size(),
383            scene_enabled: false,
384            scene_similarity_threshold: default_scene_similarity_threshold(),
385            scene_batch_size: default_scene_batch_size(),
386            scene_provider: ProviderName::default(),
387            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
388        }
389    }
390}
391
392fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
393where
394    D: serde::Deserializer<'de>,
395{
396    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
397    if value.is_nan() || value.is_infinite() {
398        return Err(serde::de::Error::custom(
399            "temporal_decay_rate must be a finite number",
400        ));
401    }
402    if !(0.0..=10.0).contains(&value) {
403        return Err(serde::de::Error::custom(
404            "temporal_decay_rate must be in [0.0, 10.0]",
405        ));
406    }
407    Ok(value)
408}
409
410fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
411where
412    D: serde::Deserializer<'de>,
413{
414    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
415    if value.is_nan() || value.is_infinite() {
416        return Err(serde::de::Error::custom(
417            "similarity_threshold must be a finite number",
418        ));
419    }
420    if !(0.0..=1.0).contains(&value) {
421        return Err(serde::de::Error::custom(
422            "similarity_threshold must be in [0.0, 1.0]",
423        ));
424    }
425    Ok(value)
426}
427
428fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
429where
430    D: serde::Deserializer<'de>,
431{
432    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
433    if value.is_nan() || value.is_infinite() {
434        return Err(serde::de::Error::custom(
435            "importance_weight must be a finite number",
436        ));
437    }
438    if value < 0.0 {
439        return Err(serde::de::Error::custom(
440            "importance_weight must be non-negative",
441        ));
442    }
443    if value > 1.0 {
444        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
445    }
446    Ok(value)
447}
448
449fn default_importance_weight() -> f64 {
450    0.15
451}
452
453/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
454///
455/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
456/// Seeds are initialized from fuzzy entity matches, then activation propagates
457/// hop-by-hop with exponential decay and lateral inhibition.
458///
459/// # Validation
460///
461/// Constraints enforced at deserialization time:
462/// - `0.0 < decay_lambda <= 1.0`
463/// - `max_hops >= 1`
464/// - `activation_threshold < inhibition_threshold`
465/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
466#[derive(Debug, Clone, Deserialize, Serialize)]
467#[serde(default)]
468pub struct SpreadingActivationConfig {
469    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
470    pub enabled: bool,
471    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
472    #[serde(deserialize_with = "validate_decay_lambda")]
473    pub decay_lambda: f32,
474    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
475    #[serde(deserialize_with = "validate_max_hops")]
476    pub max_hops: u32,
477    /// Minimum activation score to include a node in results. Default: `0.1`.
478    pub activation_threshold: f32,
479    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
480    pub inhibition_threshold: f32,
481    /// Cap on total activated nodes per spread pass. Default: `50`.
482    pub max_activated_nodes: usize,
483    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
484    #[serde(default = "default_seed_structural_weight")]
485    pub seed_structural_weight: f32,
486    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
487    #[serde(default = "default_seed_community_cap")]
488    pub seed_community_cap: usize,
489    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
490    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
491    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
492    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
493    pub recall_timeout_ms: u64,
494}
495
496fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
497where
498    D: serde::Deserializer<'de>,
499{
500    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
501    if value.is_nan() || value.is_infinite() {
502        return Err(serde::de::Error::custom(
503            "decay_lambda must be a finite number",
504        ));
505    }
506    if !(value > 0.0 && value <= 1.0) {
507        return Err(serde::de::Error::custom(
508            "decay_lambda must be in (0.0, 1.0]",
509        ));
510    }
511    Ok(value)
512}
513
514fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
515where
516    D: serde::Deserializer<'de>,
517{
518    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
519    if value == 0 {
520        return Err(serde::de::Error::custom("max_hops must be >= 1"));
521    }
522    Ok(value)
523}
524
525impl SpreadingActivationConfig {
526    /// Validate cross-field constraints that cannot be expressed in per-field validators.
527    ///
528    /// # Errors
529    ///
530    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
531    pub fn validate(&self) -> Result<(), String> {
532        if self.activation_threshold >= self.inhibition_threshold {
533            return Err(format!(
534                "activation_threshold ({}) must be < inhibition_threshold ({})",
535                self.activation_threshold, self.inhibition_threshold
536            ));
537        }
538        Ok(())
539    }
540}
541
542fn default_seed_structural_weight() -> f32 {
543    0.4
544}
545
546fn default_seed_community_cap() -> usize {
547    3
548}
549
550impl Default for SpreadingActivationConfig {
551    fn default() -> Self {
552        Self {
553            enabled: false,
554            decay_lambda: default_spreading_activation_decay_lambda(),
555            max_hops: default_spreading_activation_max_hops(),
556            activation_threshold: default_spreading_activation_activation_threshold(),
557            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
558            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
559            seed_structural_weight: default_seed_structural_weight(),
560            seed_community_cap: default_seed_community_cap(),
561            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
562        }
563    }
564}
565
566/// Kumiho belief revision configuration.
567#[derive(Debug, Clone, Deserialize, Serialize)]
568#[serde(default)]
569pub struct BeliefRevisionConfig {
570    /// Enable semantic contradiction detection for graph edges. Default: `false`.
571    pub enabled: bool,
572    /// Cosine similarity threshold for considering two facts as contradictory.
573    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
574    #[serde(deserialize_with = "validate_similarity_threshold")]
575    pub similarity_threshold: f32,
576}
577
578fn default_belief_revision_similarity_threshold() -> f32 {
579    0.85
580}
581
582impl Default for BeliefRevisionConfig {
583    fn default() -> Self {
584        Self {
585            enabled: false,
586            similarity_threshold: default_belief_revision_similarity_threshold(),
587        }
588    }
589}
590
591/// D-MEM RPE-based tiered graph extraction routing configuration.
592#[derive(Debug, Clone, Deserialize, Serialize)]
593#[serde(default)]
594pub struct RpeConfig {
595    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
596    pub enabled: bool,
597    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
598    /// Default: `0.3`.
599    #[serde(deserialize_with = "validate_similarity_threshold")]
600    pub threshold: f32,
601    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
602    pub max_skip_turns: u32,
603}
604
605fn default_rpe_threshold() -> f32 {
606    0.3
607}
608
609fn default_rpe_max_skip_turns() -> u32 {
610    5
611}
612
613impl Default for RpeConfig {
614    fn default() -> Self {
615        Self {
616            enabled: false,
617            threshold: default_rpe_threshold(),
618            max_skip_turns: default_rpe_max_skip_turns(),
619        }
620    }
621}
622
623/// Configuration for A-MEM dynamic note linking.
624///
625/// When enabled, after each graph extraction pass, entities extracted from the message are
626/// compared against the entity embedding collection. Pairs with cosine similarity above
627/// `similarity_threshold` receive a `similar_to` edge in the graph.
628#[derive(Debug, Clone, Deserialize, Serialize)]
629#[serde(default)]
630pub struct NoteLinkingConfig {
631    /// Enable A-MEM note linking after graph extraction. Default: `false`.
632    pub enabled: bool,
633    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
634    #[serde(deserialize_with = "validate_similarity_threshold")]
635    pub similarity_threshold: f32,
636    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
637    pub top_k: usize,
638    /// Timeout for the entire linking pass in seconds. Default: `5`.
639    pub timeout_secs: u64,
640}
641
642impl Default for NoteLinkingConfig {
643    fn default() -> Self {
644        Self {
645            enabled: false,
646            similarity_threshold: default_note_linking_similarity_threshold(),
647            top_k: default_note_linking_top_k(),
648            timeout_secs: default_note_linking_timeout_secs(),
649        }
650    }
651}
652
653/// Vector backend selector for embedding storage.
654#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
655#[serde(rename_all = "lowercase")]
656pub enum VectorBackend {
657    Qdrant,
658    #[default]
659    Sqlite,
660}
661
662impl VectorBackend {
663    /// Return the lowercase identifier string for this backend.
664    ///
665    /// # Examples
666    ///
667    /// ```
668    /// use zeph_config::VectorBackend;
669    ///
670    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
671    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
672    /// ```
673    #[must_use]
674    pub fn as_str(&self) -> &'static str {
675        match self {
676            Self::Qdrant => "qdrant",
677            Self::Sqlite => "sqlite",
678        }
679    }
680}
681
682/// Memory subsystem configuration, nested under `[memory]` in TOML.
683///
684/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
685/// multi-tier promotion, and all memory-related background tasks.
686///
687/// # Example (TOML)
688///
689/// ```toml
690/// [memory]
691/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
692/// qdrant_url = "http://localhost:6334"
693/// history_limit = 50
694/// summarization_threshold = 50
695/// auto_budget = true
696/// ```
697#[derive(Debug, Deserialize, Serialize)]
698#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
699pub struct MemoryConfig {
700    #[serde(default)]
701    pub compression_guidelines: CompressionGuidelinesConfig,
702    #[serde(default = "default_sqlite_path_field")]
703    pub sqlite_path: String,
704    pub history_limit: u32,
705    #[serde(default = "default_qdrant_url")]
706    pub qdrant_url: String,
707    /// Optional API key for authenticating to a remote or managed Qdrant cluster.
708    ///
709    /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
710    /// Leave `None` for local dev instances. The actual key is resolved from the vault:
711    /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
712    ///
713    /// The value is wrapped in [`Secret`] to prevent accidental logging.
714    /// `skip_serializing` prevents the key from being written back to TOML on config save.
715    #[serde(default, skip_serializing)]
716    pub qdrant_api_key: Option<Secret>,
717    #[serde(default)]
718    pub semantic: SemanticConfig,
719    #[serde(default = "default_summarization_threshold")]
720    pub summarization_threshold: usize,
721    #[serde(default = "default_context_budget_tokens")]
722    pub context_budget_tokens: usize,
723    #[serde(default = "default_soft_compaction_threshold")]
724    pub soft_compaction_threshold: f32,
725    #[serde(
726        default = "default_hard_compaction_threshold",
727        alias = "compaction_threshold"
728    )]
729    pub hard_compaction_threshold: f32,
730    #[serde(default = "default_compaction_preserve_tail")]
731    pub compaction_preserve_tail: usize,
732    #[serde(default = "default_compaction_cooldown_turns")]
733    pub compaction_cooldown_turns: u8,
734    #[serde(default = "default_auto_budget")]
735    pub auto_budget: bool,
736    #[serde(default = "default_prune_protect_tokens")]
737    pub prune_protect_tokens: usize,
738    #[serde(default = "default_cross_session_score_threshold")]
739    pub cross_session_score_threshold: f32,
740    #[serde(default)]
741    pub vector_backend: VectorBackend,
742    #[serde(default = "default_token_safety_margin")]
743    pub token_safety_margin: f32,
744    #[serde(default = "default_redact_credentials")]
745    pub redact_credentials: bool,
746    #[serde(default = "default_true")]
747    pub autosave_assistant: bool,
748    #[serde(default = "default_autosave_min_length")]
749    pub autosave_min_length: usize,
750    #[serde(default = "default_tool_call_cutoff")]
751    pub tool_call_cutoff: usize,
752    #[serde(default = "default_sqlite_pool_size")]
753    pub sqlite_pool_size: u32,
754    #[serde(default)]
755    pub sessions: SessionsConfig,
756    #[serde(default)]
757    pub documents: DocumentConfig,
758    #[serde(default)]
759    pub eviction: EvictionConfig,
760    #[serde(default)]
761    pub compression: CompressionConfig,
762    #[serde(default)]
763    pub sidequest: SidequestConfig,
764    #[serde(default)]
765    pub graph: GraphConfig,
766    /// Store a lightweight session summary to the vector store on shutdown when no session
767    /// summary exists yet for this conversation. Enables cross-session recall for short or
768    /// interrupted sessions that never triggered hard compaction. Default: `true`.
769    #[serde(default = "default_shutdown_summary")]
770    pub shutdown_summary: bool,
771    /// Minimum number of user-turn messages required before a shutdown summary is generated.
772    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
773    #[serde(default = "default_shutdown_summary_min_messages")]
774    pub shutdown_summary_min_messages: usize,
775    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
776    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
777    /// Default: `20`.
778    #[serde(default = "default_shutdown_summary_max_messages")]
779    pub shutdown_summary_max_messages: usize,
780    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
781    /// Applies independently to the structured call and to the plain-text fallback.
782    /// Default: `10`.
783    #[serde(default = "default_shutdown_summary_timeout_secs")]
784    pub shutdown_summary_timeout_secs: u64,
785    /// Use structured anchored summaries for context compaction.
786    ///
787    /// When enabled, hard compaction requests a JSON schema from the LLM
788    /// instead of free-form prose. Falls back to prose if the LLM fails
789    /// to produce valid JSON. Default: `false`.
790    #[serde(default)]
791    pub structured_summaries: bool,
792    /// AOI three-layer memory tier promotion system.
793    ///
794    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
795    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
796    #[serde(default)]
797    pub tiers: TierConfig,
798    /// A-MAC adaptive memory admission control.
799    ///
800    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
801    /// if its composite admission score falls below the configured threshold.
802    #[serde(default)]
803    pub admission: AdmissionConfig,
804    /// Session digest generation at session end. Default: disabled.
805    #[serde(default)]
806    pub digest: DigestConfig,
807    /// Context assembly strategy. Default: `full_history` (current behavior).
808    #[serde(default)]
809    pub context_strategy: ContextStrategy,
810    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
811    #[serde(default = "default_crossover_turn_threshold")]
812    pub crossover_turn_threshold: u32,
813    /// All-Mem lifelong memory consolidation sweep.
814    ///
815    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
816    /// messages and merges them into consolidated entries via LLM.
817    #[serde(default)]
818    pub consolidation: ConsolidationConfig,
819    /// `SleepGate` forgetting sweep (#2397).
820    ///
821    /// When `forgetting.enabled = true`, a background loop periodically decays importance
822    /// scores and prunes memories below the forgetting floor.
823    #[serde(default)]
824    pub forgetting: ForgettingConfig,
825    /// `PostgreSQL` connection URL.
826    ///
827    /// Used when the binary is compiled with `--features postgres`.
828    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
829    /// Example: `postgres://user:pass@localhost:5432/zeph`
830    /// Default: `None` (uses `sqlite_path` instead).
831    #[serde(default)]
832    pub database_url: Option<String>,
833    /// Cost-sensitive store routing (#2444).
834    ///
835    /// When `store_routing.enabled = true`, query intent is classified and routed to
836    /// the cheapest sufficient backend instead of querying all stores on every turn.
837    #[serde(default)]
838    pub store_routing: StoreRoutingConfig,
839    /// Persona memory layer (#2461).
840    ///
841    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
842    /// from conversation history and injected into context after the system prompt.
843    #[serde(default)]
844    pub persona: PersonaConfig,
845    /// Trajectory-informed memory (#2498).
846    #[serde(default)]
847    pub trajectory: TrajectoryConfig,
848    /// Category-aware memory (#2428).
849    #[serde(default)]
850    pub category: CategoryConfig,
851    /// `TiMem` temporal-hierarchical memory tree (#2262).
852    #[serde(default)]
853    pub tree: TreeConfig,
854    /// Time-based microcompact (#2699).
855    ///
856    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
857    /// from context when the session has been idle longer than `gap_threshold_minutes`.
858    #[serde(default)]
859    pub microcompact: MicrocompactConfig,
860    /// autoDream background memory consolidation (#2697).
861    ///
862    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
863    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
864    #[serde(default)]
865    pub autodream: AutoDreamConfig,
866    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
867    ///
868    /// Before inserting a new key fact, its nearest neighbour is looked up in the
869    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
870    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
871    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
872    #[serde(default = "default_key_facts_dedup_threshold")]
873    pub key_facts_dedup_threshold: f32,
874    /// Experience compression spectrum (#3305).
875    ///
876    /// Controls three-tier retrieval policy and background skill-promotion engine.
877    #[serde(default)]
878    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
879    /// MemMachine-inspired retrieval-stage tuning (#3340).
880    ///
881    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
882    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
883    /// uniformly across graph, hybrid, and vector-only recall paths.
884    ///
885    /// # Example (TOML)
886    ///
887    /// ```toml
888    /// [memory.retrieval]
889    /// depth = 40
890    /// search_prompt_template = ""
891    /// context_format = "structured"
892    /// ```
893    #[serde(default)]
894    pub retrieval: RetrievalConfig,
895    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
896    ///
897    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
898    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
899    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
900    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
901    /// and injected before the LLM call.
902    #[serde(default)]
903    pub reasoning: ReasoningConfig,
904    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
905    ///
906    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
907    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
908    ///
909    /// # Example (TOML)
910    ///
911    /// ```toml
912    /// [memory.hebbian]
913    /// enabled = true
914    /// hebbian_lr = 0.1
915    /// ```
916    #[serde(default)]
917    pub hebbian: HebbianConfig,
918}
919
920fn default_crossover_turn_threshold() -> u32 {
921    20
922}
923
924fn default_key_facts_dedup_threshold() -> f32 {
925    0.95
926}
927
928/// Session digest configuration (#2289).
929#[derive(Debug, Clone, Deserialize, Serialize)]
930#[serde(default)]
931pub struct DigestConfig {
932    /// Enable session digest generation at session end. Default: `false`.
933    pub enabled: bool,
934    /// Provider name from `[[llm.providers]]` for digest generation.
935    /// Falls back to the primary provider when `None`.
936    #[serde(default)]
937    pub provider: Option<ProviderName>,
938    /// Maximum tokens for the digest text. Default: `500`.
939    pub max_tokens: usize,
940    /// Maximum messages to feed into the digest prompt. Default: `50`.
941    pub max_input_messages: usize,
942}
943
944impl Default for DigestConfig {
945    fn default() -> Self {
946        Self {
947            enabled: false,
948            provider: None,
949            max_tokens: 500,
950            max_input_messages: 50,
951        }
952    }
953}
954
955/// Context assembly strategy (#2288).
956#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
957#[serde(rename_all = "snake_case")]
958pub enum ContextStrategy {
959    /// Full conversation history trimmed to budget, with memory augmentation.
960    /// This is the default and existing behavior.
961    #[default]
962    FullHistory,
963    /// Drop conversation history; assemble context from summaries, semantic recall,
964    /// cross-session memory, and session digest only.
965    MemoryFirst,
966    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
967    /// `crossover_turn_threshold`.
968    Adaptive,
969}
970
971/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
972#[derive(Debug, Clone, Deserialize, Serialize)]
973#[serde(default)]
974pub struct SessionsConfig {
975    /// Maximum number of sessions returned by list operations (0 = unlimited).
976    #[serde(default = "default_max_history")]
977    pub max_history: usize,
978    /// Maximum characters for auto-generated session titles.
979    #[serde(default = "default_title_max_chars")]
980    pub title_max_chars: usize,
981}
982
983impl Default for SessionsConfig {
984    fn default() -> Self {
985        Self {
986            max_history: default_max_history(),
987            title_max_chars: default_title_max_chars(),
988        }
989    }
990}
991
992/// Configuration for the document ingestion and RAG retrieval pipeline.
993#[derive(Debug, Clone, Deserialize, Serialize)]
994pub struct DocumentConfig {
995    #[serde(default = "default_document_collection")]
996    pub collection: String,
997    #[serde(default = "default_document_chunk_size")]
998    pub chunk_size: usize,
999    #[serde(default = "default_document_chunk_overlap")]
1000    pub chunk_overlap: usize,
1001    /// Number of document chunks to inject into agent context per turn.
1002    #[serde(default = "default_document_top_k")]
1003    pub top_k: usize,
1004    /// Enable document RAG injection into agent context.
1005    #[serde(default)]
1006    pub rag_enabled: bool,
1007}
1008
1009impl Default for DocumentConfig {
1010    fn default() -> Self {
1011        Self {
1012            collection: default_document_collection(),
1013            chunk_size: default_document_chunk_size(),
1014            chunk_overlap: default_document_chunk_overlap(),
1015            top_k: default_document_top_k(),
1016            rag_enabled: false,
1017        }
1018    }
1019}
1020
1021/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1022///
1023/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1024/// re-ranking, and hybrid BM25+vector weighting.
1025///
1026/// # Example (TOML)
1027///
1028/// ```toml
1029/// [memory.semantic]
1030/// enabled = true
1031/// recall_limit = 5
1032/// vector_weight = 0.7
1033/// keyword_weight = 0.3
1034/// mmr_lambda = 0.7
1035/// ```
1036#[derive(Debug, Deserialize, Serialize)]
1037#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1038pub struct SemanticConfig {
1039    /// Enable vector-based semantic recall. Default: `true`.
1040    #[serde(default = "default_semantic_enabled")]
1041    pub enabled: bool,
1042    #[serde(default = "default_recall_limit")]
1043    pub recall_limit: usize,
1044    #[serde(default = "default_vector_weight")]
1045    pub vector_weight: f64,
1046    #[serde(default = "default_keyword_weight")]
1047    pub keyword_weight: f64,
1048    #[serde(default = "default_true")]
1049    pub temporal_decay_enabled: bool,
1050    #[serde(default = "default_temporal_decay_half_life_days")]
1051    pub temporal_decay_half_life_days: u32,
1052    #[serde(default = "default_true")]
1053    pub mmr_enabled: bool,
1054    #[serde(default = "default_mmr_lambda")]
1055    pub mmr_lambda: f32,
1056    #[serde(default = "default_true")]
1057    pub importance_enabled: bool,
1058    #[serde(
1059        default = "default_importance_weight",
1060        deserialize_with = "validate_importance_weight"
1061    )]
1062    pub importance_weight: f64,
1063    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1064    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1065    /// from contending with the guardrail at the API server level (rate limits, Ollama
1066    /// single-model lock). Falls back to the main agent provider when `None`.
1067    #[serde(default)]
1068    pub embed_provider: Option<ProviderName>,
1069}
1070
1071impl Default for SemanticConfig {
1072    fn default() -> Self {
1073        Self {
1074            enabled: default_semantic_enabled(),
1075            recall_limit: default_recall_limit(),
1076            vector_weight: default_vector_weight(),
1077            keyword_weight: default_keyword_weight(),
1078            temporal_decay_enabled: true,
1079            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1080            mmr_enabled: true,
1081            mmr_lambda: default_mmr_lambda(),
1082            importance_enabled: true,
1083            importance_weight: default_importance_weight(),
1084            embed_provider: None,
1085        }
1086    }
1087}
1088
1089/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1090///
1091/// Controls how each recalled memory entry is presented in the assembled prompt.
1092/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1093/// always contain the raw message text. The format is applied exclusively during
1094/// context assembly and is never persisted.
1095///
1096/// # Token cost
1097///
1098/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1099/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1100#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1101#[serde(rename_all = "snake_case")]
1102pub enum ContextFormat {
1103    /// Emit a labeled header per snippet:
1104    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1105    ///
1106    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1107    /// memory without re-parsing the recall body.
1108    #[default]
1109    Structured,
1110    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1111    ///
1112    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1113    /// is tight and provenance headers are not needed.
1114    Plain,
1115}
1116
1117/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1118///
1119/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1120/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1121/// configs parse unchanged.
1122///
1123/// # Example (TOML)
1124///
1125/// ```toml
1126/// [memory.retrieval]
1127/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1128/// # search_prompt_template = ""
1129/// # context_format = "structured"
1130/// ```
1131#[derive(Debug, Clone, Deserialize, Serialize)]
1132#[serde(default)]
1133pub struct RetrievalConfig {
1134    /// Number of ANN candidates fetched from the vector store before keyword merge,
1135    /// temporal decay, and MMR re-ranking.
1136    ///
1137    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1138    ///   to pre-#3340 deployments.
1139    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1140    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1141    ///   size, or higher for better MMR diversity.
1142    ///
1143    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1144    /// cannot saturate the requested top-k.
1145    pub depth: u32,
1146    /// Template applied to the raw user query before embedding.
1147    ///
1148    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1149    /// Empty string (default) = identity: the query is embedded as-is.
1150    ///
1151    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1152    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1153    pub search_prompt_template: String,
1154    /// Shape of memory snippets injected into agent context.
1155    ///
1156    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1157    /// Default: `Structured`.
1158    pub context_format: ContextFormat,
1159    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1160    ///
1161    /// When `true` and the query is classified as first-person, the query embedding is
1162    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1163    /// towards persona-relevant content for self-referential queries.
1164    ///
1165    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1166    #[serde(default = "default_query_bias_correction")]
1167    pub query_bias_correction: bool,
1168    /// Blend weight for query-bias correction (MM-F3, #3341).
1169    ///
1170    /// Controls how much the query embedding shifts towards the profile centroid.
1171    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1172    #[serde(default = "default_query_bias_profile_weight")]
1173    pub query_bias_profile_weight: f32,
1174    /// Centroid TTL in seconds (MM-F3, #3341).
1175    ///
1176    /// The profile centroid computed from persona facts is cached for this many seconds.
1177    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1178    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1179    pub query_bias_centroid_ttl_secs: u64,
1180}
1181
1182fn default_query_bias_correction() -> bool {
1183    true
1184}
1185
1186fn default_query_bias_profile_weight() -> f32 {
1187    0.25
1188}
1189
1190fn default_query_bias_centroid_ttl_secs() -> u64 {
1191    300
1192}
1193
1194impl Default for RetrievalConfig {
1195    fn default() -> Self {
1196        Self {
1197            depth: 0,
1198            search_prompt_template: String::new(),
1199            context_format: ContextFormat::default(),
1200            query_bias_correction: default_query_bias_correction(),
1201            query_bias_profile_weight: default_query_bias_profile_weight(),
1202            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1203        }
1204    }
1205}
1206
1207/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1208///
1209/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1210/// recall traversal increments the `weight` column of the traversed edges, building
1211/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1212/// runs a background sweep that identifies high-traffic entity clusters and distills
1213/// them into `graph_rules` entries via an LLM.
1214#[derive(Debug, Clone, Deserialize, Serialize)]
1215#[serde(default)]
1216pub struct HebbianConfig {
1217    /// Master switch. When `false`, no `weight` updates are written to the database
1218    /// and the consolidation loop does not start. Default: `false`.
1219    pub enabled: bool,
1220    /// Weight increment per co-activation (HL-F2, #3344).
1221    ///
1222    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1223    /// startup when `enabled = true`. Default: `0.1`.
1224    pub hebbian_lr: f32,
1225    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1226    ///
1227    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1228    /// Default: `3600` (one hour).
1229    pub consolidation_interval_secs: u64,
1230    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1231    /// candidate (HL-F3, #3345). Default: `5.0`.
1232    pub consolidation_threshold: f64,
1233    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1234    ///
1235    /// Falls back to the main provider when `None` or unresolvable.
1236    #[serde(default)]
1237    pub consolidate_provider: Option<ProviderName>,
1238    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1239    pub max_candidates_per_sweep: usize,
1240    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1241    ///
1242    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1243    /// Default: `86400` (24 hours).
1244    pub consolidation_cooldown_secs: u64,
1245    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1246    /// Default: `30`.
1247    pub consolidation_prompt_timeout_secs: u64,
1248    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1249    /// (HL-F4, #3345). Default: `20`.
1250    pub consolidation_max_neighbors: usize,
1251    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1252    ///
1253    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1254    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1255    pub spreading_activation: bool,
1256    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1257    pub spread_depth: u32,
1258    /// MAGMA edge-type filter for HL-F5 spreading activation.
1259    ///
1260    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1261    /// Empty = traverse all edge types. Default: `[]`.
1262    pub spread_edge_types: Vec<String>,
1263    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1264    ///
1265    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1266    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1267    pub step_budget_ms: u64,
1268}
1269
1270impl Default for HebbianConfig {
1271    fn default() -> Self {
1272        Self {
1273            enabled: false,
1274            hebbian_lr: 0.1,
1275            consolidation_interval_secs: 3600,
1276            consolidation_threshold: 5.0,
1277            consolidate_provider: None,
1278            max_candidates_per_sweep: 10,
1279            consolidation_cooldown_secs: 86_400,
1280            consolidation_prompt_timeout_secs: 30,
1281            consolidation_max_neighbors: 20,
1282            spreading_activation: false,
1283            spread_depth: 2,
1284            spread_edge_types: Vec::new(),
1285            step_budget_ms: 8,
1286        }
1287    }
1288}
1289
1290/// Compression strategy for active context compression (#1161).
1291#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1292#[serde(tag = "strategy", rename_all = "snake_case")]
1293pub enum CompressionStrategy {
1294    /// Compress only when reactive compaction fires (current behavior).
1295    #[default]
1296    Reactive,
1297    /// Compress proactively when context exceeds `threshold_tokens`.
1298    Proactive {
1299        /// Token count that triggers proactive compression.
1300        threshold_tokens: usize,
1301        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1302        max_summary_tokens: usize,
1303    },
1304    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1305    /// safety net. The `compress_context` tool is also available in all other strategies.
1306    Autonomous,
1307    /// Knowledge-block-aware compression strategy (#2510).
1308    ///
1309    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1310    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1311    Focus,
1312}
1313
1314/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1315///
1316/// When `context-compression` feature is enabled, this replaces the default oldest-first
1317/// heuristic with scored eviction.
1318#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1319#[serde(rename_all = "snake_case")]
1320pub enum PruningStrategy {
1321    /// Oldest-first eviction — current default behavior.
1322    #[default]
1323    Reactive,
1324    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1325    /// lowest-first. Requires `context-compression` feature.
1326    TaskAware,
1327    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1328    /// Requires `context-compression` feature.
1329    Mig,
1330    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1331    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1332    /// Requires `context-compression` feature.
1333    Subgoal,
1334    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1335    /// Requires `context-compression` feature.
1336    SubgoalMig,
1337}
1338
1339impl PruningStrategy {
1340    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1341    #[must_use]
1342    pub fn is_subgoal(self) -> bool {
1343        matches!(self, Self::Subgoal | Self::SubgoalMig)
1344    }
1345}
1346
1347// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1348// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1349impl<'de> serde::Deserialize<'de> for PruningStrategy {
1350    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1351        let s = String::deserialize(deserializer)?;
1352        s.parse().map_err(serde::de::Error::custom)
1353    }
1354}
1355
1356impl std::str::FromStr for PruningStrategy {
1357    type Err = String;
1358
1359    fn from_str(s: &str) -> Result<Self, Self::Err> {
1360        match s {
1361            "reactive" => Ok(Self::Reactive),
1362            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1363            "mig" => Ok(Self::Mig),
1364            // task_aware_mig was removed (dead code — was routed to scored path only).
1365            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1366            "task_aware_mig" | "task-aware-mig" => {
1367                tracing::warn!(
1368                    "pruning strategy `task_aware_mig` has been removed; \
1369                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1370                );
1371                Ok(Self::Reactive)
1372            }
1373            "subgoal" => Ok(Self::Subgoal),
1374            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1375            other => Err(format!(
1376                "unknown pruning strategy `{other}`, expected \
1377                 reactive|task_aware|mig|subgoal|subgoal_mig"
1378            )),
1379        }
1380    }
1381}
1382
1383fn default_high_density_budget() -> f32 {
1384    0.7
1385}
1386
1387fn default_low_density_budget() -> f32 {
1388    0.3
1389}
1390
1391/// Configuration for the `SleepGate` forgetting sweep (#2397).
1392///
1393/// When `enabled = true`, a background loop periodically decays importance scores
1394/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1395/// and prunes memories below `forgetting_floor` (targeted forgetting).
1396#[derive(Debug, Clone, Deserialize, Serialize)]
1397#[serde(default)]
1398pub struct ForgettingConfig {
1399    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1400    pub enabled: bool,
1401    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1402    pub decay_rate: f32,
1403    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1404    pub forgetting_floor: f32,
1405    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1406    pub sweep_interval_secs: u64,
1407    /// Maximum messages to process per sweep. Default: `500`.
1408    pub sweep_batch_size: usize,
1409    /// Hours: messages accessed within this window get replay protection. Default: `24`.
1410    pub replay_window_hours: u32,
1411    /// Messages with `access_count` >= this get replay protection. Default: `3`.
1412    pub replay_min_access_count: u32,
1413    /// Hours: never prune messages accessed within this window. Default: `24`.
1414    pub protect_recent_hours: u32,
1415    /// Never prune messages with `access_count` >= this. Default: `3`.
1416    pub protect_min_access_count: u32,
1417}
1418
1419impl Default for ForgettingConfig {
1420    fn default() -> Self {
1421        Self {
1422            enabled: false,
1423            decay_rate: 0.1,
1424            forgetting_floor: 0.05,
1425            sweep_interval_secs: 7200,
1426            sweep_batch_size: 500,
1427            replay_window_hours: 24,
1428            replay_min_access_count: 3,
1429            protect_recent_hours: 24,
1430            protect_min_access_count: 3,
1431        }
1432    }
1433}
1434
1435/// Configuration for active context compression (#1161).
1436#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1437#[serde(default)]
1438pub struct CompressionConfig {
1439    /// Compression strategy.
1440    #[serde(flatten)]
1441    pub strategy: CompressionStrategy,
1442    /// Tool-output pruning strategy (requires `context-compression` feature).
1443    pub pruning_strategy: PruningStrategy,
1444    /// Model to use for compression summaries.
1445    ///
1446    /// Currently unused — the primary summary provider is used regardless of this value.
1447    /// Reserved for future per-compression model selection. Setting this field has no effect.
1448    pub model: String,
1449    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1450    /// Falls back to the primary provider when empty. Default: `""`.
1451    pub compress_provider: ProviderName,
1452    /// Compaction probe: validates summary quality before committing it (#1609).
1453    #[serde(default)]
1454    pub probe: CompactionProbeConfig,
1455    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1456    ///
1457    /// When enabled, tool output bodies in the compaction range are saved to
1458    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1459    /// The LLM summarizes placeholder messages; archived content is appended as
1460    /// a postfix after summarization so references survive compaction.
1461    /// Default: `false`.
1462    #[serde(default)]
1463    pub archive_tool_outputs: bool,
1464    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
1465    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
1466    /// Falls back to the primary provider when empty. Default: `""`.
1467    pub focus_scorer_provider: ProviderName,
1468    /// Token-budget fraction for high-density content in density-aware compression (#2481).
1469    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1470    #[serde(default = "default_high_density_budget")]
1471    pub high_density_budget: f32,
1472    /// Token-budget fraction for low-density content in density-aware compression (#2481).
1473    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1474    #[serde(default = "default_low_density_budget")]
1475    pub low_density_budget: f32,
1476}
1477
1478fn default_sidequest_interval_turns() -> u32 {
1479    4
1480}
1481
1482fn default_sidequest_max_eviction_ratio() -> f32 {
1483    0.5
1484}
1485
1486fn default_sidequest_max_cursors() -> usize {
1487    30
1488}
1489
1490fn default_sidequest_min_cursor_tokens() -> usize {
1491    100
1492}
1493
1494/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1495#[derive(Debug, Clone, Deserialize, Serialize)]
1496#[serde(default)]
1497pub struct SidequestConfig {
1498    /// Enable `SideQuest` eviction. Default: `false`.
1499    pub enabled: bool,
1500    /// Run eviction every N user turns. Default: `4`.
1501    #[serde(default = "default_sidequest_interval_turns")]
1502    pub interval_turns: u32,
1503    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1504    #[serde(default = "default_sidequest_max_eviction_ratio")]
1505    pub max_eviction_ratio: f32,
1506    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1507    #[serde(default = "default_sidequest_max_cursors")]
1508    pub max_cursors: usize,
1509    /// Exclude tool outputs smaller than this token count from eviction candidates.
1510    /// Default: `100`.
1511    #[serde(default = "default_sidequest_min_cursor_tokens")]
1512    pub min_cursor_tokens: usize,
1513}
1514
1515impl Default for SidequestConfig {
1516    fn default() -> Self {
1517        Self {
1518            enabled: false,
1519            interval_turns: default_sidequest_interval_turns(),
1520            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1521            max_cursors: default_sidequest_max_cursors(),
1522            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1523        }
1524    }
1525}
1526
1527/// Graph retrieval strategy for `[memory.graph]`.
1528///
1529/// Selects the algorithm used to traverse the knowledge graph during recall.
1530/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
1531#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
1532#[serde(rename_all = "snake_case")]
1533pub enum GraphRetrievalStrategy {
1534    /// SYNAPSE spreading activation (default, existing behavior).
1535    #[default]
1536    Synapse,
1537    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
1538    Bfs,
1539    /// A* shortest-path traversal via petgraph.
1540    #[serde(rename = "astar")]
1541    AStar,
1542    /// Concentric BFS expanding outward from seed nodes.
1543    WaterCircles,
1544    /// Beam search: keep top-K candidates per hop.
1545    BeamSearch,
1546    /// Dynamic: LLM classifier selects strategy per query.
1547    Hybrid,
1548}
1549
1550fn default_beam_width() -> usize {
1551    10
1552}
1553
1554/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
1555///
1556/// Controls the width of the beam during graph traversal: how many top candidates
1557/// are retained at each hop.
1558#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1559pub struct BeamSearchConfig {
1560    /// Number of top candidates kept per hop. Default: `10`.
1561    #[serde(default = "default_beam_width")]
1562    pub beam_width: usize,
1563}
1564
1565impl Default for BeamSearchConfig {
1566    fn default() -> Self {
1567        Self {
1568            beam_width: default_beam_width(),
1569        }
1570    }
1571}
1572
1573/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
1574///
1575/// Controls ring-by-ring concentric BFS traversal from seed nodes.
1576#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
1577pub struct WaterCirclesConfig {
1578    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
1579    #[serde(default)]
1580    pub ring_limit: usize,
1581}
1582
1583fn default_evolution_sweep_interval() -> usize {
1584    50
1585}
1586
1587fn default_confidence_prune_threshold() -> f32 {
1588    0.1
1589}
1590
1591/// Experience memory configuration for `[memory.graph.experience]`.
1592///
1593/// Controls recording of tool execution outcomes and graph evolution sweeps.
1594#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1595pub struct ExperienceConfig {
1596    /// Enable experience memory recording. Default: `false`.
1597    #[serde(default)]
1598    pub enabled: bool,
1599    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
1600    #[serde(default)]
1601    pub evolution_sweep_enabled: bool,
1602    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
1603    #[serde(default = "default_confidence_prune_threshold")]
1604    pub confidence_prune_threshold: f32,
1605    /// Number of turns between evolution sweeps. Default: `50`.
1606    #[serde(default = "default_evolution_sweep_interval")]
1607    pub evolution_sweep_interval: usize,
1608}
1609
1610impl Default for ExperienceConfig {
1611    fn default() -> Self {
1612        Self {
1613            enabled: false,
1614            evolution_sweep_enabled: false,
1615            confidence_prune_threshold: default_confidence_prune_threshold(),
1616            evolution_sweep_interval: default_evolution_sweep_interval(),
1617        }
1618    }
1619}
1620
1621/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
1622///
1623/// # Security
1624///
1625/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
1626/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
1627/// when processing conversations that may contain personal, medical, or sensitive data until
1628/// a redaction pass is implemented on the write path.
1629#[derive(Debug, Clone, Deserialize, Serialize)]
1630#[serde(default)]
1631pub struct GraphConfig {
1632    pub enabled: bool,
1633    pub extract_model: String,
1634    #[serde(default = "default_graph_max_entities_per_message")]
1635    pub max_entities_per_message: usize,
1636    #[serde(default = "default_graph_max_edges_per_message")]
1637    pub max_edges_per_message: usize,
1638    #[serde(default = "default_graph_community_refresh_interval")]
1639    pub community_refresh_interval: usize,
1640    #[serde(default = "default_graph_entity_similarity_threshold")]
1641    pub entity_similarity_threshold: f32,
1642    #[serde(default = "default_graph_extraction_timeout_secs")]
1643    pub extraction_timeout_secs: u64,
1644    #[serde(default)]
1645    pub use_embedding_resolution: bool,
1646    #[serde(default = "default_graph_entity_ambiguous_threshold")]
1647    pub entity_ambiguous_threshold: f32,
1648    #[serde(default = "default_graph_max_hops")]
1649    pub max_hops: u32,
1650    #[serde(default = "default_graph_recall_limit")]
1651    pub recall_limit: usize,
1652    /// Days to retain expired (superseded) edges before deletion. Default: 90.
1653    #[serde(default = "default_graph_expired_edge_retention_days")]
1654    pub expired_edge_retention_days: u32,
1655    /// Maximum entities to retain in the graph. 0 = unlimited.
1656    #[serde(default)]
1657    pub max_entities: usize,
1658    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
1659    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
1660    pub community_summary_max_prompt_bytes: usize,
1661    /// Maximum concurrent LLM calls during community summarization. Default: 4.
1662    #[serde(default = "default_graph_community_summary_concurrency")]
1663    pub community_summary_concurrency: usize,
1664    /// Number of edges fetched per chunk during community detection. Default: 10000.
1665    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
1666    #[serde(default = "default_lpa_edge_chunk_size")]
1667    pub lpa_edge_chunk_size: usize,
1668    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
1669    ///
1670    /// When > 0, recent edges receive a small additive score boost over older edges.
1671    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
1672    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
1673    #[serde(
1674        default = "default_graph_temporal_decay_rate",
1675        deserialize_with = "validate_temporal_decay_rate"
1676    )]
1677    pub temporal_decay_rate: f64,
1678    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
1679    ///
1680    /// Caps the result set returned for a given source entity + predicate pair. Prevents
1681    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
1682    /// or API endpoints.
1683    #[serde(default = "default_graph_edge_history_limit")]
1684    pub edge_history_limit: usize,
1685    /// A-MEM dynamic note linking configuration.
1686    ///
1687    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
1688    /// semantically similar entities via `similar_to` edges. Requires an embedding store
1689    /// (`qdrant` or `sqlite` vector backend) to be configured.
1690    #[serde(default)]
1691    pub note_linking: NoteLinkingConfig,
1692    /// SYNAPSE spreading activation retrieval configuration.
1693    ///
1694    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
1695    /// with lateral inhibition and temporal decay instead of BFS.
1696    #[serde(default)]
1697    pub spreading_activation: SpreadingActivationConfig,
1698    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
1699    ///
1700    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
1701    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
1702    #[serde(default)]
1703    pub retrieval_strategy: GraphRetrievalStrategy,
1704    /// Named LLM provider for hybrid strategy classification.
1705    /// Falls back to the default provider when `None`.
1706    #[serde(default)]
1707    pub strategy_classifier_provider: Option<ProviderName>,
1708    /// Beam search configuration.
1709    #[serde(default)]
1710    pub beam_search: BeamSearchConfig,
1711    /// `WaterCircles` BFS configuration.
1712    #[serde(default)]
1713    pub watercircles: WaterCirclesConfig,
1714    /// Experience memory configuration.
1715    #[serde(default)]
1716    pub experience: ExperienceConfig,
1717    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
1718    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
1719    #[serde(
1720        default = "default_link_weight_decay_lambda",
1721        deserialize_with = "validate_link_weight_decay_lambda"
1722    )]
1723    pub link_weight_decay_lambda: f64,
1724    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
1725    #[serde(default = "default_link_weight_decay_interval_secs")]
1726    pub link_weight_decay_interval_secs: u64,
1727    /// Kumiho AGM-inspired belief revision configuration.
1728    ///
1729    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
1730    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
1731    /// `superseded_by` pointer and the new edge becomes the current belief.
1732    #[serde(default)]
1733    pub belief_revision: BeliefRevisionConfig,
1734    /// D-MEM RPE-based tiered graph extraction routing.
1735    ///
1736    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
1737    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
1738    #[serde(default)]
1739    pub rpe: RpeConfig,
1740    /// `SQLite` connection pool size dedicated to graph operations.
1741    ///
1742    /// Graph tables share the same database file as messages/embeddings but use a
1743    /// separate pool to prevent pool starvation when community detection or spreading
1744    /// activation runs concurrently with regular memory operations. Default: `3`.
1745    #[serde(default = "default_graph_pool_size")]
1746    pub pool_size: u32,
1747}
1748
1749fn default_graph_pool_size() -> u32 {
1750    3
1751}
1752
1753impl Default for GraphConfig {
1754    fn default() -> Self {
1755        Self {
1756            enabled: false,
1757            extract_model: String::new(),
1758            max_entities_per_message: default_graph_max_entities_per_message(),
1759            max_edges_per_message: default_graph_max_edges_per_message(),
1760            community_refresh_interval: default_graph_community_refresh_interval(),
1761            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
1762            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
1763            use_embedding_resolution: false,
1764            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
1765            max_hops: default_graph_max_hops(),
1766            recall_limit: default_graph_recall_limit(),
1767            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
1768            max_entities: 0,
1769            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
1770            community_summary_concurrency: default_graph_community_summary_concurrency(),
1771            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
1772            temporal_decay_rate: default_graph_temporal_decay_rate(),
1773            edge_history_limit: default_graph_edge_history_limit(),
1774            note_linking: NoteLinkingConfig::default(),
1775            spreading_activation: SpreadingActivationConfig::default(),
1776            retrieval_strategy: GraphRetrievalStrategy::default(),
1777            strategy_classifier_provider: None,
1778            beam_search: BeamSearchConfig::default(),
1779            watercircles: WaterCirclesConfig::default(),
1780            experience: ExperienceConfig::default(),
1781            link_weight_decay_lambda: default_link_weight_decay_lambda(),
1782            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
1783            belief_revision: BeliefRevisionConfig::default(),
1784            rpe: RpeConfig::default(),
1785            pool_size: default_graph_pool_size(),
1786        }
1787    }
1788}
1789
1790fn default_consolidation_confidence_threshold() -> f32 {
1791    0.7
1792}
1793
1794fn default_consolidation_sweep_interval_secs() -> u64 {
1795    3600
1796}
1797
1798fn default_consolidation_sweep_batch_size() -> usize {
1799    50
1800}
1801
1802fn default_consolidation_similarity_threshold() -> f32 {
1803    0.85
1804}
1805
1806/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
1807///
1808/// When `enabled = true`, a background loop periodically clusters semantically similar messages
1809/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
1810/// they are marked as consolidated and deprioritized in recall via temporal decay.
1811#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1812#[serde(default)]
1813pub struct ConsolidationConfig {
1814    /// Enable the consolidation background loop. Default: `false`.
1815    pub enabled: bool,
1816    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
1817    /// Falls back to the primary provider when empty. Default: `""`.
1818    #[serde(default)]
1819    pub consolidation_provider: ProviderName,
1820    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
1821    #[serde(default = "default_consolidation_confidence_threshold")]
1822    pub confidence_threshold: f32,
1823    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
1824    #[serde(default = "default_consolidation_sweep_interval_secs")]
1825    pub sweep_interval_secs: u64,
1826    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
1827    #[serde(default = "default_consolidation_sweep_batch_size")]
1828    pub sweep_batch_size: usize,
1829    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
1830    /// Default: `0.85`.
1831    #[serde(default = "default_consolidation_similarity_threshold")]
1832    pub similarity_threshold: f32,
1833}
1834
1835impl Default for ConsolidationConfig {
1836    fn default() -> Self {
1837        Self {
1838            enabled: false,
1839            consolidation_provider: ProviderName::default(),
1840            confidence_threshold: default_consolidation_confidence_threshold(),
1841            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
1842            sweep_batch_size: default_consolidation_sweep_batch_size(),
1843            similarity_threshold: default_consolidation_similarity_threshold(),
1844        }
1845    }
1846}
1847
1848fn default_link_weight_decay_lambda() -> f64 {
1849    0.95
1850}
1851
1852fn default_link_weight_decay_interval_secs() -> u64 {
1853    86400
1854}
1855
1856fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
1857where
1858    D: serde::Deserializer<'de>,
1859{
1860    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
1861    if value.is_nan() || value.is_infinite() {
1862        return Err(serde::de::Error::custom(
1863            "link_weight_decay_lambda must be a finite number",
1864        ));
1865    }
1866    if !(value > 0.0 && value <= 1.0) {
1867        return Err(serde::de::Error::custom(
1868            "link_weight_decay_lambda must be in (0.0, 1.0]",
1869        ));
1870    }
1871    Ok(value)
1872}
1873
1874fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
1875where
1876    D: serde::Deserializer<'de>,
1877{
1878    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1879    if value.is_nan() || value.is_infinite() {
1880        return Err(serde::de::Error::custom(
1881            "threshold must be a finite number",
1882        ));
1883    }
1884    if !(0.0..=1.0).contains(&value) {
1885        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
1886    }
1887    Ok(value)
1888}
1889
1890fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
1891where
1892    D: serde::Deserializer<'de>,
1893{
1894    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1895    if value.is_nan() || value.is_infinite() {
1896        return Err(serde::de::Error::custom(
1897            "fast_path_margin must be a finite number",
1898        ));
1899    }
1900    if !(0.0..=1.0).contains(&value) {
1901        return Err(serde::de::Error::custom(
1902            "fast_path_margin must be in [0.0, 1.0]",
1903        ));
1904    }
1905    Ok(value)
1906}
1907
1908fn default_admission_threshold() -> f32 {
1909    0.40
1910}
1911
1912fn default_admission_fast_path_margin() -> f32 {
1913    0.15
1914}
1915
1916fn default_rl_min_samples() -> u32 {
1917    500
1918}
1919
1920fn default_rl_retrain_interval_secs() -> u64 {
1921    3600
1922}
1923
1924/// Admission decision strategy.
1925///
1926/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
1927/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
1928#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
1929#[serde(rename_all = "snake_case")]
1930pub enum AdmissionStrategy {
1931    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
1932    #[default]
1933    Heuristic,
1934    /// Learned model: logistic regression trained on recall feedback.
1935    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
1936    Rl,
1937}
1938
1939fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
1940where
1941    D: serde::Deserializer<'de>,
1942{
1943    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
1944    if value < 0.0 {
1945        return Err(serde::de::Error::custom(
1946            "admission weight must be non-negative (>= 0.0)",
1947        ));
1948    }
1949    Ok(value)
1950}
1951
1952/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
1953///
1954/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
1955/// All values must be non-negative.
1956#[derive(Debug, Clone, Deserialize, Serialize)]
1957#[serde(default)]
1958pub struct AdmissionWeights {
1959    /// LLM-estimated future reuse probability. Default: `0.30`.
1960    #[serde(deserialize_with = "validate_admission_weight")]
1961    pub future_utility: f32,
1962    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
1963    #[serde(deserialize_with = "validate_admission_weight")]
1964    pub factual_confidence: f32,
1965    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
1966    #[serde(deserialize_with = "validate_admission_weight")]
1967    pub semantic_novelty: f32,
1968    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
1969    #[serde(deserialize_with = "validate_admission_weight")]
1970    pub temporal_recency: f32,
1971    /// Content type prior based on role. Default: `0.15`.
1972    #[serde(deserialize_with = "validate_admission_weight")]
1973    pub content_type_prior: f32,
1974    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
1975    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
1976    /// Normalized automatically at runtime. Default: `0.0`.
1977    #[serde(deserialize_with = "validate_admission_weight")]
1978    pub goal_utility: f32,
1979}
1980
1981impl Default for AdmissionWeights {
1982    fn default() -> Self {
1983        Self {
1984            future_utility: 0.30,
1985            factual_confidence: 0.15,
1986            semantic_novelty: 0.30,
1987            temporal_recency: 0.10,
1988            content_type_prior: 0.15,
1989            goal_utility: 0.0,
1990        }
1991    }
1992}
1993
1994impl AdmissionWeights {
1995    /// Return weights normalized so they sum to 1.0.
1996    ///
1997    /// All weights are non-negative; the sum is always > 0 when defaults are used.
1998    #[must_use]
1999    pub fn normalized(&self) -> Self {
2000        let sum = self.future_utility
2001            + self.factual_confidence
2002            + self.semantic_novelty
2003            + self.temporal_recency
2004            + self.content_type_prior
2005            + self.goal_utility;
2006        if sum <= f32::EPSILON {
2007            return Self::default();
2008        }
2009        Self {
2010            future_utility: self.future_utility / sum,
2011            factual_confidence: self.factual_confidence / sum,
2012            semantic_novelty: self.semantic_novelty / sum,
2013            temporal_recency: self.temporal_recency / sum,
2014            content_type_prior: self.content_type_prior / sum,
2015            goal_utility: self.goal_utility / sum,
2016        }
2017    }
2018}
2019
2020/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
2021///
2022/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
2023/// Messages below the composite admission threshold are rejected and not persisted.
2024#[derive(Debug, Clone, Deserialize, Serialize)]
2025#[serde(default)]
2026pub struct AdmissionConfig {
2027    /// Enable A-MAC admission control. Default: `false`.
2028    pub enabled: bool,
2029    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
2030    /// Default: `0.40`.
2031    #[serde(deserialize_with = "validate_admission_threshold")]
2032    pub threshold: f32,
2033    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
2034    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
2035    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
2036    pub fast_path_margin: f32,
2037    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
2038    /// Falls back to the primary provider when empty. Default: `""`.
2039    pub admission_provider: ProviderName,
2040    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
2041    pub weights: AdmissionWeights,
2042    /// Admission decision strategy. Default: `heuristic`.
2043    #[serde(default)]
2044    pub admission_strategy: AdmissionStrategy,
2045    /// Minimum training samples before the RL model is activated.
2046    /// Below this count the system falls back to `Heuristic`. Default: `500`.
2047    #[serde(default = "default_rl_min_samples")]
2048    pub rl_min_samples: u32,
2049    /// Background RL model retraining interval in seconds. Default: `3600`.
2050    #[serde(default = "default_rl_retrain_interval_secs")]
2051    pub rl_retrain_interval_secs: u64,
2052    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
2053    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
2054    /// Zero regression when `false`. Default: `false`.
2055    #[serde(default)]
2056    pub goal_conditioned_write: bool,
2057    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
2058    /// Used only for borderline cases (similarity within 0.1 of threshold).
2059    /// Falls back to the primary provider when empty. Default: `""`.
2060    #[serde(default)]
2061    pub goal_utility_provider: ProviderName,
2062    /// Minimum cosine similarity between goal embedding and candidate memory
2063    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
2064    #[serde(default = "default_goal_utility_threshold")]
2065    pub goal_utility_threshold: f32,
2066    /// Weight of the `goal_utility` factor in the composite admission score.
2067    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
2068    #[serde(default = "default_goal_utility_weight")]
2069    pub goal_utility_weight: f32,
2070}
2071
2072fn default_goal_utility_threshold() -> f32 {
2073    0.4
2074}
2075
2076fn default_goal_utility_weight() -> f32 {
2077    0.25
2078}
2079
2080impl Default for AdmissionConfig {
2081    fn default() -> Self {
2082        Self {
2083            enabled: false,
2084            threshold: default_admission_threshold(),
2085            fast_path_margin: default_admission_fast_path_margin(),
2086            admission_provider: ProviderName::default(),
2087            weights: AdmissionWeights::default(),
2088            admission_strategy: AdmissionStrategy::default(),
2089            rl_min_samples: default_rl_min_samples(),
2090            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
2091            goal_conditioned_write: false,
2092            goal_utility_provider: ProviderName::default(),
2093            goal_utility_threshold: default_goal_utility_threshold(),
2094            goal_utility_weight: default_goal_utility_weight(),
2095        }
2096    }
2097}
2098
2099/// Routing strategy for `[memory.store_routing]`.
2100#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
2101#[serde(rename_all = "snake_case")]
2102pub enum StoreRoutingStrategy {
2103    /// Pure heuristic pattern matching. Zero LLM calls. Default.
2104    #[default]
2105    Heuristic,
2106    /// LLM-based classification via `routing_classifier_provider`.
2107    Llm,
2108    /// Heuristic first; escalates to LLM only when confidence is low.
2109    Hybrid,
2110}
2111
2112/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
2113///
2114/// Controls how each query is classified and routed to the appropriate memory
2115/// backend(s), avoiding unnecessary store queries for simple lookups.
2116#[derive(Debug, Clone, Deserialize, Serialize)]
2117#[serde(default)]
2118pub struct StoreRoutingConfig {
2119    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
2120    /// directly (existing behavior). Default: `false`.
2121    pub enabled: bool,
2122    /// Routing strategy. Default: `heuristic`.
2123    pub strategy: StoreRoutingStrategy,
2124    /// Provider name from `[[llm.providers]]` for LLM-based classification.
2125    /// Falls back to the primary provider when empty. Default: `""`.
2126    pub routing_classifier_provider: ProviderName,
2127    /// Route to use when the classifier is uncertain (confidence < threshold).
2128    /// Default: `"hybrid"`.
2129    pub fallback_route: String,
2130    /// Confidence threshold below which `HybridRouter` escalates to LLM.
2131    /// Range: `[0.0, 1.0]`. Default: `0.7`.
2132    pub confidence_threshold: f32,
2133}
2134
2135impl Default for StoreRoutingConfig {
2136    fn default() -> Self {
2137        Self {
2138            enabled: false,
2139            strategy: StoreRoutingStrategy::Heuristic,
2140            routing_classifier_provider: ProviderName::default(),
2141            fallback_route: "hybrid".into(),
2142            confidence_threshold: 0.7,
2143        }
2144    }
2145}
2146
2147/// Persona memory layer configuration (#2461).
2148///
2149/// When `enabled = true`, user preferences and domain knowledge are extracted from
2150/// conversation history via a cheap LLM provider and injected after the system prompt.
2151#[derive(Debug, Clone, Deserialize, Serialize)]
2152#[serde(default)]
2153pub struct PersonaConfig {
2154    /// Enable persona memory extraction and injection. Default: `false`.
2155    pub enabled: bool,
2156    /// Provider name from `[[llm.providers]]` for persona extraction.
2157    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
2158    pub persona_provider: ProviderName,
2159    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
2160    pub min_confidence: f64,
2161    /// Minimum user messages before extraction runs in a session. Default: `3`.
2162    pub min_messages: usize,
2163    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
2164    pub max_messages: usize,
2165    /// LLM timeout for the extraction call in seconds. Default: `10`.
2166    pub extraction_timeout_secs: u64,
2167    /// Token budget allocated to persona context in assembly. Default: `500`.
2168    pub context_budget_tokens: usize,
2169}
2170
2171impl Default for PersonaConfig {
2172    fn default() -> Self {
2173        Self {
2174            enabled: false,
2175            persona_provider: ProviderName::default(),
2176            min_confidence: 0.6,
2177            min_messages: 3,
2178            max_messages: 10,
2179            extraction_timeout_secs: 10,
2180            context_budget_tokens: 500,
2181        }
2182    }
2183}
2184
2185/// Trajectory-informed memory configuration (#2498).
2186///
2187/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
2188/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
2189/// Procedural entries are injected into context as "past experience" during assembly.
2190#[derive(Debug, Clone, Deserialize, Serialize)]
2191#[serde(default)]
2192pub struct TrajectoryConfig {
2193    /// Enable trajectory extraction and context injection. Default: `false`.
2194    pub enabled: bool,
2195    /// Provider name from `[[llm.providers]]` for extraction.
2196    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2197    pub trajectory_provider: ProviderName,
2198    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
2199    pub context_budget_tokens: usize,
2200    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
2201    pub max_messages: usize,
2202    /// LLM timeout for the extraction call in seconds. Default: `10`.
2203    pub extraction_timeout_secs: u64,
2204    /// Number of procedural entries retrieved for context injection. Default: `5`.
2205    pub recall_top_k: usize,
2206    /// Minimum confidence score for entries included in context. Default: `0.6`.
2207    pub min_confidence: f64,
2208}
2209
2210impl Default for TrajectoryConfig {
2211    fn default() -> Self {
2212        Self {
2213            enabled: false,
2214            trajectory_provider: ProviderName::default(),
2215            context_budget_tokens: 400,
2216            max_messages: 10,
2217            extraction_timeout_secs: 10,
2218            recall_top_k: 5,
2219            min_confidence: 0.6,
2220        }
2221    }
2222}
2223
2224/// Category-aware memory configuration (#2428).
2225///
2226/// When `enabled = true`, messages are auto-tagged with a category derived from the active
2227/// skill or tool context. The category is stored in the `messages.category` column and used
2228/// as a Qdrant payload filter during recall.
2229#[derive(Debug, Clone, Deserialize, Serialize)]
2230#[serde(default)]
2231pub struct CategoryConfig {
2232    /// Enable category tagging and category-filtered recall. Default: `false`.
2233    pub enabled: bool,
2234    /// Automatically assign category from skill metadata or tool type. Default: `true`.
2235    pub auto_tag: bool,
2236}
2237
2238impl Default for CategoryConfig {
2239    fn default() -> Self {
2240        Self {
2241            enabled: false,
2242            auto_tag: true,
2243        }
2244    }
2245}
2246
2247/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
2248///
2249/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
2250/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
2251/// for complex queries.
2252#[derive(Debug, Clone, Deserialize, Serialize)]
2253#[serde(default)]
2254pub struct TreeConfig {
2255    /// Enable the memory tree and background consolidation loop. Default: `false`.
2256    pub enabled: bool,
2257    /// Provider name from `[[llm.providers]]` for node consolidation.
2258    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2259    pub consolidation_provider: ProviderName,
2260    /// Interval between consolidation sweeps in seconds. Default: `300`.
2261    pub sweep_interval_secs: u64,
2262    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
2263    pub batch_size: usize,
2264    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
2265    pub similarity_threshold: f32,
2266    /// Maximum tree depth (levels above leaves). Default: `3`.
2267    pub max_level: u32,
2268    /// Token budget allocated to tree memory in context assembly. Default: `400`.
2269    pub context_budget_tokens: usize,
2270    /// Number of tree nodes retrieved for context. Default: `5`.
2271    pub recall_top_k: usize,
2272    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
2273    pub min_cluster_size: usize,
2274}
2275
2276impl Default for TreeConfig {
2277    fn default() -> Self {
2278        Self {
2279            enabled: false,
2280            consolidation_provider: ProviderName::default(),
2281            sweep_interval_secs: 300,
2282            batch_size: 20,
2283            similarity_threshold: 0.8,
2284            max_level: 3,
2285            context_budget_tokens: 400,
2286            recall_top_k: 5,
2287            min_cluster_size: 2,
2288        }
2289    }
2290}
2291
2292/// Time-based microcompact configuration (#2699).
2293///
2294/// When `enabled = true`, low-value tool outputs are cleared from context
2295/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
2296/// The most recent `keep_recent` tool messages are preserved unconditionally.
2297#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2298#[serde(default)]
2299pub struct MicrocompactConfig {
2300    /// Enable time-based microcompaction. Default: `false`.
2301    pub enabled: bool,
2302    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
2303    pub gap_threshold_minutes: u32,
2304    /// Number of most recent compactable tool messages to preserve. Default: `3`.
2305    pub keep_recent: usize,
2306}
2307
2308impl Default for MicrocompactConfig {
2309    fn default() -> Self {
2310        Self {
2311            enabled: false,
2312            gap_threshold_minutes: 60,
2313            keep_recent: 3,
2314        }
2315    }
2316}
2317
2318/// autoDream background memory consolidation configuration (#2697).
2319///
2320/// When `enabled = true`, a constrained consolidation subagent runs after
2321/// a session ends if both `min_sessions` and `min_hours` gates pass.
2322#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2323#[serde(default)]
2324pub struct AutoDreamConfig {
2325    /// Enable autoDream consolidation. Default: `false`.
2326    pub enabled: bool,
2327    /// Minimum number of sessions between consolidations. Default: `3`.
2328    pub min_sessions: u32,
2329    /// Minimum hours between consolidations. Default: `24`.
2330    pub min_hours: u32,
2331    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2332    /// Falls back to the primary provider when empty. Default: `""`.
2333    pub consolidation_provider: ProviderName,
2334    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
2335    pub max_iterations: u8,
2336}
2337
2338impl Default for AutoDreamConfig {
2339    fn default() -> Self {
2340        Self {
2341            enabled: false,
2342            min_sessions: 3,
2343            min_hours: 24,
2344            consolidation_provider: ProviderName::default(),
2345            max_iterations: 8,
2346        }
2347    }
2348}
2349
2350/// `MagicDocs` auto-maintained markdown configuration (#2702).
2351///
2352/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2353/// are registered and periodically updated by a constrained subagent.
2354#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2355#[serde(default)]
2356pub struct MagicDocsConfig {
2357    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2358    pub enabled: bool,
2359    /// Minimum turns between updates for a given doc path. Default: `5`.
2360    pub min_turns_between_updates: u32,
2361    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2362    /// Falls back to the primary provider when empty. Default: `""`.
2363    pub update_provider: ProviderName,
2364    /// Maximum agent loop iterations per doc update. Default: `4`.
2365    pub max_iterations: u8,
2366}
2367
2368impl Default for MagicDocsConfig {
2369    fn default() -> Self {
2370        Self {
2371            enabled: false,
2372            min_turns_between_updates: 5,
2373            update_provider: ProviderName::default(),
2374            max_iterations: 4,
2375        }
2376    }
2377}
2378
2379#[cfg(test)]
2380mod tests {
2381    use super::*;
2382
2383    // Verify that serde deserialization routes through FromStr so that removed variants
2384    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2385    #[test]
2386    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2387        #[derive(serde::Deserialize)]
2388        struct Wrapper {
2389            #[allow(dead_code)]
2390            pruning_strategy: PruningStrategy,
2391        }
2392        let toml = r#"pruning_strategy = "task_aware_mig""#;
2393        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2394        assert_eq!(
2395            w.pruning_strategy,
2396            PruningStrategy::Reactive,
2397            "task_aware_mig must fall back to Reactive"
2398        );
2399    }
2400
2401    #[test]
2402    fn pruning_strategy_toml_round_trip() {
2403        #[derive(serde::Deserialize)]
2404        struct Wrapper {
2405            #[allow(dead_code)]
2406            pruning_strategy: PruningStrategy,
2407        }
2408        for (input, expected) in [
2409            ("reactive", PruningStrategy::Reactive),
2410            ("task_aware", PruningStrategy::TaskAware),
2411            ("mig", PruningStrategy::Mig),
2412        ] {
2413            let toml = format!(r#"pruning_strategy = "{input}""#);
2414            let w: Wrapper = toml::from_str(&toml)
2415                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
2416            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
2417        }
2418    }
2419
2420    #[test]
2421    fn pruning_strategy_toml_unknown_value_errors() {
2422        #[derive(serde::Deserialize)]
2423        #[allow(dead_code)]
2424        struct Wrapper {
2425            pruning_strategy: PruningStrategy,
2426        }
2427        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
2428        assert!(
2429            toml::from_str::<Wrapper>(toml).is_err(),
2430            "unknown strategy must produce an error"
2431        );
2432    }
2433
2434    #[test]
2435    fn tier_config_defaults_are_correct() {
2436        let cfg = TierConfig::default();
2437        assert!(!cfg.enabled);
2438        assert_eq!(cfg.promotion_min_sessions, 3);
2439        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
2440        assert_eq!(cfg.sweep_interval_secs, 3600);
2441        assert_eq!(cfg.sweep_batch_size, 100);
2442    }
2443
2444    #[test]
2445    fn tier_config_rejects_min_sessions_below_2() {
2446        let toml = "promotion_min_sessions = 1";
2447        assert!(toml::from_str::<TierConfig>(toml).is_err());
2448    }
2449
2450    #[test]
2451    fn tier_config_rejects_similarity_threshold_below_0_5() {
2452        let toml = "similarity_threshold = 0.4";
2453        assert!(toml::from_str::<TierConfig>(toml).is_err());
2454    }
2455
2456    #[test]
2457    fn tier_config_rejects_zero_sweep_batch_size() {
2458        let toml = "sweep_batch_size = 0";
2459        assert!(toml::from_str::<TierConfig>(toml).is_err());
2460    }
2461
2462    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
2463        let input = format!("importance_weight = {toml_val}");
2464        toml::from_str::<SemanticConfig>(&input)
2465    }
2466
2467    #[test]
2468    fn importance_weight_default_is_0_15() {
2469        let cfg = SemanticConfig::default();
2470        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
2471    }
2472
2473    #[test]
2474    fn importance_weight_valid_zero() {
2475        let cfg = deserialize_importance_weight("0.0").unwrap();
2476        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
2477    }
2478
2479    #[test]
2480    fn importance_weight_valid_one() {
2481        let cfg = deserialize_importance_weight("1.0").unwrap();
2482        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
2483    }
2484
2485    #[test]
2486    fn importance_weight_rejects_near_zero_negative() {
2487        // TOML does not have a NaN literal, but we can test via a f64 that
2488        // the validator rejects out-of-range values. Test with negative here
2489        // and rely on validate_importance_weight rejecting non-finite via
2490        // a constructed deserializer call.
2491        let result = deserialize_importance_weight("-0.01");
2492        assert!(
2493            result.is_err(),
2494            "negative importance_weight must be rejected"
2495        );
2496    }
2497
2498    #[test]
2499    fn importance_weight_rejects_negative() {
2500        let result = deserialize_importance_weight("-1.0");
2501        assert!(result.is_err(), "negative value must be rejected");
2502    }
2503
2504    #[test]
2505    fn importance_weight_rejects_greater_than_one() {
2506        let result = deserialize_importance_weight("1.01");
2507        assert!(result.is_err(), "value > 1.0 must be rejected");
2508    }
2509
2510    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
2511
2512    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
2513    #[test]
2514    fn admission_weights_normalized_sums_to_one() {
2515        let w = AdmissionWeights {
2516            future_utility: 2.0,
2517            factual_confidence: 1.0,
2518            semantic_novelty: 3.0,
2519            temporal_recency: 1.0,
2520            content_type_prior: 3.0,
2521            goal_utility: 0.0,
2522        };
2523        let n = w.normalized();
2524        let sum = n.future_utility
2525            + n.factual_confidence
2526            + n.semantic_novelty
2527            + n.temporal_recency
2528            + n.content_type_prior;
2529        assert!(
2530            (sum - 1.0).abs() < 0.001,
2531            "normalized weights must sum to 1.0, got {sum}"
2532        );
2533    }
2534
2535    // Test: already-normalized weights are preserved.
2536    #[test]
2537    fn admission_weights_normalized_preserves_already_unit_sum() {
2538        let w = AdmissionWeights::default();
2539        let n = w.normalized();
2540        let sum = n.future_utility
2541            + n.factual_confidence
2542            + n.semantic_novelty
2543            + n.temporal_recency
2544            + n.content_type_prior;
2545        assert!(
2546            (sum - 1.0).abs() < 0.001,
2547            "default weights sum to ~1.0 after normalization"
2548        );
2549    }
2550
2551    // Test: zero weights fall back to default (no divide-by-zero panic).
2552    #[test]
2553    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
2554        let w = AdmissionWeights {
2555            future_utility: 0.0,
2556            factual_confidence: 0.0,
2557            semantic_novelty: 0.0,
2558            temporal_recency: 0.0,
2559            content_type_prior: 0.0,
2560            goal_utility: 0.0,
2561        };
2562        let n = w.normalized();
2563        let default = AdmissionWeights::default();
2564        assert!(
2565            (n.future_utility - default.future_utility).abs() < 0.001,
2566            "zero-sum weights must fall back to defaults"
2567        );
2568    }
2569
2570    // Test: AdmissionConfig default values match documented defaults.
2571    #[test]
2572    fn admission_config_defaults() {
2573        let cfg = AdmissionConfig::default();
2574        assert!(!cfg.enabled);
2575        assert!((cfg.threshold - 0.40).abs() < 0.001);
2576        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
2577        assert!(cfg.admission_provider.is_empty());
2578    }
2579
2580    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
2581
2582    #[test]
2583    fn spreading_activation_default_recall_timeout_ms_is_1000() {
2584        let cfg = SpreadingActivationConfig::default();
2585        assert_eq!(
2586            cfg.recall_timeout_ms, 1000,
2587            "default recall_timeout_ms must be 1000ms"
2588        );
2589    }
2590
2591    #[test]
2592    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
2593        #[derive(serde::Deserialize)]
2594        struct Wrapper {
2595            recall_timeout_ms: u64,
2596        }
2597        let toml = "recall_timeout_ms = 500";
2598        let w: Wrapper = toml::from_str(toml).unwrap();
2599        assert_eq!(w.recall_timeout_ms, 500);
2600    }
2601
2602    #[test]
2603    fn spreading_activation_validate_cross_field_constraints() {
2604        let mut cfg = SpreadingActivationConfig::default();
2605        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
2606        assert!(cfg.validate().is_ok());
2607
2608        // Equal thresholds must be rejected.
2609        cfg.activation_threshold = 0.5;
2610        cfg.inhibition_threshold = 0.5;
2611        assert!(cfg.validate().is_err());
2612    }
2613
2614    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
2615
2616    #[test]
2617    fn compression_config_focus_strategy_deserializes() {
2618        let toml = r#"strategy = "focus""#;
2619        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2620        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
2621    }
2622
2623    #[test]
2624    fn compression_config_density_budget_defaults_on_deserialize() {
2625        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
2626        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
2627        let toml = r#"strategy = "reactive""#;
2628        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2629        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
2630        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
2631    }
2632
2633    #[test]
2634    fn compression_config_density_budget_round_trip() {
2635        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
2636        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2637        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
2638        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
2639    }
2640
2641    #[test]
2642    fn compression_config_focus_scorer_provider_default_empty() {
2643        let cfg = CompressionConfig::default();
2644        assert!(cfg.focus_scorer_provider.is_empty());
2645    }
2646
2647    #[test]
2648    fn compression_config_focus_scorer_provider_round_trip() {
2649        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
2650        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2651        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
2652    }
2653}
2654
2655/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
2656///
2657/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
2658/// Successful and failed reasoning chains are compressed into short, generalizable strategy
2659/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
2660/// and injected into the prompt preamble.
2661///
2662/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
2663///
2664/// # Example
2665///
2666/// ```toml
2667/// [memory.reasoning]
2668/// enabled = true
2669/// extract_provider = "fast"
2670/// distill_provider = "fast"
2671/// top_k = 3
2672/// store_limit = 1000
2673/// ```
2674#[derive(Debug, Clone, Deserialize, Serialize)]
2675#[serde(default)]
2676pub struct ReasoningConfig {
2677    /// Enable the reasoning-bank pipeline. Default: `false`.
2678    pub enabled: bool,
2679    /// Provider name from `[[llm.providers]]` for the self-judge step.
2680    /// Falls back to the primary provider when empty. Default: `""`.
2681    pub extract_provider: ProviderName,
2682    /// Provider name from `[[llm.providers]]` for the distillation step.
2683    /// Falls back to the primary provider when empty. Default: `""`.
2684    pub distill_provider: ProviderName,
2685    /// Number of strategies retrieved per turn for context injection. Default: `3`.
2686    pub top_k: usize,
2687    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
2688    pub store_limit: usize,
2689    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
2690    pub max_messages: usize,
2691    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
2692    pub max_message_chars: usize,
2693    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
2694    pub context_budget_tokens: usize,
2695    /// Minimum number of messages required before self-judge fires. Default: `2`.
2696    pub min_messages: usize,
2697    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
2698    pub extraction_timeout_secs: u64,
2699    /// Timeout in seconds for the distillation LLM call. Default: `30`.
2700    pub distill_timeout_secs: u64,
2701    /// Maximum number of recent messages passed to the self-judge evaluator.
2702    /// Narrowing to the last user+assistant pair improves classification accuracy.
2703    /// Default: `2`.
2704    pub self_judge_window: usize,
2705    /// Minimum characters in the assistant response to trigger self-judge.
2706    /// Short or trivial responses are skipped. Default: `50`.
2707    pub min_assistant_chars: usize,
2708}
2709
2710impl Default for ReasoningConfig {
2711    fn default() -> Self {
2712        Self {
2713            enabled: false,
2714            extract_provider: ProviderName::default(),
2715            distill_provider: ProviderName::default(),
2716            top_k: 3,
2717            store_limit: 1000,
2718            max_messages: 6,
2719            max_message_chars: 2000,
2720            context_budget_tokens: 500,
2721            min_messages: 2,
2722            extraction_timeout_secs: 30,
2723            distill_timeout_secs: 30,
2724            self_judge_window: 2,
2725            min_assistant_chars: 50,
2726        }
2727    }
2728}
2729
2730// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
2731
2732/// Configuration for the memory eviction policy.
2733///
2734/// Controls which policy runs during the periodic sweep and how many entries
2735/// are retained. `zeph-memory` re-exports this type from here.
2736#[derive(Debug, Clone, Deserialize, Serialize)]
2737pub struct EvictionConfig {
2738    /// Policy name. Currently only `"ebbinghaus"` is supported.
2739    pub policy: String,
2740    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
2741    pub max_entries: usize,
2742    /// How often to run the eviction sweep, in seconds.
2743    pub sweep_interval_secs: u64,
2744}
2745
2746impl Default for EvictionConfig {
2747    fn default() -> Self {
2748        Self {
2749            policy: "ebbinghaus".to_owned(),
2750            max_entries: 0,
2751            sweep_interval_secs: 3600,
2752        }
2753    }
2754}
2755
2756// ── Compression guidelines config (moved from zeph-memory) ───────────────────
2757
2758/// Configuration for ACON failure-driven compression guidelines.
2759///
2760/// `zeph-memory` re-exports this type from here.
2761#[derive(Debug, Clone, Deserialize, Serialize)]
2762#[serde(default)]
2763pub struct CompressionGuidelinesConfig {
2764    /// Enable the feature. Default: `false`.
2765    pub enabled: bool,
2766    /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
2767    pub update_threshold: u16,
2768    /// Maximum token budget for the guidelines document. Default: `500`.
2769    pub max_guidelines_tokens: usize,
2770    /// Maximum failure pairs consumed per update cycle. Default: `10`.
2771    pub max_pairs_per_update: usize,
2772    /// Number of turns after hard compaction to watch for context loss. Default: `10`.
2773    pub detection_window_turns: u64,
2774    /// Interval in seconds between background updater checks. Default: `300`.
2775    pub update_interval_secs: u64,
2776    /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
2777    pub max_stored_pairs: usize,
2778    /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
2779    /// `None` (or `Some("")`) falls back to the primary provider.
2780    #[serde(default, skip_serializing_if = "Option::is_none")]
2781    pub guidelines_provider: Option<ProviderName>,
2782    /// Maintain separate guideline documents per content category.
2783    #[serde(default)]
2784    pub categorized_guidelines: bool,
2785}
2786
2787impl Default for CompressionGuidelinesConfig {
2788    fn default() -> Self {
2789        Self {
2790            enabled: false,
2791            update_threshold: 5,
2792            max_guidelines_tokens: 500,
2793            max_pairs_per_update: 10,
2794            detection_window_turns: 10,
2795            update_interval_secs: 300,
2796            max_stored_pairs: 100,
2797            guidelines_provider: None,
2798            categorized_guidelines: false,
2799        }
2800    }
2801}
2802
2803// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
2804
2805/// Functional category of a compaction probe question.
2806///
2807/// `zeph-memory` re-exports this type from here.
2808#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
2809#[serde(rename_all = "lowercase")]
2810pub enum ProbeCategory {
2811    /// Did specific facts survive? (file paths, function names, values, decisions)
2812    Recall,
2813    /// Does the agent know which files/tools/URLs it used?
2814    Artifact,
2815    /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
2816    Continuation,
2817    /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
2818    Decision,
2819}
2820
2821/// Configuration for the compaction probe.
2822///
2823/// `zeph-memory` re-exports this type from here.
2824#[derive(Debug, Clone, Serialize, Deserialize)]
2825#[serde(default)]
2826pub struct CompactionProbeConfig {
2827    /// Enable compaction probe validation. Default: `false`.
2828    pub enabled: bool,
2829    /// Provider name from `[[llm.providers]]` for probe LLM calls.
2830    /// `None` (or `Some("")`) uses the summary provider.
2831    #[serde(default, skip_serializing_if = "Option::is_none")]
2832    pub probe_provider: Option<ProviderName>,
2833    /// Minimum score to pass without warnings. Default: `0.6`.
2834    pub threshold: f32,
2835    /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
2836    pub hard_fail_threshold: f32,
2837    /// Maximum number of probe questions to generate. Default: `5`.
2838    pub max_questions: usize,
2839    /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
2840    pub timeout_secs: u64,
2841    /// Optional per-category weight multipliers for the overall score.
2842    #[serde(default)]
2843    pub category_weights: Option<HashMap<ProbeCategory, f32>>,
2844}
2845
2846impl Default for CompactionProbeConfig {
2847    fn default() -> Self {
2848        Self {
2849            enabled: false,
2850            probe_provider: None,
2851            threshold: 0.6,
2852            hard_fail_threshold: 0.35,
2853            max_questions: 5,
2854            timeout_secs: 15,
2855            category_weights: None,
2856        }
2857    }
2858}