Skip to main content

zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::secret::Secret;
9
10use crate::defaults::{default_sqlite_path_field, default_true};
11use crate::providers::ProviderName;
12
13fn default_sqlite_pool_size() -> u32 {
14    5
15}
16
17fn default_max_history() -> usize {
18    100
19}
20
21fn default_title_max_chars() -> usize {
22    60
23}
24
25fn default_document_collection() -> String {
26    "zeph_documents".into()
27}
28
29fn default_document_chunk_size() -> usize {
30    1000
31}
32
33fn default_document_chunk_overlap() -> usize {
34    100
35}
36
37fn default_document_top_k() -> usize {
38    3
39}
40
41fn default_autosave_min_length() -> usize {
42    20
43}
44
45fn default_tool_call_cutoff() -> usize {
46    6
47}
48
49fn default_token_safety_margin() -> f32 {
50    1.0
51}
52
53fn default_redact_credentials() -> bool {
54    true
55}
56
57fn default_qdrant_url() -> String {
58    "http://localhost:6334".into()
59}
60
61fn default_summarization_threshold() -> usize {
62    50
63}
64
65fn default_context_budget_tokens() -> usize {
66    0
67}
68
69fn default_soft_compaction_threshold() -> f32 {
70    0.60
71}
72
73fn default_hard_compaction_threshold() -> f32 {
74    0.90
75}
76
77fn default_compaction_preserve_tail() -> usize {
78    6
79}
80
81fn default_compaction_cooldown_turns() -> u8 {
82    2
83}
84
85fn default_auto_budget() -> bool {
86    true
87}
88
89fn default_prune_protect_tokens() -> usize {
90    40_000
91}
92
93fn default_cross_session_score_threshold() -> f32 {
94    0.35
95}
96
97fn default_temporal_decay_half_life_days() -> u32 {
98    30
99}
100
101fn default_mmr_lambda() -> f32 {
102    0.7
103}
104
105fn default_semantic_enabled() -> bool {
106    true
107}
108
109fn default_recall_limit() -> usize {
110    5
111}
112
113fn default_vector_weight() -> f64 {
114    0.7
115}
116
117fn default_keyword_weight() -> f64 {
118    0.3
119}
120
121fn default_graph_max_entities_per_message() -> usize {
122    10
123}
124
125fn default_graph_max_edges_per_message() -> usize {
126    15
127}
128
129fn default_graph_community_refresh_interval() -> usize {
130    100
131}
132
133fn default_graph_community_summary_max_prompt_bytes() -> usize {
134    8192
135}
136
137fn default_graph_community_summary_concurrency() -> usize {
138    4
139}
140
141fn default_lpa_edge_chunk_size() -> usize {
142    10_000
143}
144
145fn default_graph_entity_similarity_threshold() -> f32 {
146    0.85
147}
148
149fn default_graph_entity_ambiguous_threshold() -> f32 {
150    0.70
151}
152
153fn default_graph_extraction_timeout_secs() -> u64 {
154    15
155}
156
157fn default_graph_max_hops() -> u32 {
158    2
159}
160
161fn default_graph_recall_limit() -> usize {
162    10
163}
164
165fn default_graph_expired_edge_retention_days() -> u32 {
166    90
167}
168
169fn default_graph_temporal_decay_rate() -> f64 {
170    0.0
171}
172
173fn default_graph_edge_history_limit() -> usize {
174    100
175}
176
177fn default_spreading_activation_decay_lambda() -> f32 {
178    0.85
179}
180
181fn default_spreading_activation_max_hops() -> u32 {
182    3
183}
184
185fn default_spreading_activation_activation_threshold() -> f32 {
186    0.1
187}
188
189fn default_spreading_activation_inhibition_threshold() -> f32 {
190    0.8
191}
192
193fn default_spreading_activation_max_activated_nodes() -> usize {
194    50
195}
196
197fn default_spreading_activation_recall_timeout_ms() -> u64 {
198    1000
199}
200
201fn default_note_linking_similarity_threshold() -> f32 {
202    0.85
203}
204
205fn default_note_linking_top_k() -> usize {
206    10
207}
208
209fn default_note_linking_timeout_secs() -> u64 {
210    5
211}
212
213fn default_shutdown_summary() -> bool {
214    true
215}
216
217fn default_shutdown_summary_min_messages() -> usize {
218    4
219}
220
221fn default_shutdown_summary_max_messages() -> usize {
222    20
223}
224
225fn default_shutdown_summary_timeout_secs() -> u64 {
226    30
227}
228
229fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
230where
231    D: serde::Deserializer<'de>,
232{
233    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
234    if value.is_nan() || value.is_infinite() {
235        return Err(serde::de::Error::custom(
236            "similarity_threshold must be a finite number",
237        ));
238    }
239    if !(0.5..=1.0).contains(&value) {
240        return Err(serde::de::Error::custom(
241            "similarity_threshold must be in [0.5, 1.0]",
242        ));
243    }
244    Ok(value)
245}
246
247fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
248where
249    D: serde::Deserializer<'de>,
250{
251    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
252    if value < 2 {
253        return Err(serde::de::Error::custom(
254            "promotion_min_sessions must be >= 2",
255        ));
256    }
257    Ok(value)
258}
259
260fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
261where
262    D: serde::Deserializer<'de>,
263{
264    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
265    if value == 0 {
266        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
267    }
268    Ok(value)
269}
270
271fn default_tier_promotion_min_sessions() -> u32 {
272    3
273}
274
275fn default_tier_similarity_threshold() -> f32 {
276    0.92
277}
278
279fn default_tier_sweep_interval_secs() -> u64 {
280    3600
281}
282
283fn default_tier_sweep_batch_size() -> usize {
284    100
285}
286
287fn default_scene_similarity_threshold() -> f32 {
288    0.80
289}
290
291fn default_scene_batch_size() -> usize {
292    50
293}
294
295fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
296where
297    D: serde::Deserializer<'de>,
298{
299    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
300    if value.is_nan() || value.is_infinite() {
301        return Err(serde::de::Error::custom(
302            "scene_similarity_threshold must be a finite number",
303        ));
304    }
305    if !(0.5..=1.0).contains(&value) {
306        return Err(serde::de::Error::custom(
307            "scene_similarity_threshold must be in [0.5, 1.0]",
308        ));
309    }
310    Ok(value)
311}
312
313fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
314where
315    D: serde::Deserializer<'de>,
316{
317    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
318    if value == 0 {
319        return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
320    }
321    Ok(value)
322}
323
324/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
325///
326/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
327/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
328///
329/// # Validation
330///
331/// Constraints enforced at deserialization time:
332/// - `similarity_threshold` in `[0.5, 1.0]`
333/// - `promotion_min_sessions >= 2`
334/// - `sweep_batch_size >= 1`
335/// - `scene_similarity_threshold` in `[0.5, 1.0]`
336/// - `scene_batch_size >= 1`
337#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
338#[serde(default)]
339pub struct TierConfig {
340    /// Enable the tier promotion system. When `false`, all messages remain episodic.
341    /// Default: `false`.
342    pub enabled: bool,
343    /// Minimum number of distinct sessions a fact must appear in before promotion.
344    /// Must be `>= 2`. Default: `3`.
345    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
346    pub promotion_min_sessions: u32,
347    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
348    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
349    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
350    pub similarity_threshold: f32,
351    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
352    pub sweep_interval_secs: u64,
353    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
354    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
355    pub sweep_batch_size: usize,
356    /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
357    pub scene_enabled: bool,
358    /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
359    #[serde(deserialize_with = "validate_scene_similarity_threshold")]
360    pub scene_similarity_threshold: f32,
361    /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
362    #[serde(deserialize_with = "validate_scene_batch_size")]
363    pub scene_batch_size: usize,
364    /// Provider name from `[[llm.providers]]` for scene label/profile generation.
365    /// Falls back to the primary provider when empty. Default: `""`.
366    pub scene_provider: ProviderName,
367    /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
368    pub scene_sweep_interval_secs: u64,
369}
370
371fn default_scene_sweep_interval_secs() -> u64 {
372    7200
373}
374
375impl Default for TierConfig {
376    fn default() -> Self {
377        Self {
378            enabled: false,
379            promotion_min_sessions: default_tier_promotion_min_sessions(),
380            similarity_threshold: default_tier_similarity_threshold(),
381            sweep_interval_secs: default_tier_sweep_interval_secs(),
382            sweep_batch_size: default_tier_sweep_batch_size(),
383            scene_enabled: false,
384            scene_similarity_threshold: default_scene_similarity_threshold(),
385            scene_batch_size: default_scene_batch_size(),
386            scene_provider: ProviderName::default(),
387            scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
388        }
389    }
390}
391
392fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
393where
394    D: serde::Deserializer<'de>,
395{
396    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
397    if value.is_nan() || value.is_infinite() {
398        return Err(serde::de::Error::custom(
399            "temporal_decay_rate must be a finite number",
400        ));
401    }
402    if !(0.0..=10.0).contains(&value) {
403        return Err(serde::de::Error::custom(
404            "temporal_decay_rate must be in [0.0, 10.0]",
405        ));
406    }
407    Ok(value)
408}
409
410fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
411where
412    D: serde::Deserializer<'de>,
413{
414    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
415    if value.is_nan() || value.is_infinite() {
416        return Err(serde::de::Error::custom(
417            "similarity_threshold must be a finite number",
418        ));
419    }
420    if !(0.0..=1.0).contains(&value) {
421        return Err(serde::de::Error::custom(
422            "similarity_threshold must be in [0.0, 1.0]",
423        ));
424    }
425    Ok(value)
426}
427
428fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
429where
430    D: serde::Deserializer<'de>,
431{
432    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
433    if value.is_nan() || value.is_infinite() {
434        return Err(serde::de::Error::custom(
435            "importance_weight must be a finite number",
436        ));
437    }
438    if value < 0.0 {
439        return Err(serde::de::Error::custom(
440            "importance_weight must be non-negative",
441        ));
442    }
443    if value > 1.0 {
444        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
445    }
446    Ok(value)
447}
448
449fn default_importance_weight() -> f64 {
450    0.15
451}
452
453/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
454///
455/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
456/// Seeds are initialized from fuzzy entity matches, then activation propagates
457/// hop-by-hop with exponential decay and lateral inhibition.
458///
459/// # Validation
460///
461/// Constraints enforced at deserialization time:
462/// - `0.0 < decay_lambda <= 1.0`
463/// - `max_hops >= 1`
464/// - `activation_threshold < inhibition_threshold`
465/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
466#[derive(Debug, Clone, Deserialize, Serialize)]
467#[serde(default)]
468pub struct SpreadingActivationConfig {
469    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
470    pub enabled: bool,
471    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
472    #[serde(deserialize_with = "validate_decay_lambda")]
473    pub decay_lambda: f32,
474    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
475    #[serde(deserialize_with = "validate_max_hops")]
476    pub max_hops: u32,
477    /// Minimum activation score to include a node in results. Default: `0.1`.
478    pub activation_threshold: f32,
479    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
480    pub inhibition_threshold: f32,
481    /// Cap on total activated nodes per spread pass. Default: `50`.
482    pub max_activated_nodes: usize,
483    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
484    #[serde(default = "default_seed_structural_weight")]
485    pub seed_structural_weight: f32,
486    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
487    #[serde(default = "default_seed_community_cap")]
488    pub seed_community_cap: usize,
489    /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
490    /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
491    /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
492    #[serde(default = "default_spreading_activation_recall_timeout_ms")]
493    pub recall_timeout_ms: u64,
494}
495
496fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
497where
498    D: serde::Deserializer<'de>,
499{
500    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
501    if value.is_nan() || value.is_infinite() {
502        return Err(serde::de::Error::custom(
503            "decay_lambda must be a finite number",
504        ));
505    }
506    if !(value > 0.0 && value <= 1.0) {
507        return Err(serde::de::Error::custom(
508            "decay_lambda must be in (0.0, 1.0]",
509        ));
510    }
511    Ok(value)
512}
513
514fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
515where
516    D: serde::Deserializer<'de>,
517{
518    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
519    if value == 0 {
520        return Err(serde::de::Error::custom("max_hops must be >= 1"));
521    }
522    Ok(value)
523}
524
525impl SpreadingActivationConfig {
526    /// Validate cross-field constraints that cannot be expressed in per-field validators.
527    ///
528    /// # Errors
529    ///
530    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
531    pub fn validate(&self) -> Result<(), String> {
532        if self.activation_threshold >= self.inhibition_threshold {
533            return Err(format!(
534                "activation_threshold ({}) must be < inhibition_threshold ({})",
535                self.activation_threshold, self.inhibition_threshold
536            ));
537        }
538        Ok(())
539    }
540}
541
542fn default_seed_structural_weight() -> f32 {
543    0.4
544}
545
546fn default_seed_community_cap() -> usize {
547    3
548}
549
550impl Default for SpreadingActivationConfig {
551    fn default() -> Self {
552        Self {
553            enabled: false,
554            decay_lambda: default_spreading_activation_decay_lambda(),
555            max_hops: default_spreading_activation_max_hops(),
556            activation_threshold: default_spreading_activation_activation_threshold(),
557            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
558            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
559            seed_structural_weight: default_seed_structural_weight(),
560            seed_community_cap: default_seed_community_cap(),
561            recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
562        }
563    }
564}
565
566/// Kumiho belief revision configuration.
567#[derive(Debug, Clone, Deserialize, Serialize)]
568#[serde(default)]
569pub struct BeliefRevisionConfig {
570    /// Enable semantic contradiction detection for graph edges. Default: `false`.
571    pub enabled: bool,
572    /// Cosine similarity threshold for considering two facts as contradictory.
573    /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
574    #[serde(deserialize_with = "validate_similarity_threshold")]
575    pub similarity_threshold: f32,
576}
577
578fn default_belief_revision_similarity_threshold() -> f32 {
579    0.85
580}
581
582impl Default for BeliefRevisionConfig {
583    fn default() -> Self {
584        Self {
585            enabled: false,
586            similarity_threshold: default_belief_revision_similarity_threshold(),
587        }
588    }
589}
590
591/// D-MEM RPE-based tiered graph extraction routing configuration.
592#[derive(Debug, Clone, Deserialize, Serialize)]
593#[serde(default)]
594pub struct RpeConfig {
595    /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
596    pub enabled: bool,
597    /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
598    /// Default: `0.3`.
599    #[serde(deserialize_with = "validate_similarity_threshold")]
600    pub threshold: f32,
601    /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
602    pub max_skip_turns: u32,
603}
604
605fn default_rpe_threshold() -> f32 {
606    0.3
607}
608
609fn default_rpe_max_skip_turns() -> u32 {
610    5
611}
612
613impl Default for RpeConfig {
614    fn default() -> Self {
615        Self {
616            enabled: false,
617            threshold: default_rpe_threshold(),
618            max_skip_turns: default_rpe_max_skip_turns(),
619        }
620    }
621}
622
623/// Configuration for A-MEM dynamic note linking.
624///
625/// When enabled, after each graph extraction pass, entities extracted from the message are
626/// compared against the entity embedding collection. Pairs with cosine similarity above
627/// `similarity_threshold` receive a `similar_to` edge in the graph.
628#[derive(Debug, Clone, Deserialize, Serialize)]
629#[serde(default)]
630pub struct NoteLinkingConfig {
631    /// Enable A-MEM note linking after graph extraction. Default: `false`.
632    pub enabled: bool,
633    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
634    #[serde(deserialize_with = "validate_similarity_threshold")]
635    pub similarity_threshold: f32,
636    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
637    pub top_k: usize,
638    /// Timeout for the entire linking pass in seconds. Default: `5`.
639    pub timeout_secs: u64,
640}
641
642impl Default for NoteLinkingConfig {
643    fn default() -> Self {
644        Self {
645            enabled: false,
646            similarity_threshold: default_note_linking_similarity_threshold(),
647            top_k: default_note_linking_top_k(),
648            timeout_secs: default_note_linking_timeout_secs(),
649        }
650    }
651}
652
653/// Vector backend selector for embedding storage.
654#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
655#[serde(rename_all = "lowercase")]
656pub enum VectorBackend {
657    Qdrant,
658    #[default]
659    Sqlite,
660}
661
662impl VectorBackend {
663    /// Return the lowercase identifier string for this backend.
664    ///
665    /// # Examples
666    ///
667    /// ```
668    /// use zeph_config::VectorBackend;
669    ///
670    /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
671    /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
672    /// ```
673    #[must_use]
674    pub fn as_str(&self) -> &'static str {
675        match self {
676            Self::Qdrant => "qdrant",
677            Self::Sqlite => "sqlite",
678        }
679    }
680}
681
682/// Memory subsystem configuration, nested under `[memory]` in TOML.
683///
684/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
685/// multi-tier promotion, and all memory-related background tasks.
686///
687/// # Example (TOML)
688///
689/// ```toml
690/// [memory]
691/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
692/// qdrant_url = "http://localhost:6334"
693/// history_limit = 50
694/// summarization_threshold = 50
695/// auto_budget = true
696/// ```
697#[derive(Debug, Deserialize, Serialize)]
698#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
699pub struct MemoryConfig {
700    #[serde(default)]
701    pub compression_guidelines: CompressionGuidelinesConfig,
702    #[serde(default = "default_sqlite_path_field")]
703    pub sqlite_path: String,
704    pub history_limit: u32,
705    #[serde(default = "default_qdrant_url")]
706    pub qdrant_url: String,
707    /// Optional API key for authenticating to a remote or managed Qdrant cluster.
708    ///
709    /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
710    /// Leave `None` for local dev instances. The actual key is resolved from the vault:
711    /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
712    ///
713    /// The value is wrapped in [`Secret`] to prevent accidental logging.
714    /// `skip_serializing` prevents the key from being written back to TOML on config save.
715    #[serde(default, skip_serializing)]
716    pub qdrant_api_key: Option<Secret>,
717    #[serde(default)]
718    pub semantic: SemanticConfig,
719    #[serde(default = "default_summarization_threshold")]
720    pub summarization_threshold: usize,
721    #[serde(default = "default_context_budget_tokens")]
722    pub context_budget_tokens: usize,
723    #[serde(default = "default_soft_compaction_threshold")]
724    pub soft_compaction_threshold: f32,
725    #[serde(
726        default = "default_hard_compaction_threshold",
727        alias = "compaction_threshold"
728    )]
729    pub hard_compaction_threshold: f32,
730    #[serde(default = "default_compaction_preserve_tail")]
731    pub compaction_preserve_tail: usize,
732    #[serde(default = "default_compaction_cooldown_turns")]
733    pub compaction_cooldown_turns: u8,
734    #[serde(default = "default_auto_budget")]
735    pub auto_budget: bool,
736    #[serde(default = "default_prune_protect_tokens")]
737    pub prune_protect_tokens: usize,
738    #[serde(default = "default_cross_session_score_threshold")]
739    pub cross_session_score_threshold: f32,
740    #[serde(default)]
741    pub vector_backend: VectorBackend,
742    #[serde(default = "default_token_safety_margin")]
743    pub token_safety_margin: f32,
744    #[serde(default = "default_redact_credentials")]
745    pub redact_credentials: bool,
746    #[serde(default = "default_true")]
747    pub autosave_assistant: bool,
748    #[serde(default = "default_autosave_min_length")]
749    pub autosave_min_length: usize,
750    #[serde(default = "default_tool_call_cutoff")]
751    pub tool_call_cutoff: usize,
752    #[serde(default = "default_sqlite_pool_size")]
753    pub sqlite_pool_size: u32,
754    #[serde(default)]
755    pub sessions: SessionsConfig,
756    #[serde(default)]
757    pub documents: DocumentConfig,
758    #[serde(default)]
759    pub eviction: EvictionConfig,
760    #[serde(default)]
761    pub compression: CompressionConfig,
762    #[serde(default)]
763    pub sidequest: SidequestConfig,
764    #[serde(default)]
765    pub graph: GraphConfig,
766    /// Store a lightweight session summary to the vector store on shutdown when no session
767    /// summary exists yet for this conversation. Enables cross-session recall for short or
768    /// interrupted sessions that never triggered hard compaction. Default: `true`.
769    #[serde(default = "default_shutdown_summary")]
770    pub shutdown_summary: bool,
771    /// Minimum number of user-turn messages required before a shutdown summary is generated.
772    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
773    #[serde(default = "default_shutdown_summary_min_messages")]
774    pub shutdown_summary_min_messages: usize,
775    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
776    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
777    /// Default: `20`.
778    #[serde(default = "default_shutdown_summary_max_messages")]
779    pub shutdown_summary_max_messages: usize,
780    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
781    /// Applies independently to the structured call and to the plain-text fallback.
782    /// Default: `10`.
783    #[serde(default = "default_shutdown_summary_timeout_secs")]
784    pub shutdown_summary_timeout_secs: u64,
785    /// Use structured anchored summaries for context compaction.
786    ///
787    /// When enabled, hard compaction requests a JSON schema from the LLM
788    /// instead of free-form prose. Falls back to prose if the LLM fails
789    /// to produce valid JSON. Default: `false`.
790    #[serde(default)]
791    pub structured_summaries: bool,
792    /// AOI three-layer memory tier promotion system.
793    ///
794    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
795    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
796    #[serde(default)]
797    pub tiers: TierConfig,
798    /// A-MAC adaptive memory admission control.
799    ///
800    /// When `admission.enabled = true`, each message is evaluated before saving and rejected
801    /// if its composite admission score falls below the configured threshold.
802    #[serde(default)]
803    pub admission: AdmissionConfig,
804    /// Session digest generation at session end. Default: disabled.
805    #[serde(default)]
806    pub digest: DigestConfig,
807    /// Context assembly strategy. Default: `full_history` (current behavior).
808    #[serde(default)]
809    pub context_strategy: ContextStrategy,
810    /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
811    #[serde(default = "default_crossover_turn_threshold")]
812    pub crossover_turn_threshold: u32,
813    /// All-Mem lifelong memory consolidation sweep.
814    ///
815    /// When `consolidation.enabled = true`, a background loop clusters semantically similar
816    /// messages and merges them into consolidated entries via LLM.
817    #[serde(default)]
818    pub consolidation: ConsolidationConfig,
819    /// `SleepGate` forgetting sweep (#2397).
820    ///
821    /// When `forgetting.enabled = true`, a background loop periodically decays importance
822    /// scores and prunes memories below the forgetting floor.
823    #[serde(default)]
824    pub forgetting: ForgettingConfig,
825    /// `PostgreSQL` connection URL.
826    ///
827    /// Used when the binary is compiled with `--features postgres`.
828    /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
829    /// Example: `postgres://user:pass@localhost:5432/zeph`
830    /// Default: `None` (uses `sqlite_path` instead).
831    #[serde(default)]
832    pub database_url: Option<String>,
833    /// Cost-sensitive store routing (#2444).
834    ///
835    /// When `store_routing.enabled = true`, query intent is classified and routed to
836    /// the cheapest sufficient backend instead of querying all stores on every turn.
837    #[serde(default)]
838    pub store_routing: StoreRoutingConfig,
839    /// Persona memory layer (#2461).
840    ///
841    /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
842    /// from conversation history and injected into context after the system prompt.
843    #[serde(default)]
844    pub persona: PersonaConfig,
845    /// Trajectory-informed memory (#2498).
846    #[serde(default)]
847    pub trajectory: TrajectoryConfig,
848    /// Category-aware memory (#2428).
849    #[serde(default)]
850    pub category: CategoryConfig,
851    /// `TiMem` temporal-hierarchical memory tree (#2262).
852    #[serde(default)]
853    pub tree: TreeConfig,
854    /// Time-based microcompact (#2699).
855    ///
856    /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
857    /// from context when the session has been idle longer than `gap_threshold_minutes`.
858    #[serde(default)]
859    pub microcompact: MicrocompactConfig,
860    /// autoDream background memory consolidation (#2697).
861    ///
862    /// When `autodream.enabled = true`, a constrained consolidation subagent runs
863    /// after a session ends if both `min_sessions` and `min_hours` gates pass.
864    #[serde(default)]
865    pub autodream: AutoDreamConfig,
866    /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
867    ///
868    /// Before inserting a new key fact, its nearest neighbour is looked up in the
869    /// `zeph_key_facts` collection.  If the best score is ≥ this threshold the fact is
870    /// considered a near-duplicate and skipped.  Set to a value greater than `1.0` (e.g.
871    /// `2.0`) to disable dedup entirely.  Default: `0.95`.
872    #[serde(default = "default_key_facts_dedup_threshold")]
873    pub key_facts_dedup_threshold: f32,
874    /// Experience compression spectrum (#3305).
875    ///
876    /// Controls three-tier retrieval policy and background skill-promotion engine.
877    #[serde(default)]
878    pub compression_spectrum: crate::features::CompressionSpectrumConfig,
879    /// MemMachine-inspired retrieval-stage tuning (#3340).
880    ///
881    /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
882    /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
883    /// uniformly across graph, hybrid, and vector-only recall paths.
884    ///
885    /// # Example (TOML)
886    ///
887    /// ```toml
888    /// [memory.retrieval]
889    /// depth = 40
890    /// search_prompt_template = ""
891    /// context_format = "structured"
892    /// ```
893    #[serde(default)]
894    pub retrieval: RetrievalConfig,
895    /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
896    ///
897    /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
898    /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
899    /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
900    /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
901    /// and injected before the LLM call.
902    #[serde(default)]
903    pub reasoning: ReasoningConfig,
904    /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
905    ///
906    /// When `enabled = true`, the weight of each `graph_edges` row is incremented
907    /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
908    ///
909    /// # Example (TOML)
910    ///
911    /// ```toml
912    /// [memory.hebbian]
913    /// enabled = true
914    /// hebbian_lr = 0.1
915    /// ```
916    #[serde(default)]
917    pub hebbian: HebbianConfig,
918    /// `MemCoT` rolling semantic state configuration (#3574).
919    ///
920    /// When `enabled = true`, each completed assistant turn spawns a background distillation
921    /// task that compresses the response into a short semantic state buffer. The buffer is
922    /// prepended to graph recall queries so retrieval stays contextually relevant across long
923    /// multi-turn sessions.
924    ///
925    /// # Example (TOML)
926    ///
927    /// ```toml
928    /// [memory.memcot]
929    /// enabled = true
930    /// distill_provider = "fast"
931    /// min_assistant_chars = 200
932    /// max_distills_per_session = 50
933    /// ```
934    #[serde(default)]
935    pub memcot: MemCotConfig,
936    /// `OmniMem` retrieval failure tracking (issue #3576).
937    ///
938    /// When `enabled = true`, no-hit and low-confidence recall events are logged
939    /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
940    ///
941    /// # Example (TOML)
942    ///
943    /// ```toml
944    /// [memory.retrieval_failures]
945    /// enabled = true
946    /// low_confidence_threshold = 0.3
947    /// retention_days = 90
948    /// ```
949    #[serde(default)]
950    pub retrieval_failures: RetrievalFailuresConfig,
951    /// Write quality gate (#3629).
952    ///
953    /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
954    /// writes are rejected before persistence. Evaluated after A-MAC admission control.
955    #[serde(default)]
956    pub quality_gate: WriteQualityGateConfig,
957}
958
959fn default_retrieval_failures_low_confidence_threshold() -> f32 {
960    0.3
961}
962
963fn default_retrieval_failures_retention_days() -> u32 {
964    90
965}
966
967fn default_retrieval_failures_channel_capacity() -> usize {
968    256
969}
970
971fn default_retrieval_failures_batch_size() -> usize {
972    16
973}
974
975fn default_retrieval_failures_flush_interval_ms() -> u64 {
976    100
977}
978
979fn default_crossover_turn_threshold() -> u32 {
980    20
981}
982
983fn default_key_facts_dedup_threshold() -> f32 {
984    0.95
985}
986
987/// Session digest configuration (#2289).
988#[derive(Debug, Clone, Deserialize, Serialize)]
989#[serde(default)]
990pub struct DigestConfig {
991    /// Enable session digest generation at session end. Default: `false`.
992    pub enabled: bool,
993    /// Provider name from `[[llm.providers]]` for digest generation.
994    /// Falls back to the primary provider when `None`.
995    #[serde(default)]
996    pub provider: Option<ProviderName>,
997    /// Maximum tokens for the digest text. Default: `500`.
998    pub max_tokens: usize,
999    /// Maximum messages to feed into the digest prompt. Default: `50`.
1000    pub max_input_messages: usize,
1001}
1002
1003impl Default for DigestConfig {
1004    fn default() -> Self {
1005        Self {
1006            enabled: false,
1007            provider: None,
1008            max_tokens: 500,
1009            max_input_messages: 50,
1010        }
1011    }
1012}
1013
1014/// Context assembly strategy (#2288).
1015#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1016#[serde(rename_all = "snake_case")]
1017pub enum ContextStrategy {
1018    /// Full conversation history trimmed to budget, with memory augmentation.
1019    /// This is the default and existing behavior.
1020    #[default]
1021    FullHistory,
1022    /// Drop conversation history; assemble context from summaries, semantic recall,
1023    /// cross-session memory, and session digest only.
1024    MemoryFirst,
1025    /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1026    /// `crossover_turn_threshold`.
1027    Adaptive,
1028}
1029
1030/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1031#[derive(Debug, Clone, Deserialize, Serialize)]
1032#[serde(default)]
1033pub struct SessionsConfig {
1034    /// Maximum number of sessions returned by list operations (0 = unlimited).
1035    #[serde(default = "default_max_history")]
1036    pub max_history: usize,
1037    /// Maximum characters for auto-generated session titles.
1038    #[serde(default = "default_title_max_chars")]
1039    pub title_max_chars: usize,
1040}
1041
1042impl Default for SessionsConfig {
1043    fn default() -> Self {
1044        Self {
1045            max_history: default_max_history(),
1046            title_max_chars: default_title_max_chars(),
1047        }
1048    }
1049}
1050
1051/// Configuration for the document ingestion and RAG retrieval pipeline.
1052#[derive(Debug, Clone, Deserialize, Serialize)]
1053pub struct DocumentConfig {
1054    #[serde(default = "default_document_collection")]
1055    pub collection: String,
1056    #[serde(default = "default_document_chunk_size")]
1057    pub chunk_size: usize,
1058    #[serde(default = "default_document_chunk_overlap")]
1059    pub chunk_overlap: usize,
1060    /// Number of document chunks to inject into agent context per turn.
1061    #[serde(default = "default_document_top_k")]
1062    pub top_k: usize,
1063    /// Enable document RAG injection into agent context.
1064    #[serde(default)]
1065    pub rag_enabled: bool,
1066}
1067
1068impl Default for DocumentConfig {
1069    fn default() -> Self {
1070        Self {
1071            collection: default_document_collection(),
1072            chunk_size: default_document_chunk_size(),
1073            chunk_overlap: default_document_chunk_overlap(),
1074            top_k: default_document_top_k(),
1075            rag_enabled: false,
1076        }
1077    }
1078}
1079
1080/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1081///
1082/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1083/// re-ranking, and hybrid BM25+vector weighting.
1084///
1085/// # Example (TOML)
1086///
1087/// ```toml
1088/// [memory.semantic]
1089/// enabled = true
1090/// recall_limit = 5
1091/// vector_weight = 0.7
1092/// keyword_weight = 0.3
1093/// mmr_lambda = 0.7
1094/// ```
1095#[derive(Debug, Deserialize, Serialize)]
1096#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1097pub struct SemanticConfig {
1098    /// Enable vector-based semantic recall. Default: `true`.
1099    #[serde(default = "default_semantic_enabled")]
1100    pub enabled: bool,
1101    #[serde(default = "default_recall_limit")]
1102    pub recall_limit: usize,
1103    #[serde(default = "default_vector_weight")]
1104    pub vector_weight: f64,
1105    #[serde(default = "default_keyword_weight")]
1106    pub keyword_weight: f64,
1107    #[serde(default = "default_true")]
1108    pub temporal_decay_enabled: bool,
1109    #[serde(default = "default_temporal_decay_half_life_days")]
1110    pub temporal_decay_half_life_days: u32,
1111    #[serde(default = "default_true")]
1112    pub mmr_enabled: bool,
1113    #[serde(default = "default_mmr_lambda")]
1114    pub mmr_lambda: f32,
1115    #[serde(default = "default_true")]
1116    pub importance_enabled: bool,
1117    #[serde(
1118        default = "default_importance_weight",
1119        deserialize_with = "validate_importance_weight"
1120    )]
1121    pub importance_weight: f64,
1122    /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1123    /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1124    /// from contending with the guardrail at the API server level (rate limits, Ollama
1125    /// single-model lock). Falls back to the main agent provider when `None`.
1126    #[serde(default)]
1127    pub embed_provider: Option<ProviderName>,
1128}
1129
1130impl Default for SemanticConfig {
1131    fn default() -> Self {
1132        Self {
1133            enabled: default_semantic_enabled(),
1134            recall_limit: default_recall_limit(),
1135            vector_weight: default_vector_weight(),
1136            keyword_weight: default_keyword_weight(),
1137            temporal_decay_enabled: true,
1138            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1139            mmr_enabled: true,
1140            mmr_lambda: default_mmr_lambda(),
1141            importance_enabled: true,
1142            importance_weight: default_importance_weight(),
1143            embed_provider: None,
1144        }
1145    }
1146}
1147
1148/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1149///
1150/// Controls how each recalled memory entry is presented in the assembled prompt.
1151/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1152/// always contain the raw message text. The format is applied exclusively during
1153/// context assembly and is never persisted.
1154///
1155/// # Token cost
1156///
1157/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1158/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1159#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1160#[serde(rename_all = "snake_case")]
1161pub enum ContextFormat {
1162    /// Emit a labeled header per snippet:
1163    /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1164    ///
1165    /// This is the default. Gives the LLM structured provenance metadata for each recalled
1166    /// memory without re-parsing the recall body.
1167    #[default]
1168    Structured,
1169    /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1170    ///
1171    /// Use `Plain` when downstream consumers rely on the old format or when token budget
1172    /// is tight and provenance headers are not needed.
1173    Plain,
1174}
1175
1176/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1177///
1178/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1179/// Nested under `[memory.retrieval]` in TOML.  All fields have defaults so existing
1180/// configs parse unchanged.
1181///
1182/// # Example (TOML)
1183///
1184/// ```toml
1185/// [memory.retrieval]
1186/// # depth = 0          # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1187/// # search_prompt_template = ""
1188/// # context_format = "structured"
1189/// ```
1190#[derive(Debug, Clone, Deserialize, Serialize)]
1191#[serde(default)]
1192pub struct RetrievalConfig {
1193    /// Number of ANN candidates fetched from the vector store before keyword merge,
1194    /// temporal decay, and MMR re-ranking.
1195    ///
1196    /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1197    ///   to pre-#3340 deployments.
1198    /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1199    ///   `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1200    ///   size, or higher for better MMR diversity.
1201    ///
1202    /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1203    /// cannot saturate the requested top-k.
1204    pub depth: u32,
1205    /// Template applied to the raw user query before embedding.
1206    ///
1207    /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1208    /// Empty string (default) = identity: the query is embedded as-is.
1209    ///
1210    /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1211    /// is never wrapped.  Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1212    pub search_prompt_template: String,
1213    /// Shape of memory snippets injected into agent context.
1214    ///
1215    /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1216    /// Default: `Structured`.
1217    pub context_format: ContextFormat,
1218    /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1219    ///
1220    /// When `true` and the query is classified as first-person, the query embedding is
1221    /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1222    /// towards persona-relevant content for self-referential queries.
1223    ///
1224    /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1225    #[serde(default = "default_query_bias_correction")]
1226    pub query_bias_correction: bool,
1227    /// Blend weight for query-bias correction (MM-F3, #3341).
1228    ///
1229    /// Controls how much the query embedding shifts towards the profile centroid.
1230    /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1231    #[serde(default = "default_query_bias_profile_weight")]
1232    pub query_bias_profile_weight: f32,
1233    /// Centroid TTL in seconds (MM-F3, #3341).
1234    ///
1235    /// The profile centroid computed from persona facts is cached for this many seconds.
1236    /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1237    #[serde(default = "default_query_bias_centroid_ttl_secs")]
1238    pub query_bias_centroid_ttl_secs: u64,
1239}
1240
1241fn default_query_bias_correction() -> bool {
1242    true
1243}
1244
1245fn default_query_bias_profile_weight() -> f32 {
1246    0.25
1247}
1248
1249fn default_query_bias_centroid_ttl_secs() -> u64 {
1250    300
1251}
1252
1253impl Default for RetrievalConfig {
1254    fn default() -> Self {
1255        Self {
1256            depth: 0,
1257            search_prompt_template: String::new(),
1258            context_format: ContextFormat::default(),
1259            query_bias_correction: default_query_bias_correction(),
1260            query_bias_profile_weight: default_query_bias_profile_weight(),
1261            query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1262        }
1263    }
1264}
1265
1266/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1267///
1268/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1269/// recall traversal increments the `weight` column of the traversed edges, building
1270/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1271/// runs a background sweep that identifies high-traffic entity clusters and distills
1272/// them into `graph_rules` entries via an LLM.
1273#[derive(Debug, Clone, Deserialize, Serialize)]
1274#[serde(default)]
1275pub struct HebbianConfig {
1276    /// Master switch. When `false`, no `weight` updates are written to the database
1277    /// and the consolidation loop does not start. Default: `false`.
1278    pub enabled: bool,
1279    /// Weight increment per co-activation (HL-F2, #3344).
1280    ///
1281    /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1282    /// startup when `enabled = true`. Default: `0.1`.
1283    pub hebbian_lr: f32,
1284    /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1285    ///
1286    /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1287    /// Default: `3600` (one hour).
1288    pub consolidation_interval_secs: u64,
1289    /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1290    /// candidate (HL-F3, #3345). Default: `5.0`.
1291    pub consolidation_threshold: f64,
1292    /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1293    ///
1294    /// Falls back to the main provider when `None` or unresolvable.
1295    #[serde(default)]
1296    pub consolidate_provider: Option<ProviderName>,
1297    /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1298    pub max_candidates_per_sweep: usize,
1299    /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1300    ///
1301    /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1302    /// Default: `86400` (24 hours).
1303    pub consolidation_cooldown_secs: u64,
1304    /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1305    /// Default: `30`.
1306    pub consolidation_prompt_timeout_secs: u64,
1307    /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1308    /// (HL-F4, #3345). Default: `20`.
1309    pub consolidation_max_neighbors: usize,
1310    /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1311    ///
1312    /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1313    /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1314    pub spreading_activation: bool,
1315    /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1316    pub spread_depth: u32,
1317    /// MAGMA edge-type filter for HL-F5 spreading activation.
1318    ///
1319    /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1320    /// Empty = traverse all edge types. Default: `[]`.
1321    pub spread_edge_types: Vec<String>,
1322    /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1323    ///
1324    /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1325    /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1326    pub step_budget_ms: u64,
1327}
1328
1329impl Default for HebbianConfig {
1330    fn default() -> Self {
1331        Self {
1332            enabled: false,
1333            hebbian_lr: 0.1,
1334            consolidation_interval_secs: 3600,
1335            consolidation_threshold: 5.0,
1336            consolidate_provider: None,
1337            max_candidates_per_sweep: 10,
1338            consolidation_cooldown_secs: 86_400,
1339            consolidation_prompt_timeout_secs: 30,
1340            consolidation_max_neighbors: 20,
1341            spreading_activation: false,
1342            spread_depth: 2,
1343            spread_edge_types: Vec::new(),
1344            step_budget_ms: 8,
1345        }
1346    }
1347}
1348
1349/// Compression strategy for active context compression (#1161).
1350#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1351#[serde(tag = "strategy", rename_all = "snake_case")]
1352pub enum CompressionStrategy {
1353    /// Compress only when reactive compaction fires (current behavior).
1354    #[default]
1355    Reactive,
1356    /// Compress proactively when context exceeds `threshold_tokens`.
1357    Proactive {
1358        /// Token count that triggers proactive compression.
1359        threshold_tokens: usize,
1360        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1361        max_summary_tokens: usize,
1362    },
1363    /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1364    /// safety net. The `compress_context` tool is also available in all other strategies.
1365    Autonomous,
1366    /// Knowledge-block-aware compression strategy (#2510).
1367    ///
1368    /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1369    /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1370    Focus,
1371}
1372
1373/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1374///
1375/// When `context-compression` feature is enabled, this replaces the default oldest-first
1376/// heuristic with scored eviction.
1377#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1378#[serde(rename_all = "snake_case")]
1379pub enum PruningStrategy {
1380    /// Oldest-first eviction — current default behavior.
1381    #[default]
1382    Reactive,
1383    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1384    /// lowest-first. Requires `context-compression` feature.
1385    TaskAware,
1386    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1387    /// Requires `context-compression` feature.
1388    Mig,
1389    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1390    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1391    /// Requires `context-compression` feature.
1392    Subgoal,
1393    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1394    /// Requires `context-compression` feature.
1395    SubgoalMig,
1396}
1397
1398impl PruningStrategy {
1399    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1400    #[must_use]
1401    pub fn is_subgoal(self) -> bool {
1402        matches!(self, Self::Subgoal | Self::SubgoalMig)
1403    }
1404}
1405
1406// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1407// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1408impl<'de> serde::Deserialize<'de> for PruningStrategy {
1409    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1410        let s = String::deserialize(deserializer)?;
1411        s.parse().map_err(serde::de::Error::custom)
1412    }
1413}
1414
1415impl std::str::FromStr for PruningStrategy {
1416    type Err = String;
1417
1418    fn from_str(s: &str) -> Result<Self, Self::Err> {
1419        match s {
1420            "reactive" => Ok(Self::Reactive),
1421            "task_aware" | "task-aware" => Ok(Self::TaskAware),
1422            "mig" => Ok(Self::Mig),
1423            // task_aware_mig was removed (dead code — was routed to scored path only).
1424            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1425            "task_aware_mig" | "task-aware-mig" => {
1426                tracing::warn!(
1427                    "pruning strategy `task_aware_mig` has been removed; \
1428                     falling back to `reactive`. Use `task_aware` or `mig` instead."
1429                );
1430                Ok(Self::Reactive)
1431            }
1432            "subgoal" => Ok(Self::Subgoal),
1433            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1434            other => Err(format!(
1435                "unknown pruning strategy `{other}`, expected \
1436                 reactive|task_aware|mig|subgoal|subgoal_mig"
1437            )),
1438        }
1439    }
1440}
1441
1442fn default_high_density_budget() -> f32 {
1443    0.7
1444}
1445
1446fn default_low_density_budget() -> f32 {
1447    0.3
1448}
1449
1450/// Configuration for the `SleepGate` forgetting sweep (#2397).
1451///
1452/// When `enabled = true`, a background loop periodically decays importance scores
1453/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1454/// and prunes memories below `forgetting_floor` (targeted forgetting).
1455#[derive(Debug, Clone, Deserialize, Serialize)]
1456#[serde(default)]
1457pub struct ForgettingConfig {
1458    /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1459    pub enabled: bool,
1460    /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1461    pub decay_rate: f32,
1462    /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1463    pub forgetting_floor: f32,
1464    /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1465    pub sweep_interval_secs: u64,
1466    /// Maximum messages to process per sweep. Default: `500`.
1467    pub sweep_batch_size: usize,
1468    /// Hours: messages accessed within this window get replay protection. Default: `24`.
1469    pub replay_window_hours: u32,
1470    /// Messages with `access_count` >= this get replay protection. Default: `3`.
1471    pub replay_min_access_count: u32,
1472    /// Hours: never prune messages accessed within this window. Default: `24`.
1473    pub protect_recent_hours: u32,
1474    /// Never prune messages with `access_count` >= this. Default: `3`.
1475    pub protect_min_access_count: u32,
1476}
1477
1478impl Default for ForgettingConfig {
1479    fn default() -> Self {
1480        Self {
1481            enabled: false,
1482            decay_rate: 0.1,
1483            forgetting_floor: 0.05,
1484            sweep_interval_secs: 7200,
1485            sweep_batch_size: 500,
1486            replay_window_hours: 24,
1487            replay_min_access_count: 3,
1488            protect_recent_hours: 24,
1489            protect_min_access_count: 3,
1490        }
1491    }
1492}
1493
1494/// Configuration for active context compression (#1161).
1495#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1496#[serde(default)]
1497pub struct CompressionConfig {
1498    /// Compression strategy.
1499    #[serde(flatten)]
1500    pub strategy: CompressionStrategy,
1501    /// Tool-output pruning strategy (requires `context-compression` feature).
1502    pub pruning_strategy: PruningStrategy,
1503    /// Model to use for compression summaries.
1504    ///
1505    /// Currently unused — the primary summary provider is used regardless of this value.
1506    /// Reserved for future per-compression model selection. Setting this field has no effect.
1507    pub model: String,
1508    /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1509    /// Falls back to the primary provider when empty. Default: `""`.
1510    pub compress_provider: ProviderName,
1511    /// Compaction probe: validates summary quality before committing it (#1609).
1512    #[serde(default)]
1513    pub probe: CompactionProbeConfig,
1514    /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1515    ///
1516    /// When enabled, tool output bodies in the compaction range are saved to
1517    /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1518    /// The LLM summarizes placeholder messages; archived content is appended as
1519    /// a postfix after summarization so references survive compaction.
1520    /// Default: `false`.
1521    #[serde(default)]
1522    pub archive_tool_outputs: bool,
1523    /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
1524    /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
1525    /// Falls back to the primary provider when empty. Default: `""`.
1526    pub focus_scorer_provider: ProviderName,
1527    /// Token-budget fraction for high-density content in density-aware compression (#2481).
1528    /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1529    #[serde(default = "default_high_density_budget")]
1530    pub high_density_budget: f32,
1531    /// Token-budget fraction for low-density content in density-aware compression (#2481).
1532    /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1533    #[serde(default = "default_low_density_budget")]
1534    pub low_density_budget: f32,
1535    /// Typed-page classification and batch-level assertion checking (#3630).
1536    #[serde(default)]
1537    pub typed_pages: TypedPagesConfig,
1538}
1539
1540/// Configuration for typed-page compaction invariants (#3630).
1541///
1542/// Controls classification, batch-level assertion checking, and audit logging.
1543/// All behavior is disabled by default; set `enabled = true` to activate.
1544///
1545/// # Example (TOML)
1546///
1547/// ```toml
1548/// [memory.compression.typed_pages]
1549/// enabled = true
1550/// enforcement = "active"
1551/// audit_path = ""
1552/// audit_channel_capacity = 256
1553/// ```
1554#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
1555#[serde(default)]
1556pub struct TypedPagesConfig {
1557    /// Enable typed-page classification and batch-level assertion checking.
1558    /// Default: `false`.
1559    pub enabled: bool,
1560    /// Enforcement mode:
1561    ///
1562    /// - `observe`: classify and emit audit records only; no behavioral change.
1563    /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
1564    ///
1565    /// Default: `"observe"`.
1566    pub enforcement: TypedPagesEnforcement,
1567    /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
1568    /// Default: `""`.
1569    ///
1570    /// # Security
1571    ///
1572    /// This field is **operator-only trusted input** read from the agent's configuration file.
1573    /// Write access to the config file implies file-system write access, so no additional
1574    /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
1575    /// configuration sources.
1576    pub audit_path: String,
1577    /// Bounded channel capacity for the async audit writer. Default: `256`.
1578    pub audit_channel_capacity: usize,
1579}
1580
1581impl Default for TypedPagesConfig {
1582    fn default() -> Self {
1583        Self {
1584            enabled: false,
1585            enforcement: TypedPagesEnforcement::Observe,
1586            audit_path: String::new(),
1587            audit_channel_capacity: 256,
1588        }
1589    }
1590}
1591
1592/// Enforcement mode for typed-page compaction (#3630).
1593#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
1594#[serde(rename_all = "snake_case")]
1595pub enum TypedPagesEnforcement {
1596    /// Classify and audit only. Zero behavioral change relative to the untyped path.
1597    #[default]
1598    Observe,
1599    /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
1600    Active,
1601}
1602
1603fn default_sidequest_interval_turns() -> u32 {
1604    4
1605}
1606
1607fn default_sidequest_max_eviction_ratio() -> f32 {
1608    0.5
1609}
1610
1611fn default_sidequest_max_cursors() -> usize {
1612    30
1613}
1614
1615fn default_sidequest_min_cursor_tokens() -> usize {
1616    100
1617}
1618
1619/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1620#[derive(Debug, Clone, Deserialize, Serialize)]
1621#[serde(default)]
1622pub struct SidequestConfig {
1623    /// Enable `SideQuest` eviction. Default: `false`.
1624    pub enabled: bool,
1625    /// Run eviction every N user turns. Default: `4`.
1626    #[serde(default = "default_sidequest_interval_turns")]
1627    pub interval_turns: u32,
1628    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1629    #[serde(default = "default_sidequest_max_eviction_ratio")]
1630    pub max_eviction_ratio: f32,
1631    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1632    #[serde(default = "default_sidequest_max_cursors")]
1633    pub max_cursors: usize,
1634    /// Exclude tool outputs smaller than this token count from eviction candidates.
1635    /// Default: `100`.
1636    #[serde(default = "default_sidequest_min_cursor_tokens")]
1637    pub min_cursor_tokens: usize,
1638}
1639
1640impl Default for SidequestConfig {
1641    fn default() -> Self {
1642        Self {
1643            enabled: false,
1644            interval_turns: default_sidequest_interval_turns(),
1645            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1646            max_cursors: default_sidequest_max_cursors(),
1647            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1648        }
1649    }
1650}
1651
1652/// Graph retrieval strategy for `[memory.graph]`.
1653///
1654/// Selects the algorithm used to traverse the knowledge graph during recall.
1655/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
1656#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
1657#[serde(rename_all = "snake_case")]
1658pub enum GraphRetrievalStrategy {
1659    /// SYNAPSE spreading activation (default, existing behavior).
1660    #[default]
1661    Synapse,
1662    /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
1663    Bfs,
1664    /// A* shortest-path traversal via petgraph.
1665    #[serde(rename = "astar")]
1666    AStar,
1667    /// Concentric BFS expanding outward from seed nodes.
1668    WaterCircles,
1669    /// Beam search: keep top-K candidates per hop.
1670    BeamSearch,
1671    /// Dynamic: LLM classifier selects strategy per query.
1672    Hybrid,
1673}
1674
1675fn default_beam_width() -> usize {
1676    10
1677}
1678
1679/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
1680///
1681/// Controls the width of the beam during graph traversal: how many top candidates
1682/// are retained at each hop.
1683#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1684pub struct BeamSearchConfig {
1685    /// Number of top candidates kept per hop. Default: `10`.
1686    #[serde(default = "default_beam_width")]
1687    pub beam_width: usize,
1688}
1689
1690impl Default for BeamSearchConfig {
1691    fn default() -> Self {
1692        Self {
1693            beam_width: default_beam_width(),
1694        }
1695    }
1696}
1697
1698/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
1699///
1700/// Controls ring-by-ring concentric BFS traversal from seed nodes.
1701#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
1702pub struct WaterCirclesConfig {
1703    /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
1704    #[serde(default)]
1705    pub ring_limit: usize,
1706}
1707
1708fn default_evolution_sweep_interval() -> usize {
1709    50
1710}
1711
1712fn default_confidence_prune_threshold() -> f32 {
1713    0.1
1714}
1715
1716/// Experience memory configuration for `[memory.graph.experience]`.
1717///
1718/// Controls recording of tool execution outcomes and graph evolution sweeps.
1719#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1720pub struct ExperienceConfig {
1721    /// Enable experience memory recording. Default: `false`.
1722    #[serde(default)]
1723    pub enabled: bool,
1724    /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
1725    #[serde(default)]
1726    pub evolution_sweep_enabled: bool,
1727    /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
1728    #[serde(default = "default_confidence_prune_threshold")]
1729    pub confidence_prune_threshold: f32,
1730    /// Number of turns between evolution sweeps. Default: `50`.
1731    #[serde(default = "default_evolution_sweep_interval")]
1732    pub evolution_sweep_interval: usize,
1733}
1734
1735impl Default for ExperienceConfig {
1736    fn default() -> Self {
1737        Self {
1738            enabled: false,
1739            evolution_sweep_enabled: false,
1740            confidence_prune_threshold: default_confidence_prune_threshold(),
1741            evolution_sweep_interval: default_evolution_sweep_interval(),
1742        }
1743    }
1744}
1745
1746/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
1747///
1748/// # Security
1749///
1750/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
1751/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
1752/// when processing conversations that may contain personal, medical, or sensitive data until
1753/// a redaction pass is implemented on the write path.
1754#[derive(Debug, Clone, Deserialize, Serialize)]
1755#[serde(default)]
1756pub struct GraphConfig {
1757    pub enabled: bool,
1758    pub extract_model: String,
1759    #[serde(default = "default_graph_max_entities_per_message")]
1760    pub max_entities_per_message: usize,
1761    #[serde(default = "default_graph_max_edges_per_message")]
1762    pub max_edges_per_message: usize,
1763    #[serde(default = "default_graph_community_refresh_interval")]
1764    pub community_refresh_interval: usize,
1765    #[serde(default = "default_graph_entity_similarity_threshold")]
1766    pub entity_similarity_threshold: f32,
1767    #[serde(default = "default_graph_extraction_timeout_secs")]
1768    pub extraction_timeout_secs: u64,
1769    #[serde(default)]
1770    pub use_embedding_resolution: bool,
1771    #[serde(default = "default_graph_entity_ambiguous_threshold")]
1772    pub entity_ambiguous_threshold: f32,
1773    #[serde(default = "default_graph_max_hops")]
1774    pub max_hops: u32,
1775    #[serde(default = "default_graph_recall_limit")]
1776    pub recall_limit: usize,
1777    /// Days to retain expired (superseded) edges before deletion. Default: 90.
1778    #[serde(default = "default_graph_expired_edge_retention_days")]
1779    pub expired_edge_retention_days: u32,
1780    /// Maximum entities to retain in the graph. 0 = unlimited.
1781    #[serde(default)]
1782    pub max_entities: usize,
1783    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
1784    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
1785    pub community_summary_max_prompt_bytes: usize,
1786    /// Maximum concurrent LLM calls during community summarization. Default: 4.
1787    #[serde(default = "default_graph_community_summary_concurrency")]
1788    pub community_summary_concurrency: usize,
1789    /// Number of edges fetched per chunk during community detection. Default: 10000.
1790    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
1791    #[serde(default = "default_lpa_edge_chunk_size")]
1792    pub lpa_edge_chunk_size: usize,
1793    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
1794    ///
1795    /// When > 0, recent edges receive a small additive score boost over older edges.
1796    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
1797    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
1798    #[serde(
1799        default = "default_graph_temporal_decay_rate",
1800        deserialize_with = "validate_temporal_decay_rate"
1801    )]
1802    pub temporal_decay_rate: f64,
1803    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
1804    ///
1805    /// Caps the result set returned for a given source entity + predicate pair. Prevents
1806    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
1807    /// or API endpoints.
1808    #[serde(default = "default_graph_edge_history_limit")]
1809    pub edge_history_limit: usize,
1810    /// A-MEM dynamic note linking configuration.
1811    ///
1812    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
1813    /// semantically similar entities via `similar_to` edges. Requires an embedding store
1814    /// (`qdrant` or `sqlite` vector backend) to be configured.
1815    #[serde(default)]
1816    pub note_linking: NoteLinkingConfig,
1817    /// SYNAPSE spreading activation retrieval configuration.
1818    ///
1819    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
1820    /// with lateral inhibition and temporal decay instead of BFS.
1821    #[serde(default)]
1822    pub spreading_activation: SpreadingActivationConfig,
1823    /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
1824    ///
1825    /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
1826    /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
1827    #[serde(default)]
1828    pub retrieval_strategy: GraphRetrievalStrategy,
1829    /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
1830    ///
1831    /// When non-empty, graph extraction (and downstream note linking and community
1832    /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
1833    /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
1834    /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
1835    /// which causes systematic quality gate rejections. A named provider built via
1836    /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
1837    /// has no quality gate attached.
1838    ///
1839    /// Falls back to the primary provider when empty. Default: `""` (use primary).
1840    #[serde(default)]
1841    pub extract_provider: ProviderName,
1842    /// Named LLM provider for hybrid strategy classification.
1843    /// Falls back to the default provider when `None`.
1844    #[serde(default)]
1845    pub strategy_classifier_provider: Option<ProviderName>,
1846    /// Beam search configuration.
1847    #[serde(default)]
1848    pub beam_search: BeamSearchConfig,
1849    /// `WaterCircles` BFS configuration.
1850    #[serde(default)]
1851    pub watercircles: WaterCirclesConfig,
1852    /// Experience memory configuration.
1853    #[serde(default)]
1854    pub experience: ExperienceConfig,
1855    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
1856    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
1857    #[serde(
1858        default = "default_link_weight_decay_lambda",
1859        deserialize_with = "validate_link_weight_decay_lambda"
1860    )]
1861    pub link_weight_decay_lambda: f64,
1862    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
1863    #[serde(default = "default_link_weight_decay_interval_secs")]
1864    pub link_weight_decay_interval_secs: u64,
1865    /// Kumiho AGM-inspired belief revision configuration.
1866    ///
1867    /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
1868    /// edges for the same entity pair trigger revision: the old edge is invalidated with a
1869    /// `superseded_by` pointer and the new edge becomes the current belief.
1870    #[serde(default)]
1871    pub belief_revision: BeliefRevisionConfig,
1872    /// D-MEM RPE-based tiered graph extraction routing.
1873    ///
1874    /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
1875    /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
1876    #[serde(default)]
1877    pub rpe: RpeConfig,
1878    /// `SQLite` connection pool size dedicated to graph operations.
1879    ///
1880    /// Graph tables share the same database file as messages/embeddings but use a
1881    /// separate pool to prevent pool starvation when community detection or spreading
1882    /// activation runs concurrently with regular memory operations. Default: `3`.
1883    #[serde(default = "default_graph_pool_size")]
1884    pub pool_size: u32,
1885    /// APEX-MEM append-only write path (#3631).
1886    ///
1887    /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
1888    /// supersession chains instead of the legacy destructive-update path.
1889    #[serde(default)]
1890    pub apex_mem: ApexMemConfig,
1891}
1892
1893fn default_graph_pool_size() -> u32 {
1894    3
1895}
1896
1897/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
1898///
1899/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
1900/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
1901/// the full supersession chain and enables conflict resolution.
1902///
1903/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
1904#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
1905#[serde(default)]
1906pub struct ApexMemConfig {
1907    /// Enable the APEX-MEM append-only write path. Default: `false`.
1908    pub enabled: bool,
1909}
1910
1911fn default_quality_gate_threshold() -> f32 {
1912    0.55
1913}
1914
1915fn default_quality_gate_recent_window() -> usize {
1916    32
1917}
1918
1919fn default_quality_gate_contradiction_grace_seconds() -> u64 {
1920    300
1921}
1922
1923fn default_quality_gate_information_value_weight() -> f32 {
1924    0.4
1925}
1926
1927fn default_quality_gate_reference_completeness_weight() -> f32 {
1928    0.3
1929}
1930
1931fn default_quality_gate_contradiction_weight() -> f32 {
1932    0.3
1933}
1934
1935fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
1936    0.35
1937}
1938
1939fn default_quality_gate_llm_timeout_ms() -> u64 {
1940    500
1941}
1942
1943fn default_quality_gate_llm_weight() -> f32 {
1944    0.5
1945}
1946
1947fn default_quality_gate_reference_check_lang_en() -> bool {
1948    true
1949}
1950
1951/// Write quality gate configuration (`[memory.quality_gate]`).
1952///
1953/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
1954/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
1955/// scoring is opt-in via `quality_gate_provider`.
1956///
1957/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
1958#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1959#[serde(default)]
1960pub struct WriteQualityGateConfig {
1961    /// Enable the write quality gate. Default: `false`.
1962    pub enabled: bool,
1963    /// Combined score threshold below which writes are rejected. Default: `0.55`.
1964    #[serde(default = "default_quality_gate_threshold")]
1965    pub threshold: f32,
1966    /// Number of recent writes compared for information-value scoring. Default: `32`.
1967    #[serde(default = "default_quality_gate_recent_window")]
1968    pub recent_window: usize,
1969    /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
1970    #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
1971    pub contradiction_grace_seconds: u64,
1972    /// Weight of `information_value` sub-score. Default: `0.4`.
1973    #[serde(default = "default_quality_gate_information_value_weight")]
1974    pub information_value_weight: f32,
1975    /// Weight of `reference_completeness` sub-score. Default: `0.3`.
1976    #[serde(default = "default_quality_gate_reference_completeness_weight")]
1977    pub reference_completeness_weight: f32,
1978    /// Weight of `contradiction` sub-score. Default: `0.3`.
1979    #[serde(default = "default_quality_gate_contradiction_weight")]
1980    pub contradiction_weight: f32,
1981    /// Rolling rejection-rate alarm ratio. Default: `0.35`.
1982    #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
1983    pub rejection_rate_alarm_ratio: f32,
1984    /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
1985    #[serde(default)]
1986    pub quality_gate_provider: ProviderName,
1987    /// LLM timeout in milliseconds. Default: `500`.
1988    #[serde(default = "default_quality_gate_llm_timeout_ms")]
1989    pub llm_timeout_ms: u64,
1990    /// LLM blend weight into final score. Default: `0.5`.
1991    #[serde(default = "default_quality_gate_llm_weight")]
1992    pub llm_weight: f32,
1993    /// Enable pronoun/deictic reference checks (English only). Default: `true`.
1994    #[serde(default = "default_quality_gate_reference_check_lang_en")]
1995    pub reference_check_lang_en: bool,
1996}
1997
1998impl Default for WriteQualityGateConfig {
1999    fn default() -> Self {
2000        Self {
2001            enabled: false,
2002            threshold: default_quality_gate_threshold(),
2003            recent_window: default_quality_gate_recent_window(),
2004            contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
2005            information_value_weight: default_quality_gate_information_value_weight(),
2006            reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
2007            contradiction_weight: default_quality_gate_contradiction_weight(),
2008            rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
2009            quality_gate_provider: ProviderName::default(),
2010            llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
2011            llm_weight: default_quality_gate_llm_weight(),
2012            reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
2013        }
2014    }
2015}
2016
2017impl Default for GraphConfig {
2018    fn default() -> Self {
2019        Self {
2020            enabled: false,
2021            extract_model: String::new(),
2022            max_entities_per_message: default_graph_max_entities_per_message(),
2023            max_edges_per_message: default_graph_max_edges_per_message(),
2024            community_refresh_interval: default_graph_community_refresh_interval(),
2025            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
2026            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
2027            use_embedding_resolution: false,
2028            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
2029            max_hops: default_graph_max_hops(),
2030            recall_limit: default_graph_recall_limit(),
2031            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
2032            max_entities: 0,
2033            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
2034            community_summary_concurrency: default_graph_community_summary_concurrency(),
2035            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
2036            temporal_decay_rate: default_graph_temporal_decay_rate(),
2037            edge_history_limit: default_graph_edge_history_limit(),
2038            note_linking: NoteLinkingConfig::default(),
2039            spreading_activation: SpreadingActivationConfig::default(),
2040            retrieval_strategy: GraphRetrievalStrategy::default(),
2041            extract_provider: ProviderName::default(),
2042            strategy_classifier_provider: None,
2043            beam_search: BeamSearchConfig::default(),
2044            watercircles: WaterCirclesConfig::default(),
2045            experience: ExperienceConfig::default(),
2046            link_weight_decay_lambda: default_link_weight_decay_lambda(),
2047            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
2048            belief_revision: BeliefRevisionConfig::default(),
2049            rpe: RpeConfig::default(),
2050            pool_size: default_graph_pool_size(),
2051            apex_mem: ApexMemConfig::default(),
2052        }
2053    }
2054}
2055
2056fn default_consolidation_confidence_threshold() -> f32 {
2057    0.7
2058}
2059
2060fn default_consolidation_sweep_interval_secs() -> u64 {
2061    3600
2062}
2063
2064fn default_consolidation_sweep_batch_size() -> usize {
2065    50
2066}
2067
2068fn default_consolidation_similarity_threshold() -> f32 {
2069    0.85
2070}
2071
2072/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
2073///
2074/// When `enabled = true`, a background loop periodically clusters semantically similar messages
2075/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
2076/// they are marked as consolidated and deprioritized in recall via temporal decay.
2077#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2078#[serde(default)]
2079pub struct ConsolidationConfig {
2080    /// Enable the consolidation background loop. Default: `false`.
2081    pub enabled: bool,
2082    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2083    /// Falls back to the primary provider when empty. Default: `""`.
2084    #[serde(default)]
2085    pub consolidation_provider: ProviderName,
2086    /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
2087    #[serde(default = "default_consolidation_confidence_threshold")]
2088    pub confidence_threshold: f32,
2089    /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
2090    #[serde(default = "default_consolidation_sweep_interval_secs")]
2091    pub sweep_interval_secs: u64,
2092    /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
2093    #[serde(default = "default_consolidation_sweep_batch_size")]
2094    pub sweep_batch_size: usize,
2095    /// Minimum cosine similarity for two messages to be considered consolidation candidates.
2096    /// Default: `0.85`.
2097    #[serde(default = "default_consolidation_similarity_threshold")]
2098    pub similarity_threshold: f32,
2099}
2100
2101impl Default for ConsolidationConfig {
2102    fn default() -> Self {
2103        Self {
2104            enabled: false,
2105            consolidation_provider: ProviderName::default(),
2106            confidence_threshold: default_consolidation_confidence_threshold(),
2107            sweep_interval_secs: default_consolidation_sweep_interval_secs(),
2108            sweep_batch_size: default_consolidation_sweep_batch_size(),
2109            similarity_threshold: default_consolidation_similarity_threshold(),
2110        }
2111    }
2112}
2113
2114fn default_link_weight_decay_lambda() -> f64 {
2115    0.95
2116}
2117
2118fn default_link_weight_decay_interval_secs() -> u64 {
2119    86400
2120}
2121
2122fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
2123where
2124    D: serde::Deserializer<'de>,
2125{
2126    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
2127    if value.is_nan() || value.is_infinite() {
2128        return Err(serde::de::Error::custom(
2129            "link_weight_decay_lambda must be a finite number",
2130        ));
2131    }
2132    if !(value > 0.0 && value <= 1.0) {
2133        return Err(serde::de::Error::custom(
2134            "link_weight_decay_lambda must be in (0.0, 1.0]",
2135        ));
2136    }
2137    Ok(value)
2138}
2139
2140fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
2141where
2142    D: serde::Deserializer<'de>,
2143{
2144    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2145    if value.is_nan() || value.is_infinite() {
2146        return Err(serde::de::Error::custom(
2147            "threshold must be a finite number",
2148        ));
2149    }
2150    if !(0.0..=1.0).contains(&value) {
2151        return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
2152    }
2153    Ok(value)
2154}
2155
2156fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
2157where
2158    D: serde::Deserializer<'de>,
2159{
2160    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2161    if value.is_nan() || value.is_infinite() {
2162        return Err(serde::de::Error::custom(
2163            "fast_path_margin must be a finite number",
2164        ));
2165    }
2166    if !(0.0..=1.0).contains(&value) {
2167        return Err(serde::de::Error::custom(
2168            "fast_path_margin must be in [0.0, 1.0]",
2169        ));
2170    }
2171    Ok(value)
2172}
2173
2174fn default_admission_threshold() -> f32 {
2175    0.40
2176}
2177
2178fn default_admission_fast_path_margin() -> f32 {
2179    0.15
2180}
2181
2182fn default_rl_min_samples() -> u32 {
2183    500
2184}
2185
2186fn default_rl_retrain_interval_secs() -> u64 {
2187    3600
2188}
2189
2190/// Admission decision strategy.
2191///
2192/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
2193/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
2194#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
2195#[serde(rename_all = "snake_case")]
2196pub enum AdmissionStrategy {
2197    /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
2198    #[default]
2199    Heuristic,
2200    /// Learned model: logistic regression trained on recall feedback.
2201    /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
2202    Rl,
2203}
2204
2205fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
2206where
2207    D: serde::Deserializer<'de>,
2208{
2209    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2210    if value < 0.0 {
2211        return Err(serde::de::Error::custom(
2212            "admission weight must be non-negative (>= 0.0)",
2213        ));
2214    }
2215    Ok(value)
2216}
2217
2218/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
2219///
2220/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
2221/// All values must be non-negative.
2222#[derive(Debug, Clone, Deserialize, Serialize)]
2223#[serde(default)]
2224pub struct AdmissionWeights {
2225    /// LLM-estimated future reuse probability. Default: `0.30`.
2226    #[serde(deserialize_with = "validate_admission_weight")]
2227    pub future_utility: f32,
2228    /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
2229    #[serde(deserialize_with = "validate_admission_weight")]
2230    pub factual_confidence: f32,
2231    /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
2232    #[serde(deserialize_with = "validate_admission_weight")]
2233    pub semantic_novelty: f32,
2234    /// Temporal recency: always 1.0 at write time. Default: `0.10`.
2235    #[serde(deserialize_with = "validate_admission_weight")]
2236    pub temporal_recency: f32,
2237    /// Content type prior based on role. Default: `0.15`.
2238    #[serde(deserialize_with = "validate_admission_weight")]
2239    pub content_type_prior: f32,
2240    /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
2241    /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
2242    /// Normalized automatically at runtime. Default: `0.0`.
2243    #[serde(deserialize_with = "validate_admission_weight")]
2244    pub goal_utility: f32,
2245}
2246
2247impl Default for AdmissionWeights {
2248    fn default() -> Self {
2249        Self {
2250            future_utility: 0.30,
2251            factual_confidence: 0.15,
2252            semantic_novelty: 0.30,
2253            temporal_recency: 0.10,
2254            content_type_prior: 0.15,
2255            goal_utility: 0.0,
2256        }
2257    }
2258}
2259
2260impl AdmissionWeights {
2261    /// Return weights normalized so they sum to 1.0.
2262    ///
2263    /// All weights are non-negative; the sum is always > 0 when defaults are used.
2264    #[must_use]
2265    pub fn normalized(&self) -> Self {
2266        let sum = self.future_utility
2267            + self.factual_confidence
2268            + self.semantic_novelty
2269            + self.temporal_recency
2270            + self.content_type_prior
2271            + self.goal_utility;
2272        if sum <= f32::EPSILON {
2273            return Self::default();
2274        }
2275        Self {
2276            future_utility: self.future_utility / sum,
2277            factual_confidence: self.factual_confidence / sum,
2278            semantic_novelty: self.semantic_novelty / sum,
2279            temporal_recency: self.temporal_recency / sum,
2280            content_type_prior: self.content_type_prior / sum,
2281            goal_utility: self.goal_utility / sum,
2282        }
2283    }
2284}
2285
2286/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
2287///
2288/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
2289/// Messages below the composite admission threshold are rejected and not persisted.
2290#[derive(Debug, Clone, Deserialize, Serialize)]
2291#[serde(default)]
2292pub struct AdmissionConfig {
2293    /// Enable A-MAC admission control. Default: `false`.
2294    pub enabled: bool,
2295    /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
2296    /// Default: `0.40`.
2297    #[serde(deserialize_with = "validate_admission_threshold")]
2298    pub threshold: f32,
2299    /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
2300    /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
2301    #[serde(deserialize_with = "validate_admission_fast_path_margin")]
2302    pub fast_path_margin: f32,
2303    /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
2304    /// Falls back to the primary provider when empty. Default: `""`.
2305    pub admission_provider: ProviderName,
2306    /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
2307    pub weights: AdmissionWeights,
2308    /// Admission decision strategy. Default: `heuristic`.
2309    #[serde(default)]
2310    pub admission_strategy: AdmissionStrategy,
2311    /// Minimum training samples before the RL model is activated.
2312    /// Below this count the system falls back to `Heuristic`. Default: `500`.
2313    #[serde(default = "default_rl_min_samples")]
2314    pub rl_min_samples: u32,
2315    /// Background RL model retraining interval in seconds. Default: `3600`.
2316    #[serde(default = "default_rl_retrain_interval_secs")]
2317    pub rl_retrain_interval_secs: u64,
2318    /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
2319    /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
2320    /// Zero regression when `false`. Default: `false`.
2321    #[serde(default)]
2322    pub goal_conditioned_write: bool,
2323    /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
2324    /// Used only for borderline cases (similarity within 0.1 of threshold).
2325    /// Falls back to the primary provider when empty. Default: `""`.
2326    #[serde(default)]
2327    pub goal_utility_provider: ProviderName,
2328    /// Minimum cosine similarity between goal embedding and candidate memory
2329    /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
2330    #[serde(default = "default_goal_utility_threshold")]
2331    pub goal_utility_threshold: f32,
2332    /// Weight of the `goal_utility` factor in the composite admission score.
2333    /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
2334    #[serde(default = "default_goal_utility_weight")]
2335    pub goal_utility_weight: f32,
2336}
2337
2338fn default_goal_utility_threshold() -> f32 {
2339    0.4
2340}
2341
2342fn default_goal_utility_weight() -> f32 {
2343    0.25
2344}
2345
2346impl Default for AdmissionConfig {
2347    fn default() -> Self {
2348        Self {
2349            enabled: false,
2350            threshold: default_admission_threshold(),
2351            fast_path_margin: default_admission_fast_path_margin(),
2352            admission_provider: ProviderName::default(),
2353            weights: AdmissionWeights::default(),
2354            admission_strategy: AdmissionStrategy::default(),
2355            rl_min_samples: default_rl_min_samples(),
2356            rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
2357            goal_conditioned_write: false,
2358            goal_utility_provider: ProviderName::default(),
2359            goal_utility_threshold: default_goal_utility_threshold(),
2360            goal_utility_weight: default_goal_utility_weight(),
2361        }
2362    }
2363}
2364
2365/// Routing strategy for `[memory.store_routing]`.
2366#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
2367#[serde(rename_all = "snake_case")]
2368pub enum StoreRoutingStrategy {
2369    /// Pure heuristic pattern matching. Zero LLM calls. Default.
2370    #[default]
2371    Heuristic,
2372    /// LLM-based classification via `routing_classifier_provider`.
2373    Llm,
2374    /// Heuristic first; escalates to LLM only when confidence is low.
2375    Hybrid,
2376}
2377
2378/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
2379///
2380/// Controls how each query is classified and routed to the appropriate memory
2381/// backend(s), avoiding unnecessary store queries for simple lookups.
2382#[derive(Debug, Clone, Deserialize, Serialize)]
2383#[serde(default)]
2384pub struct StoreRoutingConfig {
2385    /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
2386    /// directly (existing behavior). Default: `false`.
2387    pub enabled: bool,
2388    /// Routing strategy. Default: `heuristic`.
2389    pub strategy: StoreRoutingStrategy,
2390    /// Provider name from `[[llm.providers]]` for LLM-based classification.
2391    /// Falls back to the primary provider when empty. Default: `""`.
2392    pub routing_classifier_provider: ProviderName,
2393    /// Route to use when the classifier is uncertain (confidence < threshold).
2394    /// Default: `"hybrid"`.
2395    pub fallback_route: String,
2396    /// Confidence threshold below which `HybridRouter` escalates to LLM.
2397    /// Range: `[0.0, 1.0]`. Default: `0.7`.
2398    pub confidence_threshold: f32,
2399}
2400
2401impl Default for StoreRoutingConfig {
2402    fn default() -> Self {
2403        Self {
2404            enabled: false,
2405            strategy: StoreRoutingStrategy::Heuristic,
2406            routing_classifier_provider: ProviderName::default(),
2407            fallback_route: "hybrid".into(),
2408            confidence_threshold: 0.7,
2409        }
2410    }
2411}
2412
2413/// Persona memory layer configuration (#2461).
2414///
2415/// When `enabled = true`, user preferences and domain knowledge are extracted from
2416/// conversation history via a cheap LLM provider and injected after the system prompt.
2417#[derive(Debug, Clone, Deserialize, Serialize)]
2418#[serde(default)]
2419pub struct PersonaConfig {
2420    /// Enable persona memory extraction and injection. Default: `false`.
2421    pub enabled: bool,
2422    /// Provider name from `[[llm.providers]]` for persona extraction.
2423    /// Should be a cheap/fast model. Falls back to the primary provider when empty.
2424    pub persona_provider: ProviderName,
2425    /// Minimum confidence threshold for facts included in context. Default: `0.6`.
2426    pub min_confidence: f64,
2427    /// Minimum user messages before extraction runs in a session. Default: `3`.
2428    pub min_messages: usize,
2429    /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
2430    pub max_messages: usize,
2431    /// LLM timeout for the extraction call in seconds. Default: `10`.
2432    pub extraction_timeout_secs: u64,
2433    /// Token budget allocated to persona context in assembly. Default: `500`.
2434    pub context_budget_tokens: usize,
2435}
2436
2437impl Default for PersonaConfig {
2438    fn default() -> Self {
2439        Self {
2440            enabled: false,
2441            persona_provider: ProviderName::default(),
2442            min_confidence: 0.6,
2443            min_messages: 3,
2444            max_messages: 10,
2445            extraction_timeout_secs: 10,
2446            context_budget_tokens: 500,
2447        }
2448    }
2449}
2450
2451/// Trajectory-informed memory configuration (#2498).
2452///
2453/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
2454/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
2455/// Procedural entries are injected into context as "past experience" during assembly.
2456#[derive(Debug, Clone, Deserialize, Serialize)]
2457#[serde(default)]
2458pub struct TrajectoryConfig {
2459    /// Enable trajectory extraction and context injection. Default: `false`.
2460    pub enabled: bool,
2461    /// Provider name from `[[llm.providers]]` for extraction.
2462    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2463    pub trajectory_provider: ProviderName,
2464    /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
2465    pub context_budget_tokens: usize,
2466    /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
2467    pub max_messages: usize,
2468    /// LLM timeout for the extraction call in seconds. Default: `10`.
2469    pub extraction_timeout_secs: u64,
2470    /// Number of procedural entries retrieved for context injection. Default: `5`.
2471    pub recall_top_k: usize,
2472    /// Minimum confidence score for entries included in context. Default: `0.6`.
2473    pub min_confidence: f64,
2474}
2475
2476impl Default for TrajectoryConfig {
2477    fn default() -> Self {
2478        Self {
2479            enabled: false,
2480            trajectory_provider: ProviderName::default(),
2481            context_budget_tokens: 400,
2482            max_messages: 10,
2483            extraction_timeout_secs: 10,
2484            recall_top_k: 5,
2485            min_confidence: 0.6,
2486        }
2487    }
2488}
2489
2490/// Category-aware memory configuration (#2428).
2491///
2492/// When `enabled = true`, messages are auto-tagged with a category derived from the active
2493/// skill or tool context. The category is stored in the `messages.category` column and used
2494/// as a Qdrant payload filter during recall.
2495#[derive(Debug, Clone, Deserialize, Serialize)]
2496#[serde(default)]
2497pub struct CategoryConfig {
2498    /// Enable category tagging and category-filtered recall. Default: `false`.
2499    pub enabled: bool,
2500    /// Automatically assign category from skill metadata or tool type. Default: `true`.
2501    pub auto_tag: bool,
2502}
2503
2504impl Default for CategoryConfig {
2505    fn default() -> Self {
2506        Self {
2507            enabled: false,
2508            auto_tag: true,
2509        }
2510    }
2511}
2512
2513/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
2514///
2515/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
2516/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
2517/// for complex queries.
2518#[derive(Debug, Clone, Deserialize, Serialize)]
2519#[serde(default)]
2520pub struct TreeConfig {
2521    /// Enable the memory tree and background consolidation loop. Default: `false`.
2522    pub enabled: bool,
2523    /// Provider name from `[[llm.providers]]` for node consolidation.
2524    /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2525    pub consolidation_provider: ProviderName,
2526    /// Interval between consolidation sweeps in seconds. Default: `300`.
2527    pub sweep_interval_secs: u64,
2528    /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
2529    pub batch_size: usize,
2530    /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
2531    pub similarity_threshold: f32,
2532    /// Maximum tree depth (levels above leaves). Default: `3`.
2533    pub max_level: u32,
2534    /// Token budget allocated to tree memory in context assembly. Default: `400`.
2535    pub context_budget_tokens: usize,
2536    /// Number of tree nodes retrieved for context. Default: `5`.
2537    pub recall_top_k: usize,
2538    /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
2539    pub min_cluster_size: usize,
2540}
2541
2542impl Default for TreeConfig {
2543    fn default() -> Self {
2544        Self {
2545            enabled: false,
2546            consolidation_provider: ProviderName::default(),
2547            sweep_interval_secs: 300,
2548            batch_size: 20,
2549            similarity_threshold: 0.8,
2550            max_level: 3,
2551            context_budget_tokens: 400,
2552            recall_top_k: 5,
2553            min_cluster_size: 2,
2554        }
2555    }
2556}
2557
2558/// Time-based microcompact configuration (#2699).
2559///
2560/// When `enabled = true`, low-value tool outputs are cleared from context
2561/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
2562/// The most recent `keep_recent` tool messages are preserved unconditionally.
2563#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2564#[serde(default)]
2565pub struct MicrocompactConfig {
2566    /// Enable time-based microcompaction. Default: `false`.
2567    pub enabled: bool,
2568    /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
2569    pub gap_threshold_minutes: u32,
2570    /// Number of most recent compactable tool messages to preserve. Default: `3`.
2571    pub keep_recent: usize,
2572}
2573
2574impl Default for MicrocompactConfig {
2575    fn default() -> Self {
2576        Self {
2577            enabled: false,
2578            gap_threshold_minutes: 60,
2579            keep_recent: 3,
2580        }
2581    }
2582}
2583
2584/// autoDream background memory consolidation configuration (#2697).
2585///
2586/// When `enabled = true`, a constrained consolidation subagent runs after
2587/// a session ends if both `min_sessions` and `min_hours` gates pass.
2588#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2589#[serde(default)]
2590pub struct AutoDreamConfig {
2591    /// Enable autoDream consolidation. Default: `false`.
2592    pub enabled: bool,
2593    /// Minimum number of sessions between consolidations. Default: `3`.
2594    pub min_sessions: u32,
2595    /// Minimum hours between consolidations. Default: `24`.
2596    pub min_hours: u32,
2597    /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2598    /// Falls back to the primary provider when empty. Default: `""`.
2599    pub consolidation_provider: ProviderName,
2600    /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
2601    pub max_iterations: u8,
2602}
2603
2604impl Default for AutoDreamConfig {
2605    fn default() -> Self {
2606        Self {
2607            enabled: false,
2608            min_sessions: 3,
2609            min_hours: 24,
2610            consolidation_provider: ProviderName::default(),
2611            max_iterations: 8,
2612        }
2613    }
2614}
2615
2616/// `MagicDocs` auto-maintained markdown configuration (#2702).
2617///
2618/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2619/// are registered and periodically updated by a constrained subagent.
2620#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2621#[serde(default)]
2622pub struct MagicDocsConfig {
2623    /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2624    pub enabled: bool,
2625    /// Minimum turns between updates for a given doc path. Default: `5`.
2626    pub min_turns_between_updates: u32,
2627    /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2628    /// Falls back to the primary provider when empty. Default: `""`.
2629    pub update_provider: ProviderName,
2630    /// Maximum agent loop iterations per doc update. Default: `4`.
2631    pub max_iterations: u8,
2632}
2633
2634impl Default for MagicDocsConfig {
2635    fn default() -> Self {
2636        Self {
2637            enabled: false,
2638            min_turns_between_updates: 5,
2639            update_provider: ProviderName::default(),
2640            max_iterations: 4,
2641        }
2642    }
2643}
2644
2645#[cfg(test)]
2646mod tests {
2647    use super::*;
2648
2649    // Verify that serde deserialization routes through FromStr so that removed variants
2650    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2651    #[test]
2652    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2653        #[derive(serde::Deserialize)]
2654        struct Wrapper {
2655            #[allow(dead_code)]
2656            pruning_strategy: PruningStrategy,
2657        }
2658        let toml = r#"pruning_strategy = "task_aware_mig""#;
2659        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2660        assert_eq!(
2661            w.pruning_strategy,
2662            PruningStrategy::Reactive,
2663            "task_aware_mig must fall back to Reactive"
2664        );
2665    }
2666
2667    #[test]
2668    fn pruning_strategy_toml_round_trip() {
2669        #[derive(serde::Deserialize)]
2670        struct Wrapper {
2671            #[allow(dead_code)]
2672            pruning_strategy: PruningStrategy,
2673        }
2674        for (input, expected) in [
2675            ("reactive", PruningStrategy::Reactive),
2676            ("task_aware", PruningStrategy::TaskAware),
2677            ("mig", PruningStrategy::Mig),
2678        ] {
2679            let toml = format!(r#"pruning_strategy = "{input}""#);
2680            let w: Wrapper = toml::from_str(&toml)
2681                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
2682            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
2683        }
2684    }
2685
2686    #[test]
2687    fn pruning_strategy_toml_unknown_value_errors() {
2688        #[derive(serde::Deserialize)]
2689        #[allow(dead_code)]
2690        struct Wrapper {
2691            pruning_strategy: PruningStrategy,
2692        }
2693        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
2694        assert!(
2695            toml::from_str::<Wrapper>(toml).is_err(),
2696            "unknown strategy must produce an error"
2697        );
2698    }
2699
2700    #[test]
2701    fn tier_config_defaults_are_correct() {
2702        let cfg = TierConfig::default();
2703        assert!(!cfg.enabled);
2704        assert_eq!(cfg.promotion_min_sessions, 3);
2705        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
2706        assert_eq!(cfg.sweep_interval_secs, 3600);
2707        assert_eq!(cfg.sweep_batch_size, 100);
2708    }
2709
2710    #[test]
2711    fn tier_config_rejects_min_sessions_below_2() {
2712        let toml = "promotion_min_sessions = 1";
2713        assert!(toml::from_str::<TierConfig>(toml).is_err());
2714    }
2715
2716    #[test]
2717    fn tier_config_rejects_similarity_threshold_below_0_5() {
2718        let toml = "similarity_threshold = 0.4";
2719        assert!(toml::from_str::<TierConfig>(toml).is_err());
2720    }
2721
2722    #[test]
2723    fn tier_config_rejects_zero_sweep_batch_size() {
2724        let toml = "sweep_batch_size = 0";
2725        assert!(toml::from_str::<TierConfig>(toml).is_err());
2726    }
2727
2728    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
2729        let input = format!("importance_weight = {toml_val}");
2730        toml::from_str::<SemanticConfig>(&input)
2731    }
2732
2733    #[test]
2734    fn importance_weight_default_is_0_15() {
2735        let cfg = SemanticConfig::default();
2736        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
2737    }
2738
2739    #[test]
2740    fn importance_weight_valid_zero() {
2741        let cfg = deserialize_importance_weight("0.0").unwrap();
2742        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
2743    }
2744
2745    #[test]
2746    fn importance_weight_valid_one() {
2747        let cfg = deserialize_importance_weight("1.0").unwrap();
2748        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
2749    }
2750
2751    #[test]
2752    fn importance_weight_rejects_near_zero_negative() {
2753        // TOML does not have a NaN literal, but we can test via a f64 that
2754        // the validator rejects out-of-range values. Test with negative here
2755        // and rely on validate_importance_weight rejecting non-finite via
2756        // a constructed deserializer call.
2757        let result = deserialize_importance_weight("-0.01");
2758        assert!(
2759            result.is_err(),
2760            "negative importance_weight must be rejected"
2761        );
2762    }
2763
2764    #[test]
2765    fn importance_weight_rejects_negative() {
2766        let result = deserialize_importance_weight("-1.0");
2767        assert!(result.is_err(), "negative value must be rejected");
2768    }
2769
2770    #[test]
2771    fn importance_weight_rejects_greater_than_one() {
2772        let result = deserialize_importance_weight("1.01");
2773        assert!(result.is_err(), "value > 1.0 must be rejected");
2774    }
2775
2776    // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
2777
2778    // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
2779    #[test]
2780    fn admission_weights_normalized_sums_to_one() {
2781        let w = AdmissionWeights {
2782            future_utility: 2.0,
2783            factual_confidence: 1.0,
2784            semantic_novelty: 3.0,
2785            temporal_recency: 1.0,
2786            content_type_prior: 3.0,
2787            goal_utility: 0.0,
2788        };
2789        let n = w.normalized();
2790        let sum = n.future_utility
2791            + n.factual_confidence
2792            + n.semantic_novelty
2793            + n.temporal_recency
2794            + n.content_type_prior;
2795        assert!(
2796            (sum - 1.0).abs() < 0.001,
2797            "normalized weights must sum to 1.0, got {sum}"
2798        );
2799    }
2800
2801    // Test: already-normalized weights are preserved.
2802    #[test]
2803    fn admission_weights_normalized_preserves_already_unit_sum() {
2804        let w = AdmissionWeights::default();
2805        let n = w.normalized();
2806        let sum = n.future_utility
2807            + n.factual_confidence
2808            + n.semantic_novelty
2809            + n.temporal_recency
2810            + n.content_type_prior;
2811        assert!(
2812            (sum - 1.0).abs() < 0.001,
2813            "default weights sum to ~1.0 after normalization"
2814        );
2815    }
2816
2817    // Test: zero weights fall back to default (no divide-by-zero panic).
2818    #[test]
2819    fn admission_weights_normalized_zero_sum_falls_back_to_default() {
2820        let w = AdmissionWeights {
2821            future_utility: 0.0,
2822            factual_confidence: 0.0,
2823            semantic_novelty: 0.0,
2824            temporal_recency: 0.0,
2825            content_type_prior: 0.0,
2826            goal_utility: 0.0,
2827        };
2828        let n = w.normalized();
2829        let default = AdmissionWeights::default();
2830        assert!(
2831            (n.future_utility - default.future_utility).abs() < 0.001,
2832            "zero-sum weights must fall back to defaults"
2833        );
2834    }
2835
2836    // Test: AdmissionConfig default values match documented defaults.
2837    #[test]
2838    fn admission_config_defaults() {
2839        let cfg = AdmissionConfig::default();
2840        assert!(!cfg.enabled);
2841        assert!((cfg.threshold - 0.40).abs() < 0.001);
2842        assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
2843        assert!(cfg.admission_provider.is_empty());
2844    }
2845
2846    // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
2847
2848    #[test]
2849    fn spreading_activation_default_recall_timeout_ms_is_1000() {
2850        let cfg = SpreadingActivationConfig::default();
2851        assert_eq!(
2852            cfg.recall_timeout_ms, 1000,
2853            "default recall_timeout_ms must be 1000ms"
2854        );
2855    }
2856
2857    #[test]
2858    fn spreading_activation_toml_recall_timeout_ms_round_trip() {
2859        #[derive(serde::Deserialize)]
2860        struct Wrapper {
2861            recall_timeout_ms: u64,
2862        }
2863        let toml = "recall_timeout_ms = 500";
2864        let w: Wrapper = toml::from_str(toml).unwrap();
2865        assert_eq!(w.recall_timeout_ms, 500);
2866    }
2867
2868    #[test]
2869    fn spreading_activation_validate_cross_field_constraints() {
2870        let mut cfg = SpreadingActivationConfig::default();
2871        // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
2872        assert!(cfg.validate().is_ok());
2873
2874        // Equal thresholds must be rejected.
2875        cfg.activation_threshold = 0.5;
2876        cfg.inhibition_threshold = 0.5;
2877        assert!(cfg.validate().is_err());
2878    }
2879
2880    // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
2881
2882    #[test]
2883    fn compression_config_focus_strategy_deserializes() {
2884        let toml = r#"strategy = "focus""#;
2885        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2886        assert_eq!(cfg.strategy, CompressionStrategy::Focus);
2887    }
2888
2889    #[test]
2890    fn compression_config_density_budget_defaults_on_deserialize() {
2891        // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
2892        // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
2893        let toml = r#"strategy = "reactive""#;
2894        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2895        assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
2896        assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
2897    }
2898
2899    #[test]
2900    fn compression_config_density_budget_round_trip() {
2901        let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
2902        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2903        assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
2904        assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
2905    }
2906
2907    #[test]
2908    fn compression_config_focus_scorer_provider_default_empty() {
2909        let cfg = CompressionConfig::default();
2910        assert!(cfg.focus_scorer_provider.is_empty());
2911    }
2912
2913    #[test]
2914    fn compression_config_focus_scorer_provider_round_trip() {
2915        let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
2916        let cfg: CompressionConfig = toml::from_str(toml).unwrap();
2917        assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
2918    }
2919}
2920
2921/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
2922///
2923/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
2924/// Successful and failed reasoning chains are compressed into short, generalizable strategy
2925/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
2926/// and injected into the prompt preamble.
2927///
2928/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
2929///
2930/// # Example
2931///
2932/// ```toml
2933/// [memory.reasoning]
2934/// enabled = true
2935/// extract_provider = "fast"
2936/// distill_provider = "fast"
2937/// top_k = 3
2938/// store_limit = 1000
2939/// ```
2940#[derive(Debug, Clone, Deserialize, Serialize)]
2941#[serde(default)]
2942pub struct ReasoningConfig {
2943    /// Enable the reasoning-bank pipeline. Default: `false`.
2944    pub enabled: bool,
2945    /// Provider name from `[[llm.providers]]` for the self-judge step.
2946    /// Falls back to the primary provider when empty. Default: `""`.
2947    pub extract_provider: ProviderName,
2948    /// Provider name from `[[llm.providers]]` for the distillation step.
2949    /// Falls back to the primary provider when empty. Default: `""`.
2950    pub distill_provider: ProviderName,
2951    /// Number of strategies retrieved per turn for context injection. Default: `3`.
2952    pub top_k: usize,
2953    /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
2954    pub store_limit: usize,
2955    /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
2956    pub max_messages: usize,
2957    /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
2958    pub max_message_chars: usize,
2959    /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
2960    pub context_budget_tokens: usize,
2961    /// Minimum number of messages required before self-judge fires. Default: `2`.
2962    pub min_messages: usize,
2963    /// Timeout in seconds for the self-judge LLM call. Default: `30`.
2964    pub extraction_timeout_secs: u64,
2965    /// Timeout in seconds for the distillation LLM call. Default: `30`.
2966    pub distill_timeout_secs: u64,
2967    /// Maximum number of recent messages passed to the self-judge evaluator.
2968    /// Narrowing to the last user+assistant pair improves classification accuracy.
2969    /// Default: `2`.
2970    pub self_judge_window: usize,
2971    /// Minimum characters in the assistant response to trigger self-judge.
2972    /// Short or trivial responses are skipped. Default: `50`.
2973    pub min_assistant_chars: usize,
2974}
2975
2976impl Default for ReasoningConfig {
2977    fn default() -> Self {
2978        Self {
2979            enabled: false,
2980            extract_provider: ProviderName::default(),
2981            distill_provider: ProviderName::default(),
2982            top_k: 3,
2983            store_limit: 1000,
2984            max_messages: 6,
2985            max_message_chars: 2000,
2986            context_budget_tokens: 500,
2987            min_messages: 2,
2988            extraction_timeout_secs: 30,
2989            distill_timeout_secs: 30,
2990            self_judge_window: 2,
2991            min_assistant_chars: 50,
2992        }
2993    }
2994}
2995
2996// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
2997
2998/// Configuration for the memory eviction policy.
2999///
3000/// Controls which policy runs during the periodic sweep and how many entries
3001/// are retained. `zeph-memory` re-exports this type from here.
3002#[derive(Debug, Clone, Deserialize, Serialize)]
3003pub struct EvictionConfig {
3004    /// Policy name. Currently only `"ebbinghaus"` is supported.
3005    pub policy: String,
3006    /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
3007    pub max_entries: usize,
3008    /// How often to run the eviction sweep, in seconds.
3009    pub sweep_interval_secs: u64,
3010}
3011
3012impl Default for EvictionConfig {
3013    fn default() -> Self {
3014        Self {
3015            policy: "ebbinghaus".to_owned(),
3016            max_entries: 0,
3017            sweep_interval_secs: 3600,
3018        }
3019    }
3020}
3021
3022// ── Compression guidelines config (moved from zeph-memory) ───────────────────
3023
3024/// Configuration for ACON failure-driven compression guidelines.
3025///
3026/// `zeph-memory` re-exports this type from here.
3027#[derive(Debug, Clone, Deserialize, Serialize)]
3028#[serde(default)]
3029pub struct CompressionGuidelinesConfig {
3030    /// Enable the feature. Default: `false`.
3031    pub enabled: bool,
3032    /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
3033    pub update_threshold: u16,
3034    /// Maximum token budget for the guidelines document. Default: `500`.
3035    pub max_guidelines_tokens: usize,
3036    /// Maximum failure pairs consumed per update cycle. Default: `10`.
3037    pub max_pairs_per_update: usize,
3038    /// Number of turns after hard compaction to watch for context loss. Default: `10`.
3039    pub detection_window_turns: u64,
3040    /// Interval in seconds between background updater checks. Default: `300`.
3041    pub update_interval_secs: u64,
3042    /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
3043    pub max_stored_pairs: usize,
3044    /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
3045    /// `None` (or `Some("")`) falls back to the primary provider.
3046    #[serde(default, skip_serializing_if = "Option::is_none")]
3047    pub guidelines_provider: Option<ProviderName>,
3048    /// Maintain separate guideline documents per content category.
3049    #[serde(default)]
3050    pub categorized_guidelines: bool,
3051}
3052
3053impl Default for CompressionGuidelinesConfig {
3054    fn default() -> Self {
3055        Self {
3056            enabled: false,
3057            update_threshold: 5,
3058            max_guidelines_tokens: 500,
3059            max_pairs_per_update: 10,
3060            detection_window_turns: 10,
3061            update_interval_secs: 300,
3062            max_stored_pairs: 100,
3063            guidelines_provider: None,
3064            categorized_guidelines: false,
3065        }
3066    }
3067}
3068
3069// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
3070
3071/// Functional category of a compaction probe question.
3072///
3073/// `zeph-memory` re-exports this type from here.
3074#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
3075#[serde(rename_all = "lowercase")]
3076pub enum ProbeCategory {
3077    /// Did specific facts survive? (file paths, function names, values, decisions)
3078    Recall,
3079    /// Does the agent know which files/tools/URLs it used?
3080    Artifact,
3081    /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
3082    Continuation,
3083    /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
3084    Decision,
3085}
3086
3087/// Configuration for the compaction probe.
3088///
3089/// `zeph-memory` re-exports this type from here.
3090#[derive(Debug, Clone, Serialize, Deserialize)]
3091#[serde(default)]
3092pub struct CompactionProbeConfig {
3093    /// Enable compaction probe validation. Default: `false`.
3094    pub enabled: bool,
3095    /// Provider name from `[[llm.providers]]` for probe LLM calls.
3096    /// `None` (or `Some("")`) uses the summary provider.
3097    #[serde(default, skip_serializing_if = "Option::is_none")]
3098    pub probe_provider: Option<ProviderName>,
3099    /// Minimum score to pass without warnings. Default: `0.6`.
3100    pub threshold: f32,
3101    /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
3102    pub hard_fail_threshold: f32,
3103    /// Maximum number of probe questions to generate. Default: `5`.
3104    pub max_questions: usize,
3105    /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
3106    pub timeout_secs: u64,
3107    /// Optional per-category weight multipliers for the overall score.
3108    #[serde(default)]
3109    pub category_weights: Option<HashMap<ProbeCategory, f32>>,
3110}
3111
3112impl Default for CompactionProbeConfig {
3113    fn default() -> Self {
3114        Self {
3115            enabled: false,
3116            probe_provider: None,
3117            threshold: 0.6,
3118            hard_fail_threshold: 0.35,
3119            max_questions: 5,
3120            timeout_secs: 15,
3121            category_weights: None,
3122        }
3123    }
3124}
3125
3126// ── MemCoT semantic state config ─────────────────────────────────────────────
3127
3128/// `MemCoT` semantic-state distillation configuration.
3129///
3130/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
3131/// summarizing conceptual progress across turns. This buffer is injected into graph
3132/// recall queries to improve retrieval relevance.
3133///
3134/// All LLM work (distillation) runs asynchronously — never on the turn thread.
3135/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
3136///
3137/// # Config example
3138///
3139/// ```toml
3140/// [memory.memcot]
3141/// enabled = true
3142/// distill_provider = "fast"
3143/// distill_timeout_secs = 5
3144/// min_assistant_chars = 200
3145/// min_distill_interval_secs = 30
3146/// max_distills_per_session = 50
3147/// max_state_chars = 800
3148/// recall_view = "head"
3149/// ```
3150#[derive(Debug, Clone, Serialize, Deserialize)]
3151#[serde(default)]
3152pub struct MemCotConfig {
3153    /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
3154    ///
3155    /// When `false`, the accumulator is never allocated and no LLM calls are made.
3156    pub enabled: bool,
3157    /// Provider name from `[[llm.providers]]` for distillation.
3158    ///
3159    /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
3160    /// A startup warning is emitted when the resolved model does not look fast-tier.
3161    /// Falls back to the primary provider when empty. Default: `""`.
3162    pub distill_provider: ProviderName,
3163    /// Timeout in seconds for each distillation LLM call. Default: `5`.
3164    pub distill_timeout_secs: u64,
3165    /// Minimum characters in the assistant response to trigger distillation.
3166    /// Short or trivial replies are skipped. Default: `200`.
3167    pub min_assistant_chars: usize,
3168    /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
3169    ///
3170    /// Prevents runaway costs on long sessions with rapid turns.
3171    /// Clearing `/new` resets this counter.
3172    pub min_distill_interval_secs: u64,
3173    /// Maximum distillation spawns per conversation session. Default: `50`.
3174    ///
3175    /// Once this cap is reached the accumulator stops distilling for the rest of the
3176    /// session. Counter is reset when the user sends `/new`.
3177    pub max_distills_per_session: u64,
3178    /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
3179    /// Default: `800`.
3180    pub max_state_chars: usize,
3181    /// Recall view applied when `MemCoT` is active. Default: `Head`.
3182    ///
3183    /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
3184    /// - `zoom_in`: adds source-message provenance to each returned fact.
3185    /// - `zoom_out`: expands 1-hop neighbors per returned fact.
3186    ///
3187    /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
3188    pub recall_view: RecallViewConfig,
3189    /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
3190    pub zoom_out_neighbor_cap: usize,
3191    /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
3192    /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
3193    #[serde(default, skip_serializing_if = "Vec::is_empty")]
3194    pub fast_tier_models: Vec<String>,
3195}
3196
3197/// Recall view variant exposed in config.
3198///
3199/// Maps 1-to-1 to `zeph_memory::RecallView`.
3200#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
3201#[serde(rename_all = "snake_case")]
3202pub enum RecallViewConfig {
3203    /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
3204    #[default]
3205    Head,
3206    /// Adds source-message provenance to each returned fact.
3207    ZoomIn,
3208    /// Expands 1-hop neighbor facts per returned fact.
3209    ZoomOut,
3210}
3211
3212impl Default for MemCotConfig {
3213    fn default() -> Self {
3214        Self {
3215            enabled: false,
3216            distill_provider: ProviderName::default(),
3217            distill_timeout_secs: 5,
3218            min_assistant_chars: 200,
3219            min_distill_interval_secs: 30,
3220            max_distills_per_session: 50,
3221            max_state_chars: 800,
3222            recall_view: RecallViewConfig::Head,
3223            zoom_out_neighbor_cap: 3,
3224            fast_tier_models: Vec::new(),
3225        }
3226    }
3227}
3228
3229/// `OmniMem` retrieval failure tracking configuration (issue #3576).
3230///
3231/// Controls the async logger that records no-hit and low-confidence recall events
3232/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
3233#[derive(Debug, Clone, Deserialize, Serialize)]
3234#[serde(default)]
3235pub struct RetrievalFailuresConfig {
3236    /// Enable retrieval failure logging. Default: `false`.
3237    pub enabled: bool,
3238    /// Composite recall score below which a result is classified as low-confidence.
3239    ///
3240    /// The threshold applies to the post-reranking composite score (which incorporates
3241    /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
3242    /// the scoring pipeline in use. Default: `0.3`.
3243    #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
3244    pub low_confidence_threshold: f32,
3245    /// Days to retain failure records before automatic cleanup. Default: `90`.
3246    #[serde(default = "default_retrieval_failures_retention_days")]
3247    pub retention_days: u32,
3248    /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
3249    #[serde(default = "default_retrieval_failures_channel_capacity")]
3250    pub channel_capacity: usize,
3251    /// Maximum records collected before flushing a batch INSERT. Default: `16`.
3252    #[serde(default = "default_retrieval_failures_batch_size")]
3253    pub batch_size: usize,
3254    /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
3255    #[serde(default = "default_retrieval_failures_flush_interval_ms")]
3256    pub flush_interval_ms: u64,
3257}
3258
3259impl Default for RetrievalFailuresConfig {
3260    fn default() -> Self {
3261        Self {
3262            enabled: false,
3263            low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
3264            retention_days: default_retrieval_failures_retention_days(),
3265            channel_capacity: default_retrieval_failures_channel_capacity(),
3266            batch_size: default_retrieval_failures_batch_size(),
3267            flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
3268        }
3269    }
3270}
3271
3272#[cfg(test)]
3273mod memcot_config_tests {
3274    use super::*;
3275
3276    #[test]
3277    fn memcot_config_default_disabled() {
3278        let cfg = MemCotConfig::default();
3279        assert!(!cfg.enabled);
3280        assert!(cfg.distill_provider.is_empty());
3281        assert_eq!(cfg.distill_timeout_secs, 5);
3282        assert_eq!(cfg.min_assistant_chars, 200);
3283        assert_eq!(cfg.min_distill_interval_secs, 30);
3284        assert_eq!(cfg.max_distills_per_session, 50);
3285        assert_eq!(cfg.max_state_chars, 800);
3286        assert_eq!(cfg.recall_view, RecallViewConfig::Head);
3287        assert_eq!(cfg.zoom_out_neighbor_cap, 3);
3288    }
3289
3290    #[test]
3291    fn memcot_config_round_trip() {
3292        let toml = r#"
3293            enabled = true
3294            distill_provider = "fast"
3295            distill_timeout_secs = 10
3296            min_assistant_chars = 100
3297            min_distill_interval_secs = 60
3298            max_distills_per_session = 20
3299            max_state_chars = 400
3300            recall_view = "zoom_in"
3301            zoom_out_neighbor_cap = 5
3302        "#;
3303        let cfg: MemCotConfig = toml::from_str(toml).unwrap();
3304        assert!(cfg.enabled);
3305        assert_eq!(cfg.distill_provider.as_str(), "fast");
3306        assert_eq!(cfg.distill_timeout_secs, 10);
3307        assert_eq!(cfg.min_distill_interval_secs, 60);
3308        assert_eq!(cfg.max_distills_per_session, 20);
3309        assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
3310        assert_eq!(cfg.zoom_out_neighbor_cap, 5);
3311    }
3312}
3313
3314#[cfg(test)]
3315mod apex_mem_quality_gate_config_tests {
3316    use super::*;
3317
3318    #[test]
3319    fn apex_mem_config_default_disabled() {
3320        let cfg = ApexMemConfig::default();
3321        assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
3322    }
3323
3324    #[test]
3325    fn apex_mem_config_serde_round_trip() {
3326        let toml = "enabled = true";
3327        let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
3328        assert!(cfg.enabled);
3329    }
3330
3331    #[test]
3332    fn apex_mem_config_empty_toml_uses_defaults() {
3333        let cfg: ApexMemConfig = toml::from_str("").unwrap();
3334        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3335    }
3336
3337    #[test]
3338    fn write_quality_gate_config_default_disabled() {
3339        let cfg = WriteQualityGateConfig::default();
3340        assert!(!cfg.enabled);
3341        assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
3342        assert_eq!(cfg.recent_window, 32);
3343        assert_eq!(cfg.contradiction_grace_seconds, 300);
3344        assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
3345        assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
3346        assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
3347        assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
3348        assert!(cfg.quality_gate_provider.is_empty());
3349        assert_eq!(cfg.llm_timeout_ms, 500);
3350        assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
3351        assert!(cfg.reference_check_lang_en);
3352    }
3353
3354    #[test]
3355    fn write_quality_gate_config_serde_round_trip() {
3356        let toml = r#"
3357            enabled = true
3358            threshold = 0.70
3359            recent_window = 16
3360            contradiction_grace_seconds = 600
3361            information_value_weight = 0.5
3362            reference_completeness_weight = 0.25
3363            contradiction_weight = 0.25
3364            rejection_rate_alarm_ratio = 0.50
3365            quality_gate_provider = "fast"
3366            llm_timeout_ms = 1000
3367            llm_weight = 0.3
3368            reference_check_lang_en = false
3369        "#;
3370        let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
3371        assert!(cfg.enabled);
3372        assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
3373        assert_eq!(cfg.recent_window, 16);
3374        assert_eq!(cfg.contradiction_grace_seconds, 600);
3375        assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
3376        assert_eq!(cfg.llm_timeout_ms, 1000);
3377        assert!(!cfg.reference_check_lang_en);
3378    }
3379
3380    #[test]
3381    fn write_quality_gate_config_empty_toml_uses_defaults() {
3382        let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
3383        assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3384        assert_eq!(cfg.recent_window, 32);
3385    }
3386}