zeph_config/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::defaults::{default_sqlite_path_field, default_true};
7
8fn default_sqlite_pool_size() -> u32 {
9    5
10}
11
12fn default_max_history() -> usize {
13    100
14}
15
16fn default_title_max_chars() -> usize {
17    60
18}
19
20fn default_document_collection() -> String {
21    "zeph_documents".into()
22}
23
24fn default_document_chunk_size() -> usize {
25    1000
26}
27
28fn default_document_chunk_overlap() -> usize {
29    100
30}
31
32fn default_document_top_k() -> usize {
33    3
34}
35
36fn default_autosave_min_length() -> usize {
37    20
38}
39
40fn default_tool_call_cutoff() -> usize {
41    6
42}
43
44fn default_token_safety_margin() -> f32 {
45    1.0
46}
47
48fn default_redact_credentials() -> bool {
49    true
50}
51
52fn default_qdrant_url() -> String {
53    "http://localhost:6334".into()
54}
55
56fn default_summarization_threshold() -> usize {
57    50
58}
59
60fn default_context_budget_tokens() -> usize {
61    0
62}
63
64fn default_soft_compaction_threshold() -> f32 {
65    0.60
66}
67
68fn default_hard_compaction_threshold() -> f32 {
69    0.90
70}
71
72fn default_compaction_preserve_tail() -> usize {
73    6
74}
75
76fn default_compaction_cooldown_turns() -> u8 {
77    2
78}
79
80fn default_auto_budget() -> bool {
81    true
82}
83
84fn default_prune_protect_tokens() -> usize {
85    40_000
86}
87
88fn default_cross_session_score_threshold() -> f32 {
89    0.35
90}
91
92fn default_temporal_decay_half_life_days() -> u32 {
93    30
94}
95
96fn default_mmr_lambda() -> f32 {
97    0.7
98}
99
100fn default_semantic_enabled() -> bool {
101    true
102}
103
104fn default_recall_limit() -> usize {
105    5
106}
107
108fn default_vector_weight() -> f64 {
109    0.7
110}
111
112fn default_keyword_weight() -> f64 {
113    0.3
114}
115
116fn default_graph_max_entities_per_message() -> usize {
117    10
118}
119
120fn default_graph_max_edges_per_message() -> usize {
121    15
122}
123
124fn default_graph_community_refresh_interval() -> usize {
125    100
126}
127
128fn default_graph_community_summary_max_prompt_bytes() -> usize {
129    8192
130}
131
132fn default_graph_community_summary_concurrency() -> usize {
133    4
134}
135
136fn default_lpa_edge_chunk_size() -> usize {
137    10_000
138}
139
140fn default_graph_entity_similarity_threshold() -> f32 {
141    0.85
142}
143
144fn default_graph_entity_ambiguous_threshold() -> f32 {
145    0.70
146}
147
148fn default_graph_extraction_timeout_secs() -> u64 {
149    15
150}
151
152fn default_graph_max_hops() -> u32 {
153    2
154}
155
156fn default_graph_recall_limit() -> usize {
157    10
158}
159
160fn default_graph_expired_edge_retention_days() -> u32 {
161    90
162}
163
164fn default_graph_temporal_decay_rate() -> f64 {
165    0.0
166}
167
168fn default_graph_edge_history_limit() -> usize {
169    100
170}
171
172fn default_spreading_activation_decay_lambda() -> f32 {
173    0.85
174}
175
176fn default_spreading_activation_max_hops() -> u32 {
177    3
178}
179
180fn default_spreading_activation_activation_threshold() -> f32 {
181    0.1
182}
183
184fn default_spreading_activation_inhibition_threshold() -> f32 {
185    0.8
186}
187
188fn default_spreading_activation_max_activated_nodes() -> usize {
189    50
190}
191
192fn default_note_linking_similarity_threshold() -> f32 {
193    0.85
194}
195
196fn default_note_linking_top_k() -> usize {
197    10
198}
199
200fn default_note_linking_timeout_secs() -> u64 {
201    5
202}
203
204fn default_shutdown_summary() -> bool {
205    true
206}
207
208fn default_shutdown_summary_min_messages() -> usize {
209    4
210}
211
212fn default_shutdown_summary_max_messages() -> usize {
213    20
214}
215
216fn default_shutdown_summary_timeout_secs() -> u64 {
217    10
218}
219
220fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
221where
222    D: serde::Deserializer<'de>,
223{
224    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
225    if value.is_nan() || value.is_infinite() {
226        return Err(serde::de::Error::custom(
227            "similarity_threshold must be a finite number",
228        ));
229    }
230    if !(0.5..=1.0).contains(&value) {
231        return Err(serde::de::Error::custom(
232            "similarity_threshold must be in [0.5, 1.0]",
233        ));
234    }
235    Ok(value)
236}
237
238fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
239where
240    D: serde::Deserializer<'de>,
241{
242    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
243    if value < 2 {
244        return Err(serde::de::Error::custom(
245            "promotion_min_sessions must be >= 2",
246        ));
247    }
248    Ok(value)
249}
250
251fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
252where
253    D: serde::Deserializer<'de>,
254{
255    let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
256    if value == 0 {
257        return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
258    }
259    Ok(value)
260}
261
262fn default_tier_promotion_min_sessions() -> u32 {
263    3
264}
265
266fn default_tier_similarity_threshold() -> f32 {
267    0.92
268}
269
270fn default_tier_sweep_interval_secs() -> u64 {
271    3600
272}
273
274fn default_tier_sweep_batch_size() -> usize {
275    100
276}
277
278/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
279///
280/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
281/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
282///
283/// # Validation
284///
285/// Constraints enforced at deserialization time:
286/// - `similarity_threshold` in `[0.5, 1.0]`
287/// - `promotion_min_sessions >= 2`
288/// - `sweep_batch_size >= 1`
289#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
290#[serde(default)]
291pub struct TierConfig {
292    /// Enable the tier promotion system. When `false`, all messages remain episodic.
293    /// Default: `false`.
294    pub enabled: bool,
295    /// Minimum number of distinct sessions a fact must appear in before promotion.
296    /// Must be `>= 2`. Default: `3`.
297    #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
298    pub promotion_min_sessions: u32,
299    /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
300    /// Must be in `[0.5, 1.0]`. Default: `0.92`.
301    #[serde(deserialize_with = "validate_tier_similarity_threshold")]
302    pub similarity_threshold: f32,
303    /// How often the background promotion sweep runs, in seconds. Default: `3600`.
304    pub sweep_interval_secs: u64,
305    /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
306    #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
307    pub sweep_batch_size: usize,
308}
309
310impl Default for TierConfig {
311    fn default() -> Self {
312        Self {
313            enabled: false,
314            promotion_min_sessions: default_tier_promotion_min_sessions(),
315            similarity_threshold: default_tier_similarity_threshold(),
316            sweep_interval_secs: default_tier_sweep_interval_secs(),
317            sweep_batch_size: default_tier_sweep_batch_size(),
318        }
319    }
320}
321
322fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
323where
324    D: serde::Deserializer<'de>,
325{
326    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
327    if value.is_nan() || value.is_infinite() {
328        return Err(serde::de::Error::custom(
329            "temporal_decay_rate must be a finite number",
330        ));
331    }
332    if !(0.0..=10.0).contains(&value) {
333        return Err(serde::de::Error::custom(
334            "temporal_decay_rate must be in [0.0, 10.0]",
335        ));
336    }
337    Ok(value)
338}
339
340fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
341where
342    D: serde::Deserializer<'de>,
343{
344    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
345    if value.is_nan() || value.is_infinite() {
346        return Err(serde::de::Error::custom(
347            "similarity_threshold must be a finite number",
348        ));
349    }
350    if !(0.0..=1.0).contains(&value) {
351        return Err(serde::de::Error::custom(
352            "similarity_threshold must be in [0.0, 1.0]",
353        ));
354    }
355    Ok(value)
356}
357
358fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
359where
360    D: serde::Deserializer<'de>,
361{
362    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
363    if value.is_nan() || value.is_infinite() {
364        return Err(serde::de::Error::custom(
365            "importance_weight must be a finite number",
366        ));
367    }
368    if value < 0.0 {
369        return Err(serde::de::Error::custom(
370            "importance_weight must be non-negative",
371        ));
372    }
373    if value > 1.0 {
374        return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
375    }
376    Ok(value)
377}
378
379fn default_importance_weight() -> f64 {
380    0.15
381}
382
383/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
384///
385/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
386/// Seeds are initialized from fuzzy entity matches, then activation propagates
387/// hop-by-hop with exponential decay and lateral inhibition.
388///
389/// # Validation
390///
391/// Constraints enforced at deserialization time:
392/// - `0.0 < decay_lambda <= 1.0`
393/// - `max_hops >= 1`
394/// - `activation_threshold < inhibition_threshold`
395#[derive(Debug, Clone, Deserialize, Serialize)]
396#[serde(default)]
397pub struct SpreadingActivationConfig {
398    /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
399    pub enabled: bool,
400    /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
401    #[serde(deserialize_with = "validate_decay_lambda")]
402    pub decay_lambda: f32,
403    /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
404    #[serde(deserialize_with = "validate_max_hops")]
405    pub max_hops: u32,
406    /// Minimum activation score to include a node in results. Default: `0.1`.
407    pub activation_threshold: f32,
408    /// Activation level at which a node stops receiving more activation. Default: `0.8`.
409    pub inhibition_threshold: f32,
410    /// Cap on total activated nodes per spread pass. Default: `50`.
411    pub max_activated_nodes: usize,
412    /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
413    #[serde(default = "default_seed_structural_weight")]
414    pub seed_structural_weight: f32,
415    /// Maximum seeds per community. `0` = unlimited. Default: `3`.
416    #[serde(default = "default_seed_community_cap")]
417    pub seed_community_cap: usize,
418}
419
420fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
421where
422    D: serde::Deserializer<'de>,
423{
424    let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
425    if value.is_nan() || value.is_infinite() {
426        return Err(serde::de::Error::custom(
427            "decay_lambda must be a finite number",
428        ));
429    }
430    if !(value > 0.0 && value <= 1.0) {
431        return Err(serde::de::Error::custom(
432            "decay_lambda must be in (0.0, 1.0]",
433        ));
434    }
435    Ok(value)
436}
437
438fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
439where
440    D: serde::Deserializer<'de>,
441{
442    let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
443    if value == 0 {
444        return Err(serde::de::Error::custom("max_hops must be >= 1"));
445    }
446    Ok(value)
447}
448
449impl SpreadingActivationConfig {
450    /// Validate cross-field constraints that cannot be expressed in per-field validators.
451    ///
452    /// # Errors
453    ///
454    /// Returns an error string if `activation_threshold >= inhibition_threshold`.
455    pub fn validate(&self) -> Result<(), String> {
456        if self.activation_threshold >= self.inhibition_threshold {
457            return Err(format!(
458                "activation_threshold ({}) must be < inhibition_threshold ({})",
459                self.activation_threshold, self.inhibition_threshold
460            ));
461        }
462        Ok(())
463    }
464}
465
466fn default_seed_structural_weight() -> f32 {
467    0.4
468}
469
470fn default_seed_community_cap() -> usize {
471    3
472}
473
474impl Default for SpreadingActivationConfig {
475    fn default() -> Self {
476        Self {
477            enabled: false,
478            decay_lambda: default_spreading_activation_decay_lambda(),
479            max_hops: default_spreading_activation_max_hops(),
480            activation_threshold: default_spreading_activation_activation_threshold(),
481            inhibition_threshold: default_spreading_activation_inhibition_threshold(),
482            max_activated_nodes: default_spreading_activation_max_activated_nodes(),
483            seed_structural_weight: default_seed_structural_weight(),
484            seed_community_cap: default_seed_community_cap(),
485        }
486    }
487}
488
489/// Configuration for A-MEM dynamic note linking.
490///
491/// When enabled, after each graph extraction pass, entities extracted from the message are
492/// compared against the entity embedding collection. Pairs with cosine similarity above
493/// `similarity_threshold` receive a `similar_to` edge in the graph.
494#[derive(Debug, Clone, Deserialize, Serialize)]
495#[serde(default)]
496pub struct NoteLinkingConfig {
497    /// Enable A-MEM note linking after graph extraction. Default: `false`.
498    pub enabled: bool,
499    /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
500    #[serde(deserialize_with = "validate_similarity_threshold")]
501    pub similarity_threshold: f32,
502    /// Maximum number of similar entities to link per extracted entity. Default: `10`.
503    pub top_k: usize,
504    /// Timeout for the entire linking pass in seconds. Default: `5`.
505    pub timeout_secs: u64,
506}
507
508impl Default for NoteLinkingConfig {
509    fn default() -> Self {
510        Self {
511            enabled: false,
512            similarity_threshold: default_note_linking_similarity_threshold(),
513            top_k: default_note_linking_top_k(),
514            timeout_secs: default_note_linking_timeout_secs(),
515        }
516    }
517}
518
519/// Vector backend selector for embedding storage.
520#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
521#[serde(rename_all = "lowercase")]
522pub enum VectorBackend {
523    Qdrant,
524    #[default]
525    Sqlite,
526}
527
528impl VectorBackend {
529    #[must_use]
530    pub fn as_str(&self) -> &'static str {
531        match self {
532            Self::Qdrant => "qdrant",
533            Self::Sqlite => "sqlite",
534        }
535    }
536}
537
538#[derive(Debug, Deserialize, Serialize)]
539#[allow(clippy::struct_excessive_bools)]
540pub struct MemoryConfig {
541    #[serde(default)]
542    pub compression_guidelines: zeph_memory::CompressionGuidelinesConfig,
543    #[serde(default = "default_sqlite_path_field")]
544    pub sqlite_path: String,
545    pub history_limit: u32,
546    #[serde(default = "default_qdrant_url")]
547    pub qdrant_url: String,
548    #[serde(default)]
549    pub semantic: SemanticConfig,
550    #[serde(default = "default_summarization_threshold")]
551    pub summarization_threshold: usize,
552    #[serde(default = "default_context_budget_tokens")]
553    pub context_budget_tokens: usize,
554    #[serde(default = "default_soft_compaction_threshold")]
555    pub soft_compaction_threshold: f32,
556    #[serde(
557        default = "default_hard_compaction_threshold",
558        alias = "compaction_threshold"
559    )]
560    pub hard_compaction_threshold: f32,
561    #[serde(default = "default_compaction_preserve_tail")]
562    pub compaction_preserve_tail: usize,
563    #[serde(default = "default_compaction_cooldown_turns")]
564    pub compaction_cooldown_turns: u8,
565    #[serde(default = "default_auto_budget")]
566    pub auto_budget: bool,
567    #[serde(default = "default_prune_protect_tokens")]
568    pub prune_protect_tokens: usize,
569    #[serde(default = "default_cross_session_score_threshold")]
570    pub cross_session_score_threshold: f32,
571    #[serde(default)]
572    pub vector_backend: VectorBackend,
573    #[serde(default = "default_token_safety_margin")]
574    pub token_safety_margin: f32,
575    #[serde(default = "default_redact_credentials")]
576    pub redact_credentials: bool,
577    #[serde(default = "default_true")]
578    pub autosave_assistant: bool,
579    #[serde(default = "default_autosave_min_length")]
580    pub autosave_min_length: usize,
581    #[serde(default = "default_tool_call_cutoff")]
582    pub tool_call_cutoff: usize,
583    #[serde(default = "default_sqlite_pool_size")]
584    pub sqlite_pool_size: u32,
585    #[serde(default)]
586    pub sessions: SessionsConfig,
587    #[serde(default)]
588    pub documents: DocumentConfig,
589    #[serde(default)]
590    pub eviction: zeph_memory::EvictionConfig,
591    #[serde(default)]
592    pub compression: CompressionConfig,
593    #[serde(default)]
594    pub sidequest: SidequestConfig,
595    #[serde(default)]
596    pub routing: RoutingConfig,
597    #[serde(default)]
598    pub graph: GraphConfig,
599    /// Store a lightweight session summary to the vector store on shutdown when no session
600    /// summary exists yet for this conversation. Enables cross-session recall for short or
601    /// interrupted sessions that never triggered hard compaction. Default: `true`.
602    #[serde(default = "default_shutdown_summary")]
603    pub shutdown_summary: bool,
604    /// Minimum number of user-turn messages required before a shutdown summary is generated.
605    /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
606    #[serde(default = "default_shutdown_summary_min_messages")]
607    pub shutdown_summary_min_messages: usize,
608    /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
609    /// summarization. Caps token cost for long sessions that never triggered hard compaction.
610    /// Default: `20`.
611    #[serde(default = "default_shutdown_summary_max_messages")]
612    pub shutdown_summary_max_messages: usize,
613    /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
614    /// Applies independently to the structured call and to the plain-text fallback.
615    /// Default: `10`.
616    #[serde(default = "default_shutdown_summary_timeout_secs")]
617    pub shutdown_summary_timeout_secs: u64,
618    /// Use structured anchored summaries for context compaction.
619    ///
620    /// When enabled, hard compaction requests a JSON schema from the LLM
621    /// instead of free-form prose. Falls back to prose if the LLM fails
622    /// to produce valid JSON. Default: `false`.
623    #[serde(default)]
624    pub structured_summaries: bool,
625    /// AOI three-layer memory tier promotion system.
626    ///
627    /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
628    /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
629    #[serde(default)]
630    pub tiers: TierConfig,
631}
632
633#[derive(Debug, Clone, Deserialize, Serialize)]
634#[serde(default)]
635pub struct SessionsConfig {
636    /// Maximum number of sessions returned by list operations (0 = unlimited).
637    #[serde(default = "default_max_history")]
638    pub max_history: usize,
639    /// Maximum characters for auto-generated session titles.
640    #[serde(default = "default_title_max_chars")]
641    pub title_max_chars: usize,
642}
643
644impl Default for SessionsConfig {
645    fn default() -> Self {
646        Self {
647            max_history: default_max_history(),
648            title_max_chars: default_title_max_chars(),
649        }
650    }
651}
652
653/// Configuration for the document ingestion and RAG retrieval pipeline.
654#[derive(Debug, Clone, Deserialize, Serialize)]
655pub struct DocumentConfig {
656    #[serde(default = "default_document_collection")]
657    pub collection: String,
658    #[serde(default = "default_document_chunk_size")]
659    pub chunk_size: usize,
660    #[serde(default = "default_document_chunk_overlap")]
661    pub chunk_overlap: usize,
662    /// Number of document chunks to inject into agent context per turn.
663    #[serde(default = "default_document_top_k")]
664    pub top_k: usize,
665    /// Enable document RAG injection into agent context.
666    #[serde(default)]
667    pub rag_enabled: bool,
668}
669
670impl Default for DocumentConfig {
671    fn default() -> Self {
672        Self {
673            collection: default_document_collection(),
674            chunk_size: default_document_chunk_size(),
675            chunk_overlap: default_document_chunk_overlap(),
676            top_k: default_document_top_k(),
677            rag_enabled: false,
678        }
679    }
680}
681
682#[derive(Debug, Deserialize, Serialize)]
683#[allow(clippy::struct_excessive_bools)]
684pub struct SemanticConfig {
685    #[serde(default = "default_semantic_enabled")]
686    pub enabled: bool,
687    #[serde(default = "default_recall_limit")]
688    pub recall_limit: usize,
689    #[serde(default = "default_vector_weight")]
690    pub vector_weight: f64,
691    #[serde(default = "default_keyword_weight")]
692    pub keyword_weight: f64,
693    #[serde(default = "default_true")]
694    pub temporal_decay_enabled: bool,
695    #[serde(default = "default_temporal_decay_half_life_days")]
696    pub temporal_decay_half_life_days: u32,
697    #[serde(default = "default_true")]
698    pub mmr_enabled: bool,
699    #[serde(default = "default_mmr_lambda")]
700    pub mmr_lambda: f32,
701    #[serde(default = "default_true")]
702    pub importance_enabled: bool,
703    #[serde(
704        default = "default_importance_weight",
705        deserialize_with = "validate_importance_weight"
706    )]
707    pub importance_weight: f64,
708}
709
710impl Default for SemanticConfig {
711    fn default() -> Self {
712        Self {
713            enabled: default_semantic_enabled(),
714            recall_limit: default_recall_limit(),
715            vector_weight: default_vector_weight(),
716            keyword_weight: default_keyword_weight(),
717            temporal_decay_enabled: true,
718            temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
719            mmr_enabled: true,
720            mmr_lambda: default_mmr_lambda(),
721            importance_enabled: true,
722            importance_weight: default_importance_weight(),
723        }
724    }
725}
726
727/// Routing strategy for memory backend selection.
728#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
729#[serde(rename_all = "snake_case")]
730pub enum RoutingStrategy {
731    /// Heuristic-based routing using query characteristics.
732    #[default]
733    Heuristic,
734}
735
736/// Configuration for query-aware memory routing (#1162).
737#[derive(Debug, Clone, Default, Deserialize, Serialize)]
738#[serde(default)]
739pub struct RoutingConfig {
740    /// Routing strategy. Currently only `heuristic` is supported.
741    pub strategy: RoutingStrategy,
742}
743
744/// Compression strategy for active context compression (#1161).
745#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
746#[serde(tag = "strategy", rename_all = "snake_case")]
747pub enum CompressionStrategy {
748    /// Compress only when reactive compaction fires (current behavior).
749    #[default]
750    Reactive,
751    /// Compress proactively when context exceeds `threshold_tokens`.
752    Proactive {
753        /// Token count that triggers proactive compression.
754        threshold_tokens: usize,
755        /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
756        max_summary_tokens: usize,
757    },
758}
759
760/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
761///
762/// When `context-compression` feature is enabled, this replaces the default oldest-first
763/// heuristic with scored eviction.
764#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
765#[serde(rename_all = "snake_case")]
766pub enum PruningStrategy {
767    /// Oldest-first eviction — current default behavior.
768    #[default]
769    Reactive,
770    /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
771    /// lowest-first. Requires `context-compression` feature.
772    TaskAware,
773    /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
774    /// Requires `context-compression` feature.
775    Mig,
776    /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
777    /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
778    /// Requires `context-compression` feature.
779    Subgoal,
780    /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
781    /// Requires `context-compression` feature.
782    SubgoalMig,
783}
784
785impl PruningStrategy {
786    /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
787    #[must_use]
788    pub fn is_subgoal(self) -> bool {
789        matches!(self, Self::Subgoal | Self::SubgoalMig)
790    }
791}
792
793// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
794// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
795impl<'de> serde::Deserialize<'de> for PruningStrategy {
796    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
797        let s = String::deserialize(deserializer)?;
798        s.parse().map_err(serde::de::Error::custom)
799    }
800}
801
802impl std::str::FromStr for PruningStrategy {
803    type Err = String;
804
805    fn from_str(s: &str) -> Result<Self, Self::Err> {
806        match s {
807            "reactive" => Ok(Self::Reactive),
808            "task_aware" | "task-aware" => Ok(Self::TaskAware),
809            "mig" => Ok(Self::Mig),
810            // task_aware_mig was removed (dead code — was routed to scored path only).
811            // Fall back to Reactive so existing TOML configs do not hard-error on startup.
812            "task_aware_mig" | "task-aware-mig" => {
813                tracing::warn!(
814                    "pruning strategy `task_aware_mig` has been removed; \
815                     falling back to `reactive`. Use `task_aware` or `mig` instead."
816                );
817                Ok(Self::Reactive)
818            }
819            "subgoal" => Ok(Self::Subgoal),
820            "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
821            other => Err(format!(
822                "unknown pruning strategy `{other}`, expected \
823                 reactive|task_aware|mig|subgoal|subgoal_mig"
824            )),
825        }
826    }
827}
828
829/// Configuration for active context compression (#1161).
830#[derive(Debug, Clone, Default, Deserialize, Serialize)]
831#[serde(default)]
832pub struct CompressionConfig {
833    /// Compression strategy.
834    #[serde(flatten)]
835    pub strategy: CompressionStrategy,
836    /// Tool-output pruning strategy (requires `context-compression` feature).
837    pub pruning_strategy: PruningStrategy,
838    /// Model to use for compression summaries.
839    ///
840    /// Currently unused — the primary summary provider is used regardless of this value.
841    /// Reserved for future per-compression model selection. Setting this field has no effect.
842    pub model: String,
843    /// Compaction probe: validates summary quality before committing it (#1609).
844    #[serde(default)]
845    pub probe: zeph_memory::CompactionProbeConfig,
846}
847
848fn default_sidequest_interval_turns() -> u32 {
849    4
850}
851
852fn default_sidequest_max_eviction_ratio() -> f32 {
853    0.5
854}
855
856fn default_sidequest_max_cursors() -> usize {
857    30
858}
859
860fn default_sidequest_min_cursor_tokens() -> usize {
861    100
862}
863
864/// Configuration for LLM-driven side-thread tool output eviction (#1885).
865#[derive(Debug, Clone, Deserialize, Serialize)]
866#[serde(default)]
867pub struct SidequestConfig {
868    /// Enable `SideQuest` eviction. Default: `false`.
869    pub enabled: bool,
870    /// Run eviction every N user turns. Default: `4`.
871    #[serde(default = "default_sidequest_interval_turns")]
872    pub interval_turns: u32,
873    /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
874    #[serde(default = "default_sidequest_max_eviction_ratio")]
875    pub max_eviction_ratio: f32,
876    /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
877    #[serde(default = "default_sidequest_max_cursors")]
878    pub max_cursors: usize,
879    /// Exclude tool outputs smaller than this token count from eviction candidates.
880    /// Default: `100`.
881    #[serde(default = "default_sidequest_min_cursor_tokens")]
882    pub min_cursor_tokens: usize,
883}
884
885impl Default for SidequestConfig {
886    fn default() -> Self {
887        Self {
888            enabled: false,
889            interval_turns: default_sidequest_interval_turns(),
890            max_eviction_ratio: default_sidequest_max_eviction_ratio(),
891            max_cursors: default_sidequest_max_cursors(),
892            min_cursor_tokens: default_sidequest_min_cursor_tokens(),
893        }
894    }
895}
896
897/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
898///
899/// # Security
900///
901/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
902/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
903/// when processing conversations that may contain personal, medical, or sensitive data until
904/// a redaction pass is implemented on the write path.
905#[derive(Debug, Clone, Deserialize, Serialize)]
906#[serde(default)]
907pub struct GraphConfig {
908    pub enabled: bool,
909    pub extract_model: String,
910    #[serde(default = "default_graph_max_entities_per_message")]
911    pub max_entities_per_message: usize,
912    #[serde(default = "default_graph_max_edges_per_message")]
913    pub max_edges_per_message: usize,
914    #[serde(default = "default_graph_community_refresh_interval")]
915    pub community_refresh_interval: usize,
916    #[serde(default = "default_graph_entity_similarity_threshold")]
917    pub entity_similarity_threshold: f32,
918    #[serde(default = "default_graph_extraction_timeout_secs")]
919    pub extraction_timeout_secs: u64,
920    #[serde(default)]
921    pub use_embedding_resolution: bool,
922    #[serde(default = "default_graph_entity_ambiguous_threshold")]
923    pub entity_ambiguous_threshold: f32,
924    #[serde(default = "default_graph_max_hops")]
925    pub max_hops: u32,
926    #[serde(default = "default_graph_recall_limit")]
927    pub recall_limit: usize,
928    /// Days to retain expired (superseded) edges before deletion. Default: 90.
929    #[serde(default = "default_graph_expired_edge_retention_days")]
930    pub expired_edge_retention_days: u32,
931    /// Maximum entities to retain in the graph. 0 = unlimited.
932    #[serde(default)]
933    pub max_entities: usize,
934    /// Maximum prompt size in bytes for community summary generation. Default: 8192.
935    #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
936    pub community_summary_max_prompt_bytes: usize,
937    /// Maximum concurrent LLM calls during community summarization. Default: 4.
938    #[serde(default = "default_graph_community_summary_concurrency")]
939    pub community_summary_concurrency: usize,
940    /// Number of edges fetched per chunk during community detection. Default: 10000.
941    /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
942    #[serde(default = "default_lpa_edge_chunk_size")]
943    pub lpa_edge_chunk_size: usize,
944    /// Temporal recency decay rate for graph recall scoring (units: 1/day).
945    ///
946    /// When > 0, recent edges receive a small additive score boost over older edges.
947    /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
948    /// composite score. Default 0.0 preserves existing scoring behavior exactly.
949    #[serde(
950        default = "default_graph_temporal_decay_rate",
951        deserialize_with = "validate_temporal_decay_rate"
952    )]
953    pub temporal_decay_rate: f64,
954    /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
955    ///
956    /// Caps the result set returned for a given source entity + predicate pair. Prevents
957    /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
958    /// or API endpoints.
959    #[serde(default = "default_graph_edge_history_limit")]
960    pub edge_history_limit: usize,
961    /// A-MEM dynamic note linking configuration.
962    ///
963    /// When `note_linking.enabled = true`, entities extracted from each message are linked to
964    /// semantically similar entities via `similar_to` edges. Requires an embedding store
965    /// (`qdrant` or `sqlite` vector backend) to be configured.
966    #[serde(default)]
967    pub note_linking: NoteLinkingConfig,
968    /// SYNAPSE spreading activation retrieval configuration.
969    ///
970    /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
971    /// with lateral inhibition and temporal decay instead of BFS.
972    #[serde(default)]
973    pub spreading_activation: SpreadingActivationConfig,
974    /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
975    /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
976    #[serde(
977        default = "default_link_weight_decay_lambda",
978        deserialize_with = "validate_link_weight_decay_lambda"
979    )]
980    pub link_weight_decay_lambda: f64,
981    /// Seconds between link weight decay passes. Default: `86400` (24 hours).
982    #[serde(default = "default_link_weight_decay_interval_secs")]
983    pub link_weight_decay_interval_secs: u64,
984}
985
986impl Default for GraphConfig {
987    fn default() -> Self {
988        Self {
989            enabled: false,
990            extract_model: String::new(),
991            max_entities_per_message: default_graph_max_entities_per_message(),
992            max_edges_per_message: default_graph_max_edges_per_message(),
993            community_refresh_interval: default_graph_community_refresh_interval(),
994            entity_similarity_threshold: default_graph_entity_similarity_threshold(),
995            extraction_timeout_secs: default_graph_extraction_timeout_secs(),
996            use_embedding_resolution: false,
997            entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
998            max_hops: default_graph_max_hops(),
999            recall_limit: default_graph_recall_limit(),
1000            expired_edge_retention_days: default_graph_expired_edge_retention_days(),
1001            max_entities: 0,
1002            community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
1003            community_summary_concurrency: default_graph_community_summary_concurrency(),
1004            lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
1005            temporal_decay_rate: default_graph_temporal_decay_rate(),
1006            edge_history_limit: default_graph_edge_history_limit(),
1007            note_linking: NoteLinkingConfig::default(),
1008            spreading_activation: SpreadingActivationConfig::default(),
1009            link_weight_decay_lambda: default_link_weight_decay_lambda(),
1010            link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
1011        }
1012    }
1013}
1014
1015fn default_link_weight_decay_lambda() -> f64 {
1016    0.95
1017}
1018
1019fn default_link_weight_decay_interval_secs() -> u64 {
1020    86400
1021}
1022
1023fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
1024where
1025    D: serde::Deserializer<'de>,
1026{
1027    let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
1028    if value.is_nan() || value.is_infinite() {
1029        return Err(serde::de::Error::custom(
1030            "link_weight_decay_lambda must be a finite number",
1031        ));
1032    }
1033    if !(value > 0.0 && value <= 1.0) {
1034        return Err(serde::de::Error::custom(
1035            "link_weight_decay_lambda must be in (0.0, 1.0]",
1036        ));
1037    }
1038    Ok(value)
1039}
1040
1041#[cfg(test)]
1042mod tests {
1043    use super::*;
1044
1045    // Verify that serde deserialization routes through FromStr so that removed variants
1046    // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
1047    #[test]
1048    fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
1049        #[derive(serde::Deserialize)]
1050        struct Wrapper {
1051            #[allow(dead_code)]
1052            pruning_strategy: PruningStrategy,
1053        }
1054        let toml = r#"pruning_strategy = "task_aware_mig""#;
1055        let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
1056        assert_eq!(
1057            w.pruning_strategy,
1058            PruningStrategy::Reactive,
1059            "task_aware_mig must fall back to Reactive"
1060        );
1061    }
1062
1063    #[test]
1064    fn pruning_strategy_toml_round_trip() {
1065        #[derive(serde::Deserialize)]
1066        struct Wrapper {
1067            #[allow(dead_code)]
1068            pruning_strategy: PruningStrategy,
1069        }
1070        for (input, expected) in [
1071            ("reactive", PruningStrategy::Reactive),
1072            ("task_aware", PruningStrategy::TaskAware),
1073            ("mig", PruningStrategy::Mig),
1074        ] {
1075            let toml = format!(r#"pruning_strategy = "{input}""#);
1076            let w: Wrapper = toml::from_str(&toml)
1077                .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
1078            assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
1079        }
1080    }
1081
1082    #[test]
1083    fn pruning_strategy_toml_unknown_value_errors() {
1084        #[derive(serde::Deserialize)]
1085        #[allow(dead_code)]
1086        struct Wrapper {
1087            pruning_strategy: PruningStrategy,
1088        }
1089        let toml = r#"pruning_strategy = "nonexistent_strategy""#;
1090        assert!(
1091            toml::from_str::<Wrapper>(toml).is_err(),
1092            "unknown strategy must produce an error"
1093        );
1094    }
1095
1096    #[test]
1097    fn tier_config_defaults_are_correct() {
1098        let cfg = TierConfig::default();
1099        assert!(!cfg.enabled);
1100        assert_eq!(cfg.promotion_min_sessions, 3);
1101        assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
1102        assert_eq!(cfg.sweep_interval_secs, 3600);
1103        assert_eq!(cfg.sweep_batch_size, 100);
1104    }
1105
1106    #[test]
1107    fn tier_config_rejects_min_sessions_below_2() {
1108        let toml = "promotion_min_sessions = 1";
1109        assert!(toml::from_str::<TierConfig>(toml).is_err());
1110    }
1111
1112    #[test]
1113    fn tier_config_rejects_similarity_threshold_below_0_5() {
1114        let toml = "similarity_threshold = 0.4";
1115        assert!(toml::from_str::<TierConfig>(toml).is_err());
1116    }
1117
1118    #[test]
1119    fn tier_config_rejects_zero_sweep_batch_size() {
1120        let toml = "sweep_batch_size = 0";
1121        assert!(toml::from_str::<TierConfig>(toml).is_err());
1122    }
1123
1124    fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
1125        let input = format!("importance_weight = {toml_val}");
1126        toml::from_str::<SemanticConfig>(&input)
1127    }
1128
1129    #[test]
1130    fn importance_weight_default_is_0_15() {
1131        let cfg = SemanticConfig::default();
1132        assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
1133    }
1134
1135    #[test]
1136    fn importance_weight_valid_zero() {
1137        let cfg = deserialize_importance_weight("0.0").unwrap();
1138        assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
1139    }
1140
1141    #[test]
1142    fn importance_weight_valid_one() {
1143        let cfg = deserialize_importance_weight("1.0").unwrap();
1144        assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
1145    }
1146
1147    #[test]
1148    fn importance_weight_rejects_near_zero_negative() {
1149        // TOML does not have a NaN literal, but we can test via a f64 that
1150        // the validator rejects out-of-range values. Test with negative here
1151        // and rely on validate_importance_weight rejecting non-finite via
1152        // a constructed deserializer call.
1153        let result = deserialize_importance_weight("-0.01");
1154        assert!(
1155            result.is_err(),
1156            "negative importance_weight must be rejected"
1157        );
1158    }
1159
1160    #[test]
1161    fn importance_weight_rejects_negative() {
1162        let result = deserialize_importance_weight("-1.0");
1163        assert!(result.is_err(), "negative value must be rejected");
1164    }
1165
1166    #[test]
1167    fn importance_weight_rejects_greater_than_one() {
1168        let result = deserialize_importance_weight("1.01");
1169        assert!(result.is_err(), "value > 1.0 must be rejected");
1170    }
1171}
zeph_config/memory.rs

zeph_config/
memory.rs