zeph_config/memory.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15 5
16}
17
18fn default_max_history() -> usize {
19 100
20}
21
22fn default_title_max_chars() -> usize {
23 60
24}
25
26fn default_document_collection() -> String {
27 "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31 1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35 100
36}
37
38fn default_document_top_k() -> usize {
39 3
40}
41
42fn default_autosave_min_length() -> usize {
43 20
44}
45
46fn default_tool_call_cutoff() -> usize {
47 6
48}
49
50fn default_token_safety_margin() -> f32 {
51 1.0
52}
53
54fn default_redact_credentials() -> bool {
55 true
56}
57
58fn default_qdrant_url() -> String {
59 "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63 50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67 60
68}
69
70fn default_context_budget_tokens() -> usize {
71 0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75 0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79 0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83 6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87 2
88}
89
90fn default_auto_budget() -> bool {
91 true
92}
93
94fn default_prune_protect_tokens() -> usize {
95 40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99 0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103 30
104}
105
106fn default_mmr_lambda() -> f32 {
107 0.7
108}
109
110fn default_semantic_enabled() -> bool {
111 true
112}
113
114fn default_recall_limit() -> usize {
115 5
116}
117
118fn default_vector_weight() -> f64 {
119 0.7
120}
121
122fn default_keyword_weight() -> f64 {
123 0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127 10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131 15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135 100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139 8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143 4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147 10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151 0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155 0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159 15
160}
161
162fn default_graph_max_hops() -> u32 {
163 2
164}
165
166fn default_graph_recall_limit() -> usize {
167 10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171 90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175 0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179 100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183 0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187 3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191 0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195 0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199 50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203 1000
204}
205
206fn default_note_linking_similarity_threshold() -> f32 {
207 0.85
208}
209
210fn default_note_linking_top_k() -> usize {
211 10
212}
213
214fn default_note_linking_timeout_secs() -> u64 {
215 5
216}
217
218fn default_shutdown_summary() -> bool {
219 true
220}
221
222fn default_shutdown_summary_min_messages() -> usize {
223 4
224}
225
226fn default_shutdown_summary_max_messages() -> usize {
227 20
228}
229
230fn default_shutdown_summary_timeout_secs() -> u64 {
231 30
232}
233
234fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
235where
236 D: serde::Deserializer<'de>,
237{
238 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
239 if value.is_nan() || value.is_infinite() {
240 return Err(serde::de::Error::custom(
241 "similarity_threshold must be a finite number",
242 ));
243 }
244 if !(0.5..=1.0).contains(&value) {
245 return Err(serde::de::Error::custom(
246 "similarity_threshold must be in [0.5, 1.0]",
247 ));
248 }
249 Ok(value)
250}
251
252fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
253where
254 D: serde::Deserializer<'de>,
255{
256 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
257 if value < 2 {
258 return Err(serde::de::Error::custom(
259 "promotion_min_sessions must be >= 2",
260 ));
261 }
262 Ok(value)
263}
264
265fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
266where
267 D: serde::Deserializer<'de>,
268{
269 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
270 if value == 0 {
271 return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
272 }
273 Ok(value)
274}
275
276fn default_tier_promotion_min_sessions() -> u32 {
277 3
278}
279
280fn default_tier_similarity_threshold() -> f32 {
281 0.92
282}
283
284fn default_tier_sweep_interval_secs() -> u64 {
285 3600
286}
287
288fn default_tier_sweep_batch_size() -> usize {
289 100
290}
291
292fn default_scene_similarity_threshold() -> f32 {
293 0.80
294}
295
296fn default_scene_batch_size() -> usize {
297 50
298}
299
300fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
301where
302 D: serde::Deserializer<'de>,
303{
304 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
305 if value.is_nan() || value.is_infinite() {
306 return Err(serde::de::Error::custom(
307 "scene_similarity_threshold must be a finite number",
308 ));
309 }
310 if !(0.5..=1.0).contains(&value) {
311 return Err(serde::de::Error::custom(
312 "scene_similarity_threshold must be in [0.5, 1.0]",
313 ));
314 }
315 Ok(value)
316}
317
318fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
319where
320 D: serde::Deserializer<'de>,
321{
322 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
323 if value == 0 {
324 return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
325 }
326 Ok(value)
327}
328
329/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
330///
331/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
332/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
333///
334/// # Validation
335///
336/// Constraints enforced at deserialization time:
337/// - `similarity_threshold` in `[0.5, 1.0]`
338/// - `promotion_min_sessions >= 2`
339/// - `sweep_batch_size >= 1`
340/// - `scene_similarity_threshold` in `[0.5, 1.0]`
341/// - `scene_batch_size >= 1`
342#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
343#[serde(default)]
344pub struct TierConfig {
345 /// Enable the tier promotion system. When `false`, all messages remain episodic.
346 /// Default: `false`.
347 pub enabled: bool,
348 /// Minimum number of distinct sessions a fact must appear in before promotion.
349 /// Must be `>= 2`. Default: `3`.
350 #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
351 pub promotion_min_sessions: u32,
352 /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
353 /// Must be in `[0.5, 1.0]`. Default: `0.92`.
354 #[serde(deserialize_with = "validate_tier_similarity_threshold")]
355 pub similarity_threshold: f32,
356 /// How often the background promotion sweep runs, in seconds. Default: `3600`.
357 pub sweep_interval_secs: u64,
358 /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
359 #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
360 pub sweep_batch_size: usize,
361 /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
362 pub scene_enabled: bool,
363 /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
364 #[serde(deserialize_with = "validate_scene_similarity_threshold")]
365 pub scene_similarity_threshold: f32,
366 /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
367 #[serde(deserialize_with = "validate_scene_batch_size")]
368 pub scene_batch_size: usize,
369 /// Provider name from `[[llm.providers]]` for scene label/profile generation.
370 /// Falls back to the primary provider when empty. Default: `""`.
371 pub scene_provider: ProviderName,
372 /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
373 pub scene_sweep_interval_secs: u64,
374}
375
376fn default_scene_sweep_interval_secs() -> u64 {
377 7200
378}
379
380impl Default for TierConfig {
381 fn default() -> Self {
382 Self {
383 enabled: false,
384 promotion_min_sessions: default_tier_promotion_min_sessions(),
385 similarity_threshold: default_tier_similarity_threshold(),
386 sweep_interval_secs: default_tier_sweep_interval_secs(),
387 sweep_batch_size: default_tier_sweep_batch_size(),
388 scene_enabled: false,
389 scene_similarity_threshold: default_scene_similarity_threshold(),
390 scene_batch_size: default_scene_batch_size(),
391 scene_provider: ProviderName::default(),
392 scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
393 }
394 }
395}
396
397fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
398where
399 D: serde::Deserializer<'de>,
400{
401 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
402 if value.is_nan() || value.is_infinite() {
403 return Err(serde::de::Error::custom(
404 "temporal_decay_rate must be a finite number",
405 ));
406 }
407 if !(0.0..=10.0).contains(&value) {
408 return Err(serde::de::Error::custom(
409 "temporal_decay_rate must be in [0.0, 10.0]",
410 ));
411 }
412 Ok(value)
413}
414
415fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
416where
417 D: serde::Deserializer<'de>,
418{
419 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
420 if value.is_nan() || value.is_infinite() {
421 return Err(serde::de::Error::custom(
422 "similarity_threshold must be a finite number",
423 ));
424 }
425 if !(0.0..=1.0).contains(&value) {
426 return Err(serde::de::Error::custom(
427 "similarity_threshold must be in [0.0, 1.0]",
428 ));
429 }
430 Ok(value)
431}
432
433fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
434where
435 D: serde::Deserializer<'de>,
436{
437 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
438 if value.is_nan() || value.is_infinite() {
439 return Err(serde::de::Error::custom(
440 "importance_weight must be a finite number",
441 ));
442 }
443 if value < 0.0 {
444 return Err(serde::de::Error::custom(
445 "importance_weight must be non-negative",
446 ));
447 }
448 if value > 1.0 {
449 return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
450 }
451 Ok(value)
452}
453
454fn default_importance_weight() -> f64 {
455 0.15
456}
457
458/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
459///
460/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
461/// Seeds are initialized from fuzzy entity matches, then activation propagates
462/// hop-by-hop with exponential decay and lateral inhibition.
463///
464/// # Validation
465///
466/// Constraints enforced at deserialization time:
467/// - `0.0 < decay_lambda <= 1.0`
468/// - `max_hops >= 1`
469/// - `activation_threshold < inhibition_threshold`
470/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
471#[derive(Debug, Clone, Deserialize, Serialize)]
472#[serde(default)]
473pub struct SpreadingActivationConfig {
474 /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
475 pub enabled: bool,
476 /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
477 #[serde(deserialize_with = "validate_decay_lambda")]
478 pub decay_lambda: f32,
479 /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
480 #[serde(deserialize_with = "validate_max_hops")]
481 pub max_hops: u32,
482 /// Minimum activation score to include a node in results. Default: `0.1`.
483 pub activation_threshold: f32,
484 /// Activation level at which a node stops receiving more activation. Default: `0.8`.
485 pub inhibition_threshold: f32,
486 /// Cap on total activated nodes per spread pass. Default: `50`.
487 pub max_activated_nodes: usize,
488 /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
489 #[serde(default = "default_seed_structural_weight")]
490 pub seed_structural_weight: f32,
491 /// Maximum seeds per community. `0` = unlimited. Default: `3`.
492 #[serde(default = "default_seed_community_cap")]
493 pub seed_community_cap: usize,
494 /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
495 /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
496 /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
497 #[serde(default = "default_spreading_activation_recall_timeout_ms")]
498 pub recall_timeout_ms: u64,
499}
500
501fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
502where
503 D: serde::Deserializer<'de>,
504{
505 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
506 if value.is_nan() || value.is_infinite() {
507 return Err(serde::de::Error::custom(
508 "decay_lambda must be a finite number",
509 ));
510 }
511 if !(value > 0.0 && value <= 1.0) {
512 return Err(serde::de::Error::custom(
513 "decay_lambda must be in (0.0, 1.0]",
514 ));
515 }
516 Ok(value)
517}
518
519fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
520where
521 D: serde::Deserializer<'de>,
522{
523 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
524 if value == 0 {
525 return Err(serde::de::Error::custom("max_hops must be >= 1"));
526 }
527 Ok(value)
528}
529
530impl SpreadingActivationConfig {
531 /// Validate cross-field constraints that cannot be expressed in per-field validators.
532 ///
533 /// # Errors
534 ///
535 /// Returns an error string if `activation_threshold >= inhibition_threshold`.
536 pub fn validate(&self) -> Result<(), String> {
537 if self.activation_threshold >= self.inhibition_threshold {
538 return Err(format!(
539 "activation_threshold ({}) must be < inhibition_threshold ({})",
540 self.activation_threshold, self.inhibition_threshold
541 ));
542 }
543 Ok(())
544 }
545}
546
547fn default_seed_structural_weight() -> f32 {
548 0.4
549}
550
551fn default_seed_community_cap() -> usize {
552 3
553}
554
555impl Default for SpreadingActivationConfig {
556 fn default() -> Self {
557 Self {
558 enabled: false,
559 decay_lambda: default_spreading_activation_decay_lambda(),
560 max_hops: default_spreading_activation_max_hops(),
561 activation_threshold: default_spreading_activation_activation_threshold(),
562 inhibition_threshold: default_spreading_activation_inhibition_threshold(),
563 max_activated_nodes: default_spreading_activation_max_activated_nodes(),
564 seed_structural_weight: default_seed_structural_weight(),
565 seed_community_cap: default_seed_community_cap(),
566 recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
567 }
568 }
569}
570
571/// Kumiho belief revision configuration.
572#[derive(Debug, Clone, Deserialize, Serialize)]
573#[serde(default)]
574pub struct BeliefRevisionConfig {
575 /// Enable semantic contradiction detection for graph edges. Default: `false`.
576 pub enabled: bool,
577 /// Cosine similarity threshold for considering two facts as contradictory.
578 /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
579 #[serde(deserialize_with = "validate_similarity_threshold")]
580 pub similarity_threshold: f32,
581}
582
583fn default_belief_revision_similarity_threshold() -> f32 {
584 0.85
585}
586
587impl Default for BeliefRevisionConfig {
588 fn default() -> Self {
589 Self {
590 enabled: false,
591 similarity_threshold: default_belief_revision_similarity_threshold(),
592 }
593 }
594}
595
596/// D-MEM RPE-based tiered graph extraction routing configuration.
597#[derive(Debug, Clone, Deserialize, Serialize)]
598#[serde(default)]
599pub struct RpeConfig {
600 /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
601 pub enabled: bool,
602 /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
603 /// Default: `0.3`.
604 #[serde(deserialize_with = "validate_similarity_threshold")]
605 pub threshold: f32,
606 /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
607 pub max_skip_turns: u32,
608}
609
610fn default_rpe_threshold() -> f32 {
611 0.3
612}
613
614fn default_rpe_max_skip_turns() -> u32 {
615 5
616}
617
618impl Default for RpeConfig {
619 fn default() -> Self {
620 Self {
621 enabled: false,
622 threshold: default_rpe_threshold(),
623 max_skip_turns: default_rpe_max_skip_turns(),
624 }
625 }
626}
627
628/// Configuration for A-MEM dynamic note linking.
629///
630/// When enabled, after each graph extraction pass, entities extracted from the message are
631/// compared against the entity embedding collection. Pairs with cosine similarity above
632/// `similarity_threshold` receive a `similar_to` edge in the graph.
633#[derive(Debug, Clone, Deserialize, Serialize)]
634#[serde(default)]
635pub struct NoteLinkingConfig {
636 /// Enable A-MEM note linking after graph extraction. Default: `false`.
637 pub enabled: bool,
638 /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
639 #[serde(deserialize_with = "validate_similarity_threshold")]
640 pub similarity_threshold: f32,
641 /// Maximum number of similar entities to link per extracted entity. Default: `10`.
642 pub top_k: usize,
643 /// Timeout for the entire linking pass in seconds. Default: `5`.
644 pub timeout_secs: u64,
645}
646
647impl Default for NoteLinkingConfig {
648 fn default() -> Self {
649 Self {
650 enabled: false,
651 similarity_threshold: default_note_linking_similarity_threshold(),
652 top_k: default_note_linking_top_k(),
653 timeout_secs: default_note_linking_timeout_secs(),
654 }
655 }
656}
657
658/// Vector backend selector for embedding storage.
659#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
660#[serde(rename_all = "lowercase")]
661pub enum VectorBackend {
662 Qdrant,
663 #[default]
664 Sqlite,
665}
666
667impl VectorBackend {
668 /// Return the lowercase identifier string for this backend.
669 ///
670 /// # Examples
671 ///
672 /// ```
673 /// use zeph_config::VectorBackend;
674 ///
675 /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
676 /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
677 /// ```
678 #[must_use]
679 pub fn as_str(&self) -> &'static str {
680 match self {
681 Self::Qdrant => "qdrant",
682 Self::Sqlite => "sqlite",
683 }
684 }
685}
686
687/// Memory subsystem configuration, nested under `[memory]` in TOML.
688///
689/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
690/// multi-tier promotion, and all memory-related background tasks.
691///
692/// # Example (TOML)
693///
694/// ```toml
695/// [memory]
696/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
697/// qdrant_url = "http://localhost:6334"
698/// history_limit = 50
699/// summarization_threshold = 50
700/// auto_budget = true
701/// ```
702#[derive(Debug, Deserialize, Serialize)]
703#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
704pub struct MemoryConfig {
705 #[serde(default)]
706 pub compression_guidelines: CompressionGuidelinesConfig,
707 #[serde(default = "default_sqlite_path_field")]
708 pub sqlite_path: String,
709 pub history_limit: u32,
710 #[serde(default = "default_qdrant_url")]
711 pub qdrant_url: String,
712 /// Optional API key for authenticating to a remote or managed Qdrant cluster.
713 ///
714 /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
715 /// Leave `None` for local dev instances. The actual key is resolved from the vault:
716 /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
717 ///
718 /// The value is wrapped in [`Secret`] to prevent accidental logging.
719 /// `skip_serializing` prevents the key from being written back to TOML on config save.
720 #[serde(default, skip_serializing)]
721 pub qdrant_api_key: Option<Secret>,
722 #[serde(default)]
723 pub semantic: SemanticConfig,
724 #[serde(default = "default_summarization_threshold")]
725 pub summarization_threshold: usize,
726 /// LLM call timeout for summarization, in seconds. Default: `60`.
727 #[serde(default = "default_summarization_llm_timeout_secs")]
728 pub summarization_llm_timeout_secs: u64,
729 #[serde(default = "default_context_budget_tokens")]
730 pub context_budget_tokens: usize,
731 #[serde(default = "default_soft_compaction_threshold")]
732 pub soft_compaction_threshold: f32,
733 #[serde(
734 default = "default_hard_compaction_threshold",
735 alias = "compaction_threshold"
736 )]
737 pub hard_compaction_threshold: f32,
738 #[serde(default = "default_compaction_preserve_tail")]
739 pub compaction_preserve_tail: usize,
740 #[serde(default = "default_compaction_cooldown_turns")]
741 pub compaction_cooldown_turns: u8,
742 #[serde(default = "default_auto_budget")]
743 pub auto_budget: bool,
744 #[serde(default = "default_prune_protect_tokens")]
745 pub prune_protect_tokens: usize,
746 #[serde(default = "default_cross_session_score_threshold")]
747 pub cross_session_score_threshold: f32,
748 #[serde(default)]
749 pub vector_backend: VectorBackend,
750 #[serde(default = "default_token_safety_margin")]
751 pub token_safety_margin: f32,
752 #[serde(default = "default_redact_credentials")]
753 pub redact_credentials: bool,
754 #[serde(default = "default_true")]
755 pub autosave_assistant: bool,
756 #[serde(default = "default_autosave_min_length")]
757 pub autosave_min_length: usize,
758 #[serde(default = "default_tool_call_cutoff")]
759 pub tool_call_cutoff: usize,
760 #[serde(default = "default_sqlite_pool_size")]
761 pub sqlite_pool_size: u32,
762 #[serde(default)]
763 pub sessions: SessionsConfig,
764 #[serde(default)]
765 pub documents: DocumentConfig,
766 #[serde(default)]
767 pub eviction: EvictionConfig,
768 #[serde(default)]
769 pub compression: CompressionConfig,
770 #[serde(default)]
771 pub sidequest: SidequestConfig,
772 #[serde(default)]
773 pub graph: GraphConfig,
774 /// Store a lightweight session summary to the vector store on shutdown when no session
775 /// summary exists yet for this conversation. Enables cross-session recall for short or
776 /// interrupted sessions that never triggered hard compaction. Default: `true`.
777 #[serde(default = "default_shutdown_summary")]
778 pub shutdown_summary: bool,
779 /// Minimum number of user-turn messages required before a shutdown summary is generated.
780 /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
781 #[serde(default = "default_shutdown_summary_min_messages")]
782 pub shutdown_summary_min_messages: usize,
783 /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
784 /// summarization. Caps token cost for long sessions that never triggered hard compaction.
785 /// Default: `20`.
786 #[serde(default = "default_shutdown_summary_max_messages")]
787 pub shutdown_summary_max_messages: usize,
788 /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
789 /// Applies independently to the structured call and to the plain-text fallback.
790 /// Default: `10`.
791 #[serde(default = "default_shutdown_summary_timeout_secs")]
792 pub shutdown_summary_timeout_secs: u64,
793 /// LLM provider used for shutdown summarization calls.
794 ///
795 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
796 /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
797 ///
798 /// Example:
799 /// ```toml
800 /// [memory]
801 /// shutdown_summary_provider = "fast"
802 /// ```
803 #[serde(default)]
804 pub shutdown_summary_provider: ProviderName,
805 /// LLM provider used for deferred tool-pair summarization (context compaction).
806 ///
807 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
808 /// provider. A mid-tier model is usually sufficient for compaction summaries.
809 ///
810 /// Example:
811 /// ```toml
812 /// [memory]
813 /// compaction_provider = "fast"
814 /// ```
815 #[serde(default)]
816 pub compaction_provider: ProviderName,
817 /// Use structured anchored summaries for context compaction.
818 ///
819 /// When enabled, hard compaction requests a JSON schema from the LLM
820 /// instead of free-form prose. Falls back to prose if the LLM fails
821 /// to produce valid JSON. Default: `false`.
822 #[serde(default)]
823 pub structured_summaries: bool,
824 /// AOI three-layer memory tier promotion system.
825 ///
826 /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
827 /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
828 #[serde(default)]
829 pub tiers: TierConfig,
830 /// A-MAC adaptive memory admission control.
831 ///
832 /// When `admission.enabled = true`, each message is evaluated before saving and rejected
833 /// if its composite admission score falls below the configured threshold.
834 #[serde(default)]
835 pub admission: AdmissionConfig,
836 /// Session digest generation at session end. Default: disabled.
837 #[serde(default)]
838 pub digest: DigestConfig,
839 /// Context assembly strategy. Default: `full_history` (current behavior).
840 #[serde(default)]
841 pub context_strategy: ContextStrategy,
842 /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
843 #[serde(default = "default_crossover_turn_threshold")]
844 pub crossover_turn_threshold: u32,
845 /// All-Mem lifelong memory consolidation sweep.
846 ///
847 /// When `consolidation.enabled = true`, a background loop clusters semantically similar
848 /// messages and merges them into consolidated entries via LLM.
849 #[serde(default)]
850 pub consolidation: ConsolidationConfig,
851 /// `SleepGate` forgetting sweep (#2397).
852 ///
853 /// When `forgetting.enabled = true`, a background loop periodically decays importance
854 /// scores and prunes memories below the forgetting floor.
855 #[serde(default)]
856 pub forgetting: ForgettingConfig,
857 /// `PostgreSQL` connection URL.
858 ///
859 /// Used when the binary is compiled with `--features postgres`.
860 /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
861 /// Example: `postgres://user:pass@localhost:5432/zeph`
862 /// Default: `None` (uses `sqlite_path` instead).
863 #[serde(default)]
864 pub database_url: Option<String>,
865 /// Cost-sensitive store routing (#2444).
866 ///
867 /// When `store_routing.enabled = true`, query intent is classified and routed to
868 /// the cheapest sufficient backend instead of querying all stores on every turn.
869 #[serde(default)]
870 pub store_routing: StoreRoutingConfig,
871 /// Persona memory layer (#2461).
872 ///
873 /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
874 /// from conversation history and injected into context after the system prompt.
875 #[serde(default)]
876 pub persona: PersonaConfig,
877 /// Trajectory-informed memory (#2498).
878 #[serde(default)]
879 pub trajectory: TrajectoryConfig,
880 /// Category-aware memory (#2428).
881 #[serde(default)]
882 pub category: CategoryConfig,
883 /// `TiMem` temporal-hierarchical memory tree (#2262).
884 #[serde(default)]
885 pub tree: TreeConfig,
886 /// Time-based microcompact (#2699).
887 ///
888 /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
889 /// from context when the session has been idle longer than `gap_threshold_minutes`.
890 #[serde(default)]
891 pub microcompact: MicrocompactConfig,
892 /// autoDream background memory consolidation (#2697).
893 ///
894 /// When `autodream.enabled = true`, a constrained consolidation subagent runs
895 /// after a session ends if both `min_sessions` and `min_hours` gates pass.
896 #[serde(default)]
897 pub autodream: AutoDreamConfig,
898 /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
899 ///
900 /// Before inserting a new key fact, its nearest neighbour is looked up in the
901 /// `zeph_key_facts` collection. If the best score is ≥ this threshold the fact is
902 /// considered a near-duplicate and skipped. Set to a value greater than `1.0` (e.g.
903 /// `2.0`) to disable dedup entirely. Default: `0.95`.
904 #[serde(default = "default_key_facts_dedup_threshold")]
905 pub key_facts_dedup_threshold: f32,
906 /// Experience compression spectrum (#3305).
907 ///
908 /// Controls three-tier retrieval policy and background skill-promotion engine.
909 #[serde(default)]
910 pub compression_spectrum: crate::features::CompressionSpectrumConfig,
911 /// MemMachine-inspired retrieval-stage tuning (#3340).
912 ///
913 /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
914 /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
915 /// uniformly across graph, hybrid, and vector-only recall paths.
916 ///
917 /// # Example (TOML)
918 ///
919 /// ```toml
920 /// [memory.retrieval]
921 /// depth = 40
922 /// search_prompt_template = ""
923 /// context_format = "structured"
924 /// ```
925 #[serde(default)]
926 pub retrieval: RetrievalConfig,
927 /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
928 ///
929 /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
930 /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
931 /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
932 /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
933 /// and injected before the LLM call.
934 #[serde(default)]
935 pub reasoning: ReasoningConfig,
936 /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
937 ///
938 /// When `enabled = true`, the weight of each `graph_edges` row is incremented
939 /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
940 ///
941 /// # Example (TOML)
942 ///
943 /// ```toml
944 /// [memory.hebbian]
945 /// enabled = true
946 /// hebbian_lr = 0.1
947 /// ```
948 #[serde(default)]
949 pub hebbian: HebbianConfig,
950 /// `MemCoT` rolling semantic state configuration (#3574).
951 ///
952 /// When `enabled = true`, each completed assistant turn spawns a background distillation
953 /// task that compresses the response into a short semantic state buffer. The buffer is
954 /// prepended to graph recall queries so retrieval stays contextually relevant across long
955 /// multi-turn sessions.
956 ///
957 /// # Example (TOML)
958 ///
959 /// ```toml
960 /// [memory.memcot]
961 /// enabled = true
962 /// distill_provider = "fast"
963 /// min_assistant_chars = 200
964 /// max_distills_per_session = 50
965 /// ```
966 #[serde(default)]
967 pub memcot: MemCotConfig,
968 /// `OmniMem` retrieval failure tracking (issue #3576).
969 ///
970 /// When `enabled = true`, no-hit and low-confidence recall events are logged
971 /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
972 ///
973 /// # Example (TOML)
974 ///
975 /// ```toml
976 /// [memory.retrieval_failures]
977 /// enabled = true
978 /// low_confidence_threshold = 0.3
979 /// retention_days = 90
980 /// ```
981 #[serde(default)]
982 pub retrieval_failures: RetrievalFailuresConfig,
983 /// Write quality gate (#3629).
984 ///
985 /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
986 /// writes are rejected before persistence. Evaluated after A-MAC admission control.
987 #[serde(default)]
988 pub quality_gate: WriteQualityGateConfig,
989 /// `MemFlow` tiered intent-driven retrieval (issue #3712).
990 ///
991 /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
992 /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
993 /// `DeepReasoning`) with optional validation and tier escalation.
994 #[serde(default)]
995 pub tiered_retrieval: TieredRetrievalConfig,
996 /// `ScrapMem` optical forgetting (issue #3713).
997 ///
998 /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
999 /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1000 #[serde(default)]
1001 pub optical_forgetting: OpticalForgettingConfig,
1002 /// EM-Graph episodic event extraction and causal linking (issue #3713).
1003 ///
1004 /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1005 /// and linked via causal relationships, enabling causal-chain retrieval.
1006 #[serde(default)]
1007 pub em_graph: EmGraphConfig,
1008 /// Episodic-to-semantic consolidation daemon (issue #3799).
1009 ///
1010 /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1011 /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1012 /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1013 #[serde(default)]
1014 pub episodic_consolidation: EpisodicConsolidationConfig,
1015}
1016
1017// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1018
1019/// `MemFlow` tiered intent-driven retrieval configuration.
1020///
1021/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1022/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1023/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1024///
1025/// # Example (TOML)
1026///
1027/// ```toml
1028/// [memory.tiered_retrieval]
1029/// enabled = false
1030/// classifier_provider = ""
1031/// validator_provider = ""
1032/// token_budget = 4096
1033/// validation_enabled = false
1034/// validation_threshold = 0.6
1035/// max_escalations = 1
1036/// classifier_timeout_secs = 5
1037/// validator_timeout_secs = 5
1038/// ```
1039#[derive(Debug, Clone, Deserialize, Serialize)]
1040#[serde(default)]
1041pub struct TieredRetrievalConfig {
1042 /// Enable `MemFlow` tiered retrieval. Default: `false`.
1043 pub enabled: bool,
1044 /// Provider name from `[[llm.providers]]` for intent classification.
1045 ///
1046 /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1047 /// is set but the call fails, falls back to the heuristic (fail-open).
1048 pub classifier_provider: ProviderName,
1049 /// Provider name from `[[llm.providers]]` for evidence validation.
1050 ///
1051 /// When empty or when `validation_enabled = false`, no validation call is made.
1052 pub validator_provider: ProviderName,
1053 /// Maximum tokens to gather for evidence per query. Default: `4096`.
1054 pub token_budget: usize,
1055 /// Enable evidence validation and tier escalation. Default: `false`.
1056 pub validation_enabled: bool,
1057 /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1058 pub validation_threshold: f32,
1059 /// Maximum tier escalations per query. Default: `1`.
1060 pub max_escalations: u8,
1061 /// Timeout in seconds for the classifier LLM call. Default: `5`.
1062 ///
1063 /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1064 pub classifier_timeout_secs: u64,
1065 /// Timeout in seconds for the validator LLM call. Default: `5`.
1066 ///
1067 /// On timeout the validator is treated as sufficient (fail-open).
1068 pub validator_timeout_secs: u64,
1069}
1070
1071impl Default for TieredRetrievalConfig {
1072 fn default() -> Self {
1073 Self {
1074 enabled: false,
1075 classifier_provider: ProviderName::default(),
1076 validator_provider: ProviderName::default(),
1077 token_budget: 4096,
1078 validation_enabled: false,
1079 validation_threshold: 0.6,
1080 max_escalations: 1,
1081 classifier_timeout_secs: 5,
1082 validator_timeout_secs: 5,
1083 }
1084 }
1085}
1086
1087// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1088
1089/// `ScrapMem` optical forgetting configuration.
1090///
1091/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1092/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1093/// forgetting compresses content in place based on age.
1094///
1095/// # Example (TOML)
1096///
1097/// ```toml
1098/// [memory.optical_forgetting]
1099/// enabled = false
1100/// compress_provider = ""
1101/// compress_after_turns = 100
1102/// summarize_after_turns = 500
1103/// sweep_interval_secs = 3600
1104/// sweep_batch_size = 50
1105/// ```
1106#[derive(Debug, Clone, Deserialize, Serialize)]
1107#[serde(default)]
1108pub struct OpticalForgettingConfig {
1109 /// Enable optical forgetting sweep. Default: `false`.
1110 pub enabled: bool,
1111 /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1112 /// Falls back to the primary provider when empty.
1113 pub compress_provider: ProviderName,
1114 /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1115 pub compress_after_turns: u32,
1116 /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1117 pub summarize_after_turns: u32,
1118 /// How often the sweep runs, in seconds. Default: `3600`.
1119 pub sweep_interval_secs: u64,
1120 /// Maximum messages to compress per sweep iteration. Default: `50`.
1121 pub sweep_batch_size: usize,
1122}
1123
1124impl Default for OpticalForgettingConfig {
1125 fn default() -> Self {
1126 Self {
1127 enabled: false,
1128 compress_provider: ProviderName::default(),
1129 compress_after_turns: 100,
1130 summarize_after_turns: 500,
1131 sweep_interval_secs: 3600,
1132 sweep_batch_size: 50,
1133 }
1134 }
1135}
1136
1137// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1138
1139/// EM-Graph episodic event extraction and causal linking configuration.
1140///
1141/// When enabled, episodic events are extracted from conversation turns and linked
1142/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1143///
1144/// # Example (TOML)
1145///
1146/// ```toml
1147/// [memory.em_graph]
1148/// enabled = false
1149/// extract_provider = ""
1150/// max_chain_depth = 3
1151/// ```
1152#[derive(Debug, Clone, Deserialize, Serialize)]
1153#[serde(default)]
1154pub struct EmGraphConfig {
1155 /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1156 pub enabled: bool,
1157 /// Provider name from `[[llm.providers]]` for event extraction.
1158 /// Falls back to the primary provider when empty.
1159 pub extract_provider: ProviderName,
1160 /// Maximum hops when traversing causal chains during recall. Default: `3`.
1161 pub max_chain_depth: u32,
1162}
1163
1164impl Default for EmGraphConfig {
1165 fn default() -> Self {
1166 Self {
1167 enabled: false,
1168 extract_provider: ProviderName::default(),
1169 max_chain_depth: 3,
1170 }
1171 }
1172}
1173
1174// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1175
1176fn default_episodic_consolidation_interval_secs() -> u64 {
1177 1800
1178}
1179
1180fn default_episodic_consolidation_batch_size() -> usize {
1181 30
1182}
1183
1184fn default_episodic_consolidation_min_age_secs() -> u64 {
1185 300
1186}
1187
1188fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1189 0.6
1190}
1191
1192/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1193///
1194/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1195/// extracts durable factual statements via LLM, deduplicates them against existing
1196/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1197/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1198///
1199/// # Example (TOML)
1200///
1201/// ```toml
1202/// [memory.episodic_consolidation]
1203/// enabled = false
1204/// consolidation_provider = ""
1205/// interval_secs = 1800
1206/// batch_size = 30
1207/// min_age_secs = 300
1208/// dedup_jaccard_threshold = 0.6
1209/// ```
1210#[derive(Debug, Clone, Deserialize, Serialize)]
1211#[serde(default)]
1212pub struct EpisodicConsolidationConfig {
1213 /// Enable the episodic consolidation daemon. Default: `false`.
1214 pub enabled: bool,
1215 /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1216 /// Falls back to the primary provider when empty.
1217 pub consolidation_provider: ProviderName,
1218 /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1219 #[serde(default = "default_episodic_consolidation_interval_secs")]
1220 pub interval_secs: u64,
1221 /// Maximum number of episodic events to process per sweep. Default: `30`.
1222 #[serde(default = "default_episodic_consolidation_batch_size")]
1223 pub batch_size: usize,
1224 /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1225 /// Prevents consolidating events from the active conversation.
1226 #[serde(default = "default_episodic_consolidation_min_age_secs")]
1227 pub min_age_secs: u64,
1228 /// Jaccard similarity threshold for deduplication against existing key facts.
1229 /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1230 #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1231 pub dedup_jaccard_threshold: f32,
1232}
1233
1234impl Default for EpisodicConsolidationConfig {
1235 fn default() -> Self {
1236 Self {
1237 enabled: false,
1238 consolidation_provider: ProviderName::default(),
1239 interval_secs: default_episodic_consolidation_interval_secs(),
1240 batch_size: default_episodic_consolidation_batch_size(),
1241 min_age_secs: default_episodic_consolidation_min_age_secs(),
1242 dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1243 }
1244 }
1245}
1246
1247fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1248 0.3
1249}
1250
1251fn default_retrieval_failures_retention_days() -> u32 {
1252 90
1253}
1254
1255fn default_retrieval_failures_channel_capacity() -> usize {
1256 256
1257}
1258
1259fn default_retrieval_failures_batch_size() -> usize {
1260 16
1261}
1262
1263fn default_retrieval_failures_flush_interval_ms() -> u64 {
1264 100
1265}
1266
1267fn default_crossover_turn_threshold() -> u32 {
1268 20
1269}
1270
1271fn default_key_facts_dedup_threshold() -> f32 {
1272 0.95
1273}
1274
1275/// Session digest configuration (#2289).
1276#[derive(Debug, Clone, Deserialize, Serialize)]
1277#[serde(default)]
1278pub struct DigestConfig {
1279 /// Enable session digest generation at session end. Default: `false`.
1280 pub enabled: bool,
1281 /// Provider name from `[[llm.providers]]` for digest generation.
1282 /// Falls back to the primary provider when `None`.
1283 #[serde(default)]
1284 pub provider: Option<ProviderName>,
1285 /// Maximum tokens for the digest text. Default: `500`.
1286 pub max_tokens: usize,
1287 /// Maximum messages to feed into the digest prompt. Default: `50`.
1288 pub max_input_messages: usize,
1289}
1290
1291impl Default for DigestConfig {
1292 fn default() -> Self {
1293 Self {
1294 enabled: false,
1295 provider: None,
1296 max_tokens: 500,
1297 max_input_messages: 50,
1298 }
1299 }
1300}
1301
1302/// Context assembly strategy (#2288).
1303#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1304#[serde(rename_all = "snake_case")]
1305pub enum ContextStrategy {
1306 /// Full conversation history trimmed to budget, with memory augmentation.
1307 /// This is the default and existing behavior.
1308 #[default]
1309 FullHistory,
1310 /// Drop conversation history; assemble context from summaries, semantic recall,
1311 /// cross-session memory, and session digest only.
1312 MemoryFirst,
1313 /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1314 /// `crossover_turn_threshold`.
1315 Adaptive,
1316}
1317
1318/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1319#[derive(Debug, Clone, Deserialize, Serialize)]
1320#[serde(default)]
1321pub struct SessionsConfig {
1322 /// Maximum number of sessions returned by list operations (0 = unlimited).
1323 #[serde(default = "default_max_history")]
1324 pub max_history: usize,
1325 /// Maximum characters for auto-generated session titles.
1326 #[serde(default = "default_title_max_chars")]
1327 pub title_max_chars: usize,
1328}
1329
1330impl Default for SessionsConfig {
1331 fn default() -> Self {
1332 Self {
1333 max_history: default_max_history(),
1334 title_max_chars: default_title_max_chars(),
1335 }
1336 }
1337}
1338
1339/// Configuration for the document ingestion and RAG retrieval pipeline.
1340#[derive(Debug, Clone, Deserialize, Serialize)]
1341pub struct DocumentConfig {
1342 #[serde(default = "default_document_collection")]
1343 pub collection: String,
1344 #[serde(default = "default_document_chunk_size")]
1345 pub chunk_size: usize,
1346 #[serde(default = "default_document_chunk_overlap")]
1347 pub chunk_overlap: usize,
1348 /// Number of document chunks to inject into agent context per turn.
1349 #[serde(default = "default_document_top_k")]
1350 pub top_k: usize,
1351 /// Enable document RAG injection into agent context.
1352 #[serde(default)]
1353 pub rag_enabled: bool,
1354}
1355
1356impl Default for DocumentConfig {
1357 fn default() -> Self {
1358 Self {
1359 collection: default_document_collection(),
1360 chunk_size: default_document_chunk_size(),
1361 chunk_overlap: default_document_chunk_overlap(),
1362 top_k: default_document_top_k(),
1363 rag_enabled: false,
1364 }
1365 }
1366}
1367
1368/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1369///
1370/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1371/// re-ranking, and hybrid BM25+vector weighting.
1372///
1373/// # Example (TOML)
1374///
1375/// ```toml
1376/// [memory.semantic]
1377/// enabled = true
1378/// recall_limit = 5
1379/// vector_weight = 0.7
1380/// keyword_weight = 0.3
1381/// mmr_lambda = 0.7
1382/// ```
1383#[derive(Debug, Deserialize, Serialize)]
1384#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1385pub struct SemanticConfig {
1386 /// Enable vector-based semantic recall. Default: `true`.
1387 #[serde(default = "default_semantic_enabled")]
1388 pub enabled: bool,
1389 #[serde(default = "default_recall_limit")]
1390 pub recall_limit: usize,
1391 #[serde(default = "default_vector_weight")]
1392 pub vector_weight: f64,
1393 #[serde(default = "default_keyword_weight")]
1394 pub keyword_weight: f64,
1395 #[serde(default = "default_true")]
1396 pub temporal_decay_enabled: bool,
1397 #[serde(default = "default_temporal_decay_half_life_days")]
1398 pub temporal_decay_half_life_days: u32,
1399 #[serde(default = "default_true")]
1400 pub mmr_enabled: bool,
1401 #[serde(default = "default_mmr_lambda")]
1402 pub mmr_lambda: f32,
1403 #[serde(default = "default_true")]
1404 pub importance_enabled: bool,
1405 #[serde(
1406 default = "default_importance_weight",
1407 deserialize_with = "validate_importance_weight"
1408 )]
1409 pub importance_weight: f64,
1410 /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1411 /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1412 /// from contending with the guardrail at the API server level (rate limits, Ollama
1413 /// single-model lock). Falls back to the main agent provider when `None`.
1414 #[serde(default)]
1415 pub embed_provider: Option<ProviderName>,
1416}
1417
1418impl Default for SemanticConfig {
1419 fn default() -> Self {
1420 Self {
1421 enabled: default_semantic_enabled(),
1422 recall_limit: default_recall_limit(),
1423 vector_weight: default_vector_weight(),
1424 keyword_weight: default_keyword_weight(),
1425 temporal_decay_enabled: true,
1426 temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1427 mmr_enabled: true,
1428 mmr_lambda: default_mmr_lambda(),
1429 importance_enabled: true,
1430 importance_weight: default_importance_weight(),
1431 embed_provider: None,
1432 }
1433 }
1434}
1435
1436/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1437///
1438/// Controls how each recalled memory entry is presented in the assembled prompt.
1439/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1440/// always contain the raw message text. The format is applied exclusively during
1441/// context assembly and is never persisted.
1442///
1443/// # Token cost
1444///
1445/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1446/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1447#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1448#[serde(rename_all = "snake_case")]
1449pub enum ContextFormat {
1450 /// Emit a labeled header per snippet:
1451 /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1452 ///
1453 /// This is the default. Gives the LLM structured provenance metadata for each recalled
1454 /// memory without re-parsing the recall body.
1455 #[default]
1456 Structured,
1457 /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1458 ///
1459 /// Use `Plain` when downstream consumers rely on the old format or when token budget
1460 /// is tight and provenance headers are not needed.
1461 Plain,
1462}
1463
1464/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1465///
1466/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1467/// Nested under `[memory.retrieval]` in TOML. All fields have defaults so existing
1468/// configs parse unchanged.
1469///
1470/// # Example (TOML)
1471///
1472/// ```toml
1473/// [memory.retrieval]
1474/// # depth = 0 # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1475/// # search_prompt_template = ""
1476/// # context_format = "structured"
1477/// ```
1478#[derive(Debug, Clone, Deserialize, Serialize)]
1479#[serde(default)]
1480pub struct RetrievalConfig {
1481 /// Number of ANN candidates fetched from the vector store before keyword merge,
1482 /// temporal decay, and MMR re-ranking.
1483 ///
1484 /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1485 /// to pre-#3340 deployments.
1486 /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1487 /// `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1488 /// size, or higher for better MMR diversity.
1489 ///
1490 /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1491 /// cannot saturate the requested top-k.
1492 pub depth: u32,
1493 /// Template applied to the raw user query before embedding.
1494 ///
1495 /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1496 /// Empty string (default) = identity: the query is embedded as-is.
1497 ///
1498 /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1499 /// is never wrapped. Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1500 pub search_prompt_template: String,
1501 /// Shape of memory snippets injected into agent context.
1502 ///
1503 /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1504 /// Default: `Structured`.
1505 pub context_format: ContextFormat,
1506 /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1507 ///
1508 /// When `true` and the query is classified as first-person, the query embedding is
1509 /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1510 /// towards persona-relevant content for self-referential queries.
1511 ///
1512 /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1513 #[serde(default = "default_query_bias_correction")]
1514 pub query_bias_correction: bool,
1515 /// Blend weight for query-bias correction (MM-F3, #3341).
1516 ///
1517 /// Controls how much the query embedding shifts towards the profile centroid.
1518 /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1519 #[serde(default = "default_query_bias_profile_weight")]
1520 pub query_bias_profile_weight: f32,
1521 /// Centroid TTL in seconds (MM-F3, #3341).
1522 ///
1523 /// The profile centroid computed from persona facts is cached for this many seconds.
1524 /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1525 #[serde(default = "default_query_bias_centroid_ttl_secs")]
1526 pub query_bias_centroid_ttl_secs: u64,
1527}
1528
1529fn default_query_bias_correction() -> bool {
1530 true
1531}
1532
1533fn default_query_bias_profile_weight() -> f32 {
1534 0.25
1535}
1536
1537fn default_query_bias_centroid_ttl_secs() -> u64 {
1538 300
1539}
1540
1541impl Default for RetrievalConfig {
1542 fn default() -> Self {
1543 Self {
1544 depth: 0,
1545 search_prompt_template: String::new(),
1546 context_format: ContextFormat::default(),
1547 query_bias_correction: default_query_bias_correction(),
1548 query_bias_profile_weight: default_query_bias_profile_weight(),
1549 query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1550 }
1551 }
1552}
1553
1554/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1555///
1556/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1557/// recall traversal increments the `weight` column of the traversed edges, building
1558/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1559/// runs a background sweep that identifies high-traffic entity clusters and distills
1560/// them into `graph_rules` entries via an LLM.
1561#[derive(Debug, Clone, Deserialize, Serialize)]
1562#[serde(default)]
1563pub struct HebbianConfig {
1564 /// Master switch. When `false`, no `weight` updates are written to the database
1565 /// and the consolidation loop does not start. Default: `false`.
1566 pub enabled: bool,
1567 /// Weight increment per co-activation (HL-F2, #3344).
1568 ///
1569 /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1570 /// startup when `enabled = true`. Default: `0.1`.
1571 pub hebbian_lr: f32,
1572 /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1573 ///
1574 /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1575 /// Default: `3600` (one hour).
1576 pub consolidation_interval_secs: u64,
1577 /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1578 /// candidate (HL-F3, #3345). Default: `5.0`.
1579 pub consolidation_threshold: f64,
1580 /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1581 ///
1582 /// Falls back to the main provider when `None` or unresolvable.
1583 #[serde(default)]
1584 pub consolidate_provider: Option<ProviderName>,
1585 /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1586 pub max_candidates_per_sweep: usize,
1587 /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1588 ///
1589 /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1590 /// Default: `86400` (24 hours).
1591 pub consolidation_cooldown_secs: u64,
1592 /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1593 /// Default: `30`.
1594 pub consolidation_prompt_timeout_secs: u64,
1595 /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1596 /// (HL-F4, #3345). Default: `20`.
1597 pub consolidation_max_neighbors: usize,
1598 /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1599 ///
1600 /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1601 /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1602 pub spreading_activation: bool,
1603 /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1604 pub spread_depth: u32,
1605 /// MAGMA edge-type filter for HL-F5 spreading activation.
1606 ///
1607 /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1608 /// Empty = traverse all edge types. Default: `[]`.
1609 pub spread_edge_types: Vec<EdgeType>,
1610 /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1611 ///
1612 /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1613 /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1614 pub step_budget_ms: u64,
1615 /// Timeout for the initial query embedding call in HL-F5, in seconds.
1616 ///
1617 /// `0` disables the timeout. Default: `5`.
1618 pub embed_timeout_secs: u64,
1619}
1620
1621impl Default for HebbianConfig {
1622 fn default() -> Self {
1623 Self {
1624 enabled: false,
1625 hebbian_lr: 0.1,
1626 consolidation_interval_secs: 3600,
1627 consolidation_threshold: 5.0,
1628 consolidate_provider: None,
1629 max_candidates_per_sweep: 10,
1630 consolidation_cooldown_secs: 86_400,
1631 consolidation_prompt_timeout_secs: 30,
1632 consolidation_max_neighbors: 20,
1633 spreading_activation: false,
1634 spread_depth: 2,
1635 spread_edge_types: Vec::new(),
1636 step_budget_ms: 8,
1637 embed_timeout_secs: 5,
1638 }
1639 }
1640}
1641
1642/// Compression strategy for active context compression (#1161).
1643#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1644#[serde(tag = "strategy", rename_all = "snake_case")]
1645pub enum CompressionStrategy {
1646 /// Compress only when reactive compaction fires (current behavior).
1647 #[default]
1648 Reactive,
1649 /// Compress proactively when context exceeds `threshold_tokens`.
1650 Proactive {
1651 /// Token count that triggers proactive compression.
1652 threshold_tokens: usize,
1653 /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1654 max_summary_tokens: usize,
1655 },
1656 /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1657 /// safety net. The `compress_context` tool is also available in all other strategies.
1658 Autonomous,
1659 /// Knowledge-block-aware compression strategy (#2510).
1660 ///
1661 /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1662 /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1663 Focus,
1664}
1665
1666/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1667///
1668/// When `context-compression` feature is enabled, this replaces the default oldest-first
1669/// heuristic with scored eviction.
1670#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1671#[serde(rename_all = "snake_case")]
1672pub enum PruningStrategy {
1673 /// Oldest-first eviction — current default behavior.
1674 #[default]
1675 Reactive,
1676 /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1677 /// lowest-first. Requires `context-compression` feature.
1678 TaskAware,
1679 /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1680 /// Requires `context-compression` feature.
1681 Mig,
1682 /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1683 /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1684 /// Requires `context-compression` feature.
1685 Subgoal,
1686 /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1687 /// Requires `context-compression` feature.
1688 SubgoalMig,
1689}
1690
1691impl PruningStrategy {
1692 /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1693 #[must_use]
1694 pub fn is_subgoal(self) -> bool {
1695 matches!(self, Self::Subgoal | Self::SubgoalMig)
1696 }
1697}
1698
1699// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1700// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1701impl<'de> serde::Deserialize<'de> for PruningStrategy {
1702 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1703 let s = String::deserialize(deserializer)?;
1704 s.parse().map_err(serde::de::Error::custom)
1705 }
1706}
1707
1708impl std::str::FromStr for PruningStrategy {
1709 type Err = String;
1710
1711 fn from_str(s: &str) -> Result<Self, Self::Err> {
1712 match s {
1713 "reactive" => Ok(Self::Reactive),
1714 "task_aware" | "task-aware" => Ok(Self::TaskAware),
1715 "mig" => Ok(Self::Mig),
1716 // task_aware_mig was removed (dead code — was routed to scored path only).
1717 // Fall back to Reactive so existing TOML configs do not hard-error on startup.
1718 "task_aware_mig" | "task-aware-mig" => {
1719 tracing::warn!(
1720 "pruning strategy `task_aware_mig` has been removed; \
1721 falling back to `reactive`. Use `task_aware` or `mig` instead."
1722 );
1723 Ok(Self::Reactive)
1724 }
1725 "subgoal" => Ok(Self::Subgoal),
1726 "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
1727 other => Err(format!(
1728 "unknown pruning strategy `{other}`, expected \
1729 reactive|task_aware|mig|subgoal|subgoal_mig"
1730 )),
1731 }
1732 }
1733}
1734
1735fn default_high_density_budget() -> f32 {
1736 0.7
1737}
1738
1739fn default_low_density_budget() -> f32 {
1740 0.3
1741}
1742
1743/// Configuration for the `SleepGate` forgetting sweep (#2397).
1744///
1745/// When `enabled = true`, a background loop periodically decays importance scores
1746/// (synaptic downscaling), restores recently-accessed memories (selective replay),
1747/// and prunes memories below `forgetting_floor` (targeted forgetting).
1748#[derive(Debug, Clone, Deserialize, Serialize)]
1749#[serde(default)]
1750pub struct ForgettingConfig {
1751 /// Enable the `SleepGate` forgetting sweep. Default: `false`.
1752 pub enabled: bool,
1753 /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
1754 pub decay_rate: f32,
1755 /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
1756 pub forgetting_floor: f32,
1757 /// How often the forgetting sweep runs, in seconds. Default: `7200`.
1758 pub sweep_interval_secs: u64,
1759 /// Maximum messages to process per sweep. Default: `500`.
1760 pub sweep_batch_size: usize,
1761 /// Hours: messages accessed within this window get replay protection. Default: `24`.
1762 pub replay_window_hours: u32,
1763 /// Messages with `access_count` >= this get replay protection. Default: `3`.
1764 pub replay_min_access_count: u32,
1765 /// Hours: never prune messages accessed within this window. Default: `24`.
1766 pub protect_recent_hours: u32,
1767 /// Never prune messages with `access_count` >= this. Default: `3`.
1768 pub protect_min_access_count: u32,
1769}
1770
1771impl Default for ForgettingConfig {
1772 fn default() -> Self {
1773 Self {
1774 enabled: false,
1775 decay_rate: 0.1,
1776 forgetting_floor: 0.05,
1777 sweep_interval_secs: 7200,
1778 sweep_batch_size: 500,
1779 replay_window_hours: 24,
1780 replay_min_access_count: 3,
1781 protect_recent_hours: 24,
1782 protect_min_access_count: 3,
1783 }
1784 }
1785}
1786
1787/// Configuration for active context compression (#1161).
1788#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1789#[serde(default)]
1790pub struct CompressionConfig {
1791 /// Compression strategy.
1792 #[serde(flatten)]
1793 pub strategy: CompressionStrategy,
1794 /// Tool-output pruning strategy (requires `context-compression` feature).
1795 pub pruning_strategy: PruningStrategy,
1796 /// Model to use for compression summaries.
1797 ///
1798 /// Currently unused — the primary summary provider is used regardless of this value.
1799 /// Reserved for future per-compression model selection. Setting this field has no effect.
1800 pub model: String,
1801 /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
1802 /// Falls back to the primary provider when empty. Default: `""`.
1803 pub compress_provider: ProviderName,
1804 /// Compaction probe: validates summary quality before committing it (#1609).
1805 #[serde(default)]
1806 pub probe: CompactionProbeConfig,
1807 /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
1808 ///
1809 /// When enabled, tool output bodies in the compaction range are saved to
1810 /// `tool_overflow` with `archive_type = 'archive'` before summarization.
1811 /// The LLM summarizes placeholder messages; archived content is appended as
1812 /// a postfix after summarization so references survive compaction.
1813 /// Default: `false`.
1814 #[serde(default)]
1815 pub archive_tool_outputs: bool,
1816 /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
1817 /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
1818 /// Falls back to the primary provider when empty. Default: `""`.
1819 pub focus_scorer_provider: ProviderName,
1820 /// Token-budget fraction for high-density content in density-aware compression (#2481).
1821 /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
1822 #[serde(default = "default_high_density_budget")]
1823 pub high_density_budget: f32,
1824 /// Token-budget fraction for low-density content in density-aware compression (#2481).
1825 /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
1826 #[serde(default = "default_low_density_budget")]
1827 pub low_density_budget: f32,
1828 /// Typed-page classification and batch-level assertion checking (#3630).
1829 #[serde(default)]
1830 pub typed_pages: TypedPagesConfig,
1831}
1832
1833/// Configuration for typed-page compaction invariants (#3630).
1834///
1835/// Controls classification, batch-level assertion checking, and audit logging.
1836/// All behavior is disabled by default; set `enabled = true` to activate.
1837///
1838/// # Example (TOML)
1839///
1840/// ```toml
1841/// [memory.compression.typed_pages]
1842/// enabled = true
1843/// enforcement = "active"
1844/// audit_path = ""
1845/// audit_channel_capacity = 256
1846/// ```
1847#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
1848#[serde(default)]
1849pub struct TypedPagesConfig {
1850 /// Enable typed-page classification and batch-level assertion checking.
1851 /// Default: `false`.
1852 pub enabled: bool,
1853 /// Enforcement mode:
1854 ///
1855 /// - `observe`: classify and emit audit records only; no behavioral change.
1856 /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
1857 ///
1858 /// Default: `"observe"`.
1859 pub enforcement: TypedPagesEnforcement,
1860 /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
1861 /// Default: `""`.
1862 ///
1863 /// # Security
1864 ///
1865 /// This field is **operator-only trusted input** read from the agent's configuration file.
1866 /// Write access to the config file implies file-system write access, so no additional
1867 /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
1868 /// configuration sources.
1869 pub audit_path: String,
1870 /// Bounded channel capacity for the async audit writer. Default: `256`.
1871 pub audit_channel_capacity: usize,
1872}
1873
1874impl Default for TypedPagesConfig {
1875 fn default() -> Self {
1876 Self {
1877 enabled: false,
1878 enforcement: TypedPagesEnforcement::Observe,
1879 audit_path: String::new(),
1880 audit_channel_capacity: 256,
1881 }
1882 }
1883}
1884
1885/// Enforcement mode for typed-page compaction (#3630).
1886#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
1887#[serde(rename_all = "snake_case")]
1888pub enum TypedPagesEnforcement {
1889 /// Classify and audit only. Zero behavioral change relative to the untyped path.
1890 #[default]
1891 Observe,
1892 /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
1893 Active,
1894}
1895
1896fn default_sidequest_interval_turns() -> u32 {
1897 4
1898}
1899
1900fn default_sidequest_max_eviction_ratio() -> f32 {
1901 0.5
1902}
1903
1904fn default_sidequest_max_cursors() -> usize {
1905 30
1906}
1907
1908fn default_sidequest_min_cursor_tokens() -> usize {
1909 100
1910}
1911
1912/// Configuration for LLM-driven side-thread tool output eviction (#1885).
1913#[derive(Debug, Clone, Deserialize, Serialize)]
1914#[serde(default)]
1915pub struct SidequestConfig {
1916 /// Enable `SideQuest` eviction. Default: `false`.
1917 pub enabled: bool,
1918 /// Run eviction every N user turns. Default: `4`.
1919 #[serde(default = "default_sidequest_interval_turns")]
1920 pub interval_turns: u32,
1921 /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
1922 #[serde(default = "default_sidequest_max_eviction_ratio")]
1923 pub max_eviction_ratio: f32,
1924 /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
1925 #[serde(default = "default_sidequest_max_cursors")]
1926 pub max_cursors: usize,
1927 /// Exclude tool outputs smaller than this token count from eviction candidates.
1928 /// Default: `100`.
1929 #[serde(default = "default_sidequest_min_cursor_tokens")]
1930 pub min_cursor_tokens: usize,
1931}
1932
1933impl Default for SidequestConfig {
1934 fn default() -> Self {
1935 Self {
1936 enabled: false,
1937 interval_turns: default_sidequest_interval_turns(),
1938 max_eviction_ratio: default_sidequest_max_eviction_ratio(),
1939 max_cursors: default_sidequest_max_cursors(),
1940 min_cursor_tokens: default_sidequest_min_cursor_tokens(),
1941 }
1942 }
1943}
1944
1945/// Graph retrieval strategy for `[memory.graph]`.
1946///
1947/// Selects the algorithm used to traverse the knowledge graph during recall.
1948/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
1949#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
1950#[serde(rename_all = "snake_case")]
1951pub enum GraphRetrievalStrategy {
1952 /// SYNAPSE spreading activation (default, existing behavior).
1953 #[default]
1954 Synapse,
1955 /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
1956 Bfs,
1957 /// A* shortest-path traversal via petgraph.
1958 #[serde(rename = "astar")]
1959 AStar,
1960 /// Concentric BFS expanding outward from seed nodes.
1961 WaterCircles,
1962 /// Beam search: keep top-K candidates per hop.
1963 BeamSearch,
1964 /// Dynamic: LLM classifier selects strategy per query.
1965 Hybrid,
1966}
1967
1968fn default_beam_width() -> usize {
1969 10
1970}
1971
1972/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
1973///
1974/// Controls the width of the beam during graph traversal: how many top candidates
1975/// are retained at each hop.
1976#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1977pub struct BeamSearchConfig {
1978 /// Number of top candidates kept per hop. Default: `10`.
1979 #[serde(default = "default_beam_width")]
1980 pub beam_width: usize,
1981}
1982
1983impl Default for BeamSearchConfig {
1984 fn default() -> Self {
1985 Self {
1986 beam_width: default_beam_width(),
1987 }
1988 }
1989}
1990
1991/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
1992///
1993/// Controls ring-by-ring concentric BFS traversal from seed nodes.
1994#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
1995pub struct WaterCirclesConfig {
1996 /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
1997 #[serde(default)]
1998 pub ring_limit: usize,
1999}
2000
2001fn default_evolution_sweep_interval() -> usize {
2002 50
2003}
2004
2005fn default_confidence_prune_threshold() -> f32 {
2006 0.1
2007}
2008
2009/// Experience memory configuration for `[memory.graph.experience]`.
2010///
2011/// Controls recording of tool execution outcomes and graph evolution sweeps.
2012#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2013pub struct ExperienceConfig {
2014 /// Enable experience memory recording. Default: `false`.
2015 #[serde(default)]
2016 pub enabled: bool,
2017 /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2018 #[serde(default)]
2019 pub evolution_sweep_enabled: bool,
2020 /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2021 #[serde(default = "default_confidence_prune_threshold")]
2022 pub confidence_prune_threshold: f32,
2023 /// Number of turns between evolution sweeps. Default: `50`.
2024 #[serde(default = "default_evolution_sweep_interval")]
2025 pub evolution_sweep_interval: usize,
2026}
2027
2028impl Default for ExperienceConfig {
2029 fn default() -> Self {
2030 Self {
2031 enabled: false,
2032 evolution_sweep_enabled: false,
2033 confidence_prune_threshold: default_confidence_prune_threshold(),
2034 evolution_sweep_interval: default_evolution_sweep_interval(),
2035 }
2036 }
2037}
2038
2039/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2040///
2041/// # Security
2042///
2043/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2044/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2045/// when processing conversations that may contain personal, medical, or sensitive data until
2046/// a redaction pass is implemented on the write path.
2047#[derive(Debug, Clone, Deserialize, Serialize)]
2048#[serde(default)]
2049pub struct GraphConfig {
2050 pub enabled: bool,
2051 pub extract_model: String,
2052 #[serde(default = "default_graph_max_entities_per_message")]
2053 pub max_entities_per_message: usize,
2054 #[serde(default = "default_graph_max_edges_per_message")]
2055 pub max_edges_per_message: usize,
2056 #[serde(default = "default_graph_community_refresh_interval")]
2057 pub community_refresh_interval: usize,
2058 #[serde(default = "default_graph_entity_similarity_threshold")]
2059 pub entity_similarity_threshold: f32,
2060 #[serde(default = "default_graph_extraction_timeout_secs")]
2061 pub extraction_timeout_secs: u64,
2062 #[serde(default)]
2063 pub use_embedding_resolution: bool,
2064 #[serde(default = "default_graph_entity_ambiguous_threshold")]
2065 pub entity_ambiguous_threshold: f32,
2066 #[serde(default = "default_graph_max_hops")]
2067 pub max_hops: u32,
2068 #[serde(default = "default_graph_recall_limit")]
2069 pub recall_limit: usize,
2070 /// Days to retain expired (superseded) edges before deletion. Default: 90.
2071 #[serde(default = "default_graph_expired_edge_retention_days")]
2072 pub expired_edge_retention_days: u32,
2073 /// Maximum entities to retain in the graph. 0 = unlimited.
2074 #[serde(default)]
2075 pub max_entities: usize,
2076 /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2077 #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2078 pub community_summary_max_prompt_bytes: usize,
2079 /// Maximum concurrent LLM calls during community summarization. Default: 4.
2080 #[serde(default = "default_graph_community_summary_concurrency")]
2081 pub community_summary_concurrency: usize,
2082 /// Number of edges fetched per chunk during community detection. Default: 10000.
2083 /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2084 #[serde(default = "default_lpa_edge_chunk_size")]
2085 pub lpa_edge_chunk_size: usize,
2086 /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2087 ///
2088 /// When > 0, recent edges receive a small additive score boost over older edges.
2089 /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2090 /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2091 #[serde(
2092 default = "default_graph_temporal_decay_rate",
2093 deserialize_with = "validate_temporal_decay_rate"
2094 )]
2095 pub temporal_decay_rate: f64,
2096 /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2097 ///
2098 /// Caps the result set returned for a given source entity + predicate pair. Prevents
2099 /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2100 /// or API endpoints.
2101 #[serde(default = "default_graph_edge_history_limit")]
2102 pub edge_history_limit: usize,
2103 /// A-MEM dynamic note linking configuration.
2104 ///
2105 /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2106 /// semantically similar entities via `similar_to` edges. Requires an embedding store
2107 /// (`qdrant` or `sqlite` vector backend) to be configured.
2108 #[serde(default)]
2109 pub note_linking: NoteLinkingConfig,
2110 /// SYNAPSE spreading activation retrieval configuration.
2111 ///
2112 /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2113 /// with lateral inhibition and temporal decay instead of BFS.
2114 #[serde(default)]
2115 pub spreading_activation: SpreadingActivationConfig,
2116 /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2117 ///
2118 /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2119 /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2120 #[serde(default)]
2121 pub retrieval_strategy: GraphRetrievalStrategy,
2122 /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2123 ///
2124 /// When non-empty, graph extraction (and downstream note linking and community
2125 /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2126 /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2127 /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2128 /// which causes systematic quality gate rejections. A named provider built via
2129 /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2130 /// has no quality gate attached.
2131 ///
2132 /// Falls back to the primary provider when empty. Default: `""` (use primary).
2133 #[serde(default)]
2134 pub extract_provider: ProviderName,
2135 /// Named LLM provider for hybrid strategy classification.
2136 /// Falls back to the default provider when `None`.
2137 #[serde(default)]
2138 pub strategy_classifier_provider: Option<ProviderName>,
2139 /// Beam search configuration.
2140 #[serde(default)]
2141 pub beam_search: BeamSearchConfig,
2142 /// `WaterCircles` BFS configuration.
2143 #[serde(default)]
2144 pub watercircles: WaterCirclesConfig,
2145 /// Experience memory configuration.
2146 #[serde(default)]
2147 pub experience: ExperienceConfig,
2148 /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2149 /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2150 #[serde(
2151 default = "default_link_weight_decay_lambda",
2152 deserialize_with = "validate_link_weight_decay_lambda"
2153 )]
2154 pub link_weight_decay_lambda: f64,
2155 /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2156 #[serde(default = "default_link_weight_decay_interval_secs")]
2157 pub link_weight_decay_interval_secs: u64,
2158 /// Kumiho AGM-inspired belief revision configuration.
2159 ///
2160 /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2161 /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2162 /// `superseded_by` pointer and the new edge becomes the current belief.
2163 #[serde(default)]
2164 pub belief_revision: BeliefRevisionConfig,
2165 /// D-MEM RPE-based tiered graph extraction routing.
2166 ///
2167 /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2168 /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2169 #[serde(default)]
2170 pub rpe: RpeConfig,
2171 /// `SQLite` connection pool size dedicated to graph operations.
2172 ///
2173 /// Graph tables share the same database file as messages/embeddings but use a
2174 /// separate pool to prevent pool starvation when community detection or spreading
2175 /// activation runs concurrently with regular memory operations. Default: `3`.
2176 #[serde(default = "default_graph_pool_size")]
2177 pub pool_size: u32,
2178 /// APEX-MEM append-only write path (#3631).
2179 ///
2180 /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2181 /// supersession chains instead of the legacy destructive-update path.
2182 #[serde(default)]
2183 pub apex_mem: ApexMemConfig,
2184 /// LLM call timeout per extraction request, in seconds. Default: `30`.
2185 #[serde(default = "default_graph_llm_timeout_secs")]
2186 pub llm_timeout_secs: u64,
2187 /// PRISM query-sensitive edge costing in A* graph recall.
2188 ///
2189 /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2190 /// between the query embedding and the target entity embedding:
2191 /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2192 /// Edges toward semantically relevant entities receive lower cost and are therefore
2193 /// preferred by A*, producing query-aligned recall paths.
2194 ///
2195 /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2196 /// store is unavailable or a target entity has no stored embedding, falls back to the
2197 /// baseline cost `1.0 - confidence`.
2198 ///
2199 /// Default: `false` (preserves existing A* behaviour).
2200 #[serde(default)]
2201 pub query_sensitive_cost: bool,
2202}
2203
2204fn default_graph_pool_size() -> u32 {
2205 3
2206}
2207
2208fn default_graph_llm_timeout_secs() -> u64 {
2209 30
2210}
2211
2212/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
2213///
2214/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
2215/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
2216/// the full supersession chain and enables conflict resolution.
2217///
2218/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
2219#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2220#[serde(default)]
2221pub struct ApexMemConfig {
2222 /// Enable the APEX-MEM append-only write path. Default: `false`.
2223 pub enabled: bool,
2224}
2225
2226fn default_quality_gate_threshold() -> f32 {
2227 0.55
2228}
2229
2230fn default_quality_gate_recent_window() -> usize {
2231 32
2232}
2233
2234fn default_quality_gate_contradiction_grace_seconds() -> u64 {
2235 300
2236}
2237
2238fn default_quality_gate_information_value_weight() -> f32 {
2239 0.4
2240}
2241
2242fn default_quality_gate_reference_completeness_weight() -> f32 {
2243 0.3
2244}
2245
2246fn default_quality_gate_contradiction_weight() -> f32 {
2247 0.3
2248}
2249
2250fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
2251 0.35
2252}
2253
2254fn default_quality_gate_llm_timeout_ms() -> u64 {
2255 500
2256}
2257
2258fn default_quality_gate_llm_weight() -> f32 {
2259 0.5
2260}
2261
2262fn default_quality_gate_reference_check_lang_en() -> bool {
2263 true
2264}
2265
2266/// Write quality gate configuration (`[memory.quality_gate]`).
2267///
2268/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
2269/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
2270/// scoring is opt-in via `quality_gate_provider`.
2271///
2272/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
2273#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2274#[serde(default)]
2275pub struct WriteQualityGateConfig {
2276 /// Enable the write quality gate. Default: `false`.
2277 pub enabled: bool,
2278 /// Combined score threshold below which writes are rejected. Default: `0.55`.
2279 #[serde(default = "default_quality_gate_threshold")]
2280 pub threshold: f32,
2281 /// Number of recent writes compared for information-value scoring. Default: `32`.
2282 #[serde(default = "default_quality_gate_recent_window")]
2283 pub recent_window: usize,
2284 /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
2285 #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
2286 pub contradiction_grace_seconds: u64,
2287 /// Weight of `information_value` sub-score. Default: `0.4`.
2288 #[serde(default = "default_quality_gate_information_value_weight")]
2289 pub information_value_weight: f32,
2290 /// Weight of `reference_completeness` sub-score. Default: `0.3`.
2291 #[serde(default = "default_quality_gate_reference_completeness_weight")]
2292 pub reference_completeness_weight: f32,
2293 /// Weight of `contradiction` sub-score. Default: `0.3`.
2294 #[serde(default = "default_quality_gate_contradiction_weight")]
2295 pub contradiction_weight: f32,
2296 /// Rolling rejection-rate alarm ratio. Default: `0.35`.
2297 #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
2298 pub rejection_rate_alarm_ratio: f32,
2299 /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
2300 #[serde(default)]
2301 pub quality_gate_provider: ProviderName,
2302 /// LLM timeout in milliseconds. Default: `500`.
2303 #[serde(default = "default_quality_gate_llm_timeout_ms")]
2304 pub llm_timeout_ms: u64,
2305 /// LLM blend weight into final score. Default: `0.5`.
2306 #[serde(default = "default_quality_gate_llm_weight")]
2307 pub llm_weight: f32,
2308 /// Enable pronoun/deictic reference checks (English only). Default: `true`.
2309 #[serde(default = "default_quality_gate_reference_check_lang_en")]
2310 pub reference_check_lang_en: bool,
2311}
2312
2313impl Default for WriteQualityGateConfig {
2314 fn default() -> Self {
2315 Self {
2316 enabled: false,
2317 threshold: default_quality_gate_threshold(),
2318 recent_window: default_quality_gate_recent_window(),
2319 contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
2320 information_value_weight: default_quality_gate_information_value_weight(),
2321 reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
2322 contradiction_weight: default_quality_gate_contradiction_weight(),
2323 rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
2324 quality_gate_provider: ProviderName::default(),
2325 llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
2326 llm_weight: default_quality_gate_llm_weight(),
2327 reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
2328 }
2329 }
2330}
2331
2332impl Default for GraphConfig {
2333 fn default() -> Self {
2334 Self {
2335 enabled: false,
2336 extract_model: String::new(),
2337 max_entities_per_message: default_graph_max_entities_per_message(),
2338 max_edges_per_message: default_graph_max_edges_per_message(),
2339 community_refresh_interval: default_graph_community_refresh_interval(),
2340 entity_similarity_threshold: default_graph_entity_similarity_threshold(),
2341 extraction_timeout_secs: default_graph_extraction_timeout_secs(),
2342 use_embedding_resolution: false,
2343 entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
2344 max_hops: default_graph_max_hops(),
2345 recall_limit: default_graph_recall_limit(),
2346 expired_edge_retention_days: default_graph_expired_edge_retention_days(),
2347 max_entities: 0,
2348 community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
2349 community_summary_concurrency: default_graph_community_summary_concurrency(),
2350 lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
2351 temporal_decay_rate: default_graph_temporal_decay_rate(),
2352 edge_history_limit: default_graph_edge_history_limit(),
2353 note_linking: NoteLinkingConfig::default(),
2354 spreading_activation: SpreadingActivationConfig::default(),
2355 retrieval_strategy: GraphRetrievalStrategy::default(),
2356 extract_provider: ProviderName::default(),
2357 strategy_classifier_provider: None,
2358 beam_search: BeamSearchConfig::default(),
2359 watercircles: WaterCirclesConfig::default(),
2360 experience: ExperienceConfig::default(),
2361 link_weight_decay_lambda: default_link_weight_decay_lambda(),
2362 link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
2363 belief_revision: BeliefRevisionConfig::default(),
2364 rpe: RpeConfig::default(),
2365 pool_size: default_graph_pool_size(),
2366 apex_mem: ApexMemConfig::default(),
2367 llm_timeout_secs: default_graph_llm_timeout_secs(),
2368 query_sensitive_cost: false,
2369 }
2370 }
2371}
2372
2373fn default_consolidation_confidence_threshold() -> f32 {
2374 0.7
2375}
2376
2377fn default_consolidation_sweep_interval_secs() -> u64 {
2378 3600
2379}
2380
2381fn default_consolidation_sweep_batch_size() -> usize {
2382 50
2383}
2384
2385fn default_consolidation_similarity_threshold() -> f32 {
2386 0.85
2387}
2388
2389/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
2390///
2391/// When `enabled = true`, a background loop periodically clusters semantically similar messages
2392/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
2393/// they are marked as consolidated and deprioritized in recall via temporal decay.
2394#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2395#[serde(default)]
2396pub struct ConsolidationConfig {
2397 /// Enable the consolidation background loop. Default: `false`.
2398 pub enabled: bool,
2399 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2400 /// Falls back to the primary provider when empty. Default: `""`.
2401 #[serde(default)]
2402 pub consolidation_provider: ProviderName,
2403 /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
2404 #[serde(default = "default_consolidation_confidence_threshold")]
2405 pub confidence_threshold: f32,
2406 /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
2407 #[serde(default = "default_consolidation_sweep_interval_secs")]
2408 pub sweep_interval_secs: u64,
2409 /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
2410 #[serde(default = "default_consolidation_sweep_batch_size")]
2411 pub sweep_batch_size: usize,
2412 /// Minimum cosine similarity for two messages to be considered consolidation candidates.
2413 /// Default: `0.85`.
2414 #[serde(default = "default_consolidation_similarity_threshold")]
2415 pub similarity_threshold: f32,
2416 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
2417 #[serde(default = "default_consolidation_llm_timeout_secs")]
2418 pub llm_timeout_secs: u64,
2419}
2420
2421impl Default for ConsolidationConfig {
2422 fn default() -> Self {
2423 Self {
2424 enabled: false,
2425 consolidation_provider: ProviderName::default(),
2426 confidence_threshold: default_consolidation_confidence_threshold(),
2427 sweep_interval_secs: default_consolidation_sweep_interval_secs(),
2428 sweep_batch_size: default_consolidation_sweep_batch_size(),
2429 similarity_threshold: default_consolidation_similarity_threshold(),
2430 llm_timeout_secs: default_consolidation_llm_timeout_secs(),
2431 }
2432 }
2433}
2434
2435fn default_consolidation_llm_timeout_secs() -> u64 {
2436 30
2437}
2438
2439fn default_link_weight_decay_lambda() -> f64 {
2440 0.95
2441}
2442
2443fn default_link_weight_decay_interval_secs() -> u64 {
2444 86400
2445}
2446
2447fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
2448where
2449 D: serde::Deserializer<'de>,
2450{
2451 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
2452 if value.is_nan() || value.is_infinite() {
2453 return Err(serde::de::Error::custom(
2454 "link_weight_decay_lambda must be a finite number",
2455 ));
2456 }
2457 if !(value > 0.0 && value <= 1.0) {
2458 return Err(serde::de::Error::custom(
2459 "link_weight_decay_lambda must be in (0.0, 1.0]",
2460 ));
2461 }
2462 Ok(value)
2463}
2464
2465fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
2466where
2467 D: serde::Deserializer<'de>,
2468{
2469 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2470 if value.is_nan() || value.is_infinite() {
2471 return Err(serde::de::Error::custom(
2472 "threshold must be a finite number",
2473 ));
2474 }
2475 if !(0.0..=1.0).contains(&value) {
2476 return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
2477 }
2478 Ok(value)
2479}
2480
2481fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
2482where
2483 D: serde::Deserializer<'de>,
2484{
2485 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2486 if value.is_nan() || value.is_infinite() {
2487 return Err(serde::de::Error::custom(
2488 "fast_path_margin must be a finite number",
2489 ));
2490 }
2491 if !(0.0..=1.0).contains(&value) {
2492 return Err(serde::de::Error::custom(
2493 "fast_path_margin must be in [0.0, 1.0]",
2494 ));
2495 }
2496 Ok(value)
2497}
2498
2499fn default_admission_threshold() -> f32 {
2500 0.40
2501}
2502
2503fn default_admission_fast_path_margin() -> f32 {
2504 0.15
2505}
2506
2507fn default_rl_min_samples() -> u32 {
2508 500
2509}
2510
2511fn default_rl_retrain_interval_secs() -> u64 {
2512 3600
2513}
2514
2515/// Admission decision strategy.
2516///
2517/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
2518/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
2519#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
2520#[serde(rename_all = "snake_case")]
2521pub enum AdmissionStrategy {
2522 /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
2523 #[default]
2524 Heuristic,
2525 /// Learned model: logistic regression trained on recall feedback.
2526 /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
2527 Rl,
2528}
2529
2530fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
2531where
2532 D: serde::Deserializer<'de>,
2533{
2534 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
2535 if value < 0.0 {
2536 return Err(serde::de::Error::custom(
2537 "admission weight must be non-negative (>= 0.0)",
2538 ));
2539 }
2540 Ok(value)
2541}
2542
2543/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
2544///
2545/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
2546/// All values must be non-negative.
2547#[derive(Debug, Clone, Deserialize, Serialize)]
2548#[serde(default)]
2549pub struct AdmissionWeights {
2550 /// LLM-estimated future reuse probability. Default: `0.30`.
2551 #[serde(deserialize_with = "validate_admission_weight")]
2552 pub future_utility: f32,
2553 /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
2554 #[serde(deserialize_with = "validate_admission_weight")]
2555 pub factual_confidence: f32,
2556 /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
2557 #[serde(deserialize_with = "validate_admission_weight")]
2558 pub semantic_novelty: f32,
2559 /// Temporal recency: always 1.0 at write time. Default: `0.10`.
2560 #[serde(deserialize_with = "validate_admission_weight")]
2561 pub temporal_recency: f32,
2562 /// Content type prior based on role. Default: `0.15`.
2563 #[serde(deserialize_with = "validate_admission_weight")]
2564 pub content_type_prior: f32,
2565 /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
2566 /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
2567 /// Normalized automatically at runtime. Default: `0.0`.
2568 #[serde(deserialize_with = "validate_admission_weight")]
2569 pub goal_utility: f32,
2570}
2571
2572impl Default for AdmissionWeights {
2573 fn default() -> Self {
2574 Self {
2575 future_utility: 0.30,
2576 factual_confidence: 0.15,
2577 semantic_novelty: 0.30,
2578 temporal_recency: 0.10,
2579 content_type_prior: 0.15,
2580 goal_utility: 0.0,
2581 }
2582 }
2583}
2584
2585impl AdmissionWeights {
2586 /// Return weights normalized so they sum to 1.0.
2587 ///
2588 /// All weights are non-negative; the sum is always > 0 when defaults are used.
2589 #[must_use]
2590 pub fn normalized(&self) -> Self {
2591 let sum = self.future_utility
2592 + self.factual_confidence
2593 + self.semantic_novelty
2594 + self.temporal_recency
2595 + self.content_type_prior
2596 + self.goal_utility;
2597 if sum <= f32::EPSILON {
2598 return Self::default();
2599 }
2600 Self {
2601 future_utility: self.future_utility / sum,
2602 factual_confidence: self.factual_confidence / sum,
2603 semantic_novelty: self.semantic_novelty / sum,
2604 temporal_recency: self.temporal_recency / sum,
2605 content_type_prior: self.content_type_prior / sum,
2606 goal_utility: self.goal_utility / sum,
2607 }
2608 }
2609}
2610
2611/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
2612///
2613/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
2614/// Messages below the composite admission threshold are rejected and not persisted.
2615#[derive(Debug, Clone, Deserialize, Serialize)]
2616#[serde(default)]
2617pub struct AdmissionConfig {
2618 /// Enable A-MAC admission control. Default: `false`.
2619 pub enabled: bool,
2620 /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
2621 /// Default: `0.40`.
2622 #[serde(deserialize_with = "validate_admission_threshold")]
2623 pub threshold: f32,
2624 /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
2625 /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
2626 #[serde(deserialize_with = "validate_admission_fast_path_margin")]
2627 pub fast_path_margin: f32,
2628 /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
2629 /// Falls back to the primary provider when empty. Default: `""`.
2630 pub admission_provider: ProviderName,
2631 /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
2632 pub weights: AdmissionWeights,
2633 /// Admission decision strategy. Default: `heuristic`.
2634 #[serde(default)]
2635 pub admission_strategy: AdmissionStrategy,
2636 /// Minimum training samples before the RL model is activated.
2637 /// Below this count the system falls back to `Heuristic`. Default: `500`.
2638 #[serde(default = "default_rl_min_samples")]
2639 pub rl_min_samples: u32,
2640 /// Background RL model retraining interval in seconds. Default: `3600`.
2641 #[serde(default = "default_rl_retrain_interval_secs")]
2642 pub rl_retrain_interval_secs: u64,
2643 /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
2644 /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
2645 /// Zero regression when `false`. Default: `false`.
2646 #[serde(default)]
2647 pub goal_conditioned_write: bool,
2648 /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
2649 /// Used only for borderline cases (similarity within 0.1 of threshold).
2650 /// Falls back to the primary provider when empty. Default: `""`.
2651 #[serde(default)]
2652 pub goal_utility_provider: ProviderName,
2653 /// Minimum cosine similarity between goal embedding and candidate memory
2654 /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
2655 #[serde(default = "default_goal_utility_threshold")]
2656 pub goal_utility_threshold: f32,
2657 /// Weight of the `goal_utility` factor in the composite admission score.
2658 /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
2659 #[serde(default = "default_goal_utility_weight")]
2660 pub goal_utility_weight: f32,
2661}
2662
2663fn default_goal_utility_threshold() -> f32 {
2664 0.4
2665}
2666
2667fn default_goal_utility_weight() -> f32 {
2668 0.25
2669}
2670
2671impl Default for AdmissionConfig {
2672 fn default() -> Self {
2673 Self {
2674 enabled: false,
2675 threshold: default_admission_threshold(),
2676 fast_path_margin: default_admission_fast_path_margin(),
2677 admission_provider: ProviderName::default(),
2678 weights: AdmissionWeights::default(),
2679 admission_strategy: AdmissionStrategy::default(),
2680 rl_min_samples: default_rl_min_samples(),
2681 rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
2682 goal_conditioned_write: false,
2683 goal_utility_provider: ProviderName::default(),
2684 goal_utility_threshold: default_goal_utility_threshold(),
2685 goal_utility_weight: default_goal_utility_weight(),
2686 }
2687 }
2688}
2689
2690/// Routing strategy for `[memory.store_routing]`.
2691#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
2692#[serde(rename_all = "snake_case")]
2693pub enum StoreRoutingStrategy {
2694 /// Pure heuristic pattern matching. Zero LLM calls. Default.
2695 #[default]
2696 Heuristic,
2697 /// LLM-based classification via `routing_classifier_provider`.
2698 Llm,
2699 /// Heuristic first; escalates to LLM only when confidence is low.
2700 Hybrid,
2701}
2702
2703/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
2704///
2705/// Controls how each query is classified and routed to the appropriate memory
2706/// backend(s), avoiding unnecessary store queries for simple lookups.
2707#[derive(Debug, Clone, Deserialize, Serialize)]
2708#[serde(default)]
2709pub struct StoreRoutingConfig {
2710 /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
2711 /// directly (existing behavior). Default: `false`.
2712 pub enabled: bool,
2713 /// Routing strategy. Default: `heuristic`.
2714 pub strategy: StoreRoutingStrategy,
2715 /// Provider name from `[[llm.providers]]` for LLM-based classification.
2716 /// Falls back to the primary provider when empty. Default: `""`.
2717 pub routing_classifier_provider: ProviderName,
2718 /// Route to use when the classifier is uncertain (confidence < threshold).
2719 ///
2720 /// Defaults to [`MemoryRoute::Hybrid`].
2721 pub fallback_route: MemoryRoute,
2722 /// Confidence threshold below which `HybridRouter` escalates to LLM.
2723 /// Range: `[0.0, 1.0]`. Default: `0.7`.
2724 pub confidence_threshold: f32,
2725}
2726
2727impl Default for StoreRoutingConfig {
2728 fn default() -> Self {
2729 Self {
2730 enabled: false,
2731 strategy: StoreRoutingStrategy::Heuristic,
2732 routing_classifier_provider: ProviderName::default(),
2733 fallback_route: MemoryRoute::Hybrid,
2734 confidence_threshold: 0.7,
2735 }
2736 }
2737}
2738
2739/// Persona memory layer configuration (#2461).
2740///
2741/// When `enabled = true`, user preferences and domain knowledge are extracted from
2742/// conversation history via a cheap LLM provider and injected after the system prompt.
2743#[derive(Debug, Clone, Deserialize, Serialize)]
2744#[serde(default)]
2745pub struct PersonaConfig {
2746 /// Enable persona memory extraction and injection. Default: `false`.
2747 pub enabled: bool,
2748 /// Provider name from `[[llm.providers]]` for persona extraction.
2749 /// Should be a cheap/fast model. Falls back to the primary provider when empty.
2750 pub persona_provider: ProviderName,
2751 /// Minimum confidence threshold for facts included in context. Default: `0.6`.
2752 pub min_confidence: f64,
2753 /// Minimum user messages before extraction runs in a session. Default: `3`.
2754 pub min_messages: usize,
2755 /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
2756 pub max_messages: usize,
2757 /// LLM timeout for the extraction call in seconds. Default: `10`.
2758 pub extraction_timeout_secs: u64,
2759 /// Token budget allocated to persona context in assembly. Default: `500`.
2760 pub context_budget_tokens: usize,
2761}
2762
2763impl Default for PersonaConfig {
2764 fn default() -> Self {
2765 Self {
2766 enabled: false,
2767 persona_provider: ProviderName::default(),
2768 min_confidence: 0.6,
2769 min_messages: 3,
2770 max_messages: 10,
2771 extraction_timeout_secs: 10,
2772 context_budget_tokens: 500,
2773 }
2774 }
2775}
2776
2777/// Trajectory-informed memory configuration (#2498).
2778///
2779/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
2780/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
2781/// Procedural entries are injected into context as "past experience" during assembly.
2782#[derive(Debug, Clone, Deserialize, Serialize)]
2783#[serde(default)]
2784pub struct TrajectoryConfig {
2785 /// Enable trajectory extraction and context injection. Default: `false`.
2786 pub enabled: bool,
2787 /// Provider name from `[[llm.providers]]` for extraction.
2788 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2789 pub trajectory_provider: ProviderName,
2790 /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
2791 pub context_budget_tokens: usize,
2792 /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
2793 pub max_messages: usize,
2794 /// LLM timeout for the extraction call in seconds. Default: `10`.
2795 pub extraction_timeout_secs: u64,
2796 /// Number of procedural entries retrieved for context injection. Default: `5`.
2797 pub recall_top_k: usize,
2798 /// Minimum confidence score for entries included in context. Default: `0.6`.
2799 pub min_confidence: f64,
2800}
2801
2802impl Default for TrajectoryConfig {
2803 fn default() -> Self {
2804 Self {
2805 enabled: false,
2806 trajectory_provider: ProviderName::default(),
2807 context_budget_tokens: 400,
2808 max_messages: 10,
2809 extraction_timeout_secs: 10,
2810 recall_top_k: 5,
2811 min_confidence: 0.6,
2812 }
2813 }
2814}
2815
2816/// Category-aware memory configuration (#2428).
2817///
2818/// When `enabled = true`, messages are auto-tagged with a category derived from the active
2819/// skill or tool context. The category is stored in the `messages.category` column and used
2820/// as a Qdrant payload filter during recall.
2821#[derive(Debug, Clone, Deserialize, Serialize)]
2822#[serde(default)]
2823pub struct CategoryConfig {
2824 /// Enable category tagging and category-filtered recall. Default: `false`.
2825 pub enabled: bool,
2826 /// Automatically assign category from skill metadata or tool type. Default: `true`.
2827 pub auto_tag: bool,
2828}
2829
2830impl Default for CategoryConfig {
2831 fn default() -> Self {
2832 Self {
2833 enabled: false,
2834 auto_tag: true,
2835 }
2836 }
2837}
2838
2839/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
2840///
2841/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
2842/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
2843/// for complex queries.
2844#[derive(Debug, Clone, Deserialize, Serialize)]
2845#[serde(default)]
2846pub struct TreeConfig {
2847 /// Enable the memory tree and background consolidation loop. Default: `false`.
2848 pub enabled: bool,
2849 /// Provider name from `[[llm.providers]]` for node consolidation.
2850 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
2851 pub consolidation_provider: ProviderName,
2852 /// Interval between consolidation sweeps in seconds. Default: `300`.
2853 pub sweep_interval_secs: u64,
2854 /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
2855 pub batch_size: usize,
2856 /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
2857 pub similarity_threshold: f32,
2858 /// Maximum tree depth (levels above leaves). Default: `3`.
2859 pub max_level: u32,
2860 /// Token budget allocated to tree memory in context assembly. Default: `400`.
2861 pub context_budget_tokens: usize,
2862 /// Number of tree nodes retrieved for context. Default: `5`.
2863 pub recall_top_k: usize,
2864 /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
2865 pub min_cluster_size: usize,
2866}
2867
2868impl Default for TreeConfig {
2869 fn default() -> Self {
2870 Self {
2871 enabled: false,
2872 consolidation_provider: ProviderName::default(),
2873 sweep_interval_secs: 300,
2874 batch_size: 20,
2875 similarity_threshold: 0.8,
2876 max_level: 3,
2877 context_budget_tokens: 400,
2878 recall_top_k: 5,
2879 min_cluster_size: 2,
2880 }
2881 }
2882}
2883
2884/// Time-based microcompact configuration (#2699).
2885///
2886/// When `enabled = true`, low-value tool outputs are cleared from context
2887/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
2888/// The most recent `keep_recent` tool messages are preserved unconditionally.
2889#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2890#[serde(default)]
2891pub struct MicrocompactConfig {
2892 /// Enable time-based microcompaction. Default: `false`.
2893 pub enabled: bool,
2894 /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
2895 pub gap_threshold_minutes: u32,
2896 /// Number of most recent compactable tool messages to preserve. Default: `3`.
2897 pub keep_recent: usize,
2898}
2899
2900impl Default for MicrocompactConfig {
2901 fn default() -> Self {
2902 Self {
2903 enabled: false,
2904 gap_threshold_minutes: 60,
2905 keep_recent: 3,
2906 }
2907 }
2908}
2909
2910/// autoDream background memory consolidation configuration (#2697).
2911///
2912/// When `enabled = true`, a constrained consolidation subagent runs after
2913/// a session ends if both `min_sessions` and `min_hours` gates pass.
2914#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2915#[serde(default)]
2916pub struct AutoDreamConfig {
2917 /// Enable autoDream consolidation. Default: `false`.
2918 pub enabled: bool,
2919 /// Minimum number of sessions between consolidations. Default: `3`.
2920 pub min_sessions: u32,
2921 /// Minimum hours between consolidations. Default: `24`.
2922 pub min_hours: u32,
2923 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
2924 /// Falls back to the primary provider when empty. Default: `""`.
2925 pub consolidation_provider: ProviderName,
2926 /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
2927 pub max_iterations: u8,
2928 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
2929 #[serde(default = "default_autodream_llm_timeout_secs")]
2930 pub llm_timeout_secs: u64,
2931}
2932
2933impl Default for AutoDreamConfig {
2934 fn default() -> Self {
2935 Self {
2936 enabled: false,
2937 min_sessions: 3,
2938 min_hours: 24,
2939 consolidation_provider: ProviderName::default(),
2940 max_iterations: 8,
2941 llm_timeout_secs: default_autodream_llm_timeout_secs(),
2942 }
2943 }
2944}
2945
2946fn default_autodream_llm_timeout_secs() -> u64 {
2947 30
2948}
2949
2950/// `MagicDocs` auto-maintained markdown configuration (#2702).
2951///
2952/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
2953/// are registered and periodically updated by a constrained subagent.
2954#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
2955#[serde(default)]
2956pub struct MagicDocsConfig {
2957 /// Enable `MagicDocs` auto-maintenance. Default: `false`.
2958 pub enabled: bool,
2959 /// Minimum turns between updates for a given doc path. Default: `5`.
2960 pub min_turns_between_updates: u32,
2961 /// Provider name from `[[llm.providers]]` for doc update LLM calls.
2962 /// Falls back to the primary provider when empty. Default: `""`.
2963 pub update_provider: ProviderName,
2964 /// Maximum agent loop iterations per doc update. Default: `4`.
2965 pub max_iterations: u8,
2966}
2967
2968impl Default for MagicDocsConfig {
2969 fn default() -> Self {
2970 Self {
2971 enabled: false,
2972 min_turns_between_updates: 5,
2973 update_provider: ProviderName::default(),
2974 max_iterations: 4,
2975 }
2976 }
2977}
2978
2979#[cfg(test)]
2980mod tests {
2981 use super::*;
2982
2983 // Verify that serde deserialization routes through FromStr so that removed variants
2984 // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
2985 #[test]
2986 fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
2987 #[derive(serde::Deserialize)]
2988 struct Wrapper {
2989 #[allow(dead_code)]
2990 pruning_strategy: PruningStrategy,
2991 }
2992 let toml = r#"pruning_strategy = "task_aware_mig""#;
2993 let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
2994 assert_eq!(
2995 w.pruning_strategy,
2996 PruningStrategy::Reactive,
2997 "task_aware_mig must fall back to Reactive"
2998 );
2999 }
3000
3001 #[test]
3002 fn pruning_strategy_toml_round_trip() {
3003 #[derive(serde::Deserialize)]
3004 struct Wrapper {
3005 #[allow(dead_code)]
3006 pruning_strategy: PruningStrategy,
3007 }
3008 for (input, expected) in [
3009 ("reactive", PruningStrategy::Reactive),
3010 ("task_aware", PruningStrategy::TaskAware),
3011 ("mig", PruningStrategy::Mig),
3012 ] {
3013 let toml = format!(r#"pruning_strategy = "{input}""#);
3014 let w: Wrapper = toml::from_str(&toml)
3015 .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3016 assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3017 }
3018 }
3019
3020 #[test]
3021 fn pruning_strategy_toml_unknown_value_errors() {
3022 #[derive(serde::Deserialize)]
3023 #[allow(dead_code)]
3024 struct Wrapper {
3025 pruning_strategy: PruningStrategy,
3026 }
3027 let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3028 assert!(
3029 toml::from_str::<Wrapper>(toml).is_err(),
3030 "unknown strategy must produce an error"
3031 );
3032 }
3033
3034 #[test]
3035 fn tier_config_defaults_are_correct() {
3036 let cfg = TierConfig::default();
3037 assert!(!cfg.enabled);
3038 assert_eq!(cfg.promotion_min_sessions, 3);
3039 assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3040 assert_eq!(cfg.sweep_interval_secs, 3600);
3041 assert_eq!(cfg.sweep_batch_size, 100);
3042 }
3043
3044 #[test]
3045 fn tier_config_rejects_min_sessions_below_2() {
3046 let toml = "promotion_min_sessions = 1";
3047 assert!(toml::from_str::<TierConfig>(toml).is_err());
3048 }
3049
3050 #[test]
3051 fn tier_config_rejects_similarity_threshold_below_0_5() {
3052 let toml = "similarity_threshold = 0.4";
3053 assert!(toml::from_str::<TierConfig>(toml).is_err());
3054 }
3055
3056 #[test]
3057 fn tier_config_rejects_zero_sweep_batch_size() {
3058 let toml = "sweep_batch_size = 0";
3059 assert!(toml::from_str::<TierConfig>(toml).is_err());
3060 }
3061
3062 fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3063 let input = format!("importance_weight = {toml_val}");
3064 toml::from_str::<SemanticConfig>(&input)
3065 }
3066
3067 #[test]
3068 fn importance_weight_default_is_0_15() {
3069 let cfg = SemanticConfig::default();
3070 assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3071 }
3072
3073 #[test]
3074 fn importance_weight_valid_zero() {
3075 let cfg = deserialize_importance_weight("0.0").unwrap();
3076 assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3077 }
3078
3079 #[test]
3080 fn importance_weight_valid_one() {
3081 let cfg = deserialize_importance_weight("1.0").unwrap();
3082 assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3083 }
3084
3085 #[test]
3086 fn importance_weight_rejects_near_zero_negative() {
3087 // TOML does not have a NaN literal, but we can test via a f64 that
3088 // the validator rejects out-of-range values. Test with negative here
3089 // and rely on validate_importance_weight rejecting non-finite via
3090 // a constructed deserializer call.
3091 let result = deserialize_importance_weight("-0.01");
3092 assert!(
3093 result.is_err(),
3094 "negative importance_weight must be rejected"
3095 );
3096 }
3097
3098 #[test]
3099 fn importance_weight_rejects_negative() {
3100 let result = deserialize_importance_weight("-1.0");
3101 assert!(result.is_err(), "negative value must be rejected");
3102 }
3103
3104 #[test]
3105 fn importance_weight_rejects_greater_than_one() {
3106 let result = deserialize_importance_weight("1.01");
3107 assert!(result.is_err(), "value > 1.0 must be rejected");
3108 }
3109
3110 // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3111
3112 // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3113 #[test]
3114 fn admission_weights_normalized_sums_to_one() {
3115 let w = AdmissionWeights {
3116 future_utility: 2.0,
3117 factual_confidence: 1.0,
3118 semantic_novelty: 3.0,
3119 temporal_recency: 1.0,
3120 content_type_prior: 3.0,
3121 goal_utility: 0.0,
3122 };
3123 let n = w.normalized();
3124 let sum = n.future_utility
3125 + n.factual_confidence
3126 + n.semantic_novelty
3127 + n.temporal_recency
3128 + n.content_type_prior;
3129 assert!(
3130 (sum - 1.0).abs() < 0.001,
3131 "normalized weights must sum to 1.0, got {sum}"
3132 );
3133 }
3134
3135 // Test: already-normalized weights are preserved.
3136 #[test]
3137 fn admission_weights_normalized_preserves_already_unit_sum() {
3138 let w = AdmissionWeights::default();
3139 let n = w.normalized();
3140 let sum = n.future_utility
3141 + n.factual_confidence
3142 + n.semantic_novelty
3143 + n.temporal_recency
3144 + n.content_type_prior;
3145 assert!(
3146 (sum - 1.0).abs() < 0.001,
3147 "default weights sum to ~1.0 after normalization"
3148 );
3149 }
3150
3151 // Test: zero weights fall back to default (no divide-by-zero panic).
3152 #[test]
3153 fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3154 let w = AdmissionWeights {
3155 future_utility: 0.0,
3156 factual_confidence: 0.0,
3157 semantic_novelty: 0.0,
3158 temporal_recency: 0.0,
3159 content_type_prior: 0.0,
3160 goal_utility: 0.0,
3161 };
3162 let n = w.normalized();
3163 let default = AdmissionWeights::default();
3164 assert!(
3165 (n.future_utility - default.future_utility).abs() < 0.001,
3166 "zero-sum weights must fall back to defaults"
3167 );
3168 }
3169
3170 // Test: AdmissionConfig default values match documented defaults.
3171 #[test]
3172 fn admission_config_defaults() {
3173 let cfg = AdmissionConfig::default();
3174 assert!(!cfg.enabled);
3175 assert!((cfg.threshold - 0.40).abs() < 0.001);
3176 assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3177 assert!(cfg.admission_provider.is_empty());
3178 }
3179
3180 // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3181
3182 #[test]
3183 fn spreading_activation_default_recall_timeout_ms_is_1000() {
3184 let cfg = SpreadingActivationConfig::default();
3185 assert_eq!(
3186 cfg.recall_timeout_ms, 1000,
3187 "default recall_timeout_ms must be 1000ms"
3188 );
3189 }
3190
3191 #[test]
3192 fn spreading_activation_toml_recall_timeout_ms_round_trip() {
3193 #[derive(serde::Deserialize)]
3194 struct Wrapper {
3195 recall_timeout_ms: u64,
3196 }
3197 let toml = "recall_timeout_ms = 500";
3198 let w: Wrapper = toml::from_str(toml).unwrap();
3199 assert_eq!(w.recall_timeout_ms, 500);
3200 }
3201
3202 #[test]
3203 fn spreading_activation_validate_cross_field_constraints() {
3204 let mut cfg = SpreadingActivationConfig::default();
3205 // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
3206 assert!(cfg.validate().is_ok());
3207
3208 // Equal thresholds must be rejected.
3209 cfg.activation_threshold = 0.5;
3210 cfg.inhibition_threshold = 0.5;
3211 assert!(cfg.validate().is_err());
3212 }
3213
3214 // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
3215
3216 #[test]
3217 fn compression_config_focus_strategy_deserializes() {
3218 let toml = r#"strategy = "focus""#;
3219 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3220 assert_eq!(cfg.strategy, CompressionStrategy::Focus);
3221 }
3222
3223 #[test]
3224 fn compression_config_density_budget_defaults_on_deserialize() {
3225 // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
3226 // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
3227 let toml = r#"strategy = "reactive""#;
3228 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3229 assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
3230 assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
3231 }
3232
3233 #[test]
3234 fn compression_config_density_budget_round_trip() {
3235 let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
3236 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3237 assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
3238 assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
3239 }
3240
3241 #[test]
3242 fn compression_config_focus_scorer_provider_default_empty() {
3243 let cfg = CompressionConfig::default();
3244 assert!(cfg.focus_scorer_provider.is_empty());
3245 }
3246
3247 #[test]
3248 fn compression_config_focus_scorer_provider_round_trip() {
3249 let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
3250 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3251 assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
3252 }
3253}
3254
3255/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
3256///
3257/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
3258/// Successful and failed reasoning chains are compressed into short, generalizable strategy
3259/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
3260/// and injected into the prompt preamble.
3261///
3262/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
3263///
3264/// # Example
3265///
3266/// ```toml
3267/// [memory.reasoning]
3268/// enabled = true
3269/// extract_provider = "fast"
3270/// distill_provider = "fast"
3271/// top_k = 3
3272/// store_limit = 1000
3273/// ```
3274#[derive(Debug, Clone, Deserialize, Serialize)]
3275#[serde(default)]
3276pub struct ReasoningConfig {
3277 /// Enable the reasoning-bank pipeline. Default: `false`.
3278 pub enabled: bool,
3279 /// Provider name from `[[llm.providers]]` for the self-judge step.
3280 /// Falls back to the primary provider when empty. Default: `""`.
3281 pub extract_provider: ProviderName,
3282 /// Provider name from `[[llm.providers]]` for the distillation step.
3283 /// Falls back to the primary provider when empty. Default: `""`.
3284 pub distill_provider: ProviderName,
3285 /// Number of strategies retrieved per turn for context injection. Default: `3`.
3286 pub top_k: usize,
3287 /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
3288 pub store_limit: usize,
3289 /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
3290 pub max_messages: usize,
3291 /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
3292 pub max_message_chars: usize,
3293 /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
3294 pub context_budget_tokens: usize,
3295 /// Minimum number of messages required before self-judge fires. Default: `2`.
3296 pub min_messages: usize,
3297 /// Timeout in seconds for the self-judge LLM call. Default: `30`.
3298 pub extraction_timeout_secs: u64,
3299 /// Timeout in seconds for the distillation LLM call. Default: `30`.
3300 pub distill_timeout_secs: u64,
3301 /// Maximum number of recent messages passed to the self-judge evaluator.
3302 /// Narrowing to the last user+assistant pair improves classification accuracy.
3303 /// Default: `2`.
3304 pub self_judge_window: usize,
3305 /// Minimum characters in the assistant response to trigger self-judge.
3306 /// Short or trivial responses are skipped. Default: `50`.
3307 pub min_assistant_chars: usize,
3308}
3309
3310impl Default for ReasoningConfig {
3311 fn default() -> Self {
3312 Self {
3313 enabled: false,
3314 extract_provider: ProviderName::default(),
3315 distill_provider: ProviderName::default(),
3316 top_k: 3,
3317 store_limit: 1000,
3318 max_messages: 6,
3319 max_message_chars: 2000,
3320 context_budget_tokens: 500,
3321 min_messages: 2,
3322 extraction_timeout_secs: 30,
3323 distill_timeout_secs: 30,
3324 self_judge_window: 2,
3325 min_assistant_chars: 50,
3326 }
3327 }
3328}
3329
3330// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
3331
3332/// Eviction policy variant.
3333///
3334/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
3335#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
3336#[serde(rename_all = "lowercase")]
3337pub enum EvictionPolicy {
3338 /// Ebbinghaus forgetting-curve eviction.
3339 #[default]
3340 Ebbinghaus,
3341}
3342
3343/// Configuration for the memory eviction policy.
3344///
3345/// Controls which policy runs during the periodic sweep and how many entries
3346/// are retained. `zeph-memory` re-exports this type from here.
3347#[derive(Debug, Clone, Deserialize, Serialize)]
3348pub struct EvictionConfig {
3349 /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
3350 pub policy: EvictionPolicy,
3351 /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
3352 pub max_entries: usize,
3353 /// How often to run the eviction sweep, in seconds.
3354 pub sweep_interval_secs: u64,
3355}
3356
3357impl Default for EvictionConfig {
3358 fn default() -> Self {
3359 Self {
3360 policy: EvictionPolicy::Ebbinghaus,
3361 max_entries: 0,
3362 sweep_interval_secs: 3600,
3363 }
3364 }
3365}
3366
3367// ── Compression guidelines config (moved from zeph-memory) ───────────────────
3368
3369/// Configuration for ACON failure-driven compression guidelines.
3370///
3371/// `zeph-memory` re-exports this type from here.
3372#[derive(Debug, Clone, Deserialize, Serialize)]
3373#[serde(default)]
3374pub struct CompressionGuidelinesConfig {
3375 /// Enable the feature. Default: `false`.
3376 pub enabled: bool,
3377 /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
3378 pub update_threshold: u16,
3379 /// Maximum token budget for the guidelines document. Default: `500`.
3380 pub max_guidelines_tokens: usize,
3381 /// Maximum failure pairs consumed per update cycle. Default: `10`.
3382 pub max_pairs_per_update: usize,
3383 /// Number of turns after hard compaction to watch for context loss. Default: `10`.
3384 pub detection_window_turns: u64,
3385 /// Interval in seconds between background updater checks. Default: `300`.
3386 pub update_interval_secs: u64,
3387 /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
3388 pub max_stored_pairs: usize,
3389 /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
3390 /// `None` (or `Some("")`) falls back to the primary provider.
3391 #[serde(default, skip_serializing_if = "Option::is_none")]
3392 pub guidelines_provider: Option<ProviderName>,
3393 /// Maintain separate guideline documents per content category.
3394 #[serde(default)]
3395 pub categorized_guidelines: bool,
3396}
3397
3398impl Default for CompressionGuidelinesConfig {
3399 fn default() -> Self {
3400 Self {
3401 enabled: false,
3402 update_threshold: 5,
3403 max_guidelines_tokens: 500,
3404 max_pairs_per_update: 10,
3405 detection_window_turns: 10,
3406 update_interval_secs: 300,
3407 max_stored_pairs: 100,
3408 guidelines_provider: None,
3409 categorized_guidelines: false,
3410 }
3411 }
3412}
3413
3414// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
3415
3416/// Functional category of a compaction probe question.
3417///
3418/// `zeph-memory` re-exports this type from here.
3419#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
3420#[serde(rename_all = "lowercase")]
3421pub enum ProbeCategory {
3422 /// Did specific facts survive? (file paths, function names, values, decisions)
3423 Recall,
3424 /// Does the agent know which files/tools/URLs it used?
3425 Artifact,
3426 /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
3427 Continuation,
3428 /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
3429 Decision,
3430}
3431
3432/// Configuration for the compaction probe.
3433///
3434/// `zeph-memory` re-exports this type from here.
3435#[derive(Debug, Clone, Serialize, Deserialize)]
3436#[serde(default)]
3437pub struct CompactionProbeConfig {
3438 /// Enable compaction probe validation. Default: `false`.
3439 pub enabled: bool,
3440 /// Provider name from `[[llm.providers]]` for probe LLM calls.
3441 /// `None` (or `Some("")`) uses the summary provider.
3442 #[serde(default, skip_serializing_if = "Option::is_none")]
3443 pub probe_provider: Option<ProviderName>,
3444 /// Minimum score to pass without warnings. Default: `0.6`.
3445 pub threshold: f32,
3446 /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
3447 pub hard_fail_threshold: f32,
3448 /// Maximum number of probe questions to generate. Default: `5`.
3449 pub max_questions: usize,
3450 /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
3451 pub timeout_secs: u64,
3452 /// Optional per-category weight multipliers for the overall score.
3453 #[serde(default)]
3454 pub category_weights: Option<HashMap<ProbeCategory, f32>>,
3455}
3456
3457impl Default for CompactionProbeConfig {
3458 fn default() -> Self {
3459 Self {
3460 enabled: false,
3461 probe_provider: None,
3462 threshold: 0.6,
3463 hard_fail_threshold: 0.35,
3464 max_questions: 5,
3465 timeout_secs: 15,
3466 category_weights: None,
3467 }
3468 }
3469}
3470
3471// ── MemCoT semantic state config ─────────────────────────────────────────────
3472
3473/// `MemCoT` semantic-state distillation configuration.
3474///
3475/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
3476/// summarizing conceptual progress across turns. This buffer is injected into graph
3477/// recall queries to improve retrieval relevance.
3478///
3479/// All LLM work (distillation) runs asynchronously — never on the turn thread.
3480/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
3481///
3482/// # Config example
3483///
3484/// ```toml
3485/// [memory.memcot]
3486/// enabled = true
3487/// distill_provider = "fast"
3488/// distill_timeout_secs = 5
3489/// min_assistant_chars = 200
3490/// min_distill_interval_secs = 30
3491/// max_distills_per_session = 50
3492/// max_state_chars = 800
3493/// recall_view = "head"
3494/// ```
3495#[derive(Debug, Clone, Serialize, Deserialize)]
3496#[serde(default)]
3497pub struct MemCotConfig {
3498 /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
3499 ///
3500 /// When `false`, the accumulator is never allocated and no LLM calls are made.
3501 pub enabled: bool,
3502 /// Provider name from `[[llm.providers]]` for distillation.
3503 ///
3504 /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
3505 /// A startup warning is emitted when the resolved model does not look fast-tier.
3506 /// Falls back to the primary provider when empty. Default: `""`.
3507 pub distill_provider: ProviderName,
3508 /// Timeout in seconds for each distillation LLM call. Default: `5`.
3509 pub distill_timeout_secs: u64,
3510 /// Minimum characters in the assistant response to trigger distillation.
3511 /// Short or trivial replies are skipped. Default: `200`.
3512 pub min_assistant_chars: usize,
3513 /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
3514 ///
3515 /// Prevents runaway costs on long sessions with rapid turns.
3516 /// Clearing `/new` resets this counter.
3517 pub min_distill_interval_secs: u64,
3518 /// Maximum distillation spawns per conversation session. Default: `50`.
3519 ///
3520 /// Once this cap is reached the accumulator stops distilling for the rest of the
3521 /// session. Counter is reset when the user sends `/new`.
3522 pub max_distills_per_session: u64,
3523 /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
3524 /// Default: `800`.
3525 pub max_state_chars: usize,
3526 /// Recall view applied when `MemCoT` is active. Default: `Head`.
3527 ///
3528 /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
3529 /// - `zoom_in`: adds source-message provenance to each returned fact.
3530 /// - `zoom_out`: expands 1-hop neighbors per returned fact.
3531 ///
3532 /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
3533 pub recall_view: RecallViewConfig,
3534 /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
3535 pub zoom_out_neighbor_cap: usize,
3536 /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
3537 /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
3538 #[serde(default, skip_serializing_if = "Vec::is_empty")]
3539 pub fast_tier_models: Vec<String>,
3540}
3541
3542/// Recall view variant exposed in config.
3543///
3544/// Maps 1-to-1 to `zeph_memory::RecallView`.
3545#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
3546#[serde(rename_all = "snake_case")]
3547pub enum RecallViewConfig {
3548 /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
3549 #[default]
3550 Head,
3551 /// Adds source-message provenance to each returned fact.
3552 ZoomIn,
3553 /// Expands 1-hop neighbor facts per returned fact.
3554 ZoomOut,
3555}
3556
3557impl Default for MemCotConfig {
3558 fn default() -> Self {
3559 Self {
3560 enabled: false,
3561 distill_provider: ProviderName::default(),
3562 distill_timeout_secs: 5,
3563 min_assistant_chars: 200,
3564 min_distill_interval_secs: 30,
3565 max_distills_per_session: 50,
3566 max_state_chars: 800,
3567 recall_view: RecallViewConfig::Head,
3568 zoom_out_neighbor_cap: 3,
3569 fast_tier_models: Vec::new(),
3570 }
3571 }
3572}
3573
3574/// `OmniMem` retrieval failure tracking configuration (issue #3576).
3575///
3576/// Controls the async logger that records no-hit and low-confidence recall events
3577/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
3578#[derive(Debug, Clone, Deserialize, Serialize)]
3579#[serde(default)]
3580pub struct RetrievalFailuresConfig {
3581 /// Enable retrieval failure logging. Default: `false`.
3582 pub enabled: bool,
3583 /// Composite recall score below which a result is classified as low-confidence.
3584 ///
3585 /// The threshold applies to the post-reranking composite score (which incorporates
3586 /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
3587 /// the scoring pipeline in use. Default: `0.3`.
3588 #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
3589 pub low_confidence_threshold: f32,
3590 /// Days to retain failure records before automatic cleanup. Default: `90`.
3591 #[serde(default = "default_retrieval_failures_retention_days")]
3592 pub retention_days: u32,
3593 /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
3594 #[serde(default = "default_retrieval_failures_channel_capacity")]
3595 pub channel_capacity: usize,
3596 /// Maximum records collected before flushing a batch INSERT. Default: `16`.
3597 #[serde(default = "default_retrieval_failures_batch_size")]
3598 pub batch_size: usize,
3599 /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
3600 #[serde(default = "default_retrieval_failures_flush_interval_ms")]
3601 pub flush_interval_ms: u64,
3602}
3603
3604impl Default for RetrievalFailuresConfig {
3605 fn default() -> Self {
3606 Self {
3607 enabled: false,
3608 low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
3609 retention_days: default_retrieval_failures_retention_days(),
3610 channel_capacity: default_retrieval_failures_channel_capacity(),
3611 batch_size: default_retrieval_failures_batch_size(),
3612 flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
3613 }
3614 }
3615}
3616
3617#[cfg(test)]
3618mod memcot_config_tests {
3619 use super::*;
3620
3621 #[test]
3622 fn memcot_config_default_disabled() {
3623 let cfg = MemCotConfig::default();
3624 assert!(!cfg.enabled);
3625 assert!(cfg.distill_provider.is_empty());
3626 assert_eq!(cfg.distill_timeout_secs, 5);
3627 assert_eq!(cfg.min_assistant_chars, 200);
3628 assert_eq!(cfg.min_distill_interval_secs, 30);
3629 assert_eq!(cfg.max_distills_per_session, 50);
3630 assert_eq!(cfg.max_state_chars, 800);
3631 assert_eq!(cfg.recall_view, RecallViewConfig::Head);
3632 assert_eq!(cfg.zoom_out_neighbor_cap, 3);
3633 }
3634
3635 #[test]
3636 fn memcot_config_round_trip() {
3637 let toml = r#"
3638 enabled = true
3639 distill_provider = "fast"
3640 distill_timeout_secs = 10
3641 min_assistant_chars = 100
3642 min_distill_interval_secs = 60
3643 max_distills_per_session = 20
3644 max_state_chars = 400
3645 recall_view = "zoom_in"
3646 zoom_out_neighbor_cap = 5
3647 "#;
3648 let cfg: MemCotConfig = toml::from_str(toml).unwrap();
3649 assert!(cfg.enabled);
3650 assert_eq!(cfg.distill_provider.as_str(), "fast");
3651 assert_eq!(cfg.distill_timeout_secs, 10);
3652 assert_eq!(cfg.min_distill_interval_secs, 60);
3653 assert_eq!(cfg.max_distills_per_session, 20);
3654 assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
3655 assert_eq!(cfg.zoom_out_neighbor_cap, 5);
3656 }
3657}
3658
3659#[cfg(test)]
3660mod apex_mem_quality_gate_config_tests {
3661 use super::*;
3662
3663 #[test]
3664 fn apex_mem_config_default_disabled() {
3665 let cfg = ApexMemConfig::default();
3666 assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
3667 }
3668
3669 #[test]
3670 fn apex_mem_config_serde_round_trip() {
3671 let toml = "enabled = true";
3672 let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
3673 assert!(cfg.enabled);
3674 }
3675
3676 #[test]
3677 fn apex_mem_config_empty_toml_uses_defaults() {
3678 let cfg: ApexMemConfig = toml::from_str("").unwrap();
3679 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3680 }
3681
3682 #[test]
3683 fn write_quality_gate_config_default_disabled() {
3684 let cfg = WriteQualityGateConfig::default();
3685 assert!(!cfg.enabled);
3686 assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
3687 assert_eq!(cfg.recent_window, 32);
3688 assert_eq!(cfg.contradiction_grace_seconds, 300);
3689 assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
3690 assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
3691 assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
3692 assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
3693 assert!(cfg.quality_gate_provider.is_empty());
3694 assert_eq!(cfg.llm_timeout_ms, 500);
3695 assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
3696 assert!(cfg.reference_check_lang_en);
3697 }
3698
3699 #[test]
3700 fn write_quality_gate_config_serde_round_trip() {
3701 let toml = r#"
3702 enabled = true
3703 threshold = 0.70
3704 recent_window = 16
3705 contradiction_grace_seconds = 600
3706 information_value_weight = 0.5
3707 reference_completeness_weight = 0.25
3708 contradiction_weight = 0.25
3709 rejection_rate_alarm_ratio = 0.50
3710 quality_gate_provider = "fast"
3711 llm_timeout_ms = 1000
3712 llm_weight = 0.3
3713 reference_check_lang_en = false
3714 "#;
3715 let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
3716 assert!(cfg.enabled);
3717 assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
3718 assert_eq!(cfg.recent_window, 16);
3719 assert_eq!(cfg.contradiction_grace_seconds, 600);
3720 assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
3721 assert_eq!(cfg.llm_timeout_ms, 1000);
3722 assert!(!cfg.reference_check_lang_en);
3723 }
3724
3725 #[test]
3726 fn write_quality_gate_config_empty_toml_uses_defaults() {
3727 let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
3728 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
3729 assert_eq!(cfg.recent_window, 32);
3730 }
3731
3732 #[test]
3733 fn memory_config_shutdown_summary_provider_toml_roundtrip() {
3734 let toml = r#"
3735 history_limit = 50
3736 shutdown_summary_provider = "fast"
3737 "#;
3738 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
3739 assert_eq!(
3740 cfg.shutdown_summary_provider.as_str(),
3741 "fast",
3742 "shutdown_summary_provider must deserialize from TOML"
3743 );
3744 }
3745
3746 #[test]
3747 fn memory_config_shutdown_summary_provider_default_is_empty() {
3748 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
3749 assert_eq!(
3750 cfg.shutdown_summary_provider.as_str(),
3751 "",
3752 "shutdown_summary_provider must default to empty string"
3753 );
3754 }
3755
3756 #[test]
3757 fn memory_config_compaction_provider_toml_roundtrip() {
3758 let toml = r#"
3759 history_limit = 50
3760 compaction_provider = "mid"
3761 "#;
3762 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
3763 assert_eq!(
3764 cfg.compaction_provider.as_str(),
3765 "mid",
3766 "compaction_provider must deserialize from TOML"
3767 );
3768 }
3769
3770 #[test]
3771 fn memory_config_compaction_provider_default_is_empty() {
3772 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
3773 assert_eq!(
3774 cfg.compaction_provider.as_str(),
3775 "",
3776 "compaction_provider must default to empty string"
3777 );
3778 }
3779}