zeph_config/memory.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15 5
16}
17
18fn default_max_history() -> usize {
19 100
20}
21
22fn default_title_max_chars() -> usize {
23 60
24}
25
26fn default_document_collection() -> String {
27 "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31 1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35 100
36}
37
38fn default_document_top_k() -> usize {
39 3
40}
41
42fn default_autosave_min_length() -> usize {
43 20
44}
45
46fn default_tool_call_cutoff() -> usize {
47 6
48}
49
50fn default_token_safety_margin() -> f32 {
51 1.0
52}
53
54fn default_redact_credentials() -> bool {
55 true
56}
57
58fn default_qdrant_url() -> String {
59 "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63 50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67 60
68}
69
70fn default_context_budget_tokens() -> usize {
71 0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75 0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79 0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83 6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87 2
88}
89
90fn default_auto_budget() -> bool {
91 true
92}
93
94fn default_prune_protect_tokens() -> usize {
95 40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99 0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103 30
104}
105
106fn default_mmr_lambda() -> f32 {
107 0.7
108}
109
110fn default_semantic_enabled() -> bool {
111 true
112}
113
114fn default_recall_limit() -> usize {
115 5
116}
117
118fn default_vector_weight() -> f64 {
119 0.7
120}
121
122fn default_keyword_weight() -> f64 {
123 0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127 10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131 15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135 100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139 8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143 4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147 10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151 0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155 0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159 15
160}
161
162fn default_graph_max_hops() -> u32 {
163 2
164}
165
166fn default_graph_recall_limit() -> usize {
167 10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171 90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175 0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179 100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183 0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187 3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191 0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195 0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199 50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203 1000
204}
205
206fn default_note_linking_similarity_threshold() -> f32 {
207 0.85
208}
209
210fn default_note_linking_top_k() -> usize {
211 10
212}
213
214fn default_note_linking_timeout_secs() -> u64 {
215 5
216}
217
218fn default_shutdown_summary() -> bool {
219 true
220}
221
222fn default_shutdown_summary_min_messages() -> usize {
223 4
224}
225
226fn default_shutdown_summary_max_messages() -> usize {
227 20
228}
229
230fn default_shutdown_summary_timeout_secs() -> u64 {
231 30
232}
233
234fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
235where
236 D: serde::Deserializer<'de>,
237{
238 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
239 if value.is_nan() || value.is_infinite() {
240 return Err(serde::de::Error::custom(
241 "similarity_threshold must be a finite number",
242 ));
243 }
244 if !(0.5..=1.0).contains(&value) {
245 return Err(serde::de::Error::custom(
246 "similarity_threshold must be in [0.5, 1.0]",
247 ));
248 }
249 Ok(value)
250}
251
252fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
253where
254 D: serde::Deserializer<'de>,
255{
256 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
257 if value < 2 {
258 return Err(serde::de::Error::custom(
259 "promotion_min_sessions must be >= 2",
260 ));
261 }
262 Ok(value)
263}
264
265fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
266where
267 D: serde::Deserializer<'de>,
268{
269 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
270 if value == 0 {
271 return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
272 }
273 Ok(value)
274}
275
276fn default_tier_promotion_min_sessions() -> u32 {
277 3
278}
279
280fn default_tier_similarity_threshold() -> f32 {
281 0.92
282}
283
284fn default_tier_sweep_interval_secs() -> u64 {
285 3600
286}
287
288fn default_tier_sweep_batch_size() -> usize {
289 100
290}
291
292fn default_scene_similarity_threshold() -> f32 {
293 0.80
294}
295
296fn default_scene_batch_size() -> usize {
297 50
298}
299
300fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
301where
302 D: serde::Deserializer<'de>,
303{
304 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
305 if value.is_nan() || value.is_infinite() {
306 return Err(serde::de::Error::custom(
307 "scene_similarity_threshold must be a finite number",
308 ));
309 }
310 if !(0.5..=1.0).contains(&value) {
311 return Err(serde::de::Error::custom(
312 "scene_similarity_threshold must be in [0.5, 1.0]",
313 ));
314 }
315 Ok(value)
316}
317
318fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
319where
320 D: serde::Deserializer<'de>,
321{
322 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
323 if value == 0 {
324 return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
325 }
326 Ok(value)
327}
328
329/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
330///
331/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
332/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
333///
334/// # Validation
335///
336/// Constraints enforced at deserialization time:
337/// - `similarity_threshold` in `[0.5, 1.0]`
338/// - `promotion_min_sessions >= 2`
339/// - `sweep_batch_size >= 1`
340/// - `scene_similarity_threshold` in `[0.5, 1.0]`
341/// - `scene_batch_size >= 1`
342#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
343#[serde(default)]
344pub struct TierConfig {
345 /// Enable the tier promotion system. When `false`, all messages remain episodic.
346 /// Default: `false`.
347 pub enabled: bool,
348 /// Minimum number of distinct sessions a fact must appear in before promotion.
349 /// Must be `>= 2`. Default: `3`.
350 #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
351 pub promotion_min_sessions: u32,
352 /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
353 /// Must be in `[0.5, 1.0]`. Default: `0.92`.
354 #[serde(deserialize_with = "validate_tier_similarity_threshold")]
355 pub similarity_threshold: f32,
356 /// How often the background promotion sweep runs, in seconds. Default: `3600`.
357 pub sweep_interval_secs: u64,
358 /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
359 #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
360 pub sweep_batch_size: usize,
361 /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
362 pub scene_enabled: bool,
363 /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
364 #[serde(deserialize_with = "validate_scene_similarity_threshold")]
365 pub scene_similarity_threshold: f32,
366 /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
367 #[serde(deserialize_with = "validate_scene_batch_size")]
368 pub scene_batch_size: usize,
369 /// Provider name from `[[llm.providers]]` for scene label/profile generation.
370 /// Falls back to the primary provider when empty. Default: `""`.
371 pub scene_provider: ProviderName,
372 /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
373 pub scene_sweep_interval_secs: u64,
374}
375
376fn default_scene_sweep_interval_secs() -> u64 {
377 7200
378}
379
380impl Default for TierConfig {
381 fn default() -> Self {
382 Self {
383 enabled: false,
384 promotion_min_sessions: default_tier_promotion_min_sessions(),
385 similarity_threshold: default_tier_similarity_threshold(),
386 sweep_interval_secs: default_tier_sweep_interval_secs(),
387 sweep_batch_size: default_tier_sweep_batch_size(),
388 scene_enabled: false,
389 scene_similarity_threshold: default_scene_similarity_threshold(),
390 scene_batch_size: default_scene_batch_size(),
391 scene_provider: ProviderName::default(),
392 scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
393 }
394 }
395}
396
397fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
398where
399 D: serde::Deserializer<'de>,
400{
401 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
402 if value.is_nan() || value.is_infinite() {
403 return Err(serde::de::Error::custom(
404 "temporal_decay_rate must be a finite number",
405 ));
406 }
407 if !(0.0..=10.0).contains(&value) {
408 return Err(serde::de::Error::custom(
409 "temporal_decay_rate must be in [0.0, 10.0]",
410 ));
411 }
412 Ok(value)
413}
414
415fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
416where
417 D: serde::Deserializer<'de>,
418{
419 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
420 if value.is_nan() || value.is_infinite() {
421 return Err(serde::de::Error::custom(
422 "similarity_threshold must be a finite number",
423 ));
424 }
425 if !(0.0..=1.0).contains(&value) {
426 return Err(serde::de::Error::custom(
427 "similarity_threshold must be in [0.0, 1.0]",
428 ));
429 }
430 Ok(value)
431}
432
433fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
434where
435 D: serde::Deserializer<'de>,
436{
437 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
438 if value.is_nan() || value.is_infinite() {
439 return Err(serde::de::Error::custom(
440 "importance_weight must be a finite number",
441 ));
442 }
443 if value < 0.0 {
444 return Err(serde::de::Error::custom(
445 "importance_weight must be non-negative",
446 ));
447 }
448 if value > 1.0 {
449 return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
450 }
451 Ok(value)
452}
453
454fn default_importance_weight() -> f64 {
455 0.15
456}
457
458/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
459///
460/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
461/// Seeds are initialized from fuzzy entity matches, then activation propagates
462/// hop-by-hop with exponential decay and lateral inhibition.
463///
464/// # Validation
465///
466/// Constraints enforced at deserialization time:
467/// - `0.0 < decay_lambda <= 1.0`
468/// - `max_hops >= 1`
469/// - `activation_threshold < inhibition_threshold`
470/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
471#[derive(Debug, Clone, Deserialize, Serialize)]
472#[serde(default)]
473pub struct SpreadingActivationConfig {
474 /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
475 pub enabled: bool,
476 /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
477 #[serde(deserialize_with = "validate_decay_lambda")]
478 pub decay_lambda: f32,
479 /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
480 #[serde(deserialize_with = "validate_max_hops")]
481 pub max_hops: u32,
482 /// Minimum activation score to include a node in results. Default: `0.1`.
483 pub activation_threshold: f32,
484 /// Activation level at which a node stops receiving more activation. Default: `0.8`.
485 pub inhibition_threshold: f32,
486 /// Cap on total activated nodes per spread pass. Default: `50`.
487 pub max_activated_nodes: usize,
488 /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
489 #[serde(default = "default_seed_structural_weight")]
490 pub seed_structural_weight: f32,
491 /// Maximum seeds per community. `0` = unlimited. Default: `3`.
492 #[serde(default = "default_seed_community_cap")]
493 pub seed_community_cap: usize,
494 /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
495 /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
496 /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
497 #[serde(default = "default_spreading_activation_recall_timeout_ms")]
498 pub recall_timeout_ms: u64,
499}
500
501fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
502where
503 D: serde::Deserializer<'de>,
504{
505 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
506 if value.is_nan() || value.is_infinite() {
507 return Err(serde::de::Error::custom(
508 "decay_lambda must be a finite number",
509 ));
510 }
511 if !(value > 0.0 && value <= 1.0) {
512 return Err(serde::de::Error::custom(
513 "decay_lambda must be in (0.0, 1.0]",
514 ));
515 }
516 Ok(value)
517}
518
519fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
520where
521 D: serde::Deserializer<'de>,
522{
523 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
524 if value == 0 {
525 return Err(serde::de::Error::custom("max_hops must be >= 1"));
526 }
527 Ok(value)
528}
529
530impl SpreadingActivationConfig {
531 /// Validate cross-field constraints that cannot be expressed in per-field validators.
532 ///
533 /// # Errors
534 ///
535 /// Returns an error string if `activation_threshold >= inhibition_threshold`.
536 pub fn validate(&self) -> Result<(), String> {
537 if self.activation_threshold >= self.inhibition_threshold {
538 return Err(format!(
539 "activation_threshold ({}) must be < inhibition_threshold ({})",
540 self.activation_threshold, self.inhibition_threshold
541 ));
542 }
543 Ok(())
544 }
545}
546
547fn default_seed_structural_weight() -> f32 {
548 0.4
549}
550
551fn default_seed_community_cap() -> usize {
552 3
553}
554
555impl Default for SpreadingActivationConfig {
556 fn default() -> Self {
557 Self {
558 enabled: false,
559 decay_lambda: default_spreading_activation_decay_lambda(),
560 max_hops: default_spreading_activation_max_hops(),
561 activation_threshold: default_spreading_activation_activation_threshold(),
562 inhibition_threshold: default_spreading_activation_inhibition_threshold(),
563 max_activated_nodes: default_spreading_activation_max_activated_nodes(),
564 seed_structural_weight: default_seed_structural_weight(),
565 seed_community_cap: default_seed_community_cap(),
566 recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
567 }
568 }
569}
570
571/// Kumiho belief revision configuration.
572#[derive(Debug, Clone, Deserialize, Serialize)]
573#[serde(default)]
574pub struct BeliefRevisionConfig {
575 /// Enable semantic contradiction detection for graph edges. Default: `false`.
576 pub enabled: bool,
577 /// Cosine similarity threshold for considering two facts as contradictory.
578 /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
579 #[serde(deserialize_with = "validate_similarity_threshold")]
580 pub similarity_threshold: f32,
581}
582
583fn default_belief_revision_similarity_threshold() -> f32 {
584 0.85
585}
586
587impl Default for BeliefRevisionConfig {
588 fn default() -> Self {
589 Self {
590 enabled: false,
591 similarity_threshold: default_belief_revision_similarity_threshold(),
592 }
593 }
594}
595
596/// D-MEM RPE-based tiered graph extraction routing configuration.
597#[derive(Debug, Clone, Deserialize, Serialize)]
598#[serde(default)]
599pub struct RpeConfig {
600 /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
601 pub enabled: bool,
602 /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
603 /// Default: `0.3`.
604 #[serde(deserialize_with = "validate_similarity_threshold")]
605 pub threshold: f32,
606 /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
607 pub max_skip_turns: u32,
608}
609
610fn default_rpe_threshold() -> f32 {
611 0.3
612}
613
614fn default_rpe_max_skip_turns() -> u32 {
615 5
616}
617
618impl Default for RpeConfig {
619 fn default() -> Self {
620 Self {
621 enabled: false,
622 threshold: default_rpe_threshold(),
623 max_skip_turns: default_rpe_max_skip_turns(),
624 }
625 }
626}
627
628/// Configuration for A-MEM dynamic note linking.
629///
630/// When enabled, after each graph extraction pass, entities extracted from the message are
631/// compared against the entity embedding collection. Pairs with cosine similarity above
632/// `similarity_threshold` receive a `similar_to` edge in the graph.
633#[derive(Debug, Clone, Deserialize, Serialize)]
634#[serde(default)]
635pub struct NoteLinkingConfig {
636 /// Enable A-MEM note linking after graph extraction. Default: `false`.
637 pub enabled: bool,
638 /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
639 #[serde(deserialize_with = "validate_similarity_threshold")]
640 pub similarity_threshold: f32,
641 /// Maximum number of similar entities to link per extracted entity. Default: `10`.
642 pub top_k: usize,
643 /// Timeout for the entire linking pass in seconds. Default: `5`.
644 pub timeout_secs: u64,
645}
646
647impl Default for NoteLinkingConfig {
648 fn default() -> Self {
649 Self {
650 enabled: false,
651 similarity_threshold: default_note_linking_similarity_threshold(),
652 top_k: default_note_linking_top_k(),
653 timeout_secs: default_note_linking_timeout_secs(),
654 }
655 }
656}
657
658/// Vector backend selector for embedding storage.
659#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
660#[serde(rename_all = "lowercase")]
661#[non_exhaustive]
662pub enum VectorBackend {
663 Qdrant,
664 #[default]
665 Sqlite,
666}
667
668impl VectorBackend {
669 /// Return the lowercase identifier string for this backend.
670 ///
671 /// # Examples
672 ///
673 /// ```
674 /// use zeph_config::VectorBackend;
675 ///
676 /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
677 /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
678 /// ```
679 #[must_use]
680 pub fn as_str(&self) -> &'static str {
681 match self {
682 Self::Qdrant => "qdrant",
683 Self::Sqlite => "sqlite",
684 }
685 }
686}
687
688/// Memory subsystem configuration, nested under `[memory]` in TOML.
689///
690/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
691/// multi-tier promotion, and all memory-related background tasks.
692///
693/// # Example (TOML)
694///
695/// ```toml
696/// [memory]
697/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
698/// qdrant_url = "http://localhost:6334"
699/// history_limit = 50
700/// summarization_threshold = 50
701/// auto_budget = true
702/// ```
703#[derive(Debug, Deserialize, Serialize)]
704#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
705pub struct MemoryConfig {
706 #[serde(default)]
707 pub compression_guidelines: CompressionGuidelinesConfig,
708 #[serde(default = "default_sqlite_path_field")]
709 pub sqlite_path: String,
710 pub history_limit: u32,
711 #[serde(default = "default_qdrant_url")]
712 pub qdrant_url: String,
713 /// Optional API key for authenticating to a remote or managed Qdrant cluster.
714 ///
715 /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
716 /// Leave `None` for local dev instances. The actual key is resolved from the vault:
717 /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
718 ///
719 /// The value is wrapped in [`Secret`] to prevent accidental logging.
720 /// `skip_serializing` prevents the key from being written back to TOML on config save.
721 #[serde(default, skip_serializing)]
722 pub qdrant_api_key: Option<Secret>,
723 #[serde(default)]
724 pub semantic: SemanticConfig,
725 #[serde(default = "default_summarization_threshold")]
726 pub summarization_threshold: usize,
727 /// LLM call timeout for summarization, in seconds. Default: `60`.
728 #[serde(default = "default_summarization_llm_timeout_secs")]
729 pub summarization_llm_timeout_secs: u64,
730 #[serde(default = "default_context_budget_tokens")]
731 pub context_budget_tokens: usize,
732 #[serde(default = "default_soft_compaction_threshold")]
733 pub soft_compaction_threshold: f32,
734 #[serde(
735 default = "default_hard_compaction_threshold",
736 alias = "compaction_threshold"
737 )]
738 pub hard_compaction_threshold: f32,
739 #[serde(default = "default_compaction_preserve_tail")]
740 pub compaction_preserve_tail: usize,
741 #[serde(default = "default_compaction_cooldown_turns")]
742 pub compaction_cooldown_turns: u8,
743 #[serde(default = "default_auto_budget")]
744 pub auto_budget: bool,
745 #[serde(default = "default_prune_protect_tokens")]
746 pub prune_protect_tokens: usize,
747 #[serde(default = "default_cross_session_score_threshold")]
748 pub cross_session_score_threshold: f32,
749 #[serde(default)]
750 pub vector_backend: VectorBackend,
751 #[serde(default = "default_token_safety_margin")]
752 pub token_safety_margin: f32,
753 #[serde(default = "default_redact_credentials")]
754 pub redact_credentials: bool,
755 #[serde(default = "default_true")]
756 pub autosave_assistant: bool,
757 #[serde(default = "default_autosave_min_length")]
758 pub autosave_min_length: usize,
759 #[serde(default = "default_tool_call_cutoff")]
760 pub tool_call_cutoff: usize,
761 #[serde(default = "default_sqlite_pool_size")]
762 pub sqlite_pool_size: u32,
763 #[serde(default)]
764 pub sessions: SessionsConfig,
765 #[serde(default)]
766 pub documents: DocumentConfig,
767 #[serde(default)]
768 pub eviction: EvictionConfig,
769 #[serde(default)]
770 pub compression: CompressionConfig,
771 #[serde(default)]
772 pub sidequest: SidequestConfig,
773 #[serde(default)]
774 pub graph: GraphConfig,
775 /// Store a lightweight session summary to the vector store on shutdown when no session
776 /// summary exists yet for this conversation. Enables cross-session recall for short or
777 /// interrupted sessions that never triggered hard compaction. Default: `true`.
778 #[serde(default = "default_shutdown_summary")]
779 pub shutdown_summary: bool,
780 /// Minimum number of user-turn messages required before a shutdown summary is generated.
781 /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
782 #[serde(default = "default_shutdown_summary_min_messages")]
783 pub shutdown_summary_min_messages: usize,
784 /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
785 /// summarization. Caps token cost for long sessions that never triggered hard compaction.
786 /// Default: `20`.
787 #[serde(default = "default_shutdown_summary_max_messages")]
788 pub shutdown_summary_max_messages: usize,
789 /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
790 /// Applies independently to the structured call and to the plain-text fallback.
791 /// Default: `10`.
792 #[serde(default = "default_shutdown_summary_timeout_secs")]
793 pub shutdown_summary_timeout_secs: u64,
794 /// LLM provider used for shutdown summarization calls.
795 ///
796 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
797 /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
798 ///
799 /// Example:
800 /// ```toml
801 /// [memory]
802 /// shutdown_summary_provider = "fast"
803 /// ```
804 #[serde(default)]
805 pub shutdown_summary_provider: ProviderName,
806 /// LLM provider used for deferred tool-pair summarization (context compaction).
807 ///
808 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
809 /// provider. A mid-tier model is usually sufficient for compaction summaries.
810 ///
811 /// Example:
812 /// ```toml
813 /// [memory]
814 /// compaction_provider = "fast"
815 /// ```
816 #[serde(default)]
817 pub compaction_provider: ProviderName,
818 /// Use structured anchored summaries for context compaction.
819 ///
820 /// When enabled, hard compaction requests a JSON schema from the LLM
821 /// instead of free-form prose. Falls back to prose if the LLM fails
822 /// to produce valid JSON. Default: `false`.
823 #[serde(default)]
824 pub structured_summaries: bool,
825 /// AOI three-layer memory tier promotion system.
826 ///
827 /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
828 /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
829 #[serde(default)]
830 pub tiers: TierConfig,
831 /// A-MAC adaptive memory admission control.
832 ///
833 /// When `admission.enabled = true`, each message is evaluated before saving and rejected
834 /// if its composite admission score falls below the configured threshold.
835 #[serde(default)]
836 pub admission: AdmissionConfig,
837 /// Session digest generation at session end. Default: disabled.
838 #[serde(default)]
839 pub digest: DigestConfig,
840 /// Context assembly strategy. Default: `full_history` (current behavior).
841 #[serde(default)]
842 pub context_strategy: ContextStrategy,
843 /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
844 #[serde(default = "default_crossover_turn_threshold")]
845 pub crossover_turn_threshold: u32,
846 /// All-Mem lifelong memory consolidation sweep.
847 ///
848 /// When `consolidation.enabled = true`, a background loop clusters semantically similar
849 /// messages and merges them into consolidated entries via LLM.
850 #[serde(default)]
851 pub consolidation: ConsolidationConfig,
852 /// `SleepGate` forgetting sweep (#2397).
853 ///
854 /// When `forgetting.enabled = true`, a background loop periodically decays importance
855 /// scores and prunes memories below the forgetting floor.
856 #[serde(default)]
857 pub forgetting: ForgettingConfig,
858 /// `PostgreSQL` connection URL.
859 ///
860 /// Used when the binary is compiled with `--features postgres`.
861 /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
862 /// Example: `postgres://user:pass@localhost:5432/zeph`
863 /// Default: `None` (uses `sqlite_path` instead).
864 #[serde(default)]
865 pub database_url: Option<String>,
866 /// Cost-sensitive store routing (#2444).
867 ///
868 /// When `store_routing.enabled = true`, query intent is classified and routed to
869 /// the cheapest sufficient backend instead of querying all stores on every turn.
870 #[serde(default)]
871 pub store_routing: StoreRoutingConfig,
872 /// Persona memory layer (#2461).
873 ///
874 /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
875 /// from conversation history and injected into context after the system prompt.
876 #[serde(default)]
877 pub persona: PersonaConfig,
878 /// Trajectory-informed memory (#2498).
879 #[serde(default)]
880 pub trajectory: TrajectoryConfig,
881 /// Category-aware memory (#2428).
882 #[serde(default)]
883 pub category: CategoryConfig,
884 /// `TiMem` temporal-hierarchical memory tree (#2262).
885 #[serde(default)]
886 pub tree: TreeConfig,
887 /// Time-based microcompact (#2699).
888 ///
889 /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
890 /// from context when the session has been idle longer than `gap_threshold_minutes`.
891 #[serde(default)]
892 pub microcompact: MicrocompactConfig,
893 /// autoDream background memory consolidation (#2697).
894 ///
895 /// When `autodream.enabled = true`, a constrained consolidation subagent runs
896 /// after a session ends if both `min_sessions` and `min_hours` gates pass.
897 #[serde(default)]
898 pub autodream: AutoDreamConfig,
899 /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
900 ///
901 /// Before inserting a new key fact, its nearest neighbour is looked up in the
902 /// `zeph_key_facts` collection. If the best score is ≥ this threshold the fact is
903 /// considered a near-duplicate and skipped. Set to a value greater than `1.0` (e.g.
904 /// `2.0`) to disable dedup entirely. Default: `0.95`.
905 #[serde(default = "default_key_facts_dedup_threshold")]
906 pub key_facts_dedup_threshold: f32,
907 /// Experience compression spectrum (#3305).
908 ///
909 /// Controls three-tier retrieval policy and background skill-promotion engine.
910 #[serde(default)]
911 pub compression_spectrum: crate::features::CompressionSpectrumConfig,
912 /// MemMachine-inspired retrieval-stage tuning (#3340).
913 ///
914 /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
915 /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
916 /// uniformly across graph, hybrid, and vector-only recall paths.
917 ///
918 /// # Example (TOML)
919 ///
920 /// ```toml
921 /// [memory.retrieval]
922 /// depth = 40
923 /// search_prompt_template = ""
924 /// context_format = "structured"
925 /// ```
926 #[serde(default)]
927 pub retrieval: RetrievalConfig,
928 /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
929 ///
930 /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
931 /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
932 /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
933 /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
934 /// and injected before the LLM call.
935 #[serde(default)]
936 pub reasoning: ReasoningConfig,
937 /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
938 ///
939 /// When `enabled = true`, the weight of each `graph_edges` row is incremented
940 /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
941 ///
942 /// # Example (TOML)
943 ///
944 /// ```toml
945 /// [memory.hebbian]
946 /// enabled = true
947 /// hebbian_lr = 0.1
948 /// ```
949 #[serde(default)]
950 pub hebbian: HebbianConfig,
951 /// `MemCoT` rolling semantic state configuration (#3574).
952 ///
953 /// When `enabled = true`, each completed assistant turn spawns a background distillation
954 /// task that compresses the response into a short semantic state buffer. The buffer is
955 /// prepended to graph recall queries so retrieval stays contextually relevant across long
956 /// multi-turn sessions.
957 ///
958 /// # Example (TOML)
959 ///
960 /// ```toml
961 /// [memory.memcot]
962 /// enabled = true
963 /// distill_provider = "fast"
964 /// min_assistant_chars = 200
965 /// max_distills_per_session = 50
966 /// ```
967 #[serde(default)]
968 pub memcot: MemCotConfig,
969 /// `OmniMem` retrieval failure tracking (issue #3576).
970 ///
971 /// When `enabled = true`, no-hit and low-confidence recall events are logged
972 /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
973 ///
974 /// # Example (TOML)
975 ///
976 /// ```toml
977 /// [memory.retrieval_failures]
978 /// enabled = true
979 /// low_confidence_threshold = 0.3
980 /// retention_days = 90
981 /// ```
982 #[serde(default)]
983 pub retrieval_failures: RetrievalFailuresConfig,
984 /// Write quality gate (#3629).
985 ///
986 /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
987 /// writes are rejected before persistence. Evaluated after A-MAC admission control.
988 #[serde(default)]
989 pub quality_gate: WriteQualityGateConfig,
990 /// `MemFlow` tiered intent-driven retrieval (issue #3712).
991 ///
992 /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
993 /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
994 /// `DeepReasoning`) with optional validation and tier escalation.
995 #[serde(default)]
996 pub tiered_retrieval: TieredRetrievalConfig,
997 /// `ScrapMem` optical forgetting (issue #3713).
998 ///
999 /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
1000 /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1001 #[serde(default)]
1002 pub optical_forgetting: OpticalForgettingConfig,
1003 /// EM-Graph episodic event extraction and causal linking (issue #3713).
1004 ///
1005 /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1006 /// and linked via causal relationships, enabling causal-chain retrieval.
1007 #[serde(default)]
1008 pub em_graph: EmGraphConfig,
1009 /// Episodic-to-semantic consolidation daemon (issue #3799).
1010 ///
1011 /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1012 /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1013 /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1014 #[serde(default)]
1015 pub episodic_consolidation: EpisodicConsolidationConfig,
1016 /// MAGE shadow memory trajectory risk accumulator (spec 004-16).
1017 ///
1018 /// Maintains a per-session rolling risk score fed by sanitizer audit signals.
1019 /// When `shadow_memory.enabled = true`, tool execution is gated if cumulative
1020 /// trajectory risk exceeds `risk_threshold`. When `false`, all code paths are
1021 /// zero-cost no-ops.
1022 ///
1023 /// # Example (TOML)
1024 ///
1025 /// ```toml
1026 /// [memory.shadow_memory]
1027 /// enabled = true
1028 /// risk_threshold = 0.75
1029 /// risk_halflife_turns = 10
1030 /// ```
1031 #[serde(default)]
1032 pub shadow_memory: TrajectoryRiskAccumulatorConfig,
1033 /// Five-signal SYNAPSE retrieval (issue #4374).
1034 ///
1035 /// When `five_signal.enabled = true`, SYNAPSE recall weights five signals: recency,
1036 /// relevance, access frequency, causal distance, and novelty. All new signals default
1037 /// to weight `0.0`, preserving exact backward compatibility.
1038 #[serde(default)]
1039 pub five_signal: FiveSignalConfig,
1040 /// Context-Adaptive Memory fidelity scoring (CAM Phase 1, #4547).
1041 ///
1042 /// When `fidelity.enabled = true`, the heuristic fidelity scorer runs after each
1043 /// `apply_prepared_context()` call and assigns `Full / Compressed / Placeholder`
1044 /// levels to historical messages. Default: disabled.
1045 ///
1046 /// # Example (TOML)
1047 ///
1048 /// ```toml
1049 /// [memory.fidelity]
1050 /// enabled = false
1051 /// w_semantic = 0.3
1052 /// w_temporal = 0.3
1053 /// w_importance = 0.2
1054 /// w_plan = 0.2
1055 /// full_threshold = 0.7
1056 /// compressed_threshold = 0.3
1057 /// compressed_max_tokens = 50
1058 /// regrade_threshold = 0.6
1059 /// min_query_length = 8
1060 /// max_scored_messages = 500
1061 /// ```
1062 #[serde(default, skip_serializing_if = "Option::is_none")]
1063 pub fidelity: Option<crate::fidelity::FidelityConfig>,
1064}
1065
1066// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1067
1068/// `MemFlow` tiered intent-driven retrieval configuration.
1069///
1070/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1071/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1072/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1073///
1074/// # Example (TOML)
1075///
1076/// ```toml
1077/// [memory.tiered_retrieval]
1078/// enabled = false
1079/// classifier_provider = ""
1080/// validator_provider = ""
1081/// token_budget = 4096
1082/// validation_enabled = false
1083/// validation_threshold = 0.6
1084/// max_escalations = 1
1085/// classifier_timeout_secs = 5
1086/// validator_timeout_secs = 5
1087///
1088/// # Signal weights (all default to 0.0; set to activate each signal)
1089/// similarity_weight = 1.0
1090/// recency_weight = 0.0
1091/// recency_half_life_days = 7
1092/// tfidf_weight = 0.0
1093/// cognitive_signal_weight = 0.0
1094/// tier_boost_weight = 0.0
1095/// semantic_tier_boost = 1.0
1096/// ```
1097#[derive(Debug, Clone, Deserialize, Serialize)]
1098#[serde(default)]
1099pub struct TieredRetrievalConfig {
1100 /// Enable `MemFlow` tiered retrieval. Default: `false`.
1101 pub enabled: bool,
1102 /// Provider name from `[[llm.providers]]` for intent classification.
1103 ///
1104 /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1105 /// is set but the call fails, falls back to the heuristic (fail-open).
1106 pub classifier_provider: ProviderName,
1107 /// Provider name from `[[llm.providers]]` for evidence validation.
1108 ///
1109 /// When empty or when `validation_enabled = false`, no validation call is made.
1110 pub validator_provider: ProviderName,
1111 /// Maximum tokens to gather for evidence per query. Default: `4096`.
1112 pub token_budget: usize,
1113 /// Enable evidence validation and tier escalation. Default: `false`.
1114 pub validation_enabled: bool,
1115 /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1116 pub validation_threshold: f32,
1117 /// Maximum tier escalations per query. Default: `1`.
1118 pub max_escalations: u8,
1119 /// Timeout in seconds for the classifier LLM call. Default: `5`.
1120 ///
1121 /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1122 pub classifier_timeout_secs: u64,
1123 /// Timeout in seconds for the validator LLM call. Default: `5`.
1124 ///
1125 /// On timeout the validator is treated as sufficient (fail-open).
1126 pub validator_timeout_secs: u64,
1127
1128 // ── Signal weights ────────────────────────────────────────────────────────
1129 /// Weight applied to the raw similarity score from vector/keyword recall. Default: `1.0`.
1130 ///
1131 /// Set to `1.0` and all other weights to `0.0` to reproduce pre-signal behaviour.
1132 pub similarity_weight: f64,
1133 /// Weight applied to the recency decay signal. Default: `0.0` (disabled).
1134 pub recency_weight: f64,
1135 /// Half-life for recency decay in days. Default: `7`.
1136 ///
1137 /// A message that is `recency_half_life_days` old receives a recency score of `0.5`.
1138 /// Set `recency_weight = 0.0` to disable recency scoring entirely.
1139 pub recency_half_life_days: u32,
1140 /// Weight applied to the TF-IDF signal. Default: `0.0` (disabled).
1141 pub tfidf_weight: f64,
1142 /// Weight applied to the cognitive signal (message access frequency). Default: `0.0` (disabled).
1143 pub cognitive_signal_weight: f64,
1144 /// Weight applied to the tier boost signal for consolidated/semantic entries. Default: `0.0` (disabled).
1145 pub tier_boost_weight: f64,
1146 /// Additive score awarded to entries in the `semantic` tier when `tier_boost_weight > 0`. Default: `1.0`.
1147 ///
1148 /// The final contribution is `tier_boost_weight * semantic_tier_boost` for semantic entries
1149 /// and `0.0` for episodic entries.
1150 pub semantic_tier_boost: f64,
1151}
1152
1153impl Default for TieredRetrievalConfig {
1154 fn default() -> Self {
1155 Self {
1156 enabled: false,
1157 classifier_provider: ProviderName::default(),
1158 validator_provider: ProviderName::default(),
1159 token_budget: 4096,
1160 validation_enabled: false,
1161 validation_threshold: 0.6,
1162 max_escalations: 1,
1163 classifier_timeout_secs: 5,
1164 validator_timeout_secs: 5,
1165 similarity_weight: 1.0,
1166 recency_weight: 0.0,
1167 recency_half_life_days: 7,
1168 tfidf_weight: 0.0,
1169 cognitive_signal_weight: 0.0,
1170 tier_boost_weight: 0.0,
1171 semantic_tier_boost: 1.0,
1172 }
1173 }
1174}
1175
1176// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1177
1178/// `ScrapMem` optical forgetting configuration.
1179///
1180/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1181/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1182/// forgetting compresses content in place based on age.
1183///
1184/// # Example (TOML)
1185///
1186/// ```toml
1187/// [memory.optical_forgetting]
1188/// enabled = false
1189/// compress_provider = ""
1190/// compress_after_turns = 100
1191/// summarize_after_turns = 500
1192/// sweep_interval_secs = 3600
1193/// sweep_batch_size = 50
1194/// ```
1195#[derive(Debug, Clone, Deserialize, Serialize)]
1196#[serde(default)]
1197pub struct OpticalForgettingConfig {
1198 /// Enable optical forgetting sweep. Default: `false`.
1199 pub enabled: bool,
1200 /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1201 /// Falls back to the primary provider when empty.
1202 pub compress_provider: ProviderName,
1203 /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1204 pub compress_after_turns: u32,
1205 /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1206 pub summarize_after_turns: u32,
1207 /// How often the sweep runs, in seconds. Default: `3600`.
1208 pub sweep_interval_secs: u64,
1209 /// Maximum messages to compress per sweep iteration. Default: `50`.
1210 pub sweep_batch_size: usize,
1211}
1212
1213impl Default for OpticalForgettingConfig {
1214 fn default() -> Self {
1215 Self {
1216 enabled: false,
1217 compress_provider: ProviderName::default(),
1218 compress_after_turns: 100,
1219 summarize_after_turns: 500,
1220 sweep_interval_secs: 3600,
1221 sweep_batch_size: 50,
1222 }
1223 }
1224}
1225
1226// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1227
1228/// EM-Graph episodic event extraction and causal linking configuration.
1229///
1230/// When enabled, episodic events are extracted from conversation turns and linked
1231/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1232///
1233/// # Example (TOML)
1234///
1235/// ```toml
1236/// [memory.em_graph]
1237/// enabled = false
1238/// extract_provider = ""
1239/// max_chain_depth = 3
1240/// ```
1241#[derive(Debug, Clone, Deserialize, Serialize)]
1242#[serde(default)]
1243pub struct EmGraphConfig {
1244 /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1245 pub enabled: bool,
1246 /// Provider name from `[[llm.providers]]` for event extraction.
1247 /// Falls back to the primary provider when empty.
1248 pub extract_provider: ProviderName,
1249 /// Maximum hops when traversing causal chains during recall. Default: `3`.
1250 pub max_chain_depth: u32,
1251}
1252
1253impl Default for EmGraphConfig {
1254 fn default() -> Self {
1255 Self {
1256 enabled: false,
1257 extract_provider: ProviderName::default(),
1258 max_chain_depth: 3,
1259 }
1260 }
1261}
1262
1263// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1264
1265fn default_episodic_consolidation_interval_secs() -> u64 {
1266 1800
1267}
1268
1269fn default_episodic_consolidation_batch_size() -> usize {
1270 30
1271}
1272
1273fn default_episodic_consolidation_min_age_secs() -> u64 {
1274 300
1275}
1276
1277fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1278 0.6
1279}
1280
1281// ── Five-signal SYNAPSE retrieval config (issue #4374) ────────────────────────
1282
1283fn default_five_signal_w_recency() -> f64 {
1284 0.35
1285}
1286
1287fn default_five_signal_w_relevance() -> f64 {
1288 0.35
1289}
1290
1291fn default_causal_bfs_max_depth() -> u32 {
1292 10
1293}
1294
1295fn default_neutral_causal_distance() -> u32 {
1296 5
1297}
1298
1299fn default_novelty_decay_rate() -> f64 {
1300 0.1
1301}
1302
1303fn default_five_signal_interval_seconds() -> u64 {
1304 7200
1305}
1306
1307fn default_five_signal_batch_size() -> usize {
1308 500
1309}
1310
1311fn default_five_signal_daemon_max_runtime_ms() -> u64 {
1312 30_000
1313}
1314
1315fn default_five_signal_promotion_score_threshold() -> f64 {
1316 0.70
1317}
1318
1319fn default_five_signal_demotion_score_threshold() -> f64 {
1320 0.20
1321}
1322
1323fn default_five_signal_top_k_per_run() -> usize {
1324 500
1325}
1326
1327/// Five-signal SYNAPSE retrieval configuration (issue #4374).
1328///
1329/// Extends SYNAPSE recall with three additional signals — access frequency, causal
1330/// distance, and novelty — beyond the two-signal baseline (recency + relevance).
1331/// All new signal weights default to `0.0`, preserving exact backward compatibility.
1332///
1333/// # Example (TOML)
1334///
1335/// ```toml
1336/// [memory.five_signal]
1337/// enabled = true
1338/// w_recency = 0.35
1339/// w_relevance = 0.35
1340/// w_frequency = 0.15
1341/// w_causal = 0.10
1342/// w_novelty = 0.05
1343///
1344/// [memory.five_signal.consolidation_daemon]
1345/// enabled = true
1346/// interval_seconds = 7200
1347/// ```
1348#[derive(Debug, Clone, Deserialize, Serialize)]
1349pub struct FiveSignalConfig {
1350 /// Master switch. When `false`, the five-signal code path contributes zero overhead.
1351 #[serde(default)]
1352 pub enabled: bool,
1353 /// Weight for the recency signal. Default: `0.35`.
1354 #[serde(default = "default_five_signal_w_recency")]
1355 pub w_recency: f64,
1356 /// Weight for the semantic relevance signal. Default: `0.35`.
1357 #[serde(default = "default_five_signal_w_relevance")]
1358 pub w_relevance: f64,
1359 /// Weight for the access frequency signal. Default: `0.0` (baseline-compatible).
1360 #[serde(default)]
1361 pub w_frequency: f64,
1362 /// Weight for the causal distance signal. Default: `0.0` (baseline-compatible).
1363 #[serde(default)]
1364 pub w_causal: f64,
1365 /// Weight for the novelty signal. Default: `0.0` (baseline-compatible).
1366 #[serde(default)]
1367 pub w_novelty: f64,
1368 /// Maximum BFS depth for causal distance computation. Default: `10`.
1369 #[serde(default = "default_causal_bfs_max_depth")]
1370 pub causal_bfs_max_depth: u32,
1371 /// Causal distance assigned when no goal entity is set or a fact lies beyond
1372 /// `causal_bfs_max_depth`. Default: `5`.
1373 #[serde(default = "default_neutral_causal_distance")]
1374 pub neutral_causal_distance: u32,
1375 /// Decay rate λ in `exp(-λ × days)` for the novelty signal. Default: `0.1`.
1376 #[serde(default = "default_novelty_decay_rate")]
1377 pub novelty_decay_rate: f64,
1378 /// Async consolidation daemon that promotes hot episodic facts to Qdrant.
1379 #[serde(default)]
1380 pub consolidation_daemon: FiveSignalConsolidationConfig,
1381}
1382
1383impl Default for FiveSignalConfig {
1384 fn default() -> Self {
1385 Self {
1386 enabled: false,
1387 w_recency: default_five_signal_w_recency(),
1388 w_relevance: default_five_signal_w_relevance(),
1389 w_frequency: 0.0,
1390 w_causal: 0.0,
1391 w_novelty: 0.0,
1392 causal_bfs_max_depth: default_causal_bfs_max_depth(),
1393 neutral_causal_distance: default_neutral_causal_distance(),
1394 novelty_decay_rate: default_novelty_decay_rate(),
1395 consolidation_daemon: FiveSignalConsolidationConfig::default(),
1396 }
1397 }
1398}
1399
1400/// Async consolidation daemon configuration for five-signal retrieval (issue #4374).
1401///
1402/// When `enabled = true`, a background task runs at `interval_seconds` intervals,
1403/// evaluates the top `top_k_per_run` episodic facts by five-signal score, promotes
1404/// facts above `promotion_score_threshold` to Qdrant, and demotes facts below
1405/// `demotion_score_threshold` to `episodic_only` tier.
1406///
1407/// # Example (TOML)
1408///
1409/// ```toml
1410/// [memory.five_signal.consolidation_daemon]
1411/// enabled = true
1412/// interval_seconds = 7200
1413/// batch_size = 500
1414/// promotion_score_threshold = 0.70
1415/// demotion_score_threshold = 0.20
1416/// ```
1417#[derive(Debug, Clone, Deserialize, Serialize)]
1418pub struct FiveSignalConsolidationConfig {
1419 /// Enable the daemon. Requires the `scheduler` feature. Default: `false`.
1420 #[serde(default)]
1421 pub enabled: bool,
1422 /// Interval between daemon runs in seconds. Default: `7200` (2 hours).
1423 #[serde(default = "default_five_signal_interval_seconds")]
1424 pub interval_seconds: u64,
1425 /// Maximum facts processed (embed + upsert) per run. Default: `500`.
1426 #[serde(default = "default_five_signal_batch_size")]
1427 pub batch_size: usize,
1428 /// Hard timeout per run in milliseconds. Default: `30000`.
1429 #[serde(default = "default_five_signal_daemon_max_runtime_ms")]
1430 pub daemon_max_runtime_ms: u64,
1431 /// Five-signal score above which a fact is promoted to Qdrant. Default: `0.70`.
1432 #[serde(default = "default_five_signal_promotion_score_threshold")]
1433 pub promotion_score_threshold: f64,
1434 /// Five-signal score below which a promoted fact is demoted. Default: `0.20`.
1435 #[serde(default = "default_five_signal_demotion_score_threshold")]
1436 pub demotion_score_threshold: f64,
1437 /// Number of episodic facts queried per run (SQL LIMIT). Must be >= `batch_size`.
1438 /// Default: `500`.
1439 #[serde(default = "default_five_signal_top_k_per_run")]
1440 pub top_k_per_run: usize,
1441}
1442
1443impl Default for FiveSignalConsolidationConfig {
1444 fn default() -> Self {
1445 Self {
1446 enabled: false,
1447 interval_seconds: default_five_signal_interval_seconds(),
1448 batch_size: default_five_signal_batch_size(),
1449 daemon_max_runtime_ms: default_five_signal_daemon_max_runtime_ms(),
1450 promotion_score_threshold: default_five_signal_promotion_score_threshold(),
1451 demotion_score_threshold: default_five_signal_demotion_score_threshold(),
1452 top_k_per_run: default_five_signal_top_k_per_run(),
1453 }
1454 }
1455}
1456
1457/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1458///
1459/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1460/// extracts durable factual statements via LLM, deduplicates them against existing
1461/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1462/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1463///
1464/// # Example (TOML)
1465///
1466/// ```toml
1467/// [memory.episodic_consolidation]
1468/// enabled = false
1469/// consolidation_provider = ""
1470/// interval_secs = 1800
1471/// batch_size = 30
1472/// min_age_secs = 300
1473/// dedup_jaccard_threshold = 0.6
1474/// ```
1475#[derive(Debug, Clone, Deserialize, Serialize)]
1476#[serde(default)]
1477pub struct EpisodicConsolidationConfig {
1478 /// Enable the episodic consolidation daemon. Default: `false`.
1479 pub enabled: bool,
1480 /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1481 /// Falls back to the primary provider when empty.
1482 pub consolidation_provider: ProviderName,
1483 /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1484 #[serde(default = "default_episodic_consolidation_interval_secs")]
1485 pub interval_secs: u64,
1486 /// Maximum number of episodic events to process per sweep. Default: `30`.
1487 #[serde(default = "default_episodic_consolidation_batch_size")]
1488 pub batch_size: usize,
1489 /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1490 /// Prevents consolidating events from the active conversation.
1491 #[serde(default = "default_episodic_consolidation_min_age_secs")]
1492 pub min_age_secs: u64,
1493 /// Jaccard similarity threshold for deduplication against existing key facts.
1494 /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1495 #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1496 pub dedup_jaccard_threshold: f32,
1497}
1498
1499impl Default for EpisodicConsolidationConfig {
1500 fn default() -> Self {
1501 Self {
1502 enabled: false,
1503 consolidation_provider: ProviderName::default(),
1504 interval_secs: default_episodic_consolidation_interval_secs(),
1505 batch_size: default_episodic_consolidation_batch_size(),
1506 min_age_secs: default_episodic_consolidation_min_age_secs(),
1507 dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1508 }
1509 }
1510}
1511
1512fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1513 0.3
1514}
1515
1516fn default_retrieval_failures_retention_days() -> u32 {
1517 90
1518}
1519
1520fn default_retrieval_failures_channel_capacity() -> usize {
1521 256
1522}
1523
1524fn default_retrieval_failures_batch_size() -> usize {
1525 16
1526}
1527
1528fn default_retrieval_failures_flush_interval_ms() -> u64 {
1529 100
1530}
1531
1532fn default_crossover_turn_threshold() -> u32 {
1533 20
1534}
1535
1536fn default_key_facts_dedup_threshold() -> f32 {
1537 0.95
1538}
1539
1540/// Session digest configuration (#2289).
1541#[derive(Debug, Clone, Deserialize, Serialize)]
1542#[serde(default)]
1543pub struct DigestConfig {
1544 /// Enable session digest generation at session end. Default: `false`.
1545 pub enabled: bool,
1546 /// Provider name from `[[llm.providers]]` for digest generation.
1547 /// Falls back to the primary provider when `None`.
1548 #[serde(default)]
1549 pub provider: Option<ProviderName>,
1550 /// Maximum tokens for the digest text. Default: `500`.
1551 pub max_tokens: usize,
1552 /// Maximum messages to feed into the digest prompt. Default: `50`.
1553 pub max_input_messages: usize,
1554}
1555
1556impl Default for DigestConfig {
1557 fn default() -> Self {
1558 Self {
1559 enabled: false,
1560 provider: None,
1561 max_tokens: 500,
1562 max_input_messages: 50,
1563 }
1564 }
1565}
1566
1567/// Context assembly strategy (#2288).
1568#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1569#[serde(rename_all = "snake_case")]
1570#[non_exhaustive]
1571pub enum ContextStrategy {
1572 /// Full conversation history trimmed to budget, with memory augmentation.
1573 /// This is the default and existing behavior.
1574 #[default]
1575 FullHistory,
1576 /// Drop conversation history; assemble context from summaries, semantic recall,
1577 /// cross-session memory, and session digest only.
1578 MemoryFirst,
1579 /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1580 /// `crossover_turn_threshold`.
1581 Adaptive,
1582}
1583
1584/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1585#[derive(Debug, Clone, Deserialize, Serialize)]
1586#[serde(default)]
1587pub struct SessionsConfig {
1588 /// Maximum number of sessions returned by list operations (0 = unlimited).
1589 #[serde(default = "default_max_history")]
1590 pub max_history: usize,
1591 /// Maximum characters for auto-generated session titles.
1592 #[serde(default = "default_title_max_chars")]
1593 pub title_max_chars: usize,
1594}
1595
1596impl Default for SessionsConfig {
1597 fn default() -> Self {
1598 Self {
1599 max_history: default_max_history(),
1600 title_max_chars: default_title_max_chars(),
1601 }
1602 }
1603}
1604
1605/// Configuration for the document ingestion and RAG retrieval pipeline.
1606#[derive(Debug, Clone, Deserialize, Serialize)]
1607pub struct DocumentConfig {
1608 #[serde(default = "default_document_collection")]
1609 pub collection: String,
1610 #[serde(default = "default_document_chunk_size")]
1611 pub chunk_size: usize,
1612 #[serde(default = "default_document_chunk_overlap")]
1613 pub chunk_overlap: usize,
1614 /// Number of document chunks to inject into agent context per turn.
1615 #[serde(default = "default_document_top_k")]
1616 pub top_k: usize,
1617 /// Enable document RAG injection into agent context.
1618 #[serde(default)]
1619 pub rag_enabled: bool,
1620}
1621
1622impl Default for DocumentConfig {
1623 fn default() -> Self {
1624 Self {
1625 collection: default_document_collection(),
1626 chunk_size: default_document_chunk_size(),
1627 chunk_overlap: default_document_chunk_overlap(),
1628 top_k: default_document_top_k(),
1629 rag_enabled: false,
1630 }
1631 }
1632}
1633
1634/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1635///
1636/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1637/// re-ranking, and hybrid BM25+vector weighting.
1638///
1639/// # Example (TOML)
1640///
1641/// ```toml
1642/// [memory.semantic]
1643/// enabled = true
1644/// recall_limit = 5
1645/// vector_weight = 0.7
1646/// keyword_weight = 0.3
1647/// mmr_lambda = 0.7
1648/// ```
1649#[derive(Debug, Deserialize, Serialize)]
1650#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1651pub struct SemanticConfig {
1652 /// Enable vector-based semantic recall. Default: `true`.
1653 #[serde(default = "default_semantic_enabled")]
1654 pub enabled: bool,
1655 #[serde(default = "default_recall_limit")]
1656 pub recall_limit: usize,
1657 #[serde(default = "default_vector_weight")]
1658 pub vector_weight: f64,
1659 #[serde(default = "default_keyword_weight")]
1660 pub keyword_weight: f64,
1661 #[serde(default = "default_true")]
1662 pub temporal_decay_enabled: bool,
1663 #[serde(default = "default_temporal_decay_half_life_days")]
1664 pub temporal_decay_half_life_days: u32,
1665 #[serde(default = "default_true")]
1666 pub mmr_enabled: bool,
1667 #[serde(default = "default_mmr_lambda")]
1668 pub mmr_lambda: f32,
1669 #[serde(default = "default_true")]
1670 pub importance_enabled: bool,
1671 #[serde(
1672 default = "default_importance_weight",
1673 deserialize_with = "validate_importance_weight"
1674 )]
1675 pub importance_weight: f64,
1676 /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1677 /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1678 /// from contending with the guardrail at the API server level (rate limits, Ollama
1679 /// single-model lock). Falls back to the main agent provider when `None`.
1680 #[serde(default)]
1681 pub embedding_provider: Option<ProviderName>,
1682 /// Timeout in seconds applied to every `embed()` call inside `zeph-memory`.
1683 ///
1684 /// Applies to all embedding call sites: admission control, quality gate, recall,
1685 /// summarization, graph retrieval, consolidation, and tree consolidation.
1686 /// Set to a higher value when using slow remote embedding providers.
1687 /// Default: `5`.
1688 #[serde(default = "default_embed_timeout_secs")]
1689 pub embed_timeout_secs: u64,
1690}
1691
1692impl Default for SemanticConfig {
1693 fn default() -> Self {
1694 Self {
1695 enabled: default_semantic_enabled(),
1696 recall_limit: default_recall_limit(),
1697 vector_weight: default_vector_weight(),
1698 keyword_weight: default_keyword_weight(),
1699 temporal_decay_enabled: true,
1700 temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1701 mmr_enabled: true,
1702 mmr_lambda: default_mmr_lambda(),
1703 importance_enabled: true,
1704 importance_weight: default_importance_weight(),
1705 embedding_provider: None,
1706 embed_timeout_secs: default_embed_timeout_secs(),
1707 }
1708 }
1709}
1710
1711fn default_embed_timeout_secs() -> u64 {
1712 5
1713}
1714
1715/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1716///
1717/// Controls how each recalled memory entry is presented in the assembled prompt.
1718/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1719/// always contain the raw message text. The format is applied exclusively during
1720/// context assembly and is never persisted.
1721///
1722/// # Token cost
1723///
1724/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1725/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1726#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1727#[serde(rename_all = "snake_case")]
1728#[non_exhaustive]
1729pub enum ContextFormat {
1730 /// Emit a labeled header per snippet:
1731 /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1732 ///
1733 /// This is the default. Gives the LLM structured provenance metadata for each recalled
1734 /// memory without re-parsing the recall body.
1735 #[default]
1736 Structured,
1737 /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1738 ///
1739 /// Use `Plain` when downstream consumers rely on the old format or when token budget
1740 /// is tight and provenance headers are not needed.
1741 Plain,
1742}
1743
1744/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1745///
1746/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1747/// Nested under `[memory.retrieval]` in TOML. All fields have defaults so existing
1748/// configs parse unchanged.
1749///
1750/// # Example (TOML)
1751///
1752/// ```toml
1753/// [memory.retrieval]
1754/// # depth = 0 # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1755/// # search_prompt_template = ""
1756/// # context_format = "structured"
1757/// ```
1758#[derive(Debug, Clone, Deserialize, Serialize)]
1759#[serde(default)]
1760pub struct RetrievalConfig {
1761 /// Number of ANN candidates fetched from the vector store before keyword merge,
1762 /// temporal decay, and MMR re-ranking.
1763 ///
1764 /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1765 /// to pre-#3340 deployments.
1766 /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1767 /// `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1768 /// size, or higher for better MMR diversity.
1769 ///
1770 /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1771 /// cannot saturate the requested top-k.
1772 pub depth: u32,
1773 /// Template applied to the raw user query before embedding.
1774 ///
1775 /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1776 /// Empty string (default) = identity: the query is embedded as-is.
1777 ///
1778 /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1779 /// is never wrapped. Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1780 pub search_prompt_template: String,
1781 /// Shape of memory snippets injected into agent context.
1782 ///
1783 /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1784 /// Default: `Structured`.
1785 pub context_format: ContextFormat,
1786 /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1787 ///
1788 /// When `true` and the query is classified as first-person, the query embedding is
1789 /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1790 /// towards persona-relevant content for self-referential queries.
1791 ///
1792 /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1793 #[serde(default = "default_query_bias_correction")]
1794 pub query_bias_correction: bool,
1795 /// Blend weight for query-bias correction (MM-F3, #3341).
1796 ///
1797 /// Controls how much the query embedding shifts towards the profile centroid.
1798 /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1799 #[serde(default = "default_query_bias_profile_weight")]
1800 pub query_bias_profile_weight: f32,
1801 /// Centroid TTL in seconds (MM-F3, #3341).
1802 ///
1803 /// The profile centroid computed from persona facts is cached for this many seconds.
1804 /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1805 #[serde(default = "default_query_bias_centroid_ttl_secs")]
1806 pub query_bias_centroid_ttl_secs: u64,
1807}
1808
1809fn default_query_bias_correction() -> bool {
1810 true
1811}
1812
1813fn default_query_bias_profile_weight() -> f32 {
1814 0.25
1815}
1816
1817fn default_query_bias_centroid_ttl_secs() -> u64 {
1818 300
1819}
1820
1821impl Default for RetrievalConfig {
1822 fn default() -> Self {
1823 Self {
1824 depth: 0,
1825 search_prompt_template: String::new(),
1826 context_format: ContextFormat::default(),
1827 query_bias_correction: default_query_bias_correction(),
1828 query_bias_profile_weight: default_query_bias_profile_weight(),
1829 query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1830 }
1831 }
1832}
1833
1834/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1835///
1836/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1837/// recall traversal increments the `weight` column of the traversed edges, building
1838/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1839/// runs a background sweep that identifies high-traffic entity clusters and distills
1840/// them into `graph_rules` entries via an LLM.
1841#[derive(Debug, Clone, Deserialize, Serialize)]
1842#[serde(default)]
1843pub struct HebbianConfig {
1844 /// Master switch. When `false`, no `weight` updates are written to the database
1845 /// and the consolidation loop does not start. Default: `false`.
1846 pub enabled: bool,
1847 /// Weight increment per co-activation (HL-F2, #3344).
1848 ///
1849 /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
1850 /// startup when `enabled = true`. Default: `0.1`.
1851 pub hebbian_lr: f32,
1852 /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
1853 ///
1854 /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
1855 /// Default: `3600` (one hour).
1856 pub consolidation_interval_secs: u64,
1857 /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
1858 /// candidate (HL-F3, #3345). Default: `5.0`.
1859 pub consolidation_threshold: f64,
1860 /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
1861 ///
1862 /// Falls back to the main provider when `None` or unresolvable.
1863 #[serde(default)]
1864 pub consolidate_provider: Option<ProviderName>,
1865 /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
1866 pub max_candidates_per_sweep: usize,
1867 /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
1868 ///
1869 /// An entity is skipped if its `consolidated_at` timestamp is within this window.
1870 /// Default: `86400` (24 hours).
1871 pub consolidation_cooldown_secs: u64,
1872 /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
1873 /// Default: `30`.
1874 pub consolidation_prompt_timeout_secs: u64,
1875 /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
1876 /// (HL-F4, #3345). Default: `20`.
1877 pub consolidation_max_neighbors: usize,
1878 /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
1879 ///
1880 /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
1881 /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
1882 pub spreading_activation: bool,
1883 /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
1884 pub spread_depth: u32,
1885 /// MAGMA edge-type filter for HL-F5 spreading activation.
1886 ///
1887 /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
1888 /// Empty = traverse all edge types. Default: `[]`.
1889 pub spread_edge_types: Vec<EdgeType>,
1890 /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
1891 ///
1892 /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
1893 /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
1894 pub step_budget_ms: u64,
1895 /// Timeout for the initial query embedding call in HL-F5, in seconds.
1896 ///
1897 /// `0` disables the timeout. Default: `5`.
1898 pub embed_timeout_secs: u64,
1899}
1900
1901impl Default for HebbianConfig {
1902 fn default() -> Self {
1903 Self {
1904 enabled: false,
1905 hebbian_lr: 0.1,
1906 consolidation_interval_secs: 3600,
1907 consolidation_threshold: 5.0,
1908 consolidate_provider: None,
1909 max_candidates_per_sweep: 10,
1910 consolidation_cooldown_secs: 86_400,
1911 consolidation_prompt_timeout_secs: 30,
1912 consolidation_max_neighbors: 20,
1913 spreading_activation: false,
1914 spread_depth: 2,
1915 spread_edge_types: Vec::new(),
1916 step_budget_ms: 8,
1917 embed_timeout_secs: 5,
1918 }
1919 }
1920}
1921
1922/// Compression strategy for active context compression (#1161).
1923#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
1924#[serde(tag = "strategy", rename_all = "snake_case")]
1925#[non_exhaustive]
1926pub enum CompressionStrategy {
1927 /// Compress only when reactive compaction fires (current behavior).
1928 #[default]
1929 Reactive,
1930 /// Compress proactively when context exceeds `threshold_tokens`.
1931 Proactive {
1932 /// Token count that triggers proactive compression.
1933 threshold_tokens: usize,
1934 /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
1935 max_summary_tokens: usize,
1936 },
1937 /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
1938 /// safety net. The `compress_context` tool is also available in all other strategies.
1939 Autonomous,
1940 /// Knowledge-block-aware compression strategy (#2510).
1941 ///
1942 /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
1943 /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
1944 Focus,
1945}
1946
1947/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
1948///
1949/// When `context-compression` feature is enabled, this replaces the default oldest-first
1950/// heuristic with scored eviction.
1951#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
1952#[serde(rename_all = "snake_case")]
1953#[non_exhaustive]
1954pub enum PruningStrategy {
1955 /// Oldest-first eviction — current default behavior.
1956 #[default]
1957 Reactive,
1958 /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
1959 /// lowest-first. Requires `context-compression` feature.
1960 TaskAware,
1961 /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
1962 /// Requires `context-compression` feature.
1963 Mig,
1964 /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
1965 /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
1966 /// Requires `context-compression` feature.
1967 Subgoal,
1968 /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
1969 /// Requires `context-compression` feature.
1970 SubgoalMig,
1971}
1972
1973impl PruningStrategy {
1974 /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
1975 #[must_use]
1976 pub fn is_subgoal(self) -> bool {
1977 matches!(self, Self::Subgoal | Self::SubgoalMig)
1978 }
1979}
1980
1981// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
1982// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
1983impl<'de> serde::Deserialize<'de> for PruningStrategy {
1984 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1985 let s = String::deserialize(deserializer)?;
1986 s.parse().map_err(serde::de::Error::custom)
1987 }
1988}
1989
1990impl std::str::FromStr for PruningStrategy {
1991 type Err = String;
1992
1993 fn from_str(s: &str) -> Result<Self, Self::Err> {
1994 match s {
1995 "reactive" => Ok(Self::Reactive),
1996 "task_aware" | "task-aware" => Ok(Self::TaskAware),
1997 "mig" => Ok(Self::Mig),
1998 // task_aware_mig was removed (dead code — was routed to scored path only).
1999 // Fall back to Reactive so existing TOML configs do not hard-error on startup.
2000 "task_aware_mig" | "task-aware-mig" => {
2001 tracing::warn!(
2002 "pruning strategy `task_aware_mig` has been removed; \
2003 falling back to `reactive`. Use `task_aware` or `mig` instead."
2004 );
2005 Ok(Self::Reactive)
2006 }
2007 "subgoal" => Ok(Self::Subgoal),
2008 "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
2009 other => Err(format!(
2010 "unknown pruning strategy `{other}`, expected \
2011 reactive|task_aware|mig|subgoal|subgoal_mig"
2012 )),
2013 }
2014 }
2015}
2016
2017fn default_high_density_budget() -> f32 {
2018 0.7
2019}
2020
2021fn default_low_density_budget() -> f32 {
2022 0.3
2023}
2024
2025/// Configuration for the `SleepGate` forgetting sweep (#2397).
2026///
2027/// When `enabled = true`, a background loop periodically decays importance scores
2028/// (synaptic downscaling), restores recently-accessed memories (selective replay),
2029/// and prunes memories below `forgetting_floor` (targeted forgetting).
2030#[derive(Debug, Clone, Deserialize, Serialize)]
2031#[serde(default)]
2032pub struct ForgettingConfig {
2033 /// Enable the `SleepGate` forgetting sweep. Default: `false`.
2034 pub enabled: bool,
2035 /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
2036 pub decay_rate: f32,
2037 /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
2038 pub forgetting_floor: f32,
2039 /// How often the forgetting sweep runs, in seconds. Default: `7200`.
2040 pub sweep_interval_secs: u64,
2041 /// Maximum messages to process per sweep. Default: `500`.
2042 pub sweep_batch_size: usize,
2043 /// Hours: messages accessed within this window get replay protection. Default: `24`.
2044 pub replay_window_hours: u32,
2045 /// Messages with `access_count` >= this get replay protection. Default: `3`.
2046 pub replay_min_access_count: u32,
2047 /// Hours: never prune messages accessed within this window. Default: `24`.
2048 pub protect_recent_hours: u32,
2049 /// Never prune messages with `access_count` >= this. Default: `3`.
2050 pub protect_min_access_count: u32,
2051}
2052
2053impl Default for ForgettingConfig {
2054 fn default() -> Self {
2055 Self {
2056 enabled: false,
2057 decay_rate: 0.1,
2058 forgetting_floor: 0.05,
2059 sweep_interval_secs: 7200,
2060 sweep_batch_size: 500,
2061 replay_window_hours: 24,
2062 replay_min_access_count: 3,
2063 protect_recent_hours: 24,
2064 protect_min_access_count: 3,
2065 }
2066 }
2067}
2068
2069/// Configuration for active context compression (#1161).
2070#[derive(Debug, Clone, Default, Deserialize, Serialize)]
2071#[serde(default)]
2072pub struct CompressionConfig {
2073 /// Compression strategy.
2074 #[serde(flatten)]
2075 pub strategy: CompressionStrategy,
2076 /// Tool-output pruning strategy (requires `context-compression` feature).
2077 pub pruning_strategy: PruningStrategy,
2078 /// Model to use for compression summaries.
2079 ///
2080 /// Currently unused — the primary summary provider is used regardless of this value.
2081 /// Reserved for future per-compression model selection. Setting this field has no effect.
2082 pub model: String,
2083 /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
2084 /// Falls back to the primary provider when empty. Default: `""`.
2085 pub compress_provider: ProviderName,
2086 /// Compaction probe: validates summary quality before committing it (#1609).
2087 #[serde(default)]
2088 pub probe: CompactionProbeConfig,
2089 /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
2090 ///
2091 /// When enabled, tool output bodies in the compaction range are saved to
2092 /// `tool_overflow` with `archive_type = 'archive'` before summarization.
2093 /// The LLM summarizes placeholder messages; archived content is appended as
2094 /// a postfix after summarization so references survive compaction.
2095 /// Default: `false`.
2096 #[serde(default)]
2097 pub archive_tool_outputs: bool,
2098 /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
2099 /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
2100 /// Falls back to the primary provider when empty. Default: `""`.
2101 pub focus_scorer_provider: ProviderName,
2102 /// Token-budget fraction for high-density content in density-aware compression (#2481).
2103 /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
2104 #[serde(default = "default_high_density_budget")]
2105 pub high_density_budget: f32,
2106 /// Token-budget fraction for low-density content in density-aware compression (#2481).
2107 /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
2108 #[serde(default = "default_low_density_budget")]
2109 pub low_density_budget: f32,
2110 /// Typed-page classification and batch-level assertion checking (#3630).
2111 #[serde(default)]
2112 pub typed_pages: TypedPagesConfig,
2113 /// Acon tool-result compression settings (#4021).
2114 ///
2115 /// Controls per-result and batch-level token budgets for tool outputs before they enter
2116 /// message history. Distinct from `[tools.compression]` (TACO), which applies regex-based
2117 /// rule compression at the executor level.
2118 #[serde(default)]
2119 pub acon: AconConfig,
2120 /// ARC agent-initiated compaction settings (#4020).
2121 ///
2122 /// When `allow_agent_compaction = true`, the agent can call the `request_compaction`
2123 /// internal tool to trigger context summarization on demand.
2124 #[serde(default)]
2125 pub arc: ArcCompactionConfig,
2126}
2127
2128fn default_acon_passthrough_threshold() -> usize {
2129 2000
2130}
2131
2132fn default_acon_summarize_threshold() -> usize {
2133 4000
2134}
2135
2136fn default_acon_total_budget() -> usize {
2137 8000
2138}
2139
2140fn validate_acon_passthrough_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2141where
2142 D: serde::Deserializer<'de>,
2143{
2144 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2145 if value == 0 {
2146 return Err(serde::de::Error::custom(
2147 "acon.passthrough_threshold must be >= 1",
2148 ));
2149 }
2150 Ok(value)
2151}
2152
2153fn validate_acon_summarize_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2154where
2155 D: serde::Deserializer<'de>,
2156{
2157 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2158 if value == 0 {
2159 return Err(serde::de::Error::custom(
2160 "acon.summarize_threshold must be >= 1",
2161 ));
2162 }
2163 Ok(value)
2164}
2165
2166fn validate_acon_total_budget<'de, D>(deserializer: D) -> Result<usize, D::Error>
2167where
2168 D: serde::Deserializer<'de>,
2169{
2170 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2171 if value == 0 {
2172 return Err(serde::de::Error::custom("acon.total_budget must be >= 1"));
2173 }
2174 Ok(value)
2175}
2176
2177/// Token budget configuration for Acon tool-result compression (#4021).
2178///
2179/// Controls per-result and batch-level token budgets for tool outputs injected into context.
2180/// Distinct from `[tools.compression]` (TACO), which applies regex-based rule compression
2181/// at the executor level.
2182///
2183/// # Invariants
2184///
2185/// The following ordering must hold: `passthrough_threshold < summarize_threshold <= total_budget`.
2186/// A config where `passthrough_threshold >= summarize_threshold` would make the summarization path
2187/// unreachable, silently producing incorrect compression behavior.
2188///
2189/// # Example (TOML)
2190///
2191/// ```toml
2192/// [memory.compression.acon]
2193/// enabled = true
2194/// passthrough_threshold = 2000
2195/// summarize_threshold = 4000
2196/// total_budget = 8000
2197/// ```
2198#[derive(Debug, Clone, Deserialize, Serialize)]
2199#[serde(default)]
2200pub struct AconConfig {
2201 /// Enable Acon tool-result compression. Default: `true`.
2202 pub enabled: bool,
2203 /// Token count below which results pass through unchanged.
2204 /// Also the truncation target: results above this get char-truncated to this size.
2205 /// Must be < `summarize_threshold`. Default: `2000`.
2206 #[serde(default = "default_acon_passthrough_threshold")]
2207 #[serde(deserialize_with = "validate_acon_passthrough_threshold")]
2208 pub passthrough_threshold: usize,
2209 /// Token count above which LLM summarization should be attempted before truncation.
2210 /// Must be > `passthrough_threshold` and <= `total_budget`. Default: `4000`.
2211 #[serde(default = "default_acon_summarize_threshold")]
2212 #[serde(deserialize_with = "validate_acon_summarize_threshold")]
2213 pub summarize_threshold: usize,
2214 /// Maximum total tokens for all tool results in a single turn.
2215 /// Must be >= `summarize_threshold`. Default: `8000`.
2216 #[serde(default = "default_acon_total_budget")]
2217 #[serde(deserialize_with = "validate_acon_total_budget")]
2218 pub total_budget: usize,
2219 /// Provider name from `[[llm.providers]]` for LLM summarization of large results.
2220 /// Falls back to the primary provider when empty. Default: `""`.
2221 #[serde(default)]
2222 pub summarize_provider: ProviderName,
2223}
2224
2225impl AconConfig {
2226 /// Validate threshold ordering invariants after deserialization.
2227 ///
2228 /// Returns an error string if `passthrough_threshold >= summarize_threshold` or
2229 /// `summarize_threshold > total_budget`.
2230 ///
2231 /// # Errors
2232 ///
2233 /// Returns a descriptive error string when any threshold invariant is violated.
2234 pub fn validate(&self) -> Result<(), String> {
2235 if self.passthrough_threshold >= self.summarize_threshold {
2236 return Err(format!(
2237 "acon: passthrough_threshold ({}) must be < summarize_threshold ({})",
2238 self.passthrough_threshold, self.summarize_threshold
2239 ));
2240 }
2241 if self.summarize_threshold > self.total_budget {
2242 return Err(format!(
2243 "acon: summarize_threshold ({}) must be <= total_budget ({})",
2244 self.summarize_threshold, self.total_budget
2245 ));
2246 }
2247 Ok(())
2248 }
2249}
2250
2251impl Default for AconConfig {
2252 fn default() -> Self {
2253 Self {
2254 enabled: true,
2255 passthrough_threshold: default_acon_passthrough_threshold(),
2256 summarize_threshold: default_acon_summarize_threshold(),
2257 total_budget: default_acon_total_budget(),
2258 summarize_provider: ProviderName::default(),
2259 }
2260 }
2261}
2262
2263/// Configuration for ARC agent-initiated compaction (#4020).
2264///
2265/// When `allow_agent_compaction = true`, the `request_compaction` internal tool is
2266/// registered and the agent can call it to trigger context summarization on demand.
2267/// Rate limiting is handled by `CompactionState` — only one compaction fires per turn.
2268///
2269/// # Example (TOML)
2270///
2271/// ```toml
2272/// [memory.compression.arc]
2273/// allow_agent_compaction = true
2274/// ```
2275#[derive(Debug, Clone, Deserialize, Serialize)]
2276#[serde(default)]
2277pub struct ArcCompactionConfig {
2278 /// Allow the agent to request compaction via the `request_compaction` tool call.
2279 /// Default: `true`.
2280 pub allow_agent_compaction: bool,
2281}
2282
2283impl Default for ArcCompactionConfig {
2284 fn default() -> Self {
2285 Self {
2286 allow_agent_compaction: true,
2287 }
2288 }
2289}
2290
2291/// Configuration for typed-page compaction invariants (#3630).
2292///
2293/// Controls classification, batch-level assertion checking, and audit logging.
2294/// All behavior is disabled by default; set `enabled = true` to activate.
2295///
2296/// # Example (TOML)
2297///
2298/// ```toml
2299/// [memory.compression.typed_pages]
2300/// enabled = true
2301/// enforcement = "active"
2302/// audit_path = ""
2303/// audit_channel_capacity = 256
2304/// ```
2305#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
2306#[serde(default)]
2307pub struct TypedPagesConfig {
2308 /// Enable typed-page classification and batch-level assertion checking.
2309 /// Default: `false`.
2310 pub enabled: bool,
2311 /// Enforcement mode:
2312 ///
2313 /// - `observe`: classify and emit audit records only; no behavioral change.
2314 /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
2315 ///
2316 /// Default: `"observe"`.
2317 pub enforcement: TypedPagesEnforcement,
2318 /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
2319 /// Default: `""`.
2320 ///
2321 /// # Security
2322 ///
2323 /// This field is **operator-only trusted input** read from the agent's configuration file.
2324 /// Write access to the config file implies file-system write access, so no additional
2325 /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
2326 /// configuration sources.
2327 pub audit_path: String,
2328 /// Bounded channel capacity for the async audit writer. Default: `256`.
2329 pub audit_channel_capacity: usize,
2330}
2331
2332impl Default for TypedPagesConfig {
2333 fn default() -> Self {
2334 Self {
2335 enabled: false,
2336 enforcement: TypedPagesEnforcement::Observe,
2337 audit_path: String::new(),
2338 audit_channel_capacity: 256,
2339 }
2340 }
2341}
2342
2343/// Enforcement mode for typed-page compaction (#3630).
2344#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
2345#[serde(rename_all = "snake_case")]
2346#[non_exhaustive]
2347pub enum TypedPagesEnforcement {
2348 /// Classify and audit only. Zero behavioral change relative to the untyped path.
2349 #[default]
2350 Observe,
2351 /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
2352 Active,
2353}
2354
2355fn default_sidequest_interval_turns() -> u32 {
2356 4
2357}
2358
2359fn default_sidequest_max_eviction_ratio() -> f32 {
2360 0.5
2361}
2362
2363fn default_sidequest_max_cursors() -> usize {
2364 30
2365}
2366
2367fn default_sidequest_min_cursor_tokens() -> usize {
2368 100
2369}
2370
2371/// Configuration for LLM-driven side-thread tool output eviction (#1885).
2372#[derive(Debug, Clone, Deserialize, Serialize)]
2373#[serde(default)]
2374pub struct SidequestConfig {
2375 /// Enable `SideQuest` eviction. Default: `false`.
2376 pub enabled: bool,
2377 /// Run eviction every N user turns. Default: `4`.
2378 #[serde(default = "default_sidequest_interval_turns")]
2379 pub interval_turns: u32,
2380 /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
2381 #[serde(default = "default_sidequest_max_eviction_ratio")]
2382 pub max_eviction_ratio: f32,
2383 /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
2384 #[serde(default = "default_sidequest_max_cursors")]
2385 pub max_cursors: usize,
2386 /// Exclude tool outputs smaller than this token count from eviction candidates.
2387 /// Default: `100`.
2388 #[serde(default = "default_sidequest_min_cursor_tokens")]
2389 pub min_cursor_tokens: usize,
2390}
2391
2392impl Default for SidequestConfig {
2393 fn default() -> Self {
2394 Self {
2395 enabled: false,
2396 interval_turns: default_sidequest_interval_turns(),
2397 max_eviction_ratio: default_sidequest_max_eviction_ratio(),
2398 max_cursors: default_sidequest_max_cursors(),
2399 min_cursor_tokens: default_sidequest_min_cursor_tokens(),
2400 }
2401 }
2402}
2403
2404/// Graph retrieval strategy for `[memory.graph]`.
2405///
2406/// Selects the algorithm used to traverse the knowledge graph during recall.
2407/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
2408#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
2409#[serde(rename_all = "snake_case")]
2410#[non_exhaustive]
2411pub enum GraphRetrievalStrategy {
2412 /// SYNAPSE spreading activation (default, existing behavior).
2413 #[default]
2414 Synapse,
2415 /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
2416 Bfs,
2417 /// A* shortest-path traversal via petgraph.
2418 #[serde(rename = "astar")]
2419 AStar,
2420 /// Concentric BFS expanding outward from seed nodes.
2421 WaterCircles,
2422 /// Beam search: keep top-K candidates per hop.
2423 BeamSearch,
2424 /// Dynamic: LLM classifier selects strategy per query.
2425 Hybrid,
2426}
2427
2428fn default_beam_width() -> usize {
2429 10
2430}
2431
2432/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
2433///
2434/// Controls the width of the beam during graph traversal: how many top candidates
2435/// are retained at each hop.
2436#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2437pub struct BeamSearchConfig {
2438 /// Number of top candidates kept per hop. Default: `10`.
2439 #[serde(default = "default_beam_width")]
2440 pub beam_width: usize,
2441}
2442
2443impl Default for BeamSearchConfig {
2444 fn default() -> Self {
2445 Self {
2446 beam_width: default_beam_width(),
2447 }
2448 }
2449}
2450
2451/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
2452///
2453/// Controls ring-by-ring concentric BFS traversal from seed nodes.
2454#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
2455pub struct WaterCirclesConfig {
2456 /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
2457 #[serde(default)]
2458 pub ring_limit: usize,
2459}
2460
2461fn default_evolution_sweep_interval() -> usize {
2462 50
2463}
2464
2465fn default_confidence_prune_threshold() -> f32 {
2466 0.1
2467}
2468
2469/// Experience memory configuration for `[memory.graph.experience]`.
2470///
2471/// Controls recording of tool execution outcomes and graph evolution sweeps.
2472#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2473pub struct ExperienceConfig {
2474 /// Enable experience memory recording. Default: `false`.
2475 #[serde(default)]
2476 pub enabled: bool,
2477 /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2478 #[serde(default)]
2479 pub evolution_sweep_enabled: bool,
2480 /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2481 #[serde(default = "default_confidence_prune_threshold")]
2482 pub confidence_prune_threshold: f32,
2483 /// Number of turns between evolution sweeps. Default: `50`.
2484 #[serde(default = "default_evolution_sweep_interval")]
2485 pub evolution_sweep_interval: usize,
2486}
2487
2488impl Default for ExperienceConfig {
2489 fn default() -> Self {
2490 Self {
2491 enabled: false,
2492 evolution_sweep_enabled: false,
2493 confidence_prune_threshold: default_confidence_prune_threshold(),
2494 evolution_sweep_interval: default_evolution_sweep_interval(),
2495 }
2496 }
2497}
2498
2499/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2500///
2501/// # Security
2502///
2503/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2504/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2505/// when processing conversations that may contain personal, medical, or sensitive data until
2506/// a redaction pass is implemented on the write path.
2507#[derive(Debug, Clone, Deserialize, Serialize)]
2508#[serde(default)]
2509pub struct GraphConfig {
2510 pub enabled: bool,
2511 pub extract_model: String,
2512 #[serde(default = "default_graph_max_entities_per_message")]
2513 pub max_entities_per_message: usize,
2514 #[serde(default = "default_graph_max_edges_per_message")]
2515 pub max_edges_per_message: usize,
2516 #[serde(default = "default_graph_community_refresh_interval")]
2517 pub community_refresh_interval: usize,
2518 #[serde(default = "default_graph_entity_similarity_threshold")]
2519 pub entity_similarity_threshold: f32,
2520 #[serde(default = "default_graph_extraction_timeout_secs")]
2521 pub extraction_timeout_secs: u64,
2522 #[serde(default)]
2523 pub use_embedding_resolution: bool,
2524 #[serde(default = "default_graph_entity_ambiguous_threshold")]
2525 pub entity_ambiguous_threshold: f32,
2526 #[serde(default = "default_graph_max_hops")]
2527 pub max_hops: u32,
2528 #[serde(default = "default_graph_recall_limit")]
2529 pub recall_limit: usize,
2530 /// Days to retain expired (superseded) edges before deletion. Default: 90.
2531 #[serde(default = "default_graph_expired_edge_retention_days")]
2532 pub expired_edge_retention_days: u32,
2533 /// Maximum entities to retain in the graph. 0 = unlimited.
2534 #[serde(default)]
2535 pub max_entities: usize,
2536 /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2537 #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2538 pub community_summary_max_prompt_bytes: usize,
2539 /// Maximum concurrent LLM calls during community summarization. Default: 4.
2540 #[serde(default = "default_graph_community_summary_concurrency")]
2541 pub community_summary_concurrency: usize,
2542 /// Number of edges fetched per chunk during community detection. Default: 10000.
2543 /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2544 #[serde(default = "default_lpa_edge_chunk_size")]
2545 pub lpa_edge_chunk_size: usize,
2546 /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2547 ///
2548 /// When > 0, recent edges receive a small additive score boost over older edges.
2549 /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2550 /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2551 #[serde(
2552 default = "default_graph_temporal_decay_rate",
2553 deserialize_with = "validate_temporal_decay_rate"
2554 )]
2555 pub temporal_decay_rate: f64,
2556 /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2557 ///
2558 /// Caps the result set returned for a given source entity + predicate pair. Prevents
2559 /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2560 /// or API endpoints.
2561 #[serde(default = "default_graph_edge_history_limit")]
2562 pub edge_history_limit: usize,
2563 /// A-MEM dynamic note linking configuration.
2564 ///
2565 /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2566 /// semantically similar entities via `similar_to` edges. Requires an embedding store
2567 /// (`qdrant` or `sqlite` vector backend) to be configured.
2568 #[serde(default)]
2569 pub note_linking: NoteLinkingConfig,
2570 /// SYNAPSE spreading activation retrieval configuration.
2571 ///
2572 /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2573 /// with lateral inhibition and temporal decay instead of BFS.
2574 #[serde(default)]
2575 pub spreading_activation: SpreadingActivationConfig,
2576 /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2577 ///
2578 /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2579 /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2580 #[serde(default)]
2581 pub retrieval_strategy: GraphRetrievalStrategy,
2582 /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2583 ///
2584 /// When non-empty, graph extraction (and downstream note linking and community
2585 /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2586 /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2587 /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2588 /// which causes systematic quality gate rejections. A named provider built via
2589 /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2590 /// has no quality gate attached.
2591 ///
2592 /// Falls back to the primary provider when empty. Default: `""` (use primary).
2593 #[serde(default)]
2594 pub extract_provider: ProviderName,
2595 /// Named LLM provider for hybrid strategy classification.
2596 /// Falls back to the default provider when `None`.
2597 #[serde(default)]
2598 pub strategy_classifier_provider: Option<ProviderName>,
2599 /// Beam search configuration.
2600 #[serde(default)]
2601 pub beam_search: BeamSearchConfig,
2602 /// `WaterCircles` BFS configuration.
2603 #[serde(default)]
2604 pub watercircles: WaterCirclesConfig,
2605 /// Experience memory configuration.
2606 #[serde(default)]
2607 pub experience: ExperienceConfig,
2608 /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2609 /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2610 #[serde(
2611 default = "default_link_weight_decay_lambda",
2612 deserialize_with = "validate_link_weight_decay_lambda"
2613 )]
2614 pub link_weight_decay_lambda: f64,
2615 /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2616 #[serde(default = "default_link_weight_decay_interval_secs")]
2617 pub link_weight_decay_interval_secs: u64,
2618 /// Kumiho AGM-inspired belief revision configuration.
2619 ///
2620 /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2621 /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2622 /// `superseded_by` pointer and the new edge becomes the current belief.
2623 #[serde(default)]
2624 pub belief_revision: BeliefRevisionConfig,
2625 /// D-MEM RPE-based tiered graph extraction routing.
2626 ///
2627 /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2628 /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2629 #[serde(default)]
2630 pub rpe: RpeConfig,
2631 /// `SQLite` connection pool size dedicated to graph operations.
2632 ///
2633 /// Graph tables share the same database file as messages/embeddings but use a
2634 /// separate pool to prevent pool starvation when community detection or spreading
2635 /// activation runs concurrently with regular memory operations. Default: `3`.
2636 #[serde(default = "default_graph_pool_size")]
2637 pub pool_size: u32,
2638 /// APEX-MEM append-only write path (#3631).
2639 ///
2640 /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2641 /// supersession chains instead of the legacy destructive-update path.
2642 #[serde(default)]
2643 pub apex_mem: ApexMemConfig,
2644 /// LLM call timeout per extraction request, in seconds. Default: `30`.
2645 #[serde(default = "default_graph_llm_timeout_secs")]
2646 pub llm_timeout_secs: u64,
2647 /// PRISM query-sensitive edge costing in A* graph recall.
2648 ///
2649 /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2650 /// between the query embedding and the target entity embedding:
2651 /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2652 /// Edges toward semantically relevant entities receive lower cost and are therefore
2653 /// preferred by A*, producing query-aligned recall paths.
2654 ///
2655 /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2656 /// store is unavailable or a target entity has no stored embedding, falls back to the
2657 /// baseline cost `1.0 - confidence`.
2658 ///
2659 /// Default: `false` (preserves existing A* behaviour).
2660 #[serde(default)]
2661 pub query_sensitive_cost: bool,
2662
2663 /// Implicit conflict detection for SYNAPSE recall (spec 004-17, STALE/CUPMem).
2664 ///
2665 /// When enabled, write-time fuzzy predicate matching detects implicit conflicts
2666 /// between graph edges and annotates SYNAPSE recall results accordingly.
2667 #[serde(default)]
2668 pub implicit_conflict: ImplicitConflictConfig,
2669}
2670
2671/// Similarity method for implicit conflict detection.
2672#[derive(
2673 Debug,
2674 Clone,
2675 Copy,
2676 PartialEq,
2677 Eq,
2678 Default,
2679 serde::Serialize,
2680 serde::Deserialize,
2681 schemars::JsonSchema,
2682)]
2683#[serde(rename_all = "snake_case")]
2684#[non_exhaustive]
2685pub enum SimilarityMethod {
2686 /// Normalized Levenshtein edit distance.
2687 #[default]
2688 Levenshtein,
2689 /// Cosine similarity over pre-computed predicate embeddings.
2690 Embedding,
2691 /// Either method triggers detection.
2692 Both,
2693}
2694
2695/// Resolution strategy when an implicit conflict is detected.
2696#[derive(
2697 Debug,
2698 Clone,
2699 Copy,
2700 PartialEq,
2701 Eq,
2702 Default,
2703 serde::Serialize,
2704 serde::Deserialize,
2705 schemars::JsonSchema,
2706)]
2707#[serde(rename_all = "snake_case")]
2708#[non_exhaustive]
2709pub enum ConflictResolutionStrategy {
2710 /// Mark the pair as a candidate but do not supersede either edge.
2711 #[default]
2712 FlagOnly,
2713 /// Supersede the older edge via APEX-MEM `insert_or_supersede`.
2714 Recency,
2715 /// Supersede the lower-confidence edge.
2716 Confidence,
2717 /// Delegate resolution to an LLM provider; fall back to `flag_only` on timeout.
2718 Llm,
2719}
2720
2721/// Configuration for the optional background consolidation daemon (spec 004-17).
2722#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2723#[serde(default)]
2724pub struct ConsolidationDaemonConfig {
2725 /// Enable the background consolidation daemon.
2726 pub enabled: bool,
2727 /// How often the daemon runs, in seconds. Default: 7200 (2 hours).
2728 #[serde(default = "default_ic_daemon_interval_secs")]
2729 pub interval_seconds: u64,
2730 /// Maximum number of candidates processed per daemon run. Default: 100.
2731 #[serde(default = "default_ic_daemon_batch_size")]
2732 pub batch_size: usize,
2733}
2734
2735impl Default for ConsolidationDaemonConfig {
2736 fn default() -> Self {
2737 Self {
2738 enabled: false,
2739 interval_seconds: default_ic_daemon_interval_secs(),
2740 batch_size: default_ic_daemon_batch_size(),
2741 }
2742 }
2743}
2744
2745fn default_ic_daemon_interval_secs() -> u64 {
2746 7200
2747}
2748
2749fn default_ic_daemon_batch_size() -> usize {
2750 100
2751}
2752
2753/// Configuration for implicit conflict detection (spec 004-17, STALE/CUPMem).
2754///
2755/// Controls write-time fuzzy predicate matching and SYNAPSE recall annotation.
2756/// All detection is gated behind `enabled = false` by default — no overhead when disabled.
2757///
2758/// TOML path: `[memory.graph.implicit_conflict]`
2759///
2760/// # Examples
2761///
2762/// ```toml
2763/// [memory.graph.implicit_conflict]
2764/// enabled = true
2765/// similarity_method = "levenshtein"
2766/// conflict_similarity_threshold = 0.80
2767/// resolution_strategy = "flag_only"
2768/// candidate_ttl_days = 30
2769/// propagation_depth = 2
2770/// ```
2771#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2772#[serde(default)]
2773pub struct ImplicitConflictConfig {
2774 /// Enable implicit conflict detection. Default: `false`.
2775 pub enabled: bool,
2776 /// Similarity method used to detect candidate pairs.
2777 #[serde(default)]
2778 pub similarity_method: SimilarityMethod,
2779 /// Minimum similarity score to flag a pair as a conflict candidate. Default: 0.80.
2780 #[serde(default = "default_ic_similarity_threshold")]
2781 pub conflict_similarity_threshold: f64,
2782 /// How to resolve detected conflicts. Default: `flag_only`.
2783 #[serde(default)]
2784 pub resolution_strategy: ConflictResolutionStrategy,
2785 /// Provider name (from `[[llm.providers]]`) for LLM-mediated resolution.
2786 #[serde(default)]
2787 pub implicit_conflict_provider: crate::providers::ProviderName,
2788 /// LLM resolution timeout in milliseconds. Default: 800.
2789 #[serde(default = "default_ic_llm_timeout_ms")]
2790 pub conflict_llm_timeout_ms: u64,
2791 /// Days before an unresolved candidate entry expires. Default: 30.
2792 #[serde(default = "default_ic_candidate_ttl_days")]
2793 pub candidate_ttl_days: u32,
2794 /// SYNAPSE propagation depth for surfacing superseding facts. Default: 2.
2795 #[serde(default = "default_ic_propagation_depth")]
2796 pub propagation_depth: u32,
2797 /// Background consolidation daemon configuration.
2798 #[serde(default)]
2799 pub consolidation_daemon: ConsolidationDaemonConfig,
2800}
2801
2802impl Default for ImplicitConflictConfig {
2803 fn default() -> Self {
2804 Self {
2805 enabled: false,
2806 similarity_method: SimilarityMethod::default(),
2807 conflict_similarity_threshold: default_ic_similarity_threshold(),
2808 resolution_strategy: ConflictResolutionStrategy::default(),
2809 implicit_conflict_provider: crate::providers::ProviderName::default(),
2810 conflict_llm_timeout_ms: default_ic_llm_timeout_ms(),
2811 candidate_ttl_days: default_ic_candidate_ttl_days(),
2812 propagation_depth: default_ic_propagation_depth(),
2813 consolidation_daemon: ConsolidationDaemonConfig::default(),
2814 }
2815 }
2816}
2817
2818fn default_ic_similarity_threshold() -> f64 {
2819 0.80
2820}
2821
2822fn default_ic_llm_timeout_ms() -> u64 {
2823 800
2824}
2825
2826fn default_ic_candidate_ttl_days() -> u32 {
2827 30
2828}
2829
2830fn default_ic_propagation_depth() -> u32 {
2831 2
2832}
2833
2834fn default_graph_pool_size() -> u32 {
2835 3
2836}
2837
2838fn default_graph_llm_timeout_secs() -> u64 {
2839 30
2840}
2841
2842/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
2843///
2844/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
2845/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
2846/// the full supersession chain and enables conflict resolution.
2847///
2848/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
2849#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2850#[serde(default)]
2851pub struct ApexMemConfig {
2852 /// Enable the APEX-MEM append-only write path. Default: `false`.
2853 pub enabled: bool,
2854}
2855
2856fn default_quality_gate_threshold() -> f32 {
2857 0.55
2858}
2859
2860fn default_quality_gate_recent_window() -> usize {
2861 32
2862}
2863
2864fn default_quality_gate_contradiction_grace_seconds() -> u64 {
2865 300
2866}
2867
2868fn default_quality_gate_information_value_weight() -> f32 {
2869 0.4
2870}
2871
2872fn default_quality_gate_reference_completeness_weight() -> f32 {
2873 0.3
2874}
2875
2876fn default_quality_gate_contradiction_weight() -> f32 {
2877 0.3
2878}
2879
2880fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
2881 0.35
2882}
2883
2884fn default_quality_gate_llm_timeout_ms() -> u64 {
2885 500
2886}
2887
2888fn default_quality_gate_llm_weight() -> f32 {
2889 0.5
2890}
2891
2892fn default_quality_gate_reference_check_lang_en() -> bool {
2893 true
2894}
2895
2896/// Write quality gate configuration (`[memory.quality_gate]`).
2897///
2898/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
2899/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
2900/// scoring is opt-in via `quality_gate_provider`.
2901///
2902/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
2903#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2904#[serde(default)]
2905pub struct WriteQualityGateConfig {
2906 /// Enable the write quality gate. Default: `false`.
2907 pub enabled: bool,
2908 /// Combined score threshold below which writes are rejected. Default: `0.55`.
2909 #[serde(default = "default_quality_gate_threshold")]
2910 pub threshold: f32,
2911 /// Number of recent writes compared for information-value scoring. Default: `32`.
2912 #[serde(default = "default_quality_gate_recent_window")]
2913 pub recent_window: usize,
2914 /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
2915 #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
2916 pub contradiction_grace_seconds: u64,
2917 /// Weight of `information_value` sub-score. Default: `0.4`.
2918 #[serde(default = "default_quality_gate_information_value_weight")]
2919 pub information_value_weight: f32,
2920 /// Weight of `reference_completeness` sub-score. Default: `0.3`.
2921 #[serde(default = "default_quality_gate_reference_completeness_weight")]
2922 pub reference_completeness_weight: f32,
2923 /// Weight of `contradiction` sub-score. Default: `0.3`.
2924 #[serde(default = "default_quality_gate_contradiction_weight")]
2925 pub contradiction_weight: f32,
2926 /// Rolling rejection-rate alarm ratio. Default: `0.35`.
2927 #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
2928 pub rejection_rate_alarm_ratio: f32,
2929 /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
2930 #[serde(default)]
2931 pub quality_gate_provider: ProviderName,
2932 /// LLM timeout in milliseconds. Default: `500`.
2933 #[serde(default = "default_quality_gate_llm_timeout_ms")]
2934 pub llm_timeout_ms: u64,
2935 /// LLM blend weight into final score. Default: `0.5`.
2936 #[serde(default = "default_quality_gate_llm_weight")]
2937 pub llm_weight: f32,
2938 /// Enable pronoun/deictic reference checks (English only). Default: `true`.
2939 #[serde(default = "default_quality_gate_reference_check_lang_en")]
2940 pub reference_check_lang_en: bool,
2941}
2942
2943impl Default for WriteQualityGateConfig {
2944 fn default() -> Self {
2945 Self {
2946 enabled: false,
2947 threshold: default_quality_gate_threshold(),
2948 recent_window: default_quality_gate_recent_window(),
2949 contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
2950 information_value_weight: default_quality_gate_information_value_weight(),
2951 reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
2952 contradiction_weight: default_quality_gate_contradiction_weight(),
2953 rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
2954 quality_gate_provider: ProviderName::default(),
2955 llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
2956 llm_weight: default_quality_gate_llm_weight(),
2957 reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
2958 }
2959 }
2960}
2961
2962impl Default for GraphConfig {
2963 fn default() -> Self {
2964 Self {
2965 enabled: false,
2966 extract_model: String::new(),
2967 max_entities_per_message: default_graph_max_entities_per_message(),
2968 max_edges_per_message: default_graph_max_edges_per_message(),
2969 community_refresh_interval: default_graph_community_refresh_interval(),
2970 entity_similarity_threshold: default_graph_entity_similarity_threshold(),
2971 extraction_timeout_secs: default_graph_extraction_timeout_secs(),
2972 use_embedding_resolution: false,
2973 entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
2974 max_hops: default_graph_max_hops(),
2975 recall_limit: default_graph_recall_limit(),
2976 expired_edge_retention_days: default_graph_expired_edge_retention_days(),
2977 max_entities: 0,
2978 community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
2979 community_summary_concurrency: default_graph_community_summary_concurrency(),
2980 lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
2981 temporal_decay_rate: default_graph_temporal_decay_rate(),
2982 edge_history_limit: default_graph_edge_history_limit(),
2983 note_linking: NoteLinkingConfig::default(),
2984 spreading_activation: SpreadingActivationConfig::default(),
2985 retrieval_strategy: GraphRetrievalStrategy::default(),
2986 extract_provider: ProviderName::default(),
2987 strategy_classifier_provider: None,
2988 beam_search: BeamSearchConfig::default(),
2989 watercircles: WaterCirclesConfig::default(),
2990 experience: ExperienceConfig::default(),
2991 link_weight_decay_lambda: default_link_weight_decay_lambda(),
2992 link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
2993 belief_revision: BeliefRevisionConfig::default(),
2994 rpe: RpeConfig::default(),
2995 pool_size: default_graph_pool_size(),
2996 apex_mem: ApexMemConfig::default(),
2997 llm_timeout_secs: default_graph_llm_timeout_secs(),
2998 query_sensitive_cost: false,
2999 implicit_conflict: ImplicitConflictConfig::default(),
3000 }
3001 }
3002}
3003
3004fn default_consolidation_confidence_threshold() -> f32 {
3005 0.7
3006}
3007
3008fn default_consolidation_sweep_interval_secs() -> u64 {
3009 3600
3010}
3011
3012fn default_consolidation_sweep_batch_size() -> usize {
3013 50
3014}
3015
3016fn default_consolidation_similarity_threshold() -> f32 {
3017 0.85
3018}
3019
3020/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
3021///
3022/// When `enabled = true`, a background loop periodically clusters semantically similar messages
3023/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
3024/// they are marked as consolidated and deprioritized in recall via temporal decay.
3025#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3026#[serde(default)]
3027pub struct ConsolidationConfig {
3028 /// Enable the consolidation background loop. Default: `false`.
3029 pub enabled: bool,
3030 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3031 /// Falls back to the primary provider when empty. Default: `""`.
3032 #[serde(default)]
3033 pub consolidation_provider: ProviderName,
3034 /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
3035 #[serde(default = "default_consolidation_confidence_threshold")]
3036 pub confidence_threshold: f32,
3037 /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
3038 #[serde(default = "default_consolidation_sweep_interval_secs")]
3039 pub sweep_interval_secs: u64,
3040 /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
3041 #[serde(default = "default_consolidation_sweep_batch_size")]
3042 pub sweep_batch_size: usize,
3043 /// Minimum cosine similarity for two messages to be considered consolidation candidates.
3044 /// Default: `0.85`.
3045 #[serde(default = "default_consolidation_similarity_threshold")]
3046 pub similarity_threshold: f32,
3047 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3048 #[serde(default = "default_consolidation_llm_timeout_secs")]
3049 pub llm_timeout_secs: u64,
3050 /// Per-call timeout for every `embed()` invocation in the consolidation sweep, in seconds.
3051 /// Default: `5`.
3052 #[serde(default = "default_embed_timeout_secs")]
3053 pub embed_timeout_secs: u64,
3054}
3055
3056impl Default for ConsolidationConfig {
3057 fn default() -> Self {
3058 Self {
3059 enabled: false,
3060 consolidation_provider: ProviderName::default(),
3061 confidence_threshold: default_consolidation_confidence_threshold(),
3062 sweep_interval_secs: default_consolidation_sweep_interval_secs(),
3063 sweep_batch_size: default_consolidation_sweep_batch_size(),
3064 similarity_threshold: default_consolidation_similarity_threshold(),
3065 llm_timeout_secs: default_consolidation_llm_timeout_secs(),
3066 embed_timeout_secs: default_embed_timeout_secs(),
3067 }
3068 }
3069}
3070
3071fn default_consolidation_llm_timeout_secs() -> u64 {
3072 30
3073}
3074
3075fn default_link_weight_decay_lambda() -> f64 {
3076 0.95
3077}
3078
3079fn default_link_weight_decay_interval_secs() -> u64 {
3080 86400
3081}
3082
3083fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
3084where
3085 D: serde::Deserializer<'de>,
3086{
3087 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
3088 if value.is_nan() || value.is_infinite() {
3089 return Err(serde::de::Error::custom(
3090 "link_weight_decay_lambda must be a finite number",
3091 ));
3092 }
3093 if !(value > 0.0 && value <= 1.0) {
3094 return Err(serde::de::Error::custom(
3095 "link_weight_decay_lambda must be in (0.0, 1.0]",
3096 ));
3097 }
3098 Ok(value)
3099}
3100
3101fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
3102where
3103 D: serde::Deserializer<'de>,
3104{
3105 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3106 if value.is_nan() || value.is_infinite() {
3107 return Err(serde::de::Error::custom(
3108 "threshold must be a finite number",
3109 ));
3110 }
3111 if !(0.0..=1.0).contains(&value) {
3112 return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
3113 }
3114 Ok(value)
3115}
3116
3117fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
3118where
3119 D: serde::Deserializer<'de>,
3120{
3121 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3122 if value.is_nan() || value.is_infinite() {
3123 return Err(serde::de::Error::custom(
3124 "fast_path_margin must be a finite number",
3125 ));
3126 }
3127 if !(0.0..=1.0).contains(&value) {
3128 return Err(serde::de::Error::custom(
3129 "fast_path_margin must be in [0.0, 1.0]",
3130 ));
3131 }
3132 Ok(value)
3133}
3134
3135fn default_admission_threshold() -> f32 {
3136 0.40
3137}
3138
3139fn default_admission_fast_path_margin() -> f32 {
3140 0.15
3141}
3142
3143fn default_rl_min_samples() -> u32 {
3144 500
3145}
3146
3147fn default_rl_retrain_interval_secs() -> u64 {
3148 3600
3149}
3150
3151/// Admission decision strategy.
3152///
3153/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
3154/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
3155#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
3156#[serde(rename_all = "snake_case")]
3157#[non_exhaustive]
3158pub enum AdmissionStrategy {
3159 /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
3160 #[default]
3161 Heuristic,
3162 /// Learned model: logistic regression trained on recall feedback.
3163 /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
3164 Rl,
3165}
3166
3167fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
3168where
3169 D: serde::Deserializer<'de>,
3170{
3171 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3172 if value < 0.0 {
3173 return Err(serde::de::Error::custom(
3174 "admission weight must be non-negative (>= 0.0)",
3175 ));
3176 }
3177 Ok(value)
3178}
3179
3180/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
3181///
3182/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
3183/// All values must be non-negative.
3184#[derive(Debug, Clone, Deserialize, Serialize)]
3185#[serde(default)]
3186pub struct AdmissionWeights {
3187 /// LLM-estimated future reuse probability. Default: `0.30`.
3188 #[serde(deserialize_with = "validate_admission_weight")]
3189 pub future_utility: f32,
3190 /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
3191 #[serde(deserialize_with = "validate_admission_weight")]
3192 pub factual_confidence: f32,
3193 /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
3194 #[serde(deserialize_with = "validate_admission_weight")]
3195 pub semantic_novelty: f32,
3196 /// Temporal recency: always 1.0 at write time. Default: `0.10`.
3197 #[serde(deserialize_with = "validate_admission_weight")]
3198 pub temporal_recency: f32,
3199 /// Content type prior based on role. Default: `0.15`.
3200 #[serde(deserialize_with = "validate_admission_weight")]
3201 pub content_type_prior: f32,
3202 /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
3203 /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
3204 /// Normalized automatically at runtime. Default: `0.0`.
3205 #[serde(deserialize_with = "validate_admission_weight")]
3206 pub goal_utility: f32,
3207}
3208
3209impl Default for AdmissionWeights {
3210 fn default() -> Self {
3211 Self {
3212 future_utility: 0.30,
3213 factual_confidence: 0.15,
3214 semantic_novelty: 0.30,
3215 temporal_recency: 0.10,
3216 content_type_prior: 0.15,
3217 goal_utility: 0.0,
3218 }
3219 }
3220}
3221
3222impl AdmissionWeights {
3223 /// Return weights normalized so they sum to 1.0.
3224 ///
3225 /// All weights are non-negative; the sum is always > 0 when defaults are used.
3226 #[must_use]
3227 pub fn normalized(&self) -> Self {
3228 let sum = self.future_utility
3229 + self.factual_confidence
3230 + self.semantic_novelty
3231 + self.temporal_recency
3232 + self.content_type_prior
3233 + self.goal_utility;
3234 if sum <= f32::EPSILON {
3235 return Self::default();
3236 }
3237 Self {
3238 future_utility: self.future_utility / sum,
3239 factual_confidence: self.factual_confidence / sum,
3240 semantic_novelty: self.semantic_novelty / sum,
3241 temporal_recency: self.temporal_recency / sum,
3242 content_type_prior: self.content_type_prior / sum,
3243 goal_utility: self.goal_utility / sum,
3244 }
3245 }
3246}
3247
3248/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
3249///
3250/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
3251/// Messages below the composite admission threshold are rejected and not persisted.
3252#[derive(Debug, Clone, Deserialize, Serialize)]
3253#[serde(default)]
3254pub struct AdmissionConfig {
3255 /// Enable A-MAC admission control. Default: `false`.
3256 pub enabled: bool,
3257 /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
3258 /// Default: `0.40`.
3259 #[serde(deserialize_with = "validate_admission_threshold")]
3260 pub threshold: f32,
3261 /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
3262 /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
3263 #[serde(deserialize_with = "validate_admission_fast_path_margin")]
3264 pub fast_path_margin: f32,
3265 /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
3266 /// Falls back to the primary provider when empty. Default: `""`.
3267 pub admission_provider: ProviderName,
3268 /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
3269 pub weights: AdmissionWeights,
3270 /// Admission decision strategy. Default: `heuristic`.
3271 #[serde(default)]
3272 pub admission_strategy: AdmissionStrategy,
3273 /// Minimum training samples before the RL model is activated.
3274 /// Below this count the system falls back to `Heuristic`. Default: `500`.
3275 #[serde(default = "default_rl_min_samples")]
3276 pub rl_min_samples: u32,
3277 /// Background RL model retraining interval in seconds. Default: `3600`.
3278 #[serde(default = "default_rl_retrain_interval_secs")]
3279 pub rl_retrain_interval_secs: u64,
3280 /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
3281 /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
3282 /// Zero regression when `false`. Default: `false`.
3283 #[serde(default)]
3284 pub goal_conditioned_write: bool,
3285 /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
3286 /// Used only for borderline cases (similarity within 0.1 of threshold).
3287 /// Falls back to the primary provider when empty. Default: `""`.
3288 #[serde(default)]
3289 pub goal_utility_provider: ProviderName,
3290 /// Minimum cosine similarity between goal embedding and candidate memory
3291 /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
3292 #[serde(default = "default_goal_utility_threshold")]
3293 pub goal_utility_threshold: f32,
3294 /// Weight of the `goal_utility` factor in the composite admission score.
3295 /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
3296 #[serde(default = "default_goal_utility_weight")]
3297 pub goal_utility_weight: f32,
3298}
3299
3300fn default_goal_utility_threshold() -> f32 {
3301 0.4
3302}
3303
3304fn default_goal_utility_weight() -> f32 {
3305 0.25
3306}
3307
3308impl Default for AdmissionConfig {
3309 fn default() -> Self {
3310 Self {
3311 enabled: false,
3312 threshold: default_admission_threshold(),
3313 fast_path_margin: default_admission_fast_path_margin(),
3314 admission_provider: ProviderName::default(),
3315 weights: AdmissionWeights::default(),
3316 admission_strategy: AdmissionStrategy::default(),
3317 rl_min_samples: default_rl_min_samples(),
3318 rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
3319 goal_conditioned_write: false,
3320 goal_utility_provider: ProviderName::default(),
3321 goal_utility_threshold: default_goal_utility_threshold(),
3322 goal_utility_weight: default_goal_utility_weight(),
3323 }
3324 }
3325}
3326
3327/// Routing strategy for `[memory.store_routing]`.
3328#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
3329#[serde(rename_all = "snake_case")]
3330#[non_exhaustive]
3331pub enum StoreRoutingStrategy {
3332 /// Pure heuristic pattern matching. Zero LLM calls. Default.
3333 #[default]
3334 Heuristic,
3335 /// LLM-based classification via `routing_classifier_provider`.
3336 Llm,
3337 /// Heuristic first; escalates to LLM only when confidence is low.
3338 Hybrid,
3339}
3340
3341/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
3342///
3343/// Controls how each query is classified and routed to the appropriate memory
3344/// backend(s), avoiding unnecessary store queries for simple lookups.
3345#[derive(Debug, Clone, Deserialize, Serialize)]
3346#[serde(default)]
3347pub struct StoreRoutingConfig {
3348 /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
3349 /// directly (existing behavior). Default: `false`.
3350 pub enabled: bool,
3351 /// Routing strategy. Default: `heuristic`.
3352 pub strategy: StoreRoutingStrategy,
3353 /// Provider name from `[[llm.providers]]` for LLM-based classification.
3354 /// Falls back to the primary provider when empty. Default: `""`.
3355 pub routing_classifier_provider: ProviderName,
3356 /// Route to use when the classifier is uncertain (confidence < threshold).
3357 ///
3358 /// Defaults to [`MemoryRoute::Hybrid`].
3359 pub fallback_route: MemoryRoute,
3360 /// Confidence threshold below which `HybridRouter` escalates to LLM.
3361 /// Range: `[0.0, 1.0]`. Default: `0.7`.
3362 pub confidence_threshold: f32,
3363}
3364
3365impl Default for StoreRoutingConfig {
3366 fn default() -> Self {
3367 Self {
3368 enabled: false,
3369 strategy: StoreRoutingStrategy::Heuristic,
3370 routing_classifier_provider: ProviderName::default(),
3371 fallback_route: MemoryRoute::Hybrid,
3372 confidence_threshold: 0.7,
3373 }
3374 }
3375}
3376
3377/// Persona memory layer configuration (#2461).
3378///
3379/// When `enabled = true`, user preferences and domain knowledge are extracted from
3380/// conversation history via a cheap LLM provider and injected after the system prompt.
3381#[derive(Debug, Clone, Deserialize, Serialize)]
3382#[serde(default)]
3383pub struct PersonaConfig {
3384 /// Enable persona memory extraction and injection. Default: `false`.
3385 pub enabled: bool,
3386 /// Provider name from `[[llm.providers]]` for persona extraction.
3387 /// Should be a cheap/fast model. Falls back to the primary provider when empty.
3388 pub persona_provider: ProviderName,
3389 /// Minimum confidence threshold for facts included in context. Default: `0.6`.
3390 pub min_confidence: f64,
3391 /// Minimum user messages before extraction runs in a session. Default: `3`.
3392 pub min_messages: usize,
3393 /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
3394 pub max_messages: usize,
3395 /// LLM timeout for the extraction call in seconds. Default: `10`.
3396 pub extraction_timeout_secs: u64,
3397 /// Token budget allocated to persona context in assembly. Default: `500`.
3398 pub context_budget_tokens: usize,
3399}
3400
3401impl Default for PersonaConfig {
3402 fn default() -> Self {
3403 Self {
3404 enabled: false,
3405 persona_provider: ProviderName::default(),
3406 min_confidence: 0.6,
3407 min_messages: 3,
3408 max_messages: 10,
3409 extraction_timeout_secs: 10,
3410 context_budget_tokens: 500,
3411 }
3412 }
3413}
3414
3415/// Trajectory-informed memory configuration (#2498).
3416///
3417/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
3418/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
3419/// Procedural entries are injected into context as "past experience" during assembly.
3420#[derive(Debug, Clone, Deserialize, Serialize)]
3421#[serde(default)]
3422pub struct TrajectoryConfig {
3423 /// Enable trajectory extraction and context injection. Default: `false`.
3424 pub enabled: bool,
3425 /// Provider name from `[[llm.providers]]` for extraction.
3426 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3427 pub trajectory_provider: ProviderName,
3428 /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
3429 pub context_budget_tokens: usize,
3430 /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
3431 pub max_messages: usize,
3432 /// LLM timeout for the extraction call in seconds. Default: `10`.
3433 pub extraction_timeout_secs: u64,
3434 /// Number of procedural entries retrieved for context injection. Default: `5`.
3435 pub recall_top_k: usize,
3436 /// Minimum confidence score for entries included in context. Default: `0.6`.
3437 pub min_confidence: f64,
3438}
3439
3440impl Default for TrajectoryConfig {
3441 fn default() -> Self {
3442 Self {
3443 enabled: false,
3444 trajectory_provider: ProviderName::default(),
3445 context_budget_tokens: 400,
3446 max_messages: 10,
3447 extraction_timeout_secs: 10,
3448 recall_top_k: 5,
3449 min_confidence: 0.6,
3450 }
3451 }
3452}
3453
3454/// Category-aware memory configuration (#2428).
3455///
3456/// When `enabled = true`, messages are auto-tagged with a category derived from the active
3457/// skill or tool context. The category is stored in the `messages.category` column and used
3458/// as a Qdrant payload filter during recall.
3459#[derive(Debug, Clone, Deserialize, Serialize)]
3460#[serde(default)]
3461pub struct CategoryConfig {
3462 /// Enable category tagging and category-filtered recall. Default: `false`.
3463 pub enabled: bool,
3464 /// Automatically assign category from skill metadata or tool type. Default: `true`.
3465 pub auto_tag: bool,
3466}
3467
3468impl Default for CategoryConfig {
3469 fn default() -> Self {
3470 Self {
3471 enabled: false,
3472 auto_tag: true,
3473 }
3474 }
3475}
3476
3477/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
3478///
3479/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
3480/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
3481/// for complex queries.
3482#[derive(Debug, Clone, Deserialize, Serialize)]
3483#[serde(default)]
3484pub struct TreeConfig {
3485 /// Enable the memory tree and background consolidation loop. Default: `false`.
3486 pub enabled: bool,
3487 /// Provider name from `[[llm.providers]]` for node consolidation.
3488 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3489 pub consolidation_provider: ProviderName,
3490 /// Interval between consolidation sweeps in seconds. Default: `300`.
3491 pub sweep_interval_secs: u64,
3492 /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
3493 pub batch_size: usize,
3494 /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
3495 pub similarity_threshold: f32,
3496 /// Maximum tree depth (levels above leaves). Default: `3`.
3497 pub max_level: u32,
3498 /// Token budget allocated to tree memory in context assembly. Default: `400`.
3499 pub context_budget_tokens: usize,
3500 /// Number of tree nodes retrieved for context. Default: `5`.
3501 pub recall_top_k: usize,
3502 /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
3503 pub min_cluster_size: usize,
3504}
3505
3506impl Default for TreeConfig {
3507 fn default() -> Self {
3508 Self {
3509 enabled: false,
3510 consolidation_provider: ProviderName::default(),
3511 sweep_interval_secs: 300,
3512 batch_size: 20,
3513 similarity_threshold: 0.8,
3514 max_level: 3,
3515 context_budget_tokens: 400,
3516 recall_top_k: 5,
3517 min_cluster_size: 2,
3518 }
3519 }
3520}
3521
3522/// Time-based microcompact configuration (#2699).
3523///
3524/// When `enabled = true`, low-value tool outputs are cleared from context
3525/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
3526/// The most recent `keep_recent` tool messages are preserved unconditionally.
3527#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3528#[serde(default)]
3529pub struct MicrocompactConfig {
3530 /// Enable time-based microcompaction. Default: `false`.
3531 pub enabled: bool,
3532 /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
3533 pub gap_threshold_minutes: u32,
3534 /// Number of most recent compactable tool messages to preserve. Default: `3`.
3535 pub keep_recent: usize,
3536}
3537
3538impl Default for MicrocompactConfig {
3539 fn default() -> Self {
3540 Self {
3541 enabled: false,
3542 gap_threshold_minutes: 60,
3543 keep_recent: 3,
3544 }
3545 }
3546}
3547
3548/// autoDream background memory consolidation configuration (#2697).
3549///
3550/// When `enabled = true`, a constrained consolidation subagent runs after
3551/// a session ends if both `min_sessions` and `min_hours` gates pass.
3552#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3553#[serde(default)]
3554pub struct AutoDreamConfig {
3555 /// Enable autoDream consolidation. Default: `false`.
3556 pub enabled: bool,
3557 /// Minimum number of sessions between consolidations. Default: `3`.
3558 pub min_sessions: u32,
3559 /// Minimum hours between consolidations. Default: `24`.
3560 pub min_hours: u32,
3561 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3562 /// Falls back to the primary provider when empty. Default: `""`.
3563 pub consolidation_provider: ProviderName,
3564 /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
3565 pub max_iterations: u8,
3566 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3567 #[serde(default = "default_autodream_llm_timeout_secs")]
3568 pub llm_timeout_secs: u64,
3569}
3570
3571impl Default for AutoDreamConfig {
3572 fn default() -> Self {
3573 Self {
3574 enabled: false,
3575 min_sessions: 3,
3576 min_hours: 24,
3577 consolidation_provider: ProviderName::default(),
3578 max_iterations: 8,
3579 llm_timeout_secs: default_autodream_llm_timeout_secs(),
3580 }
3581 }
3582}
3583
3584fn default_autodream_llm_timeout_secs() -> u64 {
3585 30
3586}
3587
3588/// `MagicDocs` auto-maintained markdown configuration (#2702).
3589///
3590/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
3591/// are registered and periodically updated by a constrained subagent.
3592#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3593#[serde(default)]
3594pub struct MagicDocsConfig {
3595 /// Enable `MagicDocs` auto-maintenance. Default: `false`.
3596 pub enabled: bool,
3597 /// Minimum turns between updates for a given doc path. Default: `5`.
3598 pub min_turns_between_updates: u32,
3599 /// Provider name from `[[llm.providers]]` for doc update LLM calls.
3600 /// Falls back to the primary provider when empty. Default: `""`.
3601 pub update_provider: ProviderName,
3602 /// Maximum agent loop iterations per doc update. Default: `4`.
3603 pub max_iterations: u8,
3604}
3605
3606impl Default for MagicDocsConfig {
3607 fn default() -> Self {
3608 Self {
3609 enabled: false,
3610 min_turns_between_updates: 5,
3611 update_provider: ProviderName::default(),
3612 max_iterations: 4,
3613 }
3614 }
3615}
3616
3617#[cfg(test)]
3618mod tests {
3619 use super::*;
3620
3621 // Verify that serde deserialization routes through FromStr so that removed variants
3622 // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
3623 #[test]
3624 fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
3625 #[derive(serde::Deserialize)]
3626 struct Wrapper {
3627 #[allow(dead_code)]
3628 pruning_strategy: PruningStrategy,
3629 }
3630 let toml = r#"pruning_strategy = "task_aware_mig""#;
3631 let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
3632 assert_eq!(
3633 w.pruning_strategy,
3634 PruningStrategy::Reactive,
3635 "task_aware_mig must fall back to Reactive"
3636 );
3637 }
3638
3639 #[test]
3640 fn pruning_strategy_toml_round_trip() {
3641 #[derive(serde::Deserialize)]
3642 struct Wrapper {
3643 #[allow(dead_code)]
3644 pruning_strategy: PruningStrategy,
3645 }
3646 for (input, expected) in [
3647 ("reactive", PruningStrategy::Reactive),
3648 ("task_aware", PruningStrategy::TaskAware),
3649 ("mig", PruningStrategy::Mig),
3650 ] {
3651 let toml = format!(r#"pruning_strategy = "{input}""#);
3652 let w: Wrapper = toml::from_str(&toml)
3653 .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3654 assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3655 }
3656 }
3657
3658 #[test]
3659 fn pruning_strategy_toml_unknown_value_errors() {
3660 #[derive(serde::Deserialize)]
3661 #[allow(dead_code)]
3662 struct Wrapper {
3663 pruning_strategy: PruningStrategy,
3664 }
3665 let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3666 assert!(
3667 toml::from_str::<Wrapper>(toml).is_err(),
3668 "unknown strategy must produce an error"
3669 );
3670 }
3671
3672 #[test]
3673 fn tier_config_defaults_are_correct() {
3674 let cfg = TierConfig::default();
3675 assert!(!cfg.enabled);
3676 assert_eq!(cfg.promotion_min_sessions, 3);
3677 assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3678 assert_eq!(cfg.sweep_interval_secs, 3600);
3679 assert_eq!(cfg.sweep_batch_size, 100);
3680 }
3681
3682 #[test]
3683 fn tier_config_rejects_min_sessions_below_2() {
3684 let toml = "promotion_min_sessions = 1";
3685 assert!(toml::from_str::<TierConfig>(toml).is_err());
3686 }
3687
3688 #[test]
3689 fn tier_config_rejects_similarity_threshold_below_0_5() {
3690 let toml = "similarity_threshold = 0.4";
3691 assert!(toml::from_str::<TierConfig>(toml).is_err());
3692 }
3693
3694 #[test]
3695 fn tier_config_rejects_zero_sweep_batch_size() {
3696 let toml = "sweep_batch_size = 0";
3697 assert!(toml::from_str::<TierConfig>(toml).is_err());
3698 }
3699
3700 fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3701 let input = format!("importance_weight = {toml_val}");
3702 toml::from_str::<SemanticConfig>(&input)
3703 }
3704
3705 #[test]
3706 fn importance_weight_default_is_0_15() {
3707 let cfg = SemanticConfig::default();
3708 assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3709 }
3710
3711 #[test]
3712 fn importance_weight_valid_zero() {
3713 let cfg = deserialize_importance_weight("0.0").unwrap();
3714 assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3715 }
3716
3717 #[test]
3718 fn importance_weight_valid_one() {
3719 let cfg = deserialize_importance_weight("1.0").unwrap();
3720 assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3721 }
3722
3723 #[test]
3724 fn importance_weight_rejects_near_zero_negative() {
3725 // TOML does not have a NaN literal, but we can test via a f64 that
3726 // the validator rejects out-of-range values. Test with negative here
3727 // and rely on validate_importance_weight rejecting non-finite via
3728 // a constructed deserializer call.
3729 let result = deserialize_importance_weight("-0.01");
3730 assert!(
3731 result.is_err(),
3732 "negative importance_weight must be rejected"
3733 );
3734 }
3735
3736 #[test]
3737 fn importance_weight_rejects_negative() {
3738 let result = deserialize_importance_weight("-1.0");
3739 assert!(result.is_err(), "negative value must be rejected");
3740 }
3741
3742 #[test]
3743 fn importance_weight_rejects_greater_than_one() {
3744 let result = deserialize_importance_weight("1.01");
3745 assert!(result.is_err(), "value > 1.0 must be rejected");
3746 }
3747
3748 // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3749
3750 // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3751 #[test]
3752 fn admission_weights_normalized_sums_to_one() {
3753 let w = AdmissionWeights {
3754 future_utility: 2.0,
3755 factual_confidence: 1.0,
3756 semantic_novelty: 3.0,
3757 temporal_recency: 1.0,
3758 content_type_prior: 3.0,
3759 goal_utility: 0.0,
3760 };
3761 let n = w.normalized();
3762 let sum = n.future_utility
3763 + n.factual_confidence
3764 + n.semantic_novelty
3765 + n.temporal_recency
3766 + n.content_type_prior;
3767 assert!(
3768 (sum - 1.0).abs() < 0.001,
3769 "normalized weights must sum to 1.0, got {sum}"
3770 );
3771 }
3772
3773 // Test: already-normalized weights are preserved.
3774 #[test]
3775 fn admission_weights_normalized_preserves_already_unit_sum() {
3776 let w = AdmissionWeights::default();
3777 let n = w.normalized();
3778 let sum = n.future_utility
3779 + n.factual_confidence
3780 + n.semantic_novelty
3781 + n.temporal_recency
3782 + n.content_type_prior;
3783 assert!(
3784 (sum - 1.0).abs() < 0.001,
3785 "default weights sum to ~1.0 after normalization"
3786 );
3787 }
3788
3789 // Test: zero weights fall back to default (no divide-by-zero panic).
3790 #[test]
3791 fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3792 let w = AdmissionWeights {
3793 future_utility: 0.0,
3794 factual_confidence: 0.0,
3795 semantic_novelty: 0.0,
3796 temporal_recency: 0.0,
3797 content_type_prior: 0.0,
3798 goal_utility: 0.0,
3799 };
3800 let n = w.normalized();
3801 let default = AdmissionWeights::default();
3802 assert!(
3803 (n.future_utility - default.future_utility).abs() < 0.001,
3804 "zero-sum weights must fall back to defaults"
3805 );
3806 }
3807
3808 // Test: AdmissionConfig default values match documented defaults.
3809 #[test]
3810 fn admission_config_defaults() {
3811 let cfg = AdmissionConfig::default();
3812 assert!(!cfg.enabled);
3813 assert!((cfg.threshold - 0.40).abs() < 0.001);
3814 assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3815 assert!(cfg.admission_provider.is_empty());
3816 }
3817
3818 // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3819
3820 #[test]
3821 fn spreading_activation_default_recall_timeout_ms_is_1000() {
3822 let cfg = SpreadingActivationConfig::default();
3823 assert_eq!(
3824 cfg.recall_timeout_ms, 1000,
3825 "default recall_timeout_ms must be 1000ms"
3826 );
3827 }
3828
3829 #[test]
3830 fn spreading_activation_toml_recall_timeout_ms_round_trip() {
3831 #[derive(serde::Deserialize)]
3832 struct Wrapper {
3833 recall_timeout_ms: u64,
3834 }
3835 let toml = "recall_timeout_ms = 500";
3836 let w: Wrapper = toml::from_str(toml).unwrap();
3837 assert_eq!(w.recall_timeout_ms, 500);
3838 }
3839
3840 #[test]
3841 fn spreading_activation_validate_cross_field_constraints() {
3842 let mut cfg = SpreadingActivationConfig::default();
3843 // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
3844 assert!(cfg.validate().is_ok());
3845
3846 // Equal thresholds must be rejected.
3847 cfg.activation_threshold = 0.5;
3848 cfg.inhibition_threshold = 0.5;
3849 assert!(cfg.validate().is_err());
3850 }
3851
3852 // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
3853
3854 #[test]
3855 fn compression_config_focus_strategy_deserializes() {
3856 let toml = r#"strategy = "focus""#;
3857 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3858 assert_eq!(cfg.strategy, CompressionStrategy::Focus);
3859 }
3860
3861 #[test]
3862 fn compression_config_density_budget_defaults_on_deserialize() {
3863 // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
3864 // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
3865 let toml = r#"strategy = "reactive""#;
3866 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3867 assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
3868 assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
3869 }
3870
3871 #[test]
3872 fn compression_config_density_budget_round_trip() {
3873 let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
3874 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3875 assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
3876 assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
3877 }
3878
3879 #[test]
3880 fn compression_config_focus_scorer_provider_default_empty() {
3881 let cfg = CompressionConfig::default();
3882 assert!(cfg.focus_scorer_provider.is_empty());
3883 }
3884
3885 #[test]
3886 fn compression_config_focus_scorer_provider_round_trip() {
3887 let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
3888 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
3889 assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
3890 }
3891}
3892
3893/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
3894///
3895/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
3896/// Successful and failed reasoning chains are compressed into short, generalizable strategy
3897/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
3898/// and injected into the prompt preamble.
3899///
3900/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
3901///
3902/// # Example
3903///
3904/// ```toml
3905/// [memory.reasoning]
3906/// enabled = true
3907/// extract_provider = "fast"
3908/// distill_provider = "fast"
3909/// top_k = 3
3910/// store_limit = 1000
3911/// ```
3912#[derive(Debug, Clone, Deserialize, Serialize)]
3913#[serde(default)]
3914pub struct ReasoningConfig {
3915 /// Enable the reasoning-bank pipeline. Default: `false`.
3916 pub enabled: bool,
3917 /// Provider name from `[[llm.providers]]` for the self-judge step.
3918 /// Falls back to the primary provider when empty. Default: `""`.
3919 pub extract_provider: ProviderName,
3920 /// Provider name from `[[llm.providers]]` for the distillation step.
3921 /// Falls back to the primary provider when empty. Default: `""`.
3922 pub distill_provider: ProviderName,
3923 /// Number of strategies retrieved per turn for context injection. Default: `3`.
3924 pub top_k: usize,
3925 /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
3926 pub store_limit: usize,
3927 /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
3928 pub max_messages: usize,
3929 /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
3930 pub max_message_chars: usize,
3931 /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
3932 pub context_budget_tokens: usize,
3933 /// Minimum number of messages required before self-judge fires. Default: `2`.
3934 pub min_messages: usize,
3935 /// Timeout in seconds for the self-judge LLM call. Default: `30`.
3936 pub extraction_timeout_secs: u64,
3937 /// Timeout in seconds for the distillation LLM call. Default: `30`.
3938 pub distill_timeout_secs: u64,
3939 /// Maximum number of recent messages passed to the self-judge evaluator.
3940 /// Narrowing to the last user+assistant pair improves classification accuracy.
3941 /// Default: `2`.
3942 pub self_judge_window: usize,
3943 /// Minimum characters in the assistant response to trigger self-judge.
3944 /// Short or trivial responses are skipped. Default: `50`.
3945 pub min_assistant_chars: usize,
3946}
3947
3948impl Default for ReasoningConfig {
3949 fn default() -> Self {
3950 Self {
3951 enabled: false,
3952 extract_provider: ProviderName::default(),
3953 distill_provider: ProviderName::default(),
3954 top_k: 3,
3955 store_limit: 1000,
3956 max_messages: 6,
3957 max_message_chars: 2000,
3958 context_budget_tokens: 500,
3959 min_messages: 2,
3960 extraction_timeout_secs: 30,
3961 distill_timeout_secs: 30,
3962 self_judge_window: 2,
3963 min_assistant_chars: 50,
3964 }
3965 }
3966}
3967
3968// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
3969
3970/// Eviction policy variant.
3971///
3972/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
3973#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
3974#[serde(rename_all = "lowercase")]
3975#[non_exhaustive]
3976pub enum EvictionPolicy {
3977 /// Ebbinghaus forgetting-curve eviction.
3978 #[default]
3979 Ebbinghaus,
3980}
3981
3982/// Configuration for the memory eviction policy.
3983///
3984/// Controls which policy runs during the periodic sweep and how many entries
3985/// are retained. `zeph-memory` re-exports this type from here.
3986#[derive(Debug, Clone, Deserialize, Serialize)]
3987pub struct EvictionConfig {
3988 /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
3989 pub policy: EvictionPolicy,
3990 /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
3991 pub max_entries: usize,
3992 /// How often to run the eviction sweep, in seconds.
3993 pub sweep_interval_secs: u64,
3994}
3995
3996impl Default for EvictionConfig {
3997 fn default() -> Self {
3998 Self {
3999 policy: EvictionPolicy::Ebbinghaus,
4000 max_entries: 0,
4001 sweep_interval_secs: 3600,
4002 }
4003 }
4004}
4005
4006// ── Compression guidelines config (moved from zeph-memory) ───────────────────
4007
4008/// Configuration for ACON failure-driven compression guidelines.
4009///
4010/// `zeph-memory` re-exports this type from here.
4011#[derive(Debug, Clone, Deserialize, Serialize)]
4012#[serde(default)]
4013pub struct CompressionGuidelinesConfig {
4014 /// Enable the feature. Default: `false`.
4015 pub enabled: bool,
4016 /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
4017 pub update_threshold: u16,
4018 /// Maximum token budget for the guidelines document. Default: `500`.
4019 pub max_guidelines_tokens: usize,
4020 /// Maximum failure pairs consumed per update cycle. Default: `10`.
4021 pub max_pairs_per_update: usize,
4022 /// Number of turns after hard compaction to watch for context loss. Default: `10`.
4023 pub detection_window_turns: u64,
4024 /// Interval in seconds between background updater checks. Default: `300`.
4025 pub update_interval_secs: u64,
4026 /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
4027 pub max_stored_pairs: usize,
4028 /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
4029 /// `None` (or `Some("")`) falls back to the primary provider.
4030 #[serde(default, skip_serializing_if = "Option::is_none")]
4031 pub guidelines_provider: Option<ProviderName>,
4032 /// Maintain separate guideline documents per content category.
4033 #[serde(default)]
4034 pub categorized_guidelines: bool,
4035}
4036
4037impl Default for CompressionGuidelinesConfig {
4038 fn default() -> Self {
4039 Self {
4040 enabled: false,
4041 update_threshold: 5,
4042 max_guidelines_tokens: 500,
4043 max_pairs_per_update: 10,
4044 detection_window_turns: 10,
4045 update_interval_secs: 300,
4046 max_stored_pairs: 100,
4047 guidelines_provider: None,
4048 categorized_guidelines: false,
4049 }
4050 }
4051}
4052
4053// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
4054
4055/// Functional category of a compaction probe question.
4056///
4057/// `zeph-memory` re-exports this type from here.
4058#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
4059#[serde(rename_all = "lowercase")]
4060#[non_exhaustive]
4061pub enum ProbeCategory {
4062 /// Did specific facts survive? (file paths, function names, values, decisions)
4063 Recall,
4064 /// Does the agent know which files/tools/URLs it used?
4065 Artifact,
4066 /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
4067 Continuation,
4068 /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
4069 Decision,
4070}
4071
4072/// Configuration for the compaction probe.
4073///
4074/// `zeph-memory` re-exports this type from here.
4075#[derive(Debug, Clone, Serialize, Deserialize)]
4076#[serde(default)]
4077pub struct CompactionProbeConfig {
4078 /// Enable compaction probe validation. Default: `false`.
4079 pub enabled: bool,
4080 /// Provider name from `[[llm.providers]]` for probe LLM calls.
4081 /// `None` (or `Some("")`) uses the summary provider.
4082 #[serde(default, skip_serializing_if = "Option::is_none")]
4083 pub probe_provider: Option<ProviderName>,
4084 /// Minimum score to pass without warnings. Default: `0.6`.
4085 pub threshold: f32,
4086 /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
4087 pub hard_fail_threshold: f32,
4088 /// Maximum number of probe questions to generate. Default: `5`.
4089 pub max_questions: usize,
4090 /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
4091 pub timeout_secs: u64,
4092 /// Optional per-category weight multipliers for the overall score.
4093 #[serde(default)]
4094 pub category_weights: Option<HashMap<ProbeCategory, f32>>,
4095}
4096
4097impl Default for CompactionProbeConfig {
4098 fn default() -> Self {
4099 Self {
4100 enabled: false,
4101 probe_provider: None,
4102 threshold: 0.6,
4103 hard_fail_threshold: 0.35,
4104 max_questions: 5,
4105 timeout_secs: 15,
4106 category_weights: None,
4107 }
4108 }
4109}
4110
4111// ── MemCoT semantic state config ─────────────────────────────────────────────
4112
4113/// `MemCoT` semantic-state distillation configuration.
4114///
4115/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
4116/// summarizing conceptual progress across turns. This buffer is injected into graph
4117/// recall queries to improve retrieval relevance.
4118///
4119/// All LLM work (distillation) runs asynchronously — never on the turn thread.
4120/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
4121///
4122/// # Config example
4123///
4124/// ```toml
4125/// [memory.memcot]
4126/// enabled = true
4127/// distill_provider = "fast"
4128/// distill_timeout_secs = 5
4129/// min_assistant_chars = 200
4130/// min_distill_interval_secs = 30
4131/// max_distills_per_session = 50
4132/// max_state_chars = 800
4133/// recall_view = "head"
4134/// ```
4135#[derive(Debug, Clone, Serialize, Deserialize)]
4136#[serde(default)]
4137pub struct MemCotConfig {
4138 /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
4139 ///
4140 /// When `false`, the accumulator is never allocated and no LLM calls are made.
4141 pub enabled: bool,
4142 /// Provider name from `[[llm.providers]]` for distillation.
4143 ///
4144 /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
4145 /// A startup warning is emitted when the resolved model does not look fast-tier.
4146 /// Falls back to the primary provider when empty. Default: `""`.
4147 pub distill_provider: ProviderName,
4148 /// Timeout in seconds for each distillation LLM call. Default: `5`.
4149 pub distill_timeout_secs: u64,
4150 /// Minimum characters in the assistant response to trigger distillation.
4151 /// Short or trivial replies are skipped. Default: `200`.
4152 pub min_assistant_chars: usize,
4153 /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
4154 ///
4155 /// Prevents runaway costs on long sessions with rapid turns.
4156 /// Clearing `/new` resets this counter.
4157 pub min_distill_interval_secs: u64,
4158 /// Maximum distillation spawns per conversation session. Default: `50`.
4159 ///
4160 /// Once this cap is reached the accumulator stops distilling for the rest of the
4161 /// session. Counter is reset when the user sends `/new`.
4162 pub max_distills_per_session: u64,
4163 /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
4164 /// Default: `800`.
4165 pub max_state_chars: usize,
4166 /// Recall view applied when `MemCoT` is active. Default: `Head`.
4167 ///
4168 /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
4169 /// - `zoom_in`: adds source-message provenance to each returned fact.
4170 /// - `zoom_out`: expands 1-hop neighbors per returned fact.
4171 ///
4172 /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
4173 pub recall_view: RecallViewConfig,
4174 /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
4175 pub zoom_out_neighbor_cap: usize,
4176 /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
4177 /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
4178 #[serde(default, skip_serializing_if = "Vec::is_empty")]
4179 pub fast_tier_models: Vec<String>,
4180}
4181
4182/// Recall view variant exposed in config.
4183///
4184/// Maps 1-to-1 to `zeph_memory::RecallView`.
4185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
4186#[serde(rename_all = "snake_case")]
4187#[non_exhaustive]
4188pub enum RecallViewConfig {
4189 /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
4190 #[default]
4191 Head,
4192 /// Adds source-message provenance to each returned fact.
4193 ZoomIn,
4194 /// Expands 1-hop neighbor facts per returned fact.
4195 ZoomOut,
4196}
4197
4198impl Default for MemCotConfig {
4199 fn default() -> Self {
4200 Self {
4201 enabled: false,
4202 distill_provider: ProviderName::default(),
4203 distill_timeout_secs: 5,
4204 min_assistant_chars: 200,
4205 min_distill_interval_secs: 30,
4206 max_distills_per_session: 50,
4207 max_state_chars: 800,
4208 recall_view: RecallViewConfig::Head,
4209 zoom_out_neighbor_cap: 3,
4210 fast_tier_models: Vec::new(),
4211 }
4212 }
4213}
4214
4215/// `OmniMem` retrieval failure tracking configuration (issue #3576).
4216///
4217/// Controls the async logger that records no-hit and low-confidence recall events
4218/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
4219#[derive(Debug, Clone, Deserialize, Serialize)]
4220#[serde(default)]
4221pub struct RetrievalFailuresConfig {
4222 /// Enable retrieval failure logging. Default: `false`.
4223 pub enabled: bool,
4224 /// Composite recall score below which a result is classified as low-confidence.
4225 ///
4226 /// The threshold applies to the post-reranking composite score (which incorporates
4227 /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
4228 /// the scoring pipeline in use. Default: `0.3`.
4229 #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
4230 pub low_confidence_threshold: f32,
4231 /// Days to retain failure records before automatic cleanup. Default: `90`.
4232 #[serde(default = "default_retrieval_failures_retention_days")]
4233 pub retention_days: u32,
4234 /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
4235 #[serde(default = "default_retrieval_failures_channel_capacity")]
4236 pub channel_capacity: usize,
4237 /// Maximum records collected before flushing a batch INSERT. Default: `16`.
4238 #[serde(default = "default_retrieval_failures_batch_size")]
4239 pub batch_size: usize,
4240 /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
4241 #[serde(default = "default_retrieval_failures_flush_interval_ms")]
4242 pub flush_interval_ms: u64,
4243}
4244
4245impl Default for RetrievalFailuresConfig {
4246 fn default() -> Self {
4247 Self {
4248 enabled: false,
4249 low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
4250 retention_days: default_retrieval_failures_retention_days(),
4251 channel_capacity: default_retrieval_failures_channel_capacity(),
4252 batch_size: default_retrieval_failures_batch_size(),
4253 flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
4254 }
4255 }
4256}
4257
4258// ── TrajectoryRiskAccumulator config (spec 004-16) ─────────────────────────────
4259
4260fn validate_tra_nonneg_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
4261where
4262 D: serde::Deserializer<'de>,
4263{
4264 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
4265 if value.is_nan() || value.is_infinite() || value < 0.0 {
4266 return Err(serde::de::Error::custom(
4267 "signal weight and severity multiplier values must be finite and non-negative",
4268 ));
4269 }
4270 Ok(value)
4271}
4272
4273/// Per-signal-type base weights for the trajectory risk accumulator.
4274///
4275/// Each weight is in `(0.0, 1.0]` and is multiplied by the severity multiplier
4276/// before being added to `trajectory_risk`.
4277///
4278/// # Example (TOML)
4279///
4280/// ```toml
4281/// [memory.shadow_memory.signal_weights]
4282/// prompt_injection = 0.6
4283/// ```
4284#[derive(Debug, Clone, Serialize, Deserialize)]
4285pub struct TrajectorySignalWeights {
4286 /// Weight for `PolicyViolation` signals. Default: `0.30`.
4287 #[serde(
4288 default = "default_sw_policy_violation",
4289 deserialize_with = "validate_tra_nonneg_weight"
4290 )]
4291 pub policy_violation: f64,
4292 /// Weight for `PromptInjectionPattern` signals. Default: `0.50`.
4293 #[serde(
4294 default = "default_sw_prompt_injection",
4295 deserialize_with = "validate_tra_nonneg_weight"
4296 )]
4297 pub prompt_injection: f64,
4298 /// Weight for `ToolChainAnomaly` signals. Default: `0.25`.
4299 #[serde(
4300 default = "default_sw_tool_chain_anomaly",
4301 deserialize_with = "validate_tra_nonneg_weight"
4302 )]
4303 pub tool_chain_anomaly: f64,
4304 /// Weight for `ConfidenceDrop` signals. Default: `0.15`.
4305 #[serde(
4306 default = "default_sw_confidence_drop",
4307 deserialize_with = "validate_tra_nonneg_weight"
4308 )]
4309 pub confidence_drop: f64,
4310}
4311
4312fn default_sw_policy_violation() -> f64 {
4313 0.30
4314}
4315fn default_sw_prompt_injection() -> f64 {
4316 0.50
4317}
4318fn default_sw_tool_chain_anomaly() -> f64 {
4319 0.25
4320}
4321fn default_sw_confidence_drop() -> f64 {
4322 0.15
4323}
4324
4325impl Default for TrajectorySignalWeights {
4326 fn default() -> Self {
4327 Self {
4328 policy_violation: default_sw_policy_violation(),
4329 prompt_injection: default_sw_prompt_injection(),
4330 tool_chain_anomaly: default_sw_tool_chain_anomaly(),
4331 confidence_drop: default_sw_confidence_drop(),
4332 }
4333 }
4334}
4335
4336/// Per-severity multipliers applied on top of signal base weights.
4337///
4338/// # Example (TOML)
4339///
4340/// ```toml
4341/// [memory.shadow_memory.severity_multipliers]
4342/// high = 3.0
4343/// ```
4344#[derive(Debug, Clone, Serialize, Deserialize)]
4345pub struct TrajectorySeverityMultipliers {
4346 /// Multiplier for low-severity signals. Default: `0.5`.
4347 #[serde(
4348 default = "default_sev_low",
4349 deserialize_with = "validate_tra_nonneg_weight"
4350 )]
4351 pub low: f64,
4352 /// Multiplier for medium-severity signals. Default: `1.0`.
4353 #[serde(
4354 default = "default_sev_medium",
4355 deserialize_with = "validate_tra_nonneg_weight"
4356 )]
4357 pub medium: f64,
4358 /// Multiplier for high-severity signals. Default: `2.0`.
4359 #[serde(
4360 default = "default_sev_high",
4361 deserialize_with = "validate_tra_nonneg_weight"
4362 )]
4363 pub high: f64,
4364}
4365
4366fn default_sev_low() -> f64 {
4367 0.5
4368}
4369fn default_sev_medium() -> f64 {
4370 1.0
4371}
4372fn default_sev_high() -> f64 {
4373 2.0
4374}
4375
4376impl Default for TrajectorySeverityMultipliers {
4377 fn default() -> Self {
4378 Self {
4379 low: default_sev_low(),
4380 medium: default_sev_medium(),
4381 high: default_sev_high(),
4382 }
4383 }
4384}
4385
4386/// Configuration for the MAGE trajectory risk accumulator (spec 004-16).
4387///
4388/// Controls how per-turn safety signals accumulate into a session-level risk score
4389/// and when tool execution is blocked or escalated.
4390///
4391/// # Example (TOML)
4392///
4393/// ```toml
4394/// [memory.shadow_memory]
4395/// enabled = true
4396/// risk_threshold = 0.75
4397/// escalation_threshold = 0.50
4398/// risk_halflife_turns = 10
4399/// signal_history_cap = 200
4400/// tui_show_risk_gauge = true
4401/// reset_on_compaction = false
4402/// ```
4403#[derive(Debug, Clone, Serialize, Deserialize)]
4404pub struct TrajectoryRiskAccumulatorConfig {
4405 /// Enable shadow memory. When `false`, `TrajectoryRiskAccumulator` is a zero-cost noop.
4406 #[serde(default)]
4407 pub enabled: bool,
4408 /// Block tool execution when `trajectory_risk >= risk_threshold`. Default: `0.75`.
4409 #[serde(default = "default_tra_risk_threshold")]
4410 pub risk_threshold: f64,
4411 /// Escalate to human confirmation when risk is in `[escalation_threshold, risk_threshold)`.
4412 /// Default: `0.50`.
4413 #[serde(default = "default_tra_escalation_threshold")]
4414 pub escalation_threshold: f64,
4415 /// Number of turns after which accumulated risk halves (exponential decay). Default: `10`.
4416 #[serde(default = "default_tra_risk_halflife_turns")]
4417 pub risk_halflife_turns: u32,
4418 /// Maximum number of signal events kept in the ring buffer. Default: `200`.
4419 #[serde(default = "default_tra_signal_history_cap")]
4420 pub signal_history_cap: usize,
4421 /// Show a risk gauge in the TUI security panel when the TUI is enabled. Default: `true`.
4422 #[serde(default = "default_true")]
4423 pub tui_show_risk_gauge: bool,
4424 /// Reset `trajectory_risk` to zero when a context compaction occurs. Default: `false`.
4425 #[serde(default)]
4426 pub reset_on_compaction: bool,
4427 /// Per-signal-type base weights.
4428 #[serde(default)]
4429 pub signal_weights: TrajectorySignalWeights,
4430 /// Per-severity multipliers applied on top of signal weights.
4431 #[serde(default)]
4432 pub severity_multipliers: TrajectorySeverityMultipliers,
4433}
4434
4435fn default_tra_risk_threshold() -> f64 {
4436 0.75
4437}
4438fn default_tra_escalation_threshold() -> f64 {
4439 0.50
4440}
4441fn default_tra_risk_halflife_turns() -> u32 {
4442 10
4443}
4444fn default_tra_signal_history_cap() -> usize {
4445 200
4446}
4447
4448impl Default for TrajectoryRiskAccumulatorConfig {
4449 fn default() -> Self {
4450 Self {
4451 enabled: false,
4452 risk_threshold: default_tra_risk_threshold(),
4453 escalation_threshold: default_tra_escalation_threshold(),
4454 risk_halflife_turns: default_tra_risk_halflife_turns(),
4455 signal_history_cap: default_tra_signal_history_cap(),
4456 tui_show_risk_gauge: true,
4457 reset_on_compaction: false,
4458 signal_weights: TrajectorySignalWeights::default(),
4459 severity_multipliers: TrajectorySeverityMultipliers::default(),
4460 }
4461 }
4462}
4463
4464#[cfg(test)]
4465mod memcot_config_tests {
4466 use super::*;
4467
4468 #[test]
4469 fn memcot_config_default_disabled() {
4470 let cfg = MemCotConfig::default();
4471 assert!(!cfg.enabled);
4472 assert!(cfg.distill_provider.is_empty());
4473 assert_eq!(cfg.distill_timeout_secs, 5);
4474 assert_eq!(cfg.min_assistant_chars, 200);
4475 assert_eq!(cfg.min_distill_interval_secs, 30);
4476 assert_eq!(cfg.max_distills_per_session, 50);
4477 assert_eq!(cfg.max_state_chars, 800);
4478 assert_eq!(cfg.recall_view, RecallViewConfig::Head);
4479 assert_eq!(cfg.zoom_out_neighbor_cap, 3);
4480 }
4481
4482 #[test]
4483 fn memcot_config_round_trip() {
4484 let toml = r#"
4485 enabled = true
4486 distill_provider = "fast"
4487 distill_timeout_secs = 10
4488 min_assistant_chars = 100
4489 min_distill_interval_secs = 60
4490 max_distills_per_session = 20
4491 max_state_chars = 400
4492 recall_view = "zoom_in"
4493 zoom_out_neighbor_cap = 5
4494 "#;
4495 let cfg: MemCotConfig = toml::from_str(toml).unwrap();
4496 assert!(cfg.enabled);
4497 assert_eq!(cfg.distill_provider.as_str(), "fast");
4498 assert_eq!(cfg.distill_timeout_secs, 10);
4499 assert_eq!(cfg.min_distill_interval_secs, 60);
4500 assert_eq!(cfg.max_distills_per_session, 20);
4501 assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
4502 assert_eq!(cfg.zoom_out_neighbor_cap, 5);
4503 }
4504}
4505
4506#[cfg(test)]
4507mod apex_mem_quality_gate_config_tests {
4508 use super::*;
4509
4510 #[test]
4511 fn apex_mem_config_default_disabled() {
4512 let cfg = ApexMemConfig::default();
4513 assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
4514 }
4515
4516 #[test]
4517 fn apex_mem_config_serde_round_trip() {
4518 let toml = "enabled = true";
4519 let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
4520 assert!(cfg.enabled);
4521 }
4522
4523 #[test]
4524 fn apex_mem_config_empty_toml_uses_defaults() {
4525 let cfg: ApexMemConfig = toml::from_str("").unwrap();
4526 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4527 }
4528
4529 #[test]
4530 fn write_quality_gate_config_default_disabled() {
4531 let cfg = WriteQualityGateConfig::default();
4532 assert!(!cfg.enabled);
4533 assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
4534 assert_eq!(cfg.recent_window, 32);
4535 assert_eq!(cfg.contradiction_grace_seconds, 300);
4536 assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
4537 assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
4538 assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
4539 assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
4540 assert!(cfg.quality_gate_provider.is_empty());
4541 assert_eq!(cfg.llm_timeout_ms, 500);
4542 assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
4543 assert!(cfg.reference_check_lang_en);
4544 }
4545
4546 #[test]
4547 fn write_quality_gate_config_serde_round_trip() {
4548 let toml = r#"
4549 enabled = true
4550 threshold = 0.70
4551 recent_window = 16
4552 contradiction_grace_seconds = 600
4553 information_value_weight = 0.5
4554 reference_completeness_weight = 0.25
4555 contradiction_weight = 0.25
4556 rejection_rate_alarm_ratio = 0.50
4557 quality_gate_provider = "fast"
4558 llm_timeout_ms = 1000
4559 llm_weight = 0.3
4560 reference_check_lang_en = false
4561 "#;
4562 let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
4563 assert!(cfg.enabled);
4564 assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
4565 assert_eq!(cfg.recent_window, 16);
4566 assert_eq!(cfg.contradiction_grace_seconds, 600);
4567 assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
4568 assert_eq!(cfg.llm_timeout_ms, 1000);
4569 assert!(!cfg.reference_check_lang_en);
4570 }
4571
4572 #[test]
4573 fn write_quality_gate_config_empty_toml_uses_defaults() {
4574 let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
4575 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4576 assert_eq!(cfg.recent_window, 32);
4577 }
4578
4579 #[test]
4580 fn memory_config_shutdown_summary_provider_toml_roundtrip() {
4581 let toml = r#"
4582 history_limit = 50
4583 shutdown_summary_provider = "fast"
4584 "#;
4585 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4586 assert_eq!(
4587 cfg.shutdown_summary_provider.as_str(),
4588 "fast",
4589 "shutdown_summary_provider must deserialize from TOML"
4590 );
4591 }
4592
4593 #[test]
4594 fn five_signal_config_default_is_disabled() {
4595 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4596 assert!(!cfg.five_signal.enabled);
4597 assert!((cfg.five_signal.w_recency - 0.35).abs() < 1e-9);
4598 assert!((cfg.five_signal.w_relevance - 0.35).abs() < 1e-9);
4599 assert!((cfg.five_signal.w_frequency).abs() < 1e-9);
4600 assert!((cfg.five_signal.w_causal).abs() < 1e-9);
4601 assert!((cfg.five_signal.w_novelty).abs() < 1e-9);
4602 }
4603
4604 #[test]
4605 fn five_signal_config_toml_roundtrip() {
4606 let toml = r"
4607 history_limit = 50
4608 [five_signal]
4609 enabled = true
4610 w_recency = 0.35
4611 w_relevance = 0.35
4612 w_frequency = 0.15
4613 w_causal = 0.10
4614 w_novelty = 0.05
4615 ";
4616 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4617 assert!(cfg.five_signal.enabled);
4618 assert!((cfg.five_signal.w_frequency - 0.15).abs() < 1e-9);
4619 }
4620
4621 #[test]
4622 fn memory_config_shutdown_summary_provider_default_is_empty() {
4623 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4624 assert_eq!(
4625 cfg.shutdown_summary_provider.as_str(),
4626 "",
4627 "shutdown_summary_provider must default to empty string"
4628 );
4629 }
4630
4631 #[test]
4632 fn memory_config_compaction_provider_toml_roundtrip() {
4633 let toml = r#"
4634 history_limit = 50
4635 compaction_provider = "mid"
4636 "#;
4637 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4638 assert_eq!(
4639 cfg.compaction_provider.as_str(),
4640 "mid",
4641 "compaction_provider must deserialize from TOML"
4642 );
4643 }
4644
4645 #[test]
4646 fn memory_config_compaction_provider_default_is_empty() {
4647 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4648 assert_eq!(
4649 cfg.compaction_provider.as_str(),
4650 "",
4651 "compaction_provider must default to empty string"
4652 );
4653 }
4654}