zeph_config/memory.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashMap;
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8use zeph_common::memory::{EdgeType, MemoryRoute};
9use zeph_common::secret::Secret;
10
11use crate::defaults::{default_sqlite_path_field, default_true};
12use crate::providers::ProviderName;
13
14fn default_sqlite_pool_size() -> u32 {
15 5
16}
17
18fn default_max_history() -> usize {
19 100
20}
21
22fn default_title_max_chars() -> usize {
23 60
24}
25
26fn default_document_collection() -> String {
27 "zeph_documents".into()
28}
29
30fn default_document_chunk_size() -> usize {
31 1000
32}
33
34fn default_document_chunk_overlap() -> usize {
35 100
36}
37
38fn default_document_top_k() -> usize {
39 3
40}
41
42fn default_autosave_min_length() -> usize {
43 20
44}
45
46fn default_tool_call_cutoff() -> usize {
47 6
48}
49
50fn default_token_safety_margin() -> f32 {
51 1.0
52}
53
54fn default_redact_credentials() -> bool {
55 true
56}
57
58fn default_qdrant_url() -> String {
59 "http://localhost:6334".into()
60}
61
62fn default_summarization_threshold() -> usize {
63 50
64}
65
66fn default_summarization_llm_timeout_secs() -> u64 {
67 60
68}
69
70fn default_context_budget_tokens() -> usize {
71 0
72}
73
74fn default_soft_compaction_threshold() -> f32 {
75 0.60
76}
77
78fn default_hard_compaction_threshold() -> f32 {
79 0.90
80}
81
82fn default_compaction_preserve_tail() -> usize {
83 6
84}
85
86fn default_compaction_cooldown_turns() -> u8 {
87 2
88}
89
90fn default_auto_budget() -> bool {
91 true
92}
93
94fn default_prune_protect_tokens() -> usize {
95 40_000
96}
97
98fn default_cross_session_score_threshold() -> f32 {
99 0.35
100}
101
102fn default_temporal_decay_half_life_days() -> u32 {
103 30
104}
105
106fn default_mmr_lambda() -> f32 {
107 0.7
108}
109
110fn default_semantic_enabled() -> bool {
111 true
112}
113
114fn default_recall_limit() -> usize {
115 5
116}
117
118fn default_vector_weight() -> f64 {
119 0.7
120}
121
122fn default_keyword_weight() -> f64 {
123 0.3
124}
125
126fn default_graph_max_entities_per_message() -> usize {
127 10
128}
129
130fn default_graph_max_edges_per_message() -> usize {
131 15
132}
133
134fn default_graph_community_refresh_interval() -> usize {
135 100
136}
137
138fn default_graph_community_summary_max_prompt_bytes() -> usize {
139 8192
140}
141
142fn default_graph_community_summary_concurrency() -> usize {
143 4
144}
145
146fn default_lpa_edge_chunk_size() -> usize {
147 10_000
148}
149
150fn default_graph_entity_similarity_threshold() -> f32 {
151 0.85
152}
153
154fn default_graph_entity_ambiguous_threshold() -> f32 {
155 0.70
156}
157
158fn default_graph_extraction_timeout_secs() -> u64 {
159 15
160}
161
162fn default_graph_max_hops() -> u32 {
163 2
164}
165
166fn default_graph_recall_limit() -> usize {
167 10
168}
169
170fn default_graph_expired_edge_retention_days() -> u32 {
171 90
172}
173
174fn default_graph_temporal_decay_rate() -> f64 {
175 0.0
176}
177
178fn default_graph_edge_history_limit() -> usize {
179 100
180}
181
182fn default_spreading_activation_decay_lambda() -> f32 {
183 0.85
184}
185
186fn default_spreading_activation_max_hops() -> u32 {
187 3
188}
189
190fn default_spreading_activation_activation_threshold() -> f32 {
191 0.1
192}
193
194fn default_spreading_activation_inhibition_threshold() -> f32 {
195 0.8
196}
197
198fn default_spreading_activation_max_activated_nodes() -> usize {
199 50
200}
201
202fn default_spreading_activation_recall_timeout_ms() -> u64 {
203 1000
204}
205
206fn default_benna_alpha() -> f32 {
207 0.3
208}
209
210fn default_benna_fast_rate() -> f32 {
211 0.5
212}
213
214fn default_benna_slow_rate() -> f32 {
215 0.05
216}
217
218fn default_write_gate_min_edge_relevance() -> f32 {
219 0.3
220}
221
222fn default_conflict_recency_slow_threshold() -> f32 {
223 0.2
224}
225
226fn default_note_linking_similarity_threshold() -> f32 {
227 0.85
228}
229
230fn default_note_linking_top_k() -> usize {
231 10
232}
233
234fn default_note_linking_timeout_secs() -> u64 {
235 5
236}
237
238fn default_shutdown_summary() -> bool {
239 true
240}
241
242fn default_shutdown_summary_min_messages() -> usize {
243 4
244}
245
246fn default_shutdown_summary_max_messages() -> usize {
247 20
248}
249
250fn default_shutdown_summary_timeout_secs() -> u64 {
251 30
252}
253
254fn validate_tier_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
255where
256 D: serde::Deserializer<'de>,
257{
258 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
259 if value.is_nan() || value.is_infinite() {
260 return Err(serde::de::Error::custom(
261 "similarity_threshold must be a finite number",
262 ));
263 }
264 if !(0.5..=1.0).contains(&value) {
265 return Err(serde::de::Error::custom(
266 "similarity_threshold must be in [0.5, 1.0]",
267 ));
268 }
269 Ok(value)
270}
271
272fn validate_tier_promotion_min_sessions<'de, D>(deserializer: D) -> Result<u32, D::Error>
273where
274 D: serde::Deserializer<'de>,
275{
276 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
277 if value < 2 {
278 return Err(serde::de::Error::custom(
279 "promotion_min_sessions must be >= 2",
280 ));
281 }
282 Ok(value)
283}
284
285fn validate_tier_sweep_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
286where
287 D: serde::Deserializer<'de>,
288{
289 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
290 if value == 0 {
291 return Err(serde::de::Error::custom("sweep_batch_size must be >= 1"));
292 }
293 Ok(value)
294}
295
296fn default_tier_promotion_min_sessions() -> u32 {
297 3
298}
299
300fn default_tier_similarity_threshold() -> f32 {
301 0.92
302}
303
304fn default_tier_sweep_interval_secs() -> u64 {
305 3600
306}
307
308fn default_tier_sweep_batch_size() -> usize {
309 100
310}
311
312fn default_scene_similarity_threshold() -> f32 {
313 0.80
314}
315
316fn default_scene_batch_size() -> usize {
317 50
318}
319
320fn validate_scene_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
321where
322 D: serde::Deserializer<'de>,
323{
324 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
325 if value.is_nan() || value.is_infinite() {
326 return Err(serde::de::Error::custom(
327 "scene_similarity_threshold must be a finite number",
328 ));
329 }
330 if !(0.5..=1.0).contains(&value) {
331 return Err(serde::de::Error::custom(
332 "scene_similarity_threshold must be in [0.5, 1.0]",
333 ));
334 }
335 Ok(value)
336}
337
338fn validate_scene_batch_size<'de, D>(deserializer: D) -> Result<usize, D::Error>
339where
340 D: serde::Deserializer<'de>,
341{
342 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
343 if value == 0 {
344 return Err(serde::de::Error::custom("scene_batch_size must be >= 1"));
345 }
346 Ok(value)
347}
348
349/// Configuration for the AOI three-layer memory tier promotion system (`[memory.tiers]`).
350///
351/// When `enabled = true`, a background sweep promotes frequently-accessed episodic messages
352/// to semantic tier by clustering near-duplicates and distilling them via an LLM call.
353///
354/// # Validation
355///
356/// Constraints enforced at deserialization time:
357/// - `similarity_threshold` in `[0.5, 1.0]`
358/// - `promotion_min_sessions >= 2`
359/// - `sweep_batch_size >= 1`
360/// - `scene_similarity_threshold` in `[0.5, 1.0]`
361/// - `scene_batch_size >= 1`
362#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
363#[serde(default)]
364pub struct TierConfig {
365 /// Enable the tier promotion system. When `false`, all messages remain episodic.
366 /// Default: `false`.
367 pub enabled: bool,
368 /// Minimum number of distinct sessions a fact must appear in before promotion.
369 /// Must be `>= 2`. Default: `3`.
370 #[serde(deserialize_with = "validate_tier_promotion_min_sessions")]
371 pub promotion_min_sessions: u32,
372 /// Cosine similarity threshold for clustering near-duplicate facts during sweep.
373 /// Must be in `[0.5, 1.0]`. Default: `0.92`.
374 #[serde(deserialize_with = "validate_tier_similarity_threshold")]
375 pub similarity_threshold: f32,
376 /// How often the background promotion sweep runs, in seconds. Default: `3600`.
377 pub sweep_interval_secs: u64,
378 /// Maximum number of messages to evaluate per sweep cycle. Must be `>= 1`. Default: `100`.
379 #[serde(deserialize_with = "validate_tier_sweep_batch_size")]
380 pub sweep_batch_size: usize,
381 /// Enable `MemScene` consolidation of semantic-tier messages. Default: `false`.
382 pub scene_enabled: bool,
383 /// Cosine similarity threshold for `MemScene` clustering. Must be in `[0.5, 1.0]`. Default: `0.80`.
384 #[serde(deserialize_with = "validate_scene_similarity_threshold")]
385 pub scene_similarity_threshold: f32,
386 /// Maximum unassigned semantic messages processed per scene consolidation sweep. Default: `50`.
387 #[serde(deserialize_with = "validate_scene_batch_size")]
388 pub scene_batch_size: usize,
389 /// Provider name from `[[llm.providers]]` for scene label/profile generation.
390 /// Falls back to the primary provider when empty. Default: `""`.
391 pub scene_provider: ProviderName,
392 /// How often the background scene consolidation sweep runs, in seconds. Default: `7200`.
393 pub scene_sweep_interval_secs: u64,
394}
395
396fn default_scene_sweep_interval_secs() -> u64 {
397 7200
398}
399
400impl Default for TierConfig {
401 fn default() -> Self {
402 Self {
403 enabled: false,
404 promotion_min_sessions: default_tier_promotion_min_sessions(),
405 similarity_threshold: default_tier_similarity_threshold(),
406 sweep_interval_secs: default_tier_sweep_interval_secs(),
407 sweep_batch_size: default_tier_sweep_batch_size(),
408 scene_enabled: false,
409 scene_similarity_threshold: default_scene_similarity_threshold(),
410 scene_batch_size: default_scene_batch_size(),
411 scene_provider: ProviderName::default(),
412 scene_sweep_interval_secs: default_scene_sweep_interval_secs(),
413 }
414 }
415}
416
417fn validate_temporal_decay_rate<'de, D>(deserializer: D) -> Result<f64, D::Error>
418where
419 D: serde::Deserializer<'de>,
420{
421 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
422 if value.is_nan() || value.is_infinite() {
423 return Err(serde::de::Error::custom(
424 "temporal_decay_rate must be a finite number",
425 ));
426 }
427 if !(0.0..=10.0).contains(&value) {
428 return Err(serde::de::Error::custom(
429 "temporal_decay_rate must be in [0.0, 10.0]",
430 ));
431 }
432 Ok(value)
433}
434
435fn validate_similarity_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
436where
437 D: serde::Deserializer<'de>,
438{
439 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
440 if value.is_nan() || value.is_infinite() {
441 return Err(serde::de::Error::custom(
442 "similarity_threshold must be a finite number",
443 ));
444 }
445 if !(0.0..=1.0).contains(&value) {
446 return Err(serde::de::Error::custom(
447 "similarity_threshold must be in [0.0, 1.0]",
448 ));
449 }
450 Ok(value)
451}
452
453fn validate_importance_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
454where
455 D: serde::Deserializer<'de>,
456{
457 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
458 if value.is_nan() || value.is_infinite() {
459 return Err(serde::de::Error::custom(
460 "importance_weight must be a finite number",
461 ));
462 }
463 if value < 0.0 {
464 return Err(serde::de::Error::custom(
465 "importance_weight must be non-negative",
466 ));
467 }
468 if value > 1.0 {
469 return Err(serde::de::Error::custom("importance_weight must be <= 1.0"));
470 }
471 Ok(value)
472}
473
474fn default_importance_weight() -> f64 {
475 0.15
476}
477
478/// Configuration for SYNAPSE spreading activation retrieval over the entity graph.
479///
480/// When `enabled = true`, spreading activation replaces BFS-based graph recall.
481/// Seeds are initialized from fuzzy entity matches, then activation propagates
482/// hop-by-hop with exponential decay and lateral inhibition.
483///
484/// # Validation
485///
486/// Constraints enforced at deserialization time:
487/// - `0.0 < decay_lambda <= 1.0`
488/// - `max_hops >= 1`
489/// - `activation_threshold < inhibition_threshold`
490/// - `recall_timeout_ms >= 1` (clamped to 100 with a warning if set to 0)
491#[derive(Debug, Clone, Deserialize, Serialize)]
492#[serde(default)]
493pub struct SpreadingActivationConfig {
494 /// Enable spreading activation (replaces BFS in graph recall when `true`). Default: `false`.
495 pub enabled: bool,
496 /// Per-hop activation decay factor. Range: `(0.0, 1.0]`. Default: `0.85`.
497 #[serde(deserialize_with = "validate_decay_lambda")]
498 pub decay_lambda: f32,
499 /// Maximum propagation depth. Must be `>= 1`. Default: `3`.
500 #[serde(deserialize_with = "validate_max_hops")]
501 pub max_hops: u32,
502 /// Minimum activation score to include a node in results. Default: `0.1`.
503 pub activation_threshold: f32,
504 /// Activation level at which a node stops receiving more activation. Default: `0.8`.
505 pub inhibition_threshold: f32,
506 /// Cap on total activated nodes per spread pass. Default: `50`.
507 pub max_activated_nodes: usize,
508 /// Weight of structural score in hybrid seed ranking. Range: `[0.0, 1.0]`. Default: `0.4`.
509 #[serde(default = "default_seed_structural_weight")]
510 pub seed_structural_weight: f32,
511 /// Maximum seeds per community. `0` = unlimited. Default: `3`.
512 #[serde(default = "default_seed_community_cap")]
513 pub seed_community_cap: usize,
514 /// Timeout in milliseconds for a single spreading activation recall call. Default: `1000`.
515 /// Values below 1 are clamped to 100ms at runtime. Benchmark data shows FTS5 + graph
516 /// traversal completes within 200–400ms; 1000ms provides headroom for cold caches.
517 #[serde(default = "default_spreading_activation_recall_timeout_ms")]
518 pub recall_timeout_ms: u64,
519 /// SYNAPSE blend coefficient for Benna-Fusi fast/slow variables (#3709).
520 ///
521 /// `blended = alpha * confidence_fast + (1 - alpha) * confidence_slow`.
522 /// Range: `[0.0, 1.0]`. Default: `0.3`.
523 #[serde(
524 default = "default_benna_alpha",
525 deserialize_with = "validate_benna_alpha"
526 )]
527 pub alpha: f32,
528 /// Benna-Fusi fast-variable learning rate applied on each confidence merge (#3709).
529 ///
530 /// `fast' = fast + eta_f * (c - fast)`. Range: `(0.0, 1.0]`. Default: `0.5`.
531 #[serde(
532 default = "default_benna_fast_rate",
533 deserialize_with = "validate_benna_rate"
534 )]
535 pub benna_fast_rate: f32,
536 /// Benna-Fusi slow-variable learning rate applied on each confidence merge (#3709).
537 ///
538 /// `slow' = slow + eta_s * (fast' - slow)`. Range: `(0.0, 1.0]`. Default: `0.05`.
539 #[serde(
540 default = "default_benna_slow_rate",
541 deserialize_with = "validate_benna_rate"
542 )]
543 pub benna_slow_rate: f32,
544}
545
546fn validate_decay_lambda<'de, D>(deserializer: D) -> Result<f32, D::Error>
547where
548 D: serde::Deserializer<'de>,
549{
550 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
551 if value.is_nan() || value.is_infinite() {
552 return Err(serde::de::Error::custom(
553 "decay_lambda must be a finite number",
554 ));
555 }
556 if !(value > 0.0 && value <= 1.0) {
557 return Err(serde::de::Error::custom(
558 "decay_lambda must be in (0.0, 1.0]",
559 ));
560 }
561 Ok(value)
562}
563
564fn validate_max_hops<'de, D>(deserializer: D) -> Result<u32, D::Error>
565where
566 D: serde::Deserializer<'de>,
567{
568 let value = <u32 as serde::Deserialize>::deserialize(deserializer)?;
569 if value == 0 {
570 return Err(serde::de::Error::custom("max_hops must be >= 1"));
571 }
572 Ok(value)
573}
574
575fn validate_unit_f32<'de, D>(deserializer: D) -> Result<f32, D::Error>
576where
577 D: serde::Deserializer<'de>,
578{
579 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
580 if !value.is_finite() {
581 return Err(serde::de::Error::custom("value must be a finite number"));
582 }
583 if !(0.0..=1.0).contains(&value) {
584 return Err(serde::de::Error::custom("value must be in [0.0, 1.0]"));
585 }
586 Ok(value)
587}
588
589fn validate_benna_alpha<'de, D>(deserializer: D) -> Result<f32, D::Error>
590where
591 D: serde::Deserializer<'de>,
592{
593 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
594 if !value.is_finite() {
595 return Err(serde::de::Error::custom("alpha must be a finite number"));
596 }
597 if !(0.0..=1.0).contains(&value) {
598 return Err(serde::de::Error::custom("alpha must be in [0.0, 1.0]"));
599 }
600 Ok(value)
601}
602
603fn validate_benna_rate<'de, D>(deserializer: D) -> Result<f32, D::Error>
604where
605 D: serde::Deserializer<'de>,
606{
607 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
608 if !value.is_finite() {
609 return Err(serde::de::Error::custom(
610 "benna_fast_rate/benna_slow_rate must be a finite number",
611 ));
612 }
613 if !(value > 0.0 && value <= 1.0) {
614 return Err(serde::de::Error::custom(
615 "benna_fast_rate/benna_slow_rate must be in (0.0, 1.0]",
616 ));
617 }
618 Ok(value)
619}
620
621impl SpreadingActivationConfig {
622 /// Validate cross-field constraints that cannot be expressed in per-field validators.
623 ///
624 /// # Errors
625 ///
626 /// Returns an error string if `activation_threshold >= inhibition_threshold`.
627 pub fn validate(&self) -> Result<(), String> {
628 if self.activation_threshold >= self.inhibition_threshold {
629 return Err(format!(
630 "activation_threshold ({}) must be < inhibition_threshold ({})",
631 self.activation_threshold, self.inhibition_threshold
632 ));
633 }
634 Ok(())
635 }
636}
637
638fn default_seed_structural_weight() -> f32 {
639 0.4
640}
641
642fn default_seed_community_cap() -> usize {
643 3
644}
645
646impl Default for SpreadingActivationConfig {
647 fn default() -> Self {
648 Self {
649 enabled: false,
650 decay_lambda: default_spreading_activation_decay_lambda(),
651 max_hops: default_spreading_activation_max_hops(),
652 activation_threshold: default_spreading_activation_activation_threshold(),
653 inhibition_threshold: default_spreading_activation_inhibition_threshold(),
654 max_activated_nodes: default_spreading_activation_max_activated_nodes(),
655 seed_structural_weight: default_seed_structural_weight(),
656 seed_community_cap: default_seed_community_cap(),
657 recall_timeout_ms: default_spreading_activation_recall_timeout_ms(),
658 alpha: default_benna_alpha(),
659 benna_fast_rate: default_benna_fast_rate(),
660 benna_slow_rate: default_benna_slow_rate(),
661 }
662 }
663}
664
665/// `MemORAI` write-gate prefilter configuration (#3709).
666///
667/// When `enabled = true`, low-signal edges (confidence below threshold + generic relation type)
668/// are silently dropped before write, reducing noise in the knowledge graph.
669///
670/// TOML path: `[memory.graph.write_gate]`
671#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
672#[serde(default)]
673pub struct WriteGateConfig {
674 /// Enable write-gate prefilter. Default: `false` (opt-in).
675 pub enabled: bool,
676 /// Minimum edge confidence to pass the gate when the relation is low-signal. Default: `0.3`.
677 ///
678 /// Range: `[0.0, 1.0]`.
679 #[serde(
680 default = "default_write_gate_min_edge_relevance",
681 deserialize_with = "validate_unit_f32"
682 )]
683 pub min_edge_relevance: f32,
684}
685
686impl Default for WriteGateConfig {
687 fn default() -> Self {
688 Self {
689 enabled: false,
690 min_edge_relevance: default_write_gate_min_edge_relevance(),
691 }
692 }
693}
694
695/// Recency fallback threshold for the conflict resolver (#3709).
696///
697/// TOML path: `[memory.graph.conflict]`
698#[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
699#[serde(default)]
700pub struct ConflictRecencyConfig {
701 /// Minimum `confidence_slow` for the recency strategy to prefer an edge. Default: `0.2`.
702 ///
703 /// When two cardinality-1 heads conflict and recency is the resolution strategy,
704 /// only edges with `confidence_slow >= recency_slow_threshold` are preferred by recency;
705 /// edges below the threshold fall back to `valid_from` comparison. Range: `[0.0, 1.0]`.
706 #[serde(
707 default = "default_conflict_recency_slow_threshold",
708 deserialize_with = "validate_unit_f32"
709 )]
710 pub recency_slow_threshold: f32,
711}
712
713impl Default for ConflictRecencyConfig {
714 fn default() -> Self {
715 Self {
716 recency_slow_threshold: default_conflict_recency_slow_threshold(),
717 }
718 }
719}
720
721/// Kumiho belief revision configuration.
722#[derive(Debug, Clone, Deserialize, Serialize)]
723#[serde(default)]
724pub struct BeliefRevisionConfig {
725 /// Enable semantic contradiction detection for graph edges. Default: `false`.
726 pub enabled: bool,
727 /// Cosine similarity threshold for considering two facts as contradictory.
728 /// Only edges with similarity >= this value are candidates for revision. Default: `0.85`.
729 #[serde(deserialize_with = "validate_similarity_threshold")]
730 pub similarity_threshold: f32,
731}
732
733fn default_belief_revision_similarity_threshold() -> f32 {
734 0.85
735}
736
737impl Default for BeliefRevisionConfig {
738 fn default() -> Self {
739 Self {
740 enabled: false,
741 similarity_threshold: default_belief_revision_similarity_threshold(),
742 }
743 }
744}
745
746/// D-MEM RPE-based tiered graph extraction routing configuration.
747#[derive(Debug, Clone, Deserialize, Serialize)]
748#[serde(default)]
749pub struct RpeConfig {
750 /// Enable RPE-based routing to skip extraction on low-surprise turns. Default: `false`.
751 pub enabled: bool,
752 /// RPE threshold. Turns with RPE < this value skip graph extraction. Range: `[0.0, 1.0]`.
753 /// Default: `0.3`.
754 #[serde(deserialize_with = "validate_similarity_threshold")]
755 pub threshold: f32,
756 /// Maximum consecutive turns to skip before forcing extraction (safety valve). Default: `5`.
757 pub max_skip_turns: u32,
758}
759
760fn default_rpe_threshold() -> f32 {
761 0.3
762}
763
764fn default_rpe_max_skip_turns() -> u32 {
765 5
766}
767
768impl Default for RpeConfig {
769 fn default() -> Self {
770 Self {
771 enabled: false,
772 threshold: default_rpe_threshold(),
773 max_skip_turns: default_rpe_max_skip_turns(),
774 }
775 }
776}
777
778/// Configuration for A-MEM dynamic note linking.
779///
780/// When enabled, after each graph extraction pass, entities extracted from the message are
781/// compared against the entity embedding collection. Pairs with cosine similarity above
782/// `similarity_threshold` receive a `similar_to` edge in the graph.
783#[derive(Debug, Clone, Deserialize, Serialize)]
784#[serde(default)]
785pub struct NoteLinkingConfig {
786 /// Enable A-MEM note linking after graph extraction. Default: `false`.
787 pub enabled: bool,
788 /// Minimum cosine similarity score to create a `similar_to` edge. Default: `0.85`.
789 #[serde(deserialize_with = "validate_similarity_threshold")]
790 pub similarity_threshold: f32,
791 /// Maximum number of similar entities to link per extracted entity. Default: `10`.
792 pub top_k: usize,
793 /// Timeout for the entire linking pass in seconds. Default: `5`.
794 pub timeout_secs: u64,
795}
796
797impl Default for NoteLinkingConfig {
798 fn default() -> Self {
799 Self {
800 enabled: false,
801 similarity_threshold: default_note_linking_similarity_threshold(),
802 top_k: default_note_linking_top_k(),
803 timeout_secs: default_note_linking_timeout_secs(),
804 }
805 }
806}
807
808/// Vector backend selector for embedding storage.
809#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
810#[serde(rename_all = "lowercase")]
811#[non_exhaustive]
812pub enum VectorBackend {
813 Qdrant,
814 #[default]
815 Sqlite,
816}
817
818impl VectorBackend {
819 /// Return the lowercase identifier string for this backend.
820 ///
821 /// # Examples
822 ///
823 /// ```
824 /// use zeph_config::VectorBackend;
825 ///
826 /// assert_eq!(VectorBackend::Sqlite.as_str(), "sqlite");
827 /// assert_eq!(VectorBackend::Qdrant.as_str(), "qdrant");
828 /// ```
829 #[must_use]
830 pub fn as_str(&self) -> &'static str {
831 match self {
832 Self::Qdrant => "qdrant",
833 Self::Sqlite => "sqlite",
834 }
835 }
836}
837
838/// Memory subsystem configuration, nested under `[memory]` in TOML.
839///
840/// Controls `SQLite` and Qdrant storage, semantic recall, context compaction,
841/// multi-tier promotion, and all memory-related background tasks.
842///
843/// # Example (TOML)
844///
845/// ```toml
846/// [memory]
847/// sqlite_path = "~/.local/share/zeph/data/zeph.db"
848/// qdrant_url = "http://localhost:6334"
849/// history_limit = 50
850/// summarization_threshold = 50
851/// auto_budget = true
852/// ```
853#[derive(Debug, Deserialize, Serialize)]
854#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
855pub struct MemoryConfig {
856 #[serde(default)]
857 pub compression_guidelines: CompressionGuidelinesConfig,
858 #[serde(default = "default_sqlite_path_field")]
859 pub sqlite_path: String,
860 pub history_limit: u32,
861 #[serde(default = "default_qdrant_url")]
862 pub qdrant_url: String,
863 /// Optional API key for authenticating to a remote or managed Qdrant cluster.
864 ///
865 /// Required when `qdrant_url` points to a non-localhost host (e.g. Qdrant Cloud).
866 /// Leave `None` for local dev instances. The actual key is resolved from the vault:
867 /// `zeph vault set ZEPH_QDRANT_API_KEY "<key>"`.
868 ///
869 /// The value is wrapped in [`Secret`] to prevent accidental logging.
870 /// `skip_serializing` prevents the key from being written back to TOML on config save.
871 #[serde(default, skip_serializing)]
872 pub qdrant_api_key: Option<Secret>,
873 #[serde(default)]
874 pub semantic: SemanticConfig,
875 #[serde(default = "default_summarization_threshold")]
876 pub summarization_threshold: usize,
877 /// LLM call timeout for summarization, in seconds. Default: `60`.
878 #[serde(default = "default_summarization_llm_timeout_secs")]
879 pub summarization_llm_timeout_secs: u64,
880 #[serde(default = "default_context_budget_tokens")]
881 pub context_budget_tokens: usize,
882 #[serde(default = "default_soft_compaction_threshold")]
883 pub soft_compaction_threshold: f32,
884 #[serde(
885 default = "default_hard_compaction_threshold",
886 alias = "compaction_threshold"
887 )]
888 pub hard_compaction_threshold: f32,
889 #[serde(default = "default_compaction_preserve_tail")]
890 pub compaction_preserve_tail: usize,
891 #[serde(default = "default_compaction_cooldown_turns")]
892 pub compaction_cooldown_turns: u8,
893 #[serde(default = "default_auto_budget")]
894 pub auto_budget: bool,
895 #[serde(default = "default_prune_protect_tokens")]
896 pub prune_protect_tokens: usize,
897 #[serde(default = "default_cross_session_score_threshold")]
898 pub cross_session_score_threshold: f32,
899 #[serde(default)]
900 pub vector_backend: VectorBackend,
901 #[serde(default = "default_token_safety_margin")]
902 pub token_safety_margin: f32,
903 #[serde(default = "default_redact_credentials")]
904 pub redact_credentials: bool,
905 #[serde(default = "default_true")]
906 pub autosave_assistant: bool,
907 #[serde(default = "default_autosave_min_length")]
908 pub autosave_min_length: usize,
909 #[serde(default = "default_tool_call_cutoff")]
910 pub tool_call_cutoff: usize,
911 #[serde(default = "default_sqlite_pool_size")]
912 pub sqlite_pool_size: u32,
913 #[serde(default)]
914 pub sessions: SessionsConfig,
915 #[serde(default)]
916 pub documents: DocumentConfig,
917 #[serde(default)]
918 pub eviction: EvictionConfig,
919 #[serde(default)]
920 pub compression: CompressionConfig,
921 #[serde(default)]
922 pub sidequest: SidequestConfig,
923 #[serde(default)]
924 pub graph: GraphConfig,
925 /// Store a lightweight session summary to the vector store on shutdown when no session
926 /// summary exists yet for this conversation. Enables cross-session recall for short or
927 /// interrupted sessions that never triggered hard compaction. Default: `true`.
928 #[serde(default = "default_shutdown_summary")]
929 pub shutdown_summary: bool,
930 /// Minimum number of user-turn messages required before a shutdown summary is generated.
931 /// Sessions below this threshold are considered trivial and skipped. Default: `4`.
932 #[serde(default = "default_shutdown_summary_min_messages")]
933 pub shutdown_summary_min_messages: usize,
934 /// Maximum number of recent messages (user + assistant) sent to the LLM for shutdown
935 /// summarization. Caps token cost for long sessions that never triggered hard compaction.
936 /// Default: `20`.
937 #[serde(default = "default_shutdown_summary_max_messages")]
938 pub shutdown_summary_max_messages: usize,
939 /// Per-attempt timeout in seconds for each LLM call during shutdown summarization.
940 /// Applies independently to the structured call and to the plain-text fallback.
941 /// Default: `10`.
942 #[serde(default = "default_shutdown_summary_timeout_secs")]
943 pub shutdown_summary_timeout_secs: u64,
944 /// LLM provider used for shutdown summarization calls.
945 ///
946 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
947 /// provider. Use a fast, cost-efficient model (e.g. `"fast"`) to minimise shutdown latency.
948 ///
949 /// Example:
950 /// ```toml
951 /// [memory]
952 /// shutdown_summary_provider = "fast"
953 /// ```
954 #[serde(default)]
955 pub shutdown_summary_provider: ProviderName,
956 /// LLM provider used for deferred tool-pair summarization (context compaction).
957 ///
958 /// Accepts a provider name from `[[llm.providers]]`. When empty, falls back to the primary
959 /// provider. A mid-tier model is usually sufficient for compaction summaries.
960 ///
961 /// Example:
962 /// ```toml
963 /// [memory]
964 /// compaction_provider = "fast"
965 /// ```
966 #[serde(default)]
967 pub compaction_provider: ProviderName,
968 /// Use structured anchored summaries for context compaction.
969 ///
970 /// When enabled, hard compaction requests a JSON schema from the LLM
971 /// instead of free-form prose. Falls back to prose if the LLM fails
972 /// to produce valid JSON. Default: `false`.
973 #[serde(default)]
974 pub structured_summaries: bool,
975 /// AOI three-layer memory tier promotion system.
976 ///
977 /// When `tiers.enabled = true`, a background sweep promotes frequently-accessed episodic
978 /// messages to a semantic tier by clustering near-duplicates and distilling via LLM.
979 #[serde(default)]
980 pub tiers: TierConfig,
981 /// A-MAC adaptive memory admission control.
982 ///
983 /// When `admission.enabled = true`, each message is evaluated before saving and rejected
984 /// if its composite admission score falls below the configured threshold.
985 #[serde(default)]
986 pub admission: AdmissionConfig,
987 /// Session digest generation at session end. Default: disabled.
988 #[serde(default)]
989 pub digest: DigestConfig,
990 /// Context assembly strategy. Default: `full_history` (current behavior).
991 #[serde(default)]
992 pub context_strategy: ContextStrategy,
993 /// Number of turns at which `Adaptive` strategy switches to `MemoryFirst`. Default: `20`.
994 #[serde(default = "default_crossover_turn_threshold")]
995 pub crossover_turn_threshold: u32,
996 /// All-Mem lifelong memory consolidation sweep.
997 ///
998 /// When `consolidation.enabled = true`, a background loop clusters semantically similar
999 /// messages and merges them into consolidated entries via LLM.
1000 #[serde(default)]
1001 pub consolidation: ConsolidationConfig,
1002 /// `SleepGate` forgetting sweep (#2397).
1003 ///
1004 /// When `forgetting.enabled = true`, a background loop periodically decays importance
1005 /// scores and prunes memories below the forgetting floor.
1006 #[serde(default)]
1007 pub forgetting: ForgettingConfig,
1008 /// `PostgreSQL` connection URL.
1009 ///
1010 /// Used when the binary is compiled with `--features postgres`.
1011 /// Can be overridden by the vault key `ZEPH_DATABASE_URL`.
1012 /// Example: `postgres://user:pass@localhost:5432/zeph`
1013 /// Default: `None` (uses `sqlite_path` instead).
1014 #[serde(default)]
1015 pub database_url: Option<String>,
1016 /// Cost-sensitive store routing (#2444).
1017 ///
1018 /// When `store_routing.enabled = true`, query intent is classified and routed to
1019 /// the cheapest sufficient backend instead of querying all stores on every turn.
1020 #[serde(default)]
1021 pub store_routing: StoreRoutingConfig,
1022 /// Persona memory layer (#2461).
1023 ///
1024 /// When `persona.enabled = true`, user preferences and domain knowledge are extracted
1025 /// from conversation history and injected into context after the system prompt.
1026 #[serde(default)]
1027 pub persona: PersonaConfig,
1028 /// Trajectory-informed memory (#2498).
1029 #[serde(default)]
1030 pub trajectory: TrajectoryConfig,
1031 /// Category-aware memory (#2428).
1032 #[serde(default)]
1033 pub category: CategoryConfig,
1034 /// `TiMem` temporal-hierarchical memory tree (#2262).
1035 #[serde(default)]
1036 pub tree: TreeConfig,
1037 /// Time-based microcompact (#2699).
1038 ///
1039 /// When `microcompact.enabled = true`, stale low-value tool outputs are cleared
1040 /// from context when the session has been idle longer than `gap_threshold_minutes`.
1041 #[serde(default)]
1042 pub microcompact: MicrocompactConfig,
1043 /// autoDream background memory consolidation (#2697).
1044 ///
1045 /// When `autodream.enabled = true`, a constrained consolidation subagent runs
1046 /// after a session ends if both `min_sessions` and `min_hours` gates pass.
1047 #[serde(default)]
1048 pub autodream: AutoDreamConfig,
1049 /// Cosine similarity threshold for deduplicating key facts in `zeph_key_facts` (#2717).
1050 ///
1051 /// Before inserting a new key fact, its nearest neighbour is looked up in the
1052 /// `zeph_key_facts` collection. If the best score is ≥ this threshold the fact is
1053 /// considered a near-duplicate and skipped. Set to a value greater than `1.0` (e.g.
1054 /// `2.0`) to disable dedup entirely. Default: `0.95`.
1055 #[serde(default = "default_key_facts_dedup_threshold")]
1056 pub key_facts_dedup_threshold: f32,
1057 /// Experience compression spectrum (#3305).
1058 ///
1059 /// Controls three-tier retrieval policy and background skill-promotion engine.
1060 #[serde(default)]
1061 pub compression_spectrum: crate::features::CompressionSpectrumConfig,
1062 /// MemMachine-inspired retrieval-stage tuning (#3340).
1063 ///
1064 /// Controls ANN candidate depth, search-prompt formatting, and the shape of memory snippets
1065 /// injected into agent context. Separate from `SemanticConfig` because these knobs apply
1066 /// uniformly across graph, hybrid, and vector-only recall paths.
1067 ///
1068 /// # Example (TOML)
1069 ///
1070 /// ```toml
1071 /// [memory.retrieval]
1072 /// depth = 40
1073 /// search_prompt_template = ""
1074 /// context_format = "structured"
1075 /// ```
1076 #[serde(default)]
1077 pub retrieval: RetrievalConfig,
1078 /// `ReasoningBank`: distilled reasoning strategy memory (#3342).
1079 ///
1080 /// When `reasoning.enabled = true`, each completed agent turn is evaluated by a self-judge
1081 /// LLM call; successful and failed reasoning chains are compressed into short, generalizable
1082 /// strategy summaries stored in `reasoning_strategies` (`SQLite`) and a matching Qdrant
1083 /// collection. Top-k strategies are retrieved by embedding similarity at context-build time
1084 /// and injected before the LLM call.
1085 #[serde(default)]
1086 pub reasoning: ReasoningConfig,
1087 /// Hebbian edge-weight reinforcement configuration (HL-F1/F2, #3344).
1088 ///
1089 /// When `enabled = true`, the weight of each `graph_edges` row is incremented
1090 /// by `hebbian_lr` every time that edge is traversed during a recall. Default: disabled.
1091 ///
1092 /// # Example (TOML)
1093 ///
1094 /// ```toml
1095 /// [memory.hebbian]
1096 /// enabled = true
1097 /// hebbian_lr = 0.1
1098 /// ```
1099 #[serde(default)]
1100 pub hebbian: HebbianConfig,
1101 /// `MemCoT` rolling semantic state configuration (#3574).
1102 ///
1103 /// When `enabled = true`, each completed assistant turn spawns a background distillation
1104 /// task that compresses the response into a short semantic state buffer. The buffer is
1105 /// prepended to graph recall queries so retrieval stays contextually relevant across long
1106 /// multi-turn sessions.
1107 ///
1108 /// # Example (TOML)
1109 ///
1110 /// ```toml
1111 /// [memory.memcot]
1112 /// enabled = true
1113 /// distill_provider = "fast"
1114 /// min_assistant_chars = 200
1115 /// max_distills_per_session = 50
1116 /// ```
1117 #[serde(default)]
1118 pub memcot: MemCotConfig,
1119 /// `OmniMem` retrieval failure tracking (issue #3576).
1120 ///
1121 /// When `enabled = true`, no-hit and low-confidence recall events are logged
1122 /// asynchronously to `memory_retrieval_failures` for closed-loop parameter tuning.
1123 ///
1124 /// # Example (TOML)
1125 ///
1126 /// ```toml
1127 /// [memory.retrieval_failures]
1128 /// enabled = true
1129 /// low_confidence_threshold = 0.3
1130 /// retention_days = 90
1131 /// ```
1132 #[serde(default)]
1133 pub retrieval_failures: RetrievalFailuresConfig,
1134 /// Write quality gate (#3629).
1135 ///
1136 /// When `quality_gate.enabled = true`, each `remember()` call is scored and low-quality
1137 /// writes are rejected before persistence. Evaluated after A-MAC admission control.
1138 #[serde(default)]
1139 pub quality_gate: WriteQualityGateConfig,
1140 /// `MemFlow` tiered intent-driven retrieval (issue #3712).
1141 ///
1142 /// When `tiered_retrieval.enabled = true`, recall queries are classified by intent and
1143 /// dispatched to the cheapest sufficient tier (`ProfileLookup` → `TargetedRetrieval` →
1144 /// `DeepReasoning`) with optional validation and tier escalation.
1145 #[serde(default)]
1146 pub tiered_retrieval: TieredRetrievalConfig,
1147 /// `ScrapMem` optical forgetting (issue #3713).
1148 ///
1149 /// When `optical_forgetting.enabled = true`, a background sweep progressively compresses
1150 /// old messages: `Full` → `Compressed` → `SummaryOnly`, saving token budget in context assembly.
1151 #[serde(default)]
1152 pub optical_forgetting: OpticalForgettingConfig,
1153 /// EM-Graph episodic event extraction and causal linking (issue #3713).
1154 ///
1155 /// When `em_graph.enabled = true`, episodic events are extracted from conversation turns
1156 /// and linked via causal relationships, enabling causal-chain retrieval.
1157 #[serde(default)]
1158 pub em_graph: EmGraphConfig,
1159 /// Episodic-to-semantic consolidation daemon (issue #3799).
1160 ///
1161 /// When `episodic_consolidation.enabled = true`, a background loop periodically sweeps
1162 /// mature `episodic_events`, extracts durable facts via LLM, deduplicates against existing
1163 /// key facts, and promotes them to the semantic tier in `zeph_key_facts`.
1164 #[serde(default)]
1165 pub episodic_consolidation: EpisodicConsolidationConfig,
1166 /// MAGE shadow memory trajectory risk accumulator (spec 004-16).
1167 ///
1168 /// Maintains a per-session rolling risk score fed by sanitizer audit signals.
1169 /// When `shadow_memory.enabled = true`, tool execution is gated if cumulative
1170 /// trajectory risk exceeds `risk_threshold`. When `false`, all code paths are
1171 /// zero-cost no-ops.
1172 ///
1173 /// # Example (TOML)
1174 ///
1175 /// ```toml
1176 /// [memory.shadow_memory]
1177 /// enabled = true
1178 /// risk_threshold = 0.75
1179 /// risk_halflife_turns = 10
1180 /// ```
1181 #[serde(default)]
1182 pub shadow_memory: TrajectoryRiskAccumulatorConfig,
1183 /// Five-signal SYNAPSE retrieval (issue #4374).
1184 ///
1185 /// When `five_signal.enabled = true`, SYNAPSE recall weights five signals: recency,
1186 /// relevance, access frequency, causal distance, and novelty. All new signals default
1187 /// to weight `0.0`, preserving exact backward compatibility.
1188 #[serde(default)]
1189 pub five_signal: FiveSignalConfig,
1190 /// Context-Adaptive Memory fidelity scoring (CAM Phase 1, #4547).
1191 ///
1192 /// When `fidelity.enabled = true`, the heuristic fidelity scorer runs after each
1193 /// `apply_prepared_context()` call and assigns `Full / Compressed / Placeholder`
1194 /// levels to historical messages. Default: disabled.
1195 ///
1196 /// # Example (TOML)
1197 ///
1198 /// ```toml
1199 /// [memory.fidelity]
1200 /// enabled = false
1201 /// w_semantic = 0.3
1202 /// w_temporal = 0.3
1203 /// w_importance = 0.2
1204 /// w_plan = 0.2
1205 /// full_threshold = 0.7
1206 /// compressed_threshold = 0.3
1207 /// compressed_max_tokens = 50
1208 /// regrade_threshold = 0.6
1209 /// min_query_length = 8
1210 /// max_scored_messages = 500
1211 /// ```
1212 #[serde(default, skip_serializing_if = "Option::is_none")]
1213 pub fidelity: Option<crate::fidelity::FidelityConfig>,
1214}
1215
1216// ── MemFlow tiered retrieval config (issue #3712) ──────────────────────────────
1217
1218/// `MemFlow` tiered intent-driven retrieval configuration.
1219///
1220/// Classifies each recall query into one of three intent tiers (`ProfileLookup`,
1221/// `TargetedRetrieval`, `DeepReasoning`) and dispatches to the cheapest sufficient backend.
1222/// An optional validation step can escalate to a heavier tier when evidence confidence is low.
1223///
1224/// # Example (TOML)
1225///
1226/// ```toml
1227/// [memory.tiered_retrieval]
1228/// enabled = false
1229/// classifier_provider = ""
1230/// validator_provider = ""
1231/// token_budget = 4096
1232/// validation_enabled = false
1233/// validation_threshold = 0.6
1234/// max_escalations = 1
1235/// classifier_timeout_secs = 5
1236/// validator_timeout_secs = 5
1237///
1238/// # Signal weights (all default to 0.0; set to activate each signal)
1239/// similarity_weight = 1.0
1240/// recency_weight = 0.0
1241/// recency_half_life_days = 7
1242/// tfidf_weight = 0.0
1243/// cognitive_signal_weight = 0.0
1244/// tier_boost_weight = 0.0
1245/// semantic_tier_boost = 1.0
1246/// ```
1247#[derive(Debug, Clone, Deserialize, Serialize)]
1248#[serde(default)]
1249pub struct TieredRetrievalConfig {
1250 /// Enable `MemFlow` tiered retrieval. Default: `false`.
1251 pub enabled: bool,
1252 /// Provider name from `[[llm.providers]]` for intent classification.
1253 ///
1254 /// When empty, the `HeuristicRouter` is used (no LLM call). When a provider
1255 /// is set but the call fails, falls back to the heuristic (fail-open).
1256 pub classifier_provider: ProviderName,
1257 /// Provider name from `[[llm.providers]]` for evidence validation.
1258 ///
1259 /// When empty or when `validation_enabled = false`, no validation call is made.
1260 pub validator_provider: ProviderName,
1261 /// Maximum tokens to gather for evidence per query. Default: `4096`.
1262 pub token_budget: usize,
1263 /// Enable evidence validation and tier escalation. Default: `false`.
1264 pub validation_enabled: bool,
1265 /// Confidence threshold below which validation triggers tier escalation. Default: `0.6`.
1266 pub validation_threshold: f32,
1267 /// Maximum tier escalations per query. Default: `1`.
1268 pub max_escalations: u8,
1269 /// Timeout in seconds for the classifier LLM call. Default: `5`.
1270 ///
1271 /// On timeout the pipeline falls back to the `HeuristicRouter` (fail-open).
1272 pub classifier_timeout_secs: u64,
1273 /// Timeout in seconds for the validator LLM call. Default: `5`.
1274 ///
1275 /// On timeout the validator is treated as sufficient (fail-open).
1276 pub validator_timeout_secs: u64,
1277
1278 // ── Signal weights ────────────────────────────────────────────────────────
1279 /// Weight applied to the raw similarity score from vector/keyword recall. Default: `1.0`.
1280 ///
1281 /// Set to `1.0` and all other weights to `0.0` to reproduce pre-signal behaviour.
1282 pub similarity_weight: f64,
1283 /// Weight applied to the recency decay signal. Default: `0.0` (disabled).
1284 pub recency_weight: f64,
1285 /// Half-life for recency decay in days. Default: `7`.
1286 ///
1287 /// A message that is `recency_half_life_days` old receives a recency score of `0.5`.
1288 /// Set `recency_weight = 0.0` to disable recency scoring entirely.
1289 pub recency_half_life_days: u32,
1290 /// Weight applied to the TF-IDF signal. Default: `0.0` (disabled).
1291 pub tfidf_weight: f64,
1292 /// Weight applied to the cognitive signal (message access frequency). Default: `0.0` (disabled).
1293 pub cognitive_signal_weight: f64,
1294 /// Weight applied to the tier boost signal for consolidated/semantic entries. Default: `0.0` (disabled).
1295 pub tier_boost_weight: f64,
1296 /// Additive score awarded to entries in the `semantic` tier when `tier_boost_weight > 0`. Default: `1.0`.
1297 ///
1298 /// The final contribution is `tier_boost_weight * semantic_tier_boost` for semantic entries
1299 /// and `0.0` for episodic entries.
1300 pub semantic_tier_boost: f64,
1301 /// Route the `DeepReasoning` tier graph step through query-conditioned recall (#3994).
1302 ///
1303 /// When `true`, the graph recall step for `IntentClass::DeepReasoning` uses
1304 /// `recall_graph_hela` (HELA spreading activation) instead of static-weight BFS,
1305 /// producing query-aligned results. Requires an embedding store. Default: `false` (opt-in).
1306 #[serde(default)]
1307 pub deep_reasoning_query_conditioned: bool,
1308}
1309
1310impl Default for TieredRetrievalConfig {
1311 fn default() -> Self {
1312 Self {
1313 enabled: false,
1314 classifier_provider: ProviderName::default(),
1315 validator_provider: ProviderName::default(),
1316 token_budget: 4096,
1317 validation_enabled: false,
1318 validation_threshold: 0.6,
1319 max_escalations: 1,
1320 classifier_timeout_secs: 5,
1321 validator_timeout_secs: 5,
1322 similarity_weight: 1.0,
1323 recency_weight: 0.0,
1324 recency_half_life_days: 7,
1325 tfidf_weight: 0.0,
1326 cognitive_signal_weight: 0.0,
1327 tier_boost_weight: 0.0,
1328 semantic_tier_boost: 1.0,
1329 deep_reasoning_query_conditioned: false,
1330 }
1331 }
1332}
1333
1334// ── ScrapMem optical forgetting config (issue #3713) ───────────────────────────
1335
1336/// `ScrapMem` optical forgetting configuration.
1337///
1338/// Controls progressive content-fidelity decay: `Full` → `Compressed` → `SummaryOnly`.
1339/// The sweep is orthogonal to `SleepGate` (which decays importance scores); optical
1340/// forgetting compresses content in place based on age.
1341///
1342/// # Example (TOML)
1343///
1344/// ```toml
1345/// [memory.optical_forgetting]
1346/// enabled = false
1347/// compress_provider = ""
1348/// compress_after_turns = 100
1349/// summarize_after_turns = 500
1350/// sweep_interval_secs = 3600
1351/// sweep_batch_size = 50
1352/// ```
1353#[derive(Debug, Clone, Deserialize, Serialize)]
1354#[serde(default)]
1355pub struct OpticalForgettingConfig {
1356 /// Enable optical forgetting sweep. Default: `false`.
1357 pub enabled: bool,
1358 /// Provider name from `[[llm.providers]]` for LLM-based content compression.
1359 /// Falls back to the primary provider when empty.
1360 pub compress_provider: ProviderName,
1361 /// Number of conversation turns after which `Full` messages are compressed. Default: `100`.
1362 pub compress_after_turns: u32,
1363 /// Number of conversation turns after which `Compressed` messages become `SummaryOnly`. Default: `500`.
1364 pub summarize_after_turns: u32,
1365 /// How often the sweep runs, in seconds. Default: `3600`.
1366 pub sweep_interval_secs: u64,
1367 /// Maximum messages to compress per sweep iteration. Default: `50`.
1368 pub sweep_batch_size: usize,
1369}
1370
1371impl Default for OpticalForgettingConfig {
1372 fn default() -> Self {
1373 Self {
1374 enabled: false,
1375 compress_provider: ProviderName::default(),
1376 compress_after_turns: 100,
1377 summarize_after_turns: 500,
1378 sweep_interval_secs: 3600,
1379 sweep_batch_size: 50,
1380 }
1381 }
1382}
1383
1384// ── EM-Graph config (issue #3713) ──────────────────────────────────────────────
1385
1386/// EM-Graph episodic event extraction and causal linking configuration.
1387///
1388/// When enabled, episodic events are extracted from conversation turns and linked
1389/// via causal relationships stored in `episodic_events` and `causal_links` tables.
1390///
1391/// # Example (TOML)
1392///
1393/// ```toml
1394/// [memory.em_graph]
1395/// enabled = false
1396/// extract_provider = ""
1397/// max_chain_depth = 3
1398/// ```
1399#[derive(Debug, Clone, Deserialize, Serialize)]
1400#[serde(default)]
1401pub struct EmGraphConfig {
1402 /// Enable EM-Graph event extraction and causal linking. Default: `false`.
1403 pub enabled: bool,
1404 /// Provider name from `[[llm.providers]]` for event extraction.
1405 /// Falls back to the primary provider when empty.
1406 pub extract_provider: ProviderName,
1407 /// Maximum hops when traversing causal chains during recall. Default: `3`.
1408 pub max_chain_depth: u32,
1409}
1410
1411impl Default for EmGraphConfig {
1412 fn default() -> Self {
1413 Self {
1414 enabled: false,
1415 extract_provider: ProviderName::default(),
1416 max_chain_depth: 3,
1417 }
1418 }
1419}
1420
1421// ── Episodic consolidation daemon config (issue #3799) ────────────────────────
1422
1423fn default_episodic_consolidation_interval_secs() -> u64 {
1424 1800
1425}
1426
1427fn default_episodic_consolidation_batch_size() -> usize {
1428 30
1429}
1430
1431fn default_episodic_consolidation_min_age_secs() -> u64 {
1432 300
1433}
1434
1435fn default_episodic_consolidation_dedup_jaccard_threshold() -> f32 {
1436 0.6
1437}
1438
1439// ── Five-signal SYNAPSE retrieval config (issue #4374) ────────────────────────
1440
1441fn default_five_signal_w_recency() -> f64 {
1442 0.35
1443}
1444
1445fn default_five_signal_w_relevance() -> f64 {
1446 0.35
1447}
1448
1449fn default_causal_bfs_max_depth() -> u32 {
1450 10
1451}
1452
1453fn default_neutral_causal_distance() -> u32 {
1454 5
1455}
1456
1457fn default_novelty_decay_rate() -> f64 {
1458 0.1
1459}
1460
1461fn default_five_signal_interval_seconds() -> u64 {
1462 7200
1463}
1464
1465fn default_five_signal_batch_size() -> usize {
1466 500
1467}
1468
1469fn default_five_signal_daemon_max_runtime_ms() -> u64 {
1470 30_000
1471}
1472
1473fn default_five_signal_promotion_score_threshold() -> f64 {
1474 0.70
1475}
1476
1477fn default_five_signal_demotion_score_threshold() -> f64 {
1478 0.20
1479}
1480
1481fn default_five_signal_top_k_per_run() -> usize {
1482 500
1483}
1484
1485/// Five-signal SYNAPSE retrieval configuration (issue #4374).
1486///
1487/// Extends SYNAPSE recall with three additional signals — access frequency, causal
1488/// distance, and novelty — beyond the two-signal baseline (recency + relevance).
1489/// All new signal weights default to `0.0`, preserving exact backward compatibility.
1490///
1491/// # Example (TOML)
1492///
1493/// ```toml
1494/// [memory.five_signal]
1495/// enabled = true
1496/// w_recency = 0.35
1497/// w_relevance = 0.35
1498/// w_frequency = 0.15
1499/// w_causal = 0.10
1500/// w_novelty = 0.05
1501///
1502/// [memory.five_signal.consolidation_daemon]
1503/// enabled = true
1504/// interval_seconds = 7200
1505/// ```
1506#[derive(Debug, Clone, Deserialize, Serialize)]
1507pub struct FiveSignalConfig {
1508 /// Master switch. When `false`, the five-signal code path contributes zero overhead.
1509 #[serde(default)]
1510 pub enabled: bool,
1511 /// Weight for the recency signal. Default: `0.35`.
1512 #[serde(default = "default_five_signal_w_recency")]
1513 pub w_recency: f64,
1514 /// Weight for the semantic relevance signal. Default: `0.35`.
1515 #[serde(default = "default_five_signal_w_relevance")]
1516 pub w_relevance: f64,
1517 /// Weight for the access frequency signal. Default: `0.0` (baseline-compatible).
1518 #[serde(default)]
1519 pub w_frequency: f64,
1520 /// Weight for the causal distance signal. Default: `0.0` (baseline-compatible).
1521 #[serde(default)]
1522 pub w_causal: f64,
1523 /// Weight for the novelty signal. Default: `0.0` (baseline-compatible).
1524 #[serde(default)]
1525 pub w_novelty: f64,
1526 /// Maximum BFS depth for causal distance computation. Default: `10`.
1527 #[serde(default = "default_causal_bfs_max_depth")]
1528 pub causal_bfs_max_depth: u32,
1529 /// Causal distance assigned when no goal entity is set or a fact lies beyond
1530 /// `causal_bfs_max_depth`. Default: `5`.
1531 #[serde(default = "default_neutral_causal_distance")]
1532 pub neutral_causal_distance: u32,
1533 /// Decay rate λ in `exp(-λ × days)` for the novelty signal. Default: `0.1`.
1534 #[serde(default = "default_novelty_decay_rate")]
1535 pub novelty_decay_rate: f64,
1536 /// Async consolidation daemon that promotes hot episodic facts to Qdrant.
1537 #[serde(default)]
1538 pub consolidation_daemon: FiveSignalConsolidationConfig,
1539}
1540
1541impl Default for FiveSignalConfig {
1542 fn default() -> Self {
1543 Self {
1544 enabled: false,
1545 w_recency: default_five_signal_w_recency(),
1546 w_relevance: default_five_signal_w_relevance(),
1547 w_frequency: 0.0,
1548 w_causal: 0.0,
1549 w_novelty: 0.0,
1550 causal_bfs_max_depth: default_causal_bfs_max_depth(),
1551 neutral_causal_distance: default_neutral_causal_distance(),
1552 novelty_decay_rate: default_novelty_decay_rate(),
1553 consolidation_daemon: FiveSignalConsolidationConfig::default(),
1554 }
1555 }
1556}
1557
1558/// Async consolidation daemon configuration for five-signal retrieval (issue #4374).
1559///
1560/// When `enabled = true`, a background task runs at `interval_seconds` intervals,
1561/// evaluates the top `top_k_per_run` episodic facts by five-signal score, promotes
1562/// facts above `promotion_score_threshold` to Qdrant, and demotes facts below
1563/// `demotion_score_threshold` to `episodic_only` tier.
1564///
1565/// # Example (TOML)
1566///
1567/// ```toml
1568/// [memory.five_signal.consolidation_daemon]
1569/// enabled = true
1570/// interval_seconds = 7200
1571/// batch_size = 500
1572/// promotion_score_threshold = 0.70
1573/// demotion_score_threshold = 0.20
1574/// ```
1575#[derive(Debug, Clone, Deserialize, Serialize)]
1576pub struct FiveSignalConsolidationConfig {
1577 /// Enable the daemon. Requires the `scheduler` feature. Default: `false`.
1578 #[serde(default)]
1579 pub enabled: bool,
1580 /// Interval between daemon runs in seconds. Default: `7200` (2 hours).
1581 #[serde(default = "default_five_signal_interval_seconds")]
1582 pub interval_seconds: u64,
1583 /// Maximum facts processed (embed + upsert) per run. Default: `500`.
1584 #[serde(default = "default_five_signal_batch_size")]
1585 pub batch_size: usize,
1586 /// Hard timeout per run in milliseconds. Default: `30000`.
1587 #[serde(default = "default_five_signal_daemon_max_runtime_ms")]
1588 pub daemon_max_runtime_ms: u64,
1589 /// Five-signal score above which a fact is promoted to Qdrant. Default: `0.70`.
1590 #[serde(default = "default_five_signal_promotion_score_threshold")]
1591 pub promotion_score_threshold: f64,
1592 /// Five-signal score below which a promoted fact is demoted. Default: `0.20`.
1593 #[serde(default = "default_five_signal_demotion_score_threshold")]
1594 pub demotion_score_threshold: f64,
1595 /// Number of episodic facts queried per run (SQL LIMIT). Must be >= `batch_size`.
1596 /// Default: `500`.
1597 #[serde(default = "default_five_signal_top_k_per_run")]
1598 pub top_k_per_run: usize,
1599}
1600
1601impl Default for FiveSignalConsolidationConfig {
1602 fn default() -> Self {
1603 Self {
1604 enabled: false,
1605 interval_seconds: default_five_signal_interval_seconds(),
1606 batch_size: default_five_signal_batch_size(),
1607 daemon_max_runtime_ms: default_five_signal_daemon_max_runtime_ms(),
1608 promotion_score_threshold: default_five_signal_promotion_score_threshold(),
1609 demotion_score_threshold: default_five_signal_demotion_score_threshold(),
1610 top_k_per_run: default_five_signal_top_k_per_run(),
1611 }
1612 }
1613}
1614
1615/// Episodic-to-semantic consolidation daemon configuration (issue #3799).
1616///
1617/// When `enabled = true`, a background loop periodically sweeps mature `episodic_events`,
1618/// extracts durable factual statements via LLM, deduplicates them against existing
1619/// key facts using Jaccard similarity, and promotes accepted facts to the semantic tier
1620/// in both `consolidated_facts` (`SQLite` persistence) and `zeph_key_facts` (Qdrant, if available).
1621///
1622/// # Example (TOML)
1623///
1624/// ```toml
1625/// [memory.episodic_consolidation]
1626/// enabled = false
1627/// consolidation_provider = ""
1628/// interval_secs = 1800
1629/// batch_size = 30
1630/// min_age_secs = 300
1631/// dedup_jaccard_threshold = 0.6
1632/// ```
1633#[derive(Debug, Clone, Deserialize, Serialize)]
1634#[serde(default)]
1635pub struct EpisodicConsolidationConfig {
1636 /// Enable the episodic consolidation daemon. Default: `false`.
1637 pub enabled: bool,
1638 /// Provider name from `[[llm.providers]]` for fact extraction LLM calls.
1639 /// Falls back to the primary provider when empty.
1640 pub consolidation_provider: ProviderName,
1641 /// How often the consolidation sweep runs, in seconds. Default: `1800` (30 min).
1642 #[serde(default = "default_episodic_consolidation_interval_secs")]
1643 pub interval_secs: u64,
1644 /// Maximum number of episodic events to process per sweep. Default: `30`.
1645 #[serde(default = "default_episodic_consolidation_batch_size")]
1646 pub batch_size: usize,
1647 /// Minimum age in seconds before an episodic event is eligible. Default: `300` (5 min).
1648 /// Prevents consolidating events from the active conversation.
1649 #[serde(default = "default_episodic_consolidation_min_age_secs")]
1650 pub min_age_secs: u64,
1651 /// Jaccard similarity threshold for deduplication against existing key facts.
1652 /// Facts with token-set Jaccard >= this value are considered duplicates. Default: `0.6`.
1653 #[serde(default = "default_episodic_consolidation_dedup_jaccard_threshold")]
1654 pub dedup_jaccard_threshold: f32,
1655}
1656
1657impl Default for EpisodicConsolidationConfig {
1658 fn default() -> Self {
1659 Self {
1660 enabled: false,
1661 consolidation_provider: ProviderName::default(),
1662 interval_secs: default_episodic_consolidation_interval_secs(),
1663 batch_size: default_episodic_consolidation_batch_size(),
1664 min_age_secs: default_episodic_consolidation_min_age_secs(),
1665 dedup_jaccard_threshold: default_episodic_consolidation_dedup_jaccard_threshold(),
1666 }
1667 }
1668}
1669
1670fn default_retrieval_failures_low_confidence_threshold() -> f32 {
1671 0.3
1672}
1673
1674fn default_retrieval_failures_retention_days() -> u32 {
1675 90
1676}
1677
1678fn default_retrieval_failures_channel_capacity() -> usize {
1679 256
1680}
1681
1682fn default_retrieval_failures_batch_size() -> usize {
1683 16
1684}
1685
1686fn default_retrieval_failures_flush_interval_ms() -> u64 {
1687 100
1688}
1689
1690fn default_crossover_turn_threshold() -> u32 {
1691 20
1692}
1693
1694fn default_key_facts_dedup_threshold() -> f32 {
1695 0.95
1696}
1697
1698/// Session digest configuration (#2289).
1699#[derive(Debug, Clone, Deserialize, Serialize)]
1700#[serde(default)]
1701pub struct DigestConfig {
1702 /// Enable session digest generation at session end. Default: `false`.
1703 pub enabled: bool,
1704 /// Provider name from `[[llm.providers]]` for digest generation.
1705 /// Falls back to the primary provider when `None`.
1706 #[serde(default)]
1707 pub provider: Option<ProviderName>,
1708 /// Maximum tokens for the digest text. Default: `500`.
1709 pub max_tokens: usize,
1710 /// Maximum messages to feed into the digest prompt. Default: `50`.
1711 pub max_input_messages: usize,
1712}
1713
1714impl Default for DigestConfig {
1715 fn default() -> Self {
1716 Self {
1717 enabled: false,
1718 provider: None,
1719 max_tokens: 500,
1720 max_input_messages: 50,
1721 }
1722 }
1723}
1724
1725/// Context assembly strategy (#2288).
1726#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
1727#[serde(rename_all = "snake_case")]
1728#[non_exhaustive]
1729pub enum ContextStrategy {
1730 /// Full conversation history trimmed to budget, with memory augmentation.
1731 /// This is the default and existing behavior.
1732 #[default]
1733 FullHistory,
1734 /// Drop conversation history; assemble context from summaries, semantic recall,
1735 /// cross-session memory, and session digest only.
1736 MemoryFirst,
1737 /// Start as `FullHistory`; switch to `MemoryFirst` when turn count exceeds
1738 /// `crossover_turn_threshold`.
1739 Adaptive,
1740}
1741
1742/// Session list and auto-title configuration, nested under `[memory.sessions]` in TOML.
1743#[derive(Debug, Clone, Deserialize, Serialize)]
1744#[serde(default)]
1745pub struct SessionsConfig {
1746 /// Maximum number of sessions returned by list operations (0 = unlimited).
1747 #[serde(default = "default_max_history")]
1748 pub max_history: usize,
1749 /// Maximum characters for auto-generated session titles.
1750 #[serde(default = "default_title_max_chars")]
1751 pub title_max_chars: usize,
1752}
1753
1754impl Default for SessionsConfig {
1755 fn default() -> Self {
1756 Self {
1757 max_history: default_max_history(),
1758 title_max_chars: default_title_max_chars(),
1759 }
1760 }
1761}
1762
1763/// Configuration for the document ingestion and RAG retrieval pipeline.
1764#[derive(Debug, Clone, Deserialize, Serialize)]
1765pub struct DocumentConfig {
1766 #[serde(default = "default_document_collection")]
1767 pub collection: String,
1768 #[serde(default = "default_document_chunk_size")]
1769 pub chunk_size: usize,
1770 #[serde(default = "default_document_chunk_overlap")]
1771 pub chunk_overlap: usize,
1772 /// Number of document chunks to inject into agent context per turn.
1773 #[serde(default = "default_document_top_k")]
1774 pub top_k: usize,
1775 /// Enable document RAG injection into agent context.
1776 #[serde(default)]
1777 pub rag_enabled: bool,
1778}
1779
1780impl Default for DocumentConfig {
1781 fn default() -> Self {
1782 Self {
1783 collection: default_document_collection(),
1784 chunk_size: default_document_chunk_size(),
1785 chunk_overlap: default_document_chunk_overlap(),
1786 top_k: default_document_top_k(),
1787 rag_enabled: false,
1788 }
1789 }
1790}
1791
1792/// Semantic (vector) memory retrieval configuration, nested under `[memory.semantic]` in TOML.
1793///
1794/// Controls how memories are searched and ranked, including temporal decay, MMR diversity
1795/// re-ranking, and hybrid BM25+vector weighting.
1796///
1797/// # Example (TOML)
1798///
1799/// ```toml
1800/// [memory.semantic]
1801/// enabled = true
1802/// recall_limit = 5
1803/// vector_weight = 0.7
1804/// keyword_weight = 0.3
1805/// mmr_lambda = 0.7
1806/// ```
1807#[derive(Debug, Deserialize, Serialize)]
1808#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1809pub struct SemanticConfig {
1810 /// Enable vector-based semantic recall. Default: `true`.
1811 #[serde(default = "default_semantic_enabled")]
1812 pub enabled: bool,
1813 #[serde(default = "default_recall_limit")]
1814 pub recall_limit: usize,
1815 #[serde(default = "default_vector_weight")]
1816 pub vector_weight: f64,
1817 #[serde(default = "default_keyword_weight")]
1818 pub keyword_weight: f64,
1819 #[serde(default = "default_true")]
1820 pub temporal_decay_enabled: bool,
1821 #[serde(default = "default_temporal_decay_half_life_days")]
1822 pub temporal_decay_half_life_days: u32,
1823 #[serde(default = "default_true")]
1824 pub mmr_enabled: bool,
1825 #[serde(default = "default_mmr_lambda")]
1826 pub mmr_lambda: f32,
1827 #[serde(default = "default_true")]
1828 pub importance_enabled: bool,
1829 #[serde(
1830 default = "default_importance_weight",
1831 deserialize_with = "validate_importance_weight"
1832 )]
1833 pub importance_weight: f64,
1834 /// Name of a `[[llm.providers]]` entry to use exclusively for embedding calls during
1835 /// memory write and backfill operations. A dedicated provider prevents `embed_backfill`
1836 /// from contending with the guardrail at the API server level (rate limits, Ollama
1837 /// single-model lock). Falls back to the main agent provider when `None`.
1838 #[serde(default)]
1839 pub embedding_provider: Option<ProviderName>,
1840 /// Timeout in seconds applied to every `embed()` call inside `zeph-memory`.
1841 ///
1842 /// Applies to all embedding call sites: admission control, quality gate, recall,
1843 /// summarization, graph retrieval, consolidation, and tree consolidation.
1844 /// Set to a higher value when using slow remote embedding providers.
1845 /// Default: `5`.
1846 #[serde(default = "default_embed_timeout_secs")]
1847 pub embed_timeout_secs: u64,
1848}
1849
1850impl Default for SemanticConfig {
1851 fn default() -> Self {
1852 Self {
1853 enabled: default_semantic_enabled(),
1854 recall_limit: default_recall_limit(),
1855 vector_weight: default_vector_weight(),
1856 keyword_weight: default_keyword_weight(),
1857 temporal_decay_enabled: true,
1858 temporal_decay_half_life_days: default_temporal_decay_half_life_days(),
1859 mmr_enabled: true,
1860 mmr_lambda: default_mmr_lambda(),
1861 importance_enabled: true,
1862 importance_weight: default_importance_weight(),
1863 embedding_provider: None,
1864 embed_timeout_secs: default_embed_timeout_secs(),
1865 }
1866 }
1867}
1868
1869fn default_embed_timeout_secs() -> u64 {
1870 5
1871}
1872
1873/// Memory snippet rendering format injected into agent context (MM-F5, #3340).
1874///
1875/// Controls how each recalled memory entry is presented in the assembled prompt.
1876/// Flipping this value does not affect stored content — `SQLite` rows and Qdrant points
1877/// always contain the raw message text. The format is applied exclusively during
1878/// context assembly and is never persisted.
1879///
1880/// # Token cost
1881///
1882/// `Structured` headers add roughly 2–3× more tokens per entry than `Plain`.
1883/// Consider raising `memory.recall_tokens` proportionally when switching to `Structured`.
1884#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, Hash)]
1885#[serde(rename_all = "snake_case")]
1886#[non_exhaustive]
1887pub enum ContextFormat {
1888 /// Emit a labeled header per snippet:
1889 /// `[Memory | <source> | <date> | relevance: <score>]` followed by the content.
1890 ///
1891 /// This is the default. Gives the LLM structured provenance metadata for each recalled
1892 /// memory without re-parsing the recall body.
1893 #[default]
1894 Structured,
1895 /// Legacy plain format: `- [role] content` per snippet, byte-identical to pre-#3340.
1896 ///
1897 /// Use `Plain` when downstream consumers rely on the old format or when token budget
1898 /// is tight and provenance headers are not needed.
1899 Plain,
1900}
1901
1902/// Retrieval-stage tuning for semantic memory (MemMachine-inspired, #3340).
1903///
1904/// Controls ANN candidate depth, search-prompt template, and memory snippet rendering.
1905/// Nested under `[memory.retrieval]` in TOML. All fields have defaults so existing
1906/// configs parse unchanged.
1907///
1908/// # Example (TOML)
1909///
1910/// ```toml
1911/// [memory.retrieval]
1912/// # depth = 0 # 0 = legacy (recall_limit * 2); set ≥ 1 to override directly
1913/// # search_prompt_template = ""
1914/// # context_format = "structured"
1915/// ```
1916#[derive(Debug, Clone, Deserialize, Serialize)]
1917#[serde(default)]
1918pub struct RetrievalConfig {
1919 /// Number of ANN candidates fetched from the vector store before keyword merge,
1920 /// temporal decay, and MMR re-ranking.
1921 ///
1922 /// - `0` (default): legacy behavior — `recall_limit * 2` candidates, byte-identical
1923 /// to pre-#3340 deployments.
1924 /// - `≥ 1`: the configured value is passed directly to `qdrant.search` /
1925 /// `keyword_search`. Set to at least `recall_limit * 2` to match the legacy pool
1926 /// size, or higher for better MMR diversity.
1927 ///
1928 /// A value below `recall_limit` triggers a one-shot WARN because the ANN pool
1929 /// cannot saturate the requested top-k.
1930 pub depth: u32,
1931 /// Template applied to the raw user query before embedding.
1932 ///
1933 /// Supports a single `{query}` placeholder which is replaced with the raw query string.
1934 /// Empty string (default) = identity: the query is embedded as-is.
1935 ///
1936 /// Applied **only** at query-side embedding sites — stored content (summaries, documents)
1937 /// is never wrapped. Use this for asymmetric embedding models (e.g. E5 `"query: {query}"`).
1938 pub search_prompt_template: String,
1939 /// Shape of memory snippets injected into agent context.
1940 ///
1941 /// See [`ContextFormat`] for the exact rendering and token-cost implications.
1942 /// Default: `Structured`.
1943 pub context_format: ContextFormat,
1944 /// Enable query-bias correction towards the user's profile centroid (MM-F3, #3341).
1945 ///
1946 /// When `true` and the query is classified as first-person, the query embedding is
1947 /// shifted towards the centroid of persona-fact embeddings. This nudges recall results
1948 /// towards persona-relevant content for self-referential queries.
1949 ///
1950 /// Default: `true` (low blast-radius: no-op when the persona table is empty).
1951 #[serde(default = "default_query_bias_correction")]
1952 pub query_bias_correction: bool,
1953 /// Blend weight for query-bias correction (MM-F3, #3341).
1954 ///
1955 /// Controls how much the query embedding shifts towards the profile centroid.
1956 /// `0.0` = no shift; `1.0` = full centroid. Clamped to `[0.0, 1.0]`. Default: `0.25`.
1957 #[serde(default = "default_query_bias_profile_weight")]
1958 pub query_bias_profile_weight: f32,
1959 /// Centroid TTL in seconds (MM-F3, #3341).
1960 ///
1961 /// The profile centroid computed from persona facts is cached for this many seconds.
1962 /// After expiry it is recomputed on the next first-person query. Default: 300 (5 min).
1963 #[serde(default = "default_query_bias_centroid_ttl_secs")]
1964 pub query_bias_centroid_ttl_secs: u64,
1965}
1966
1967fn default_query_bias_correction() -> bool {
1968 true
1969}
1970
1971fn default_query_bias_profile_weight() -> f32 {
1972 0.25
1973}
1974
1975fn default_query_bias_centroid_ttl_secs() -> u64 {
1976 300
1977}
1978
1979impl Default for RetrievalConfig {
1980 fn default() -> Self {
1981 Self {
1982 depth: 0,
1983 search_prompt_template: String::new(),
1984 context_format: ContextFormat::default(),
1985 query_bias_correction: default_query_bias_correction(),
1986 query_bias_profile_weight: default_query_bias_profile_weight(),
1987 query_bias_centroid_ttl_secs: default_query_bias_centroid_ttl_secs(),
1988 }
1989 }
1990}
1991
1992/// Hebbian edge-weight reinforcement and consolidation configuration (HL-F1/F2/F3/F4, #3344/#3345).
1993///
1994/// Controls opt-in Hebbian learning on knowledge-graph edges. When enabled, every
1995/// recall traversal increments the `weight` column of the traversed edges, building
1996/// a usage-frequency signal into the graph. The consolidation sub-feature (HL-F3/F4)
1997/// runs a background sweep that identifies high-traffic entity clusters and distills
1998/// them into `graph_rules` entries via an LLM.
1999#[derive(Debug, Clone, Deserialize, Serialize)]
2000#[serde(default)]
2001pub struct HebbianConfig {
2002 /// Master switch. When `false`, no `weight` updates are written to the database
2003 /// and the consolidation loop does not start. Default: `false`.
2004 pub enabled: bool,
2005 /// Weight increment per co-activation (HL-F2, #3344).
2006 ///
2007 /// Typical range: `0.01`–`0.5`. A value of `0.0` is accepted but logs a `WARN` at
2008 /// startup when `enabled = true`. Default: `0.1`.
2009 pub hebbian_lr: f32,
2010 /// How often the consolidation sweep runs, in seconds (HL-F3, #3345).
2011 ///
2012 /// Set to `0` to disable the consolidation loop while keeping Hebbian updates active.
2013 /// Default: `3600` (one hour).
2014 pub consolidation_interval_secs: u64,
2015 /// Minimum `degree × avg_weight` score for an entity to qualify as a consolidation
2016 /// candidate (HL-F3, #3345). Default: `5.0`.
2017 pub consolidation_threshold: f64,
2018 /// Provider name (from `[[llm.providers]]`) used for cluster distillation (HL-F4, #3345).
2019 ///
2020 /// Falls back to the main provider when `None` or unresolvable.
2021 #[serde(default)]
2022 pub consolidate_provider: Option<ProviderName>,
2023 /// Maximum number of candidates processed per sweep (HL-F3, #3345). Default: `10`.
2024 pub max_candidates_per_sweep: usize,
2025 /// Minimum seconds between consecutive consolidations of the same entity (HL-F3, #3345).
2026 ///
2027 /// An entity is skipped if its `consolidated_at` timestamp is within this window.
2028 /// Default: `86400` (24 hours).
2029 pub consolidation_cooldown_secs: u64,
2030 /// LLM prompt timeout for a single distillation call, in seconds (HL-F4, #3345).
2031 /// Default: `30`.
2032 pub consolidation_prompt_timeout_secs: u64,
2033 /// Maximum number of neighbouring entity summaries passed to the LLM per candidate
2034 /// (HL-F4, #3345). Default: `20`.
2035 pub consolidation_max_neighbors: usize,
2036 /// Enable HL-F5 spreading activation from the top-1 ANN anchor (HL-F5, #3346).
2037 ///
2038 /// When `true` and `enabled = true`, `recall_graph_hela` performs BFS from the
2039 /// nearest entity anchor, scoring nodes by `path_weight × cosine`. Default: `false`.
2040 pub spreading_activation: bool,
2041 /// BFS depth for HL-F5 spreading activation. Clamped to `[1, 6]`. Default: `2`.
2042 pub spread_depth: u32,
2043 /// MAGMA edge-type filter for HL-F5 spreading activation.
2044 ///
2045 /// Accepted values: `"semantic"`, `"temporal"`, `"causal"`, `"entity"`.
2046 /// Empty = traverse all edge types. Default: `[]`.
2047 pub spread_edge_types: Vec<EdgeType>,
2048 /// Per-step circuit-breaker timeout for HL-F5 in milliseconds.
2049 ///
2050 /// Any internal step (anchor ANN, edges batch, vectors batch) that exceeds this
2051 /// duration triggers an `Ok(Vec::new())` fallback with a `WARN`. Default: `8`.
2052 pub step_budget_ms: u64,
2053 /// Timeout for the initial query embedding call in HL-F5, in seconds.
2054 ///
2055 /// `0` disables the timeout. Default: `5`.
2056 pub embed_timeout_secs: u64,
2057}
2058
2059impl Default for HebbianConfig {
2060 fn default() -> Self {
2061 Self {
2062 enabled: false,
2063 hebbian_lr: 0.1,
2064 consolidation_interval_secs: 3600,
2065 consolidation_threshold: 5.0,
2066 consolidate_provider: None,
2067 max_candidates_per_sweep: 10,
2068 consolidation_cooldown_secs: 86_400,
2069 consolidation_prompt_timeout_secs: 30,
2070 consolidation_max_neighbors: 20,
2071 spreading_activation: false,
2072 spread_depth: 2,
2073 spread_edge_types: Vec::new(),
2074 step_budget_ms: 8,
2075 embed_timeout_secs: 5,
2076 }
2077 }
2078}
2079
2080/// Compression strategy for active context compression (#1161).
2081#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
2082#[serde(tag = "strategy", rename_all = "snake_case")]
2083#[non_exhaustive]
2084pub enum CompressionStrategy {
2085 /// Compress only when reactive compaction fires (current behavior).
2086 #[default]
2087 Reactive,
2088 /// Compress proactively when context exceeds `threshold_tokens`.
2089 Proactive {
2090 /// Token count that triggers proactive compression.
2091 threshold_tokens: usize,
2092 /// Maximum tokens for the compressed summary (passed to LLM as `max_tokens`).
2093 max_summary_tokens: usize,
2094 },
2095 /// Agent calls `compress_context` tool explicitly. Reactive compaction still fires as a
2096 /// safety net. The `compress_context` tool is also available in all other strategies.
2097 Autonomous,
2098 /// Knowledge-block-aware compression strategy (#2510).
2099 ///
2100 /// Low-relevance context segments are automatically consolidated into `AutoConsolidated`
2101 /// knowledge blocks. LLM-curated blocks are never evicted before auto-consolidated ones.
2102 Focus,
2103}
2104
2105/// Pruning strategy for tool-output eviction inside the compaction pipeline (#1851, #2022).
2106///
2107/// When `context-compression` feature is enabled, this replaces the default oldest-first
2108/// heuristic with scored eviction.
2109#[derive(Debug, Clone, Copy, Default, Serialize, PartialEq, Eq)]
2110#[serde(rename_all = "snake_case")]
2111#[non_exhaustive]
2112pub enum PruningStrategy {
2113 /// Oldest-first eviction — current default behavior.
2114 #[default]
2115 Reactive,
2116 /// Short LLM call extracts a task goal; blocks are scored by keyword overlap and pruned
2117 /// lowest-first. Requires `context-compression` feature.
2118 TaskAware,
2119 /// Coarse-to-fine MIG scoring: relevance − redundancy with temporal partitioning.
2120 /// Requires `context-compression` feature.
2121 Mig,
2122 /// Subgoal-aware pruning: tracks the agent's current subgoal via fire-and-forget LLM
2123 /// extraction and partitions tool outputs into Active/Completed/Outdated tiers (#2022).
2124 /// Requires `context-compression` feature.
2125 Subgoal,
2126 /// Subgoal-aware pruning combined with MIG redundancy scoring (#2022).
2127 /// Requires `context-compression` feature.
2128 SubgoalMig,
2129}
2130
2131impl PruningStrategy {
2132 /// Returns `true` when the strategy is subgoal-aware (`Subgoal` or `SubgoalMig`).
2133 #[must_use]
2134 pub fn is_subgoal(self) -> bool {
2135 matches!(self, Self::Subgoal | Self::SubgoalMig)
2136 }
2137}
2138
2139// Route serde deserialization through FromStr so that removed variants (e.g. task_aware_mig)
2140// emit a warning and fall back to Reactive instead of hard-erroring when found in TOML configs.
2141impl<'de> serde::Deserialize<'de> for PruningStrategy {
2142 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
2143 let s = String::deserialize(deserializer)?;
2144 s.parse().map_err(serde::de::Error::custom)
2145 }
2146}
2147
2148impl std::str::FromStr for PruningStrategy {
2149 type Err = String;
2150
2151 fn from_str(s: &str) -> Result<Self, Self::Err> {
2152 match s {
2153 "reactive" => Ok(Self::Reactive),
2154 "task_aware" | "task-aware" => Ok(Self::TaskAware),
2155 "mig" => Ok(Self::Mig),
2156 // task_aware_mig was removed (dead code — was routed to scored path only).
2157 // Fall back to Reactive so existing TOML configs do not hard-error on startup.
2158 "task_aware_mig" | "task-aware-mig" => {
2159 tracing::warn!(
2160 "pruning strategy `task_aware_mig` has been removed; \
2161 falling back to `reactive`. Use `task_aware` or `mig` instead."
2162 );
2163 Ok(Self::Reactive)
2164 }
2165 "subgoal" => Ok(Self::Subgoal),
2166 "subgoal_mig" | "subgoal-mig" => Ok(Self::SubgoalMig),
2167 other => Err(format!(
2168 "unknown pruning strategy `{other}`, expected \
2169 reactive|task_aware|mig|subgoal|subgoal_mig"
2170 )),
2171 }
2172 }
2173}
2174
2175fn default_high_density_budget() -> f32 {
2176 0.7
2177}
2178
2179fn default_low_density_budget() -> f32 {
2180 0.3
2181}
2182
2183/// Configuration for the `SleepGate` forgetting sweep (#2397).
2184///
2185/// When `enabled = true`, a background loop periodically decays importance scores
2186/// (synaptic downscaling), restores recently-accessed memories (selective replay),
2187/// and prunes memories below `forgetting_floor` (targeted forgetting).
2188#[derive(Debug, Clone, Deserialize, Serialize)]
2189#[serde(default)]
2190pub struct ForgettingConfig {
2191 /// Enable the `SleepGate` forgetting sweep. Default: `false`.
2192 pub enabled: bool,
2193 /// Per-sweep decay rate applied to importance scores. Range: (0.0, 1.0). Default: `0.1`.
2194 pub decay_rate: f32,
2195 /// Importance floor below which memories are pruned. Range: [0.0, 1.0]. Default: `0.05`.
2196 pub forgetting_floor: f32,
2197 /// How often the forgetting sweep runs, in seconds. Default: `7200`.
2198 pub sweep_interval_secs: u64,
2199 /// Maximum messages to process per sweep. Default: `500`.
2200 pub sweep_batch_size: usize,
2201 /// Hours: messages accessed within this window get replay protection. Default: `24`.
2202 pub replay_window_hours: u32,
2203 /// Messages with `access_count` >= this get replay protection. Default: `3`.
2204 pub replay_min_access_count: u32,
2205 /// Hours: never prune messages accessed within this window. Default: `24`.
2206 pub protect_recent_hours: u32,
2207 /// Never prune messages with `access_count` >= this. Default: `3`.
2208 pub protect_min_access_count: u32,
2209}
2210
2211impl Default for ForgettingConfig {
2212 fn default() -> Self {
2213 Self {
2214 enabled: false,
2215 decay_rate: 0.1,
2216 forgetting_floor: 0.05,
2217 sweep_interval_secs: 7200,
2218 sweep_batch_size: 500,
2219 replay_window_hours: 24,
2220 replay_min_access_count: 3,
2221 protect_recent_hours: 24,
2222 protect_min_access_count: 3,
2223 }
2224 }
2225}
2226
2227/// Configuration for active context compression (#1161).
2228#[derive(Debug, Clone, Default, Deserialize, Serialize)]
2229#[serde(default)]
2230pub struct CompressionConfig {
2231 /// Compression strategy.
2232 #[serde(flatten)]
2233 pub strategy: CompressionStrategy,
2234 /// Tool-output pruning strategy (requires `context-compression` feature).
2235 pub pruning_strategy: PruningStrategy,
2236 /// Model to use for compression summaries.
2237 ///
2238 /// Currently unused — the primary summary provider is used regardless of this value.
2239 /// Reserved for future per-compression model selection. Setting this field has no effect.
2240 pub model: String,
2241 /// Provider name from `[[llm.providers]]` for `compress_context` summaries.
2242 /// Falls back to the primary provider when empty. Default: `""`.
2243 pub compress_provider: ProviderName,
2244 /// Compaction probe: validates summary quality before committing it (#1609).
2245 #[serde(default)]
2246 pub probe: CompactionProbeConfig,
2247 /// Archive tool output bodies to `SQLite` before compaction (Memex #2432).
2248 ///
2249 /// When enabled, tool output bodies in the compaction range are saved to
2250 /// `tool_overflow` with `archive_type = 'archive'` before summarization.
2251 /// The LLM summarizes placeholder messages; archived content is appended as
2252 /// a postfix after summarization so references survive compaction.
2253 /// Default: `false`.
2254 #[serde(default)]
2255 pub archive_tool_outputs: bool,
2256 /// Provider for Focus strategy segment scoring and the auto-consolidation extraction
2257 /// LLM call (#2510, #3313). Both are cheap/mid-tier tasks, so one provider suffices.
2258 /// Falls back to the primary provider when empty. Default: `""`.
2259 pub focus_scorer_provider: ProviderName,
2260 /// Token-budget fraction for high-density content in density-aware compression (#2481).
2261 /// Must sum to 1.0 with `low_density_budget`. Default: `0.7`.
2262 #[serde(default = "default_high_density_budget")]
2263 pub high_density_budget: f32,
2264 /// Token-budget fraction for low-density content in density-aware compression (#2481).
2265 /// Must sum to 1.0 with `high_density_budget`. Default: `0.3`.
2266 #[serde(default = "default_low_density_budget")]
2267 pub low_density_budget: f32,
2268 /// Typed-page classification and batch-level assertion checking (#3630).
2269 #[serde(default)]
2270 pub typed_pages: TypedPagesConfig,
2271 /// Acon tool-result compression settings (#4021).
2272 ///
2273 /// Controls per-result and batch-level token budgets for tool outputs before they enter
2274 /// message history. Distinct from `[tools.compression]` (TACO), which applies regex-based
2275 /// rule compression at the executor level.
2276 #[serde(default)]
2277 pub acon: AconConfig,
2278 /// ARC agent-initiated compaction settings (#4020).
2279 ///
2280 /// When `allow_agent_compaction = true`, the agent can call the `request_compaction`
2281 /// internal tool to trigger context summarization on demand.
2282 #[serde(default)]
2283 pub arc: ArcCompactionConfig,
2284}
2285
2286fn default_acon_passthrough_threshold() -> usize {
2287 2000
2288}
2289
2290fn default_acon_summarize_threshold() -> usize {
2291 4000
2292}
2293
2294fn default_acon_total_budget() -> usize {
2295 8000
2296}
2297
2298fn validate_acon_passthrough_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2299where
2300 D: serde::Deserializer<'de>,
2301{
2302 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2303 if value == 0 {
2304 return Err(serde::de::Error::custom(
2305 "acon.passthrough_threshold must be >= 1",
2306 ));
2307 }
2308 Ok(value)
2309}
2310
2311fn validate_acon_summarize_threshold<'de, D>(deserializer: D) -> Result<usize, D::Error>
2312where
2313 D: serde::Deserializer<'de>,
2314{
2315 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2316 if value == 0 {
2317 return Err(serde::de::Error::custom(
2318 "acon.summarize_threshold must be >= 1",
2319 ));
2320 }
2321 Ok(value)
2322}
2323
2324fn validate_acon_total_budget<'de, D>(deserializer: D) -> Result<usize, D::Error>
2325where
2326 D: serde::Deserializer<'de>,
2327{
2328 let value = <usize as serde::Deserialize>::deserialize(deserializer)?;
2329 if value == 0 {
2330 return Err(serde::de::Error::custom("acon.total_budget must be >= 1"));
2331 }
2332 Ok(value)
2333}
2334
2335/// Token budget configuration for Acon tool-result compression (#4021).
2336///
2337/// Controls per-result and batch-level token budgets for tool outputs injected into context.
2338/// Distinct from `[tools.compression]` (TACO), which applies regex-based rule compression
2339/// at the executor level.
2340///
2341/// # Invariants
2342///
2343/// The following ordering must hold: `passthrough_threshold < summarize_threshold <= total_budget`.
2344/// A config where `passthrough_threshold >= summarize_threshold` would make the summarization path
2345/// unreachable, silently producing incorrect compression behavior.
2346///
2347/// # Example (TOML)
2348///
2349/// ```toml
2350/// [memory.compression.acon]
2351/// enabled = true
2352/// passthrough_threshold = 2000
2353/// summarize_threshold = 4000
2354/// total_budget = 8000
2355/// ```
2356#[derive(Debug, Clone, Deserialize, Serialize)]
2357#[serde(default)]
2358pub struct AconConfig {
2359 /// Enable Acon tool-result compression. Default: `true`.
2360 pub enabled: bool,
2361 /// Token count below which results pass through unchanged.
2362 /// Also the truncation target: results above this get char-truncated to this size.
2363 /// Must be < `summarize_threshold`. Default: `2000`.
2364 #[serde(default = "default_acon_passthrough_threshold")]
2365 #[serde(deserialize_with = "validate_acon_passthrough_threshold")]
2366 pub passthrough_threshold: usize,
2367 /// Token count above which LLM summarization should be attempted before truncation.
2368 /// Must be > `passthrough_threshold` and <= `total_budget`. Default: `4000`.
2369 #[serde(default = "default_acon_summarize_threshold")]
2370 #[serde(deserialize_with = "validate_acon_summarize_threshold")]
2371 pub summarize_threshold: usize,
2372 /// Maximum total tokens for all tool results in a single turn.
2373 /// Must be >= `summarize_threshold`. Default: `8000`.
2374 #[serde(default = "default_acon_total_budget")]
2375 #[serde(deserialize_with = "validate_acon_total_budget")]
2376 pub total_budget: usize,
2377 /// Provider name from `[[llm.providers]]` for LLM summarization of large results.
2378 /// Falls back to the primary provider when empty. Default: `""`.
2379 #[serde(default)]
2380 pub summarize_provider: ProviderName,
2381}
2382
2383impl AconConfig {
2384 /// Validate threshold ordering invariants after deserialization.
2385 ///
2386 /// Returns an error string if `passthrough_threshold >= summarize_threshold` or
2387 /// `summarize_threshold > total_budget`.
2388 ///
2389 /// # Errors
2390 ///
2391 /// Returns a descriptive error string when any threshold invariant is violated.
2392 pub fn validate(&self) -> Result<(), String> {
2393 if self.passthrough_threshold >= self.summarize_threshold {
2394 return Err(format!(
2395 "acon: passthrough_threshold ({}) must be < summarize_threshold ({})",
2396 self.passthrough_threshold, self.summarize_threshold
2397 ));
2398 }
2399 if self.summarize_threshold > self.total_budget {
2400 return Err(format!(
2401 "acon: summarize_threshold ({}) must be <= total_budget ({})",
2402 self.summarize_threshold, self.total_budget
2403 ));
2404 }
2405 Ok(())
2406 }
2407}
2408
2409impl Default for AconConfig {
2410 fn default() -> Self {
2411 Self {
2412 enabled: true,
2413 passthrough_threshold: default_acon_passthrough_threshold(),
2414 summarize_threshold: default_acon_summarize_threshold(),
2415 total_budget: default_acon_total_budget(),
2416 summarize_provider: ProviderName::default(),
2417 }
2418 }
2419}
2420
2421/// Configuration for ARC agent-initiated compaction (#4020).
2422///
2423/// When `allow_agent_compaction = true`, the `request_compaction` internal tool is
2424/// registered and the agent can call it to trigger context summarization on demand.
2425/// Rate limiting is handled by `CompactionState` — only one compaction fires per turn.
2426///
2427/// # Example (TOML)
2428///
2429/// ```toml
2430/// [memory.compression.arc]
2431/// allow_agent_compaction = true
2432/// ```
2433#[derive(Debug, Clone, Deserialize, Serialize)]
2434#[serde(default)]
2435pub struct ArcCompactionConfig {
2436 /// Allow the agent to request compaction via the `request_compaction` tool call.
2437 /// Default: `true`.
2438 pub allow_agent_compaction: bool,
2439}
2440
2441impl Default for ArcCompactionConfig {
2442 fn default() -> Self {
2443 Self {
2444 allow_agent_compaction: true,
2445 }
2446 }
2447}
2448
2449/// Configuration for typed-page compaction invariants (#3630).
2450///
2451/// Controls classification, batch-level assertion checking, and audit logging.
2452/// All behavior is disabled by default; set `enabled = true` to activate.
2453///
2454/// # Example (TOML)
2455///
2456/// ```toml
2457/// [memory.compression.typed_pages]
2458/// enabled = true
2459/// enforcement = "active"
2460/// audit_path = ""
2461/// audit_channel_capacity = 256
2462/// ```
2463#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
2464#[serde(default)]
2465pub struct TypedPagesConfig {
2466 /// Enable typed-page classification and batch-level assertion checking.
2467 /// Default: `false`.
2468 pub enabled: bool,
2469 /// Enforcement mode:
2470 ///
2471 /// - `observe`: classify and emit audit records only; no behavioral change.
2472 /// - `active`: classify + `SystemContext` pointer-replace + batch assertions + audit.
2473 ///
2474 /// Default: `"observe"`.
2475 pub enforcement: TypedPagesEnforcement,
2476 /// Path for JSONL audit log. Empty string resolves to `{data_dir}/audit/compaction.jsonl`.
2477 /// Default: `""`.
2478 ///
2479 /// # Security
2480 ///
2481 /// This field is **operator-only trusted input** read from the agent's configuration file.
2482 /// Write access to the config file implies file-system write access, so no additional
2483 /// canonicalization is enforced here. Do not expose this field to end-users or untrusted
2484 /// configuration sources.
2485 pub audit_path: String,
2486 /// Bounded channel capacity for the async audit writer. Default: `256`.
2487 pub audit_channel_capacity: usize,
2488}
2489
2490impl Default for TypedPagesConfig {
2491 fn default() -> Self {
2492 Self {
2493 enabled: false,
2494 enforcement: TypedPagesEnforcement::Observe,
2495 audit_path: String::new(),
2496 audit_channel_capacity: 256,
2497 }
2498 }
2499}
2500
2501/// Enforcement mode for typed-page compaction (#3630).
2502#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)]
2503#[serde(rename_all = "snake_case")]
2504#[non_exhaustive]
2505pub enum TypedPagesEnforcement {
2506 /// Classify and audit only. Zero behavioral change relative to the untyped path.
2507 #[default]
2508 Observe,
2509 /// Classify + pointer-replace `SystemContext` pages + batch assertions + audit.
2510 Active,
2511}
2512
2513fn default_sidequest_interval_turns() -> u32 {
2514 4
2515}
2516
2517fn default_sidequest_max_eviction_ratio() -> f32 {
2518 0.5
2519}
2520
2521fn default_sidequest_max_cursors() -> usize {
2522 30
2523}
2524
2525fn default_sidequest_min_cursor_tokens() -> usize {
2526 100
2527}
2528
2529/// Configuration for LLM-driven side-thread tool output eviction (#1885).
2530#[derive(Debug, Clone, Deserialize, Serialize)]
2531#[serde(default)]
2532pub struct SidequestConfig {
2533 /// Enable `SideQuest` eviction. Default: `false`.
2534 pub enabled: bool,
2535 /// Run eviction every N user turns. Default: `4`.
2536 #[serde(default = "default_sidequest_interval_turns")]
2537 pub interval_turns: u32,
2538 /// Maximum fraction of tool outputs to evict per pass. Default: `0.5`.
2539 #[serde(default = "default_sidequest_max_eviction_ratio")]
2540 pub max_eviction_ratio: f32,
2541 /// Maximum cursor entries in eviction prompt (largest outputs first). Default: `30`.
2542 #[serde(default = "default_sidequest_max_cursors")]
2543 pub max_cursors: usize,
2544 /// Exclude tool outputs smaller than this token count from eviction candidates.
2545 /// Default: `100`.
2546 #[serde(default = "default_sidequest_min_cursor_tokens")]
2547 pub min_cursor_tokens: usize,
2548}
2549
2550impl Default for SidequestConfig {
2551 fn default() -> Self {
2552 Self {
2553 enabled: false,
2554 interval_turns: default_sidequest_interval_turns(),
2555 max_eviction_ratio: default_sidequest_max_eviction_ratio(),
2556 max_cursors: default_sidequest_max_cursors(),
2557 min_cursor_tokens: default_sidequest_min_cursor_tokens(),
2558 }
2559 }
2560}
2561
2562/// Graph retrieval strategy for `[memory.graph]`.
2563///
2564/// Selects the algorithm used to traverse the knowledge graph during recall.
2565/// The default (`synapse`) preserves existing SYNAPSE spreading-activation behavior.
2566#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
2567#[serde(rename_all = "snake_case")]
2568#[non_exhaustive]
2569pub enum GraphRetrievalStrategy {
2570 /// SYNAPSE spreading activation (default, existing behavior).
2571 #[default]
2572 Synapse,
2573 /// Hop-limited BFS traversal (pre-SYNAPSE behavior).
2574 Bfs,
2575 /// A* shortest-path traversal via petgraph.
2576 #[serde(rename = "astar")]
2577 AStar,
2578 /// Concentric BFS expanding outward from seed nodes.
2579 WaterCircles,
2580 /// Beam search: keep top-K candidates per hop.
2581 BeamSearch,
2582 /// Dynamic: LLM classifier selects strategy per query.
2583 Hybrid,
2584}
2585
2586fn default_beam_width() -> usize {
2587 10
2588}
2589
2590/// Beam search retrieval configuration for `[memory.graph.beam_search]`.
2591///
2592/// Controls the width of the beam during graph traversal: how many top candidates
2593/// are retained at each hop.
2594#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2595pub struct BeamSearchConfig {
2596 /// Number of top candidates kept per hop. Default: `10`.
2597 #[serde(default = "default_beam_width")]
2598 pub beam_width: usize,
2599}
2600
2601impl Default for BeamSearchConfig {
2602 fn default() -> Self {
2603 Self {
2604 beam_width: default_beam_width(),
2605 }
2606 }
2607}
2608
2609/// `WaterCircles` BFS configuration for `[memory.graph.watercircles]`.
2610///
2611/// Controls ring-by-ring concentric BFS traversal from seed nodes.
2612#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
2613pub struct WaterCirclesConfig {
2614 /// Max facts per ring (hop). `0` = auto (`limit / max_hops`). Default: `0`.
2615 #[serde(default)]
2616 pub ring_limit: usize,
2617}
2618
2619fn default_evolution_sweep_interval() -> usize {
2620 50
2621}
2622
2623fn default_confidence_prune_threshold() -> f32 {
2624 0.1
2625}
2626
2627/// Experience memory configuration for `[memory.graph.experience]`.
2628///
2629/// Controls recording of tool execution outcomes and graph evolution sweeps.
2630#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2631pub struct ExperienceConfig {
2632 /// Enable experience memory recording. Default: `false`.
2633 #[serde(default)]
2634 pub enabled: bool,
2635 /// Enable graph evolution sweep (prune self-loops + low-confidence edges). Default: `false`.
2636 #[serde(default)]
2637 pub evolution_sweep_enabled: bool,
2638 /// Confidence threshold below which zero-retrieval edges are pruned. Default: `0.1`.
2639 #[serde(default = "default_confidence_prune_threshold")]
2640 pub confidence_prune_threshold: f32,
2641 /// Number of turns between evolution sweeps. Default: `50`.
2642 #[serde(default = "default_evolution_sweep_interval")]
2643 pub evolution_sweep_interval: usize,
2644}
2645
2646impl Default for ExperienceConfig {
2647 fn default() -> Self {
2648 Self {
2649 enabled: false,
2650 evolution_sweep_enabled: false,
2651 confidence_prune_threshold: default_confidence_prune_threshold(),
2652 evolution_sweep_interval: default_evolution_sweep_interval(),
2653 }
2654 }
2655}
2656
2657/// Configuration for the knowledge graph memory subsystem (`[memory.graph]` TOML section).
2658///
2659/// # Security
2660///
2661/// Entity names, relation labels, and fact strings extracted by the LLM are stored verbatim
2662/// without PII redaction. This is a known pre-1.0 MVP limitation. Do not enable graph memory
2663/// when processing conversations that may contain personal, medical, or sensitive data until
2664/// a redaction pass is implemented on the write path.
2665#[derive(Debug, Clone, Deserialize, Serialize)]
2666#[serde(default)]
2667pub struct GraphConfig {
2668 pub enabled: bool,
2669 pub extract_model: String,
2670 #[serde(default = "default_graph_max_entities_per_message")]
2671 pub max_entities_per_message: usize,
2672 #[serde(default = "default_graph_max_edges_per_message")]
2673 pub max_edges_per_message: usize,
2674 #[serde(default = "default_graph_community_refresh_interval")]
2675 pub community_refresh_interval: usize,
2676 #[serde(default = "default_graph_entity_similarity_threshold")]
2677 pub entity_similarity_threshold: f32,
2678 #[serde(default = "default_graph_extraction_timeout_secs")]
2679 pub extraction_timeout_secs: u64,
2680 #[serde(default)]
2681 pub use_embedding_resolution: bool,
2682 #[serde(default = "default_graph_entity_ambiguous_threshold")]
2683 pub entity_ambiguous_threshold: f32,
2684 #[serde(default = "default_graph_max_hops")]
2685 pub max_hops: u32,
2686 #[serde(default = "default_graph_recall_limit")]
2687 pub recall_limit: usize,
2688 /// Days to retain expired (superseded) edges before deletion. Default: 90.
2689 #[serde(default = "default_graph_expired_edge_retention_days")]
2690 pub expired_edge_retention_days: u32,
2691 /// Maximum entities to retain in the graph. 0 = unlimited.
2692 #[serde(default)]
2693 pub max_entities: usize,
2694 /// Maximum prompt size in bytes for community summary generation. Default: 8192.
2695 #[serde(default = "default_graph_community_summary_max_prompt_bytes")]
2696 pub community_summary_max_prompt_bytes: usize,
2697 /// Maximum concurrent LLM calls during community summarization. Default: 4.
2698 #[serde(default = "default_graph_community_summary_concurrency")]
2699 pub community_summary_concurrency: usize,
2700 /// Number of edges fetched per chunk during community detection. Default: 10000.
2701 /// Set to 0 to disable chunking and load all edges at once (legacy behavior).
2702 #[serde(default = "default_lpa_edge_chunk_size")]
2703 pub lpa_edge_chunk_size: usize,
2704 /// Temporal recency decay rate for graph recall scoring (units: 1/day).
2705 ///
2706 /// When > 0, recent edges receive a small additive score boost over older edges.
2707 /// The boost formula is `1 / (1 + age_days * rate)`, blended additively with the base
2708 /// composite score. Default 0.0 preserves existing scoring behavior exactly.
2709 #[serde(
2710 default = "default_graph_temporal_decay_rate",
2711 deserialize_with = "validate_temporal_decay_rate"
2712 )]
2713 pub temporal_decay_rate: f64,
2714 /// Maximum number of historical edge versions returned by `edge_history()`. Default: 100.
2715 ///
2716 /// Caps the result set returned for a given source entity + predicate pair. Prevents
2717 /// unbounded memory usage for high-churn predicates when this method is exposed via TUI
2718 /// or API endpoints.
2719 #[serde(default = "default_graph_edge_history_limit")]
2720 pub edge_history_limit: usize,
2721 /// A-MEM dynamic note linking configuration.
2722 ///
2723 /// When `note_linking.enabled = true`, entities extracted from each message are linked to
2724 /// semantically similar entities via `similar_to` edges. Requires an embedding store
2725 /// (`qdrant` or `sqlite` vector backend) to be configured.
2726 #[serde(default)]
2727 pub note_linking: NoteLinkingConfig,
2728 /// SYNAPSE spreading activation retrieval configuration.
2729 ///
2730 /// When `spreading_activation.enabled = true`, graph recall uses spreading activation
2731 /// with lateral inhibition and temporal decay instead of BFS.
2732 #[serde(default)]
2733 pub spreading_activation: SpreadingActivationConfig,
2734 /// Graph retrieval strategy. Default: `synapse` (preserves existing behavior).
2735 ///
2736 /// When `spreading_activation.enabled = true` and `retrieval_strategy` is `synapse`,
2737 /// SYNAPSE spreading activation is used. Set to `bfs` to revert to hop-limited BFS.
2738 #[serde(default)]
2739 pub retrieval_strategy: GraphRetrievalStrategy,
2740 /// Named LLM provider from `[[llm.providers]]` for graph entity/relation extraction.
2741 ///
2742 /// When non-empty, graph extraction (and downstream note linking and community
2743 /// summarization) use this provider instead of the primary `SemanticMemory.provider`.
2744 /// This is the recommended fix for `quality_gate` false positives (#3601): JSON
2745 /// extraction tasks produce structurally low prompt/response similarity (~0.55–0.70),
2746 /// which causes systematic quality gate rejections. A named provider built via
2747 /// `resolve_background_provider` bypasses `apply_routing_signals()` and therefore
2748 /// has no quality gate attached.
2749 ///
2750 /// Falls back to the primary provider when empty. Default: `""` (use primary).
2751 #[serde(default)]
2752 pub extract_provider: ProviderName,
2753 /// Named LLM provider for hybrid strategy classification.
2754 /// Falls back to the default provider when `None`.
2755 #[serde(default)]
2756 pub strategy_classifier_provider: Option<ProviderName>,
2757 /// Beam search configuration.
2758 #[serde(default)]
2759 pub beam_search: BeamSearchConfig,
2760 /// `WaterCircles` BFS configuration.
2761 #[serde(default)]
2762 pub watercircles: WaterCirclesConfig,
2763 /// Experience memory configuration.
2764 #[serde(default)]
2765 pub experience: ExperienceConfig,
2766 /// A-MEM link weight decay: multiplicative factor applied to `retrieval_count`
2767 /// for un-retrieved edges each decay pass. Range: `(0.0, 1.0]`. Default: `0.95`.
2768 #[serde(
2769 default = "default_link_weight_decay_lambda",
2770 deserialize_with = "validate_link_weight_decay_lambda"
2771 )]
2772 pub link_weight_decay_lambda: f64,
2773 /// Seconds between link weight decay passes. Default: `86400` (24 hours).
2774 #[serde(default = "default_link_weight_decay_interval_secs")]
2775 pub link_weight_decay_interval_secs: u64,
2776 /// Kumiho AGM-inspired belief revision configuration.
2777 ///
2778 /// When `belief_revision.enabled = true`, new edges that semantically contradict existing
2779 /// edges for the same entity pair trigger revision: the old edge is invalidated with a
2780 /// `superseded_by` pointer and the new edge becomes the current belief.
2781 #[serde(default)]
2782 pub belief_revision: BeliefRevisionConfig,
2783 /// D-MEM RPE-based tiered graph extraction routing.
2784 ///
2785 /// When `rpe.enabled = true`, low-surprise turns skip the expensive MAGMA LLM extraction
2786 /// pipeline. A consecutive-skip safety valve ensures no turn is silently skipped indefinitely.
2787 #[serde(default)]
2788 pub rpe: RpeConfig,
2789 /// `SQLite` connection pool size dedicated to graph operations.
2790 ///
2791 /// Graph tables share the same database file as messages/embeddings but use a
2792 /// separate pool to prevent pool starvation when community detection or spreading
2793 /// activation runs concurrently with regular memory operations. Default: `3`.
2794 #[serde(default = "default_graph_pool_size")]
2795 pub pool_size: u32,
2796 /// APEX-MEM append-only write path (#3631).
2797 ///
2798 /// When `apex_mem.enabled = true`, edge insertion uses `insert_or_supersede` with
2799 /// supersession chains instead of the legacy destructive-update path.
2800 #[serde(default)]
2801 pub apex_mem: ApexMemConfig,
2802 /// LLM call timeout per extraction request, in seconds. Default: `30`.
2803 #[serde(default = "default_graph_llm_timeout_secs")]
2804 pub llm_timeout_secs: u64,
2805 /// PRISM query-sensitive edge costing in A* graph recall.
2806 ///
2807 /// When `true`, edge cost in the A\* graph recall function is modulated by the cosine similarity
2808 /// between the query embedding and the target entity embedding:
2809 /// `cost = (1.0 - confidence) * (1.0 - target_cosine).max(0.01)`.
2810 /// Edges toward semantically relevant entities receive lower cost and are therefore
2811 /// preferred by A*, producing query-aligned recall paths.
2812 ///
2813 /// Requires an embedding store (`qdrant` or `sqlite` vector backend). When the embedding
2814 /// store is unavailable or a target entity has no stored embedding, falls back to the
2815 /// baseline cost `1.0 - confidence`.
2816 ///
2817 /// Default: `false` (preserves existing A* behaviour).
2818 #[serde(default)]
2819 pub query_sensitive_cost: bool,
2820
2821 /// Implicit conflict detection for SYNAPSE recall (spec 004-17, STALE/CUPMem).
2822 ///
2823 /// When enabled, write-time fuzzy predicate matching detects implicit conflicts
2824 /// between graph edges and annotates SYNAPSE recall results accordingly.
2825 #[serde(default)]
2826 pub implicit_conflict: ImplicitConflictConfig,
2827 /// `MemORAI` write-gate prefilter (#3709).
2828 ///
2829 /// When `write_gate.enabled = true`, low-signal edges are dropped before graph write,
2830 /// reducing noise. Opt-in; default is `false`.
2831 #[serde(default)]
2832 pub write_gate: WriteGateConfig,
2833 /// Conflict resolver recency-fallback threshold (#3709).
2834 ///
2835 /// Controls when the recency strategy is allowed to override `valid_from` comparison.
2836 #[serde(default)]
2837 pub conflict_recency: ConflictRecencyConfig,
2838}
2839
2840/// Similarity method for implicit conflict detection.
2841#[derive(
2842 Debug,
2843 Clone,
2844 Copy,
2845 PartialEq,
2846 Eq,
2847 Default,
2848 serde::Serialize,
2849 serde::Deserialize,
2850 schemars::JsonSchema,
2851)]
2852#[serde(rename_all = "snake_case")]
2853#[non_exhaustive]
2854pub enum SimilarityMethod {
2855 /// Normalized Levenshtein edit distance.
2856 #[default]
2857 Levenshtein,
2858 /// Cosine similarity over pre-computed predicate embeddings.
2859 Embedding,
2860 /// Either method triggers detection.
2861 Both,
2862}
2863
2864/// Resolution strategy when an implicit conflict is detected.
2865#[derive(
2866 Debug,
2867 Clone,
2868 Copy,
2869 PartialEq,
2870 Eq,
2871 Default,
2872 serde::Serialize,
2873 serde::Deserialize,
2874 schemars::JsonSchema,
2875)]
2876#[serde(rename_all = "snake_case")]
2877#[non_exhaustive]
2878pub enum ConflictResolutionStrategy {
2879 /// Mark the pair as a candidate but do not supersede either edge.
2880 #[default]
2881 FlagOnly,
2882 /// Supersede the older edge via APEX-MEM `insert_or_supersede`.
2883 Recency,
2884 /// Supersede the lower-confidence edge.
2885 Confidence,
2886 /// Delegate resolution to an LLM provider; fall back to `flag_only` on timeout.
2887 Llm,
2888}
2889
2890/// Configuration for the optional background consolidation daemon (spec 004-17).
2891#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
2892#[serde(default)]
2893pub struct ConsolidationDaemonConfig {
2894 /// Enable the background consolidation daemon.
2895 pub enabled: bool,
2896 /// How often the daemon runs, in seconds. Default: 7200 (2 hours).
2897 #[serde(default = "default_ic_daemon_interval_secs")]
2898 pub interval_seconds: u64,
2899 /// Maximum number of candidates processed per daemon run. Default: 100.
2900 #[serde(default = "default_ic_daemon_batch_size")]
2901 pub batch_size: usize,
2902}
2903
2904impl Default for ConsolidationDaemonConfig {
2905 fn default() -> Self {
2906 Self {
2907 enabled: false,
2908 interval_seconds: default_ic_daemon_interval_secs(),
2909 batch_size: default_ic_daemon_batch_size(),
2910 }
2911 }
2912}
2913
2914fn default_ic_daemon_interval_secs() -> u64 {
2915 7200
2916}
2917
2918fn default_ic_daemon_batch_size() -> usize {
2919 100
2920}
2921
2922/// Configuration for implicit conflict detection (spec 004-17, STALE/CUPMem).
2923///
2924/// Controls write-time fuzzy predicate matching and SYNAPSE recall annotation.
2925/// All detection is gated behind `enabled = false` by default — no overhead when disabled.
2926///
2927/// TOML path: `[memory.graph.implicit_conflict]`
2928///
2929/// # Examples
2930///
2931/// ```toml
2932/// [memory.graph.implicit_conflict]
2933/// enabled = true
2934/// similarity_method = "levenshtein"
2935/// conflict_similarity_threshold = 0.80
2936/// resolution_strategy = "flag_only"
2937/// candidate_ttl_days = 30
2938/// propagation_depth = 2
2939/// ```
2940#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2941#[serde(default)]
2942pub struct ImplicitConflictConfig {
2943 /// Enable implicit conflict detection. Default: `false`.
2944 pub enabled: bool,
2945 /// Similarity method used to detect candidate pairs.
2946 #[serde(default)]
2947 pub similarity_method: SimilarityMethod,
2948 /// Minimum similarity score to flag a pair as a conflict candidate. Default: 0.80.
2949 #[serde(default = "default_ic_similarity_threshold")]
2950 pub conflict_similarity_threshold: f64,
2951 /// How to resolve detected conflicts. Default: `flag_only`.
2952 #[serde(default)]
2953 pub resolution_strategy: ConflictResolutionStrategy,
2954 /// Provider name (from `[[llm.providers]]`) for LLM-mediated resolution.
2955 #[serde(default)]
2956 pub implicit_conflict_provider: crate::providers::ProviderName,
2957 /// LLM resolution timeout in milliseconds. Default: 800.
2958 #[serde(default = "default_ic_llm_timeout_ms")]
2959 pub conflict_llm_timeout_ms: u64,
2960 /// Days before an unresolved candidate entry expires. Default: 30.
2961 #[serde(default = "default_ic_candidate_ttl_days")]
2962 pub candidate_ttl_days: u32,
2963 /// SYNAPSE propagation depth for surfacing superseding facts. Default: 2.
2964 #[serde(default = "default_ic_propagation_depth")]
2965 pub propagation_depth: u32,
2966 /// Background consolidation daemon configuration.
2967 #[serde(default)]
2968 pub consolidation_daemon: ConsolidationDaemonConfig,
2969}
2970
2971impl Default for ImplicitConflictConfig {
2972 fn default() -> Self {
2973 Self {
2974 enabled: false,
2975 similarity_method: SimilarityMethod::default(),
2976 conflict_similarity_threshold: default_ic_similarity_threshold(),
2977 resolution_strategy: ConflictResolutionStrategy::default(),
2978 implicit_conflict_provider: crate::providers::ProviderName::default(),
2979 conflict_llm_timeout_ms: default_ic_llm_timeout_ms(),
2980 candidate_ttl_days: default_ic_candidate_ttl_days(),
2981 propagation_depth: default_ic_propagation_depth(),
2982 consolidation_daemon: ConsolidationDaemonConfig::default(),
2983 }
2984 }
2985}
2986
2987fn default_ic_similarity_threshold() -> f64 {
2988 0.80
2989}
2990
2991fn default_ic_llm_timeout_ms() -> u64 {
2992 800
2993}
2994
2995fn default_ic_candidate_ttl_days() -> u32 {
2996 30
2997}
2998
2999fn default_ic_propagation_depth() -> u32 {
3000 2
3001}
3002
3003fn default_graph_pool_size() -> u32 {
3004 3
3005}
3006
3007fn default_graph_llm_timeout_secs() -> u64 {
3008 30
3009}
3010
3011/// APEX-MEM append-only write path configuration (`[memory.graph.apex_mem]`).
3012///
3013/// When `enabled = true`, graph edge insertion uses `insert_or_supersede`
3014/// instead of the legacy destructive-update `resolve_edge_typed`. This preserves
3015/// the full supersession chain and enables conflict resolution.
3016///
3017/// Spec: `/specs/004-memory/004-7-memory-apex-magma.md`
3018#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
3019#[serde(default)]
3020pub struct ApexMemConfig {
3021 /// Enable the APEX-MEM append-only write path. Default: `false`.
3022 pub enabled: bool,
3023}
3024
3025fn default_quality_gate_threshold() -> f32 {
3026 0.55
3027}
3028
3029fn default_quality_gate_recent_window() -> usize {
3030 32
3031}
3032
3033fn default_quality_gate_contradiction_grace_seconds() -> u64 {
3034 300
3035}
3036
3037fn default_quality_gate_information_value_weight() -> f32 {
3038 0.4
3039}
3040
3041fn default_quality_gate_reference_completeness_weight() -> f32 {
3042 0.3
3043}
3044
3045fn default_quality_gate_contradiction_weight() -> f32 {
3046 0.3
3047}
3048
3049fn default_quality_gate_rejection_rate_alarm_ratio() -> f32 {
3050 0.35
3051}
3052
3053fn default_quality_gate_llm_timeout_ms() -> u64 {
3054 500
3055}
3056
3057fn default_quality_gate_llm_weight() -> f32 {
3058 0.5
3059}
3060
3061fn default_quality_gate_reference_check_lang_en() -> bool {
3062 true
3063}
3064
3065/// Write quality gate configuration (`[memory.quality_gate]`).
3066///
3067/// When `enabled = true`, each `remember()` call is scored before persistence. Writes
3068/// below `threshold` are rejected. Rule-based scoring is the default; LLM-assisted
3069/// scoring is opt-in via `quality_gate_provider`.
3070///
3071/// Spec: `/specs/004-memory/004-9-memory-write-gate.md`
3072#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3073#[serde(default)]
3074pub struct WriteQualityGateConfig {
3075 /// Enable the write quality gate. Default: `false`.
3076 pub enabled: bool,
3077 /// Combined score threshold below which writes are rejected. Default: `0.55`.
3078 #[serde(default = "default_quality_gate_threshold")]
3079 pub threshold: f32,
3080 /// Number of recent writes compared for information-value scoring. Default: `32`.
3081 #[serde(default = "default_quality_gate_recent_window")]
3082 pub recent_window: usize,
3083 /// Edges older than this (seconds) are stable for contradiction detection. Default: `300`.
3084 #[serde(default = "default_quality_gate_contradiction_grace_seconds")]
3085 pub contradiction_grace_seconds: u64,
3086 /// Weight of `information_value` sub-score. Default: `0.4`.
3087 #[serde(default = "default_quality_gate_information_value_weight")]
3088 pub information_value_weight: f32,
3089 /// Weight of `reference_completeness` sub-score. Default: `0.3`.
3090 #[serde(default = "default_quality_gate_reference_completeness_weight")]
3091 pub reference_completeness_weight: f32,
3092 /// Weight of `contradiction` sub-score. Default: `0.3`.
3093 #[serde(default = "default_quality_gate_contradiction_weight")]
3094 pub contradiction_weight: f32,
3095 /// Rolling rejection-rate alarm ratio. Default: `0.35`.
3096 #[serde(default = "default_quality_gate_rejection_rate_alarm_ratio")]
3097 pub rejection_rate_alarm_ratio: f32,
3098 /// Named LLM provider for optional scoring path. Default: `""` (rule-based only).
3099 #[serde(default)]
3100 pub quality_gate_provider: ProviderName,
3101 /// LLM timeout in milliseconds. Default: `500`.
3102 #[serde(default = "default_quality_gate_llm_timeout_ms")]
3103 pub llm_timeout_ms: u64,
3104 /// LLM blend weight into final score. Default: `0.5`.
3105 #[serde(default = "default_quality_gate_llm_weight")]
3106 pub llm_weight: f32,
3107 /// Enable pronoun/deictic reference checks (English only). Default: `true`.
3108 #[serde(default = "default_quality_gate_reference_check_lang_en")]
3109 pub reference_check_lang_en: bool,
3110}
3111
3112impl Default for WriteQualityGateConfig {
3113 fn default() -> Self {
3114 Self {
3115 enabled: false,
3116 threshold: default_quality_gate_threshold(),
3117 recent_window: default_quality_gate_recent_window(),
3118 contradiction_grace_seconds: default_quality_gate_contradiction_grace_seconds(),
3119 information_value_weight: default_quality_gate_information_value_weight(),
3120 reference_completeness_weight: default_quality_gate_reference_completeness_weight(),
3121 contradiction_weight: default_quality_gate_contradiction_weight(),
3122 rejection_rate_alarm_ratio: default_quality_gate_rejection_rate_alarm_ratio(),
3123 quality_gate_provider: ProviderName::default(),
3124 llm_timeout_ms: default_quality_gate_llm_timeout_ms(),
3125 llm_weight: default_quality_gate_llm_weight(),
3126 reference_check_lang_en: default_quality_gate_reference_check_lang_en(),
3127 }
3128 }
3129}
3130
3131impl Default for GraphConfig {
3132 fn default() -> Self {
3133 Self {
3134 enabled: false,
3135 extract_model: String::new(),
3136 max_entities_per_message: default_graph_max_entities_per_message(),
3137 max_edges_per_message: default_graph_max_edges_per_message(),
3138 community_refresh_interval: default_graph_community_refresh_interval(),
3139 entity_similarity_threshold: default_graph_entity_similarity_threshold(),
3140 extraction_timeout_secs: default_graph_extraction_timeout_secs(),
3141 use_embedding_resolution: false,
3142 entity_ambiguous_threshold: default_graph_entity_ambiguous_threshold(),
3143 max_hops: default_graph_max_hops(),
3144 recall_limit: default_graph_recall_limit(),
3145 expired_edge_retention_days: default_graph_expired_edge_retention_days(),
3146 max_entities: 0,
3147 community_summary_max_prompt_bytes: default_graph_community_summary_max_prompt_bytes(),
3148 community_summary_concurrency: default_graph_community_summary_concurrency(),
3149 lpa_edge_chunk_size: default_lpa_edge_chunk_size(),
3150 temporal_decay_rate: default_graph_temporal_decay_rate(),
3151 edge_history_limit: default_graph_edge_history_limit(),
3152 note_linking: NoteLinkingConfig::default(),
3153 spreading_activation: SpreadingActivationConfig::default(),
3154 retrieval_strategy: GraphRetrievalStrategy::default(),
3155 extract_provider: ProviderName::default(),
3156 strategy_classifier_provider: None,
3157 beam_search: BeamSearchConfig::default(),
3158 watercircles: WaterCirclesConfig::default(),
3159 experience: ExperienceConfig::default(),
3160 link_weight_decay_lambda: default_link_weight_decay_lambda(),
3161 link_weight_decay_interval_secs: default_link_weight_decay_interval_secs(),
3162 belief_revision: BeliefRevisionConfig::default(),
3163 rpe: RpeConfig::default(),
3164 pool_size: default_graph_pool_size(),
3165 apex_mem: ApexMemConfig::default(),
3166 llm_timeout_secs: default_graph_llm_timeout_secs(),
3167 query_sensitive_cost: false,
3168 implicit_conflict: ImplicitConflictConfig::default(),
3169 write_gate: WriteGateConfig::default(),
3170 conflict_recency: ConflictRecencyConfig::default(),
3171 }
3172 }
3173}
3174
3175fn default_consolidation_confidence_threshold() -> f32 {
3176 0.7
3177}
3178
3179fn default_consolidation_sweep_interval_secs() -> u64 {
3180 3600
3181}
3182
3183fn default_consolidation_sweep_batch_size() -> usize {
3184 50
3185}
3186
3187fn default_consolidation_similarity_threshold() -> f32 {
3188 0.85
3189}
3190
3191/// Configuration for the All-Mem lifelong memory consolidation sweep (`[memory.consolidation]`).
3192///
3193/// When `enabled = true`, a background loop periodically clusters semantically similar messages
3194/// and merges them into consolidated entries via an LLM call. Originals are never deleted —
3195/// they are marked as consolidated and deprioritized in recall via temporal decay.
3196#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
3197#[serde(default)]
3198pub struct ConsolidationConfig {
3199 /// Enable the consolidation background loop. Default: `false`.
3200 pub enabled: bool,
3201 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3202 /// Falls back to the primary provider when empty. Default: `""`.
3203 #[serde(default)]
3204 pub consolidation_provider: ProviderName,
3205 /// Minimum LLM-assigned confidence for a topology op to be applied. Default: `0.7`.
3206 #[serde(default = "default_consolidation_confidence_threshold")]
3207 pub confidence_threshold: f32,
3208 /// How often the background consolidation sweep runs, in seconds. Default: `3600`.
3209 #[serde(default = "default_consolidation_sweep_interval_secs")]
3210 pub sweep_interval_secs: u64,
3211 /// Maximum number of messages to evaluate per sweep cycle. Default: `50`.
3212 #[serde(default = "default_consolidation_sweep_batch_size")]
3213 pub sweep_batch_size: usize,
3214 /// Minimum cosine similarity for two messages to be considered consolidation candidates.
3215 /// Default: `0.85`.
3216 #[serde(default = "default_consolidation_similarity_threshold")]
3217 pub similarity_threshold: f32,
3218 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3219 #[serde(default = "default_consolidation_llm_timeout_secs")]
3220 pub llm_timeout_secs: u64,
3221 /// Per-call timeout for every `embed()` invocation in the consolidation sweep, in seconds.
3222 /// Default: `5`.
3223 #[serde(default = "default_embed_timeout_secs")]
3224 pub embed_timeout_secs: u64,
3225}
3226
3227impl Default for ConsolidationConfig {
3228 fn default() -> Self {
3229 Self {
3230 enabled: false,
3231 consolidation_provider: ProviderName::default(),
3232 confidence_threshold: default_consolidation_confidence_threshold(),
3233 sweep_interval_secs: default_consolidation_sweep_interval_secs(),
3234 sweep_batch_size: default_consolidation_sweep_batch_size(),
3235 similarity_threshold: default_consolidation_similarity_threshold(),
3236 llm_timeout_secs: default_consolidation_llm_timeout_secs(),
3237 embed_timeout_secs: default_embed_timeout_secs(),
3238 }
3239 }
3240}
3241
3242fn default_consolidation_llm_timeout_secs() -> u64 {
3243 30
3244}
3245
3246fn default_link_weight_decay_lambda() -> f64 {
3247 0.95
3248}
3249
3250fn default_link_weight_decay_interval_secs() -> u64 {
3251 86400
3252}
3253
3254fn validate_link_weight_decay_lambda<'de, D>(deserializer: D) -> Result<f64, D::Error>
3255where
3256 D: serde::Deserializer<'de>,
3257{
3258 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
3259 if value.is_nan() || value.is_infinite() {
3260 return Err(serde::de::Error::custom(
3261 "link_weight_decay_lambda must be a finite number",
3262 ));
3263 }
3264 if !(value > 0.0 && value <= 1.0) {
3265 return Err(serde::de::Error::custom(
3266 "link_weight_decay_lambda must be in (0.0, 1.0]",
3267 ));
3268 }
3269 Ok(value)
3270}
3271
3272fn validate_admission_threshold<'de, D>(deserializer: D) -> Result<f32, D::Error>
3273where
3274 D: serde::Deserializer<'de>,
3275{
3276 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3277 if value.is_nan() || value.is_infinite() {
3278 return Err(serde::de::Error::custom(
3279 "threshold must be a finite number",
3280 ));
3281 }
3282 if !(0.0..=1.0).contains(&value) {
3283 return Err(serde::de::Error::custom("threshold must be in [0.0, 1.0]"));
3284 }
3285 Ok(value)
3286}
3287
3288fn validate_admission_fast_path_margin<'de, D>(deserializer: D) -> Result<f32, D::Error>
3289where
3290 D: serde::Deserializer<'de>,
3291{
3292 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3293 if value.is_nan() || value.is_infinite() {
3294 return Err(serde::de::Error::custom(
3295 "fast_path_margin must be a finite number",
3296 ));
3297 }
3298 if !(0.0..=1.0).contains(&value) {
3299 return Err(serde::de::Error::custom(
3300 "fast_path_margin must be in [0.0, 1.0]",
3301 ));
3302 }
3303 Ok(value)
3304}
3305
3306fn default_admission_threshold() -> f32 {
3307 0.40
3308}
3309
3310fn default_admission_fast_path_margin() -> f32 {
3311 0.15
3312}
3313
3314fn default_rl_min_samples() -> u32 {
3315 500
3316}
3317
3318fn default_rl_retrain_interval_secs() -> u64 {
3319 3600
3320}
3321
3322/// Admission decision strategy.
3323///
3324/// `Heuristic` uses the existing multi-factor weighted score with an optional LLM call.
3325/// `Rl` replaces the LLM-based `future_utility` factor with a trained logistic regression model.
3326#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
3327#[serde(rename_all = "snake_case")]
3328#[non_exhaustive]
3329pub enum AdmissionStrategy {
3330 /// Current A-MAC behavior: weighted heuristics + optional LLM call. Default.
3331 #[default]
3332 Heuristic,
3333 /// Learned model: logistic regression trained on recall feedback.
3334 /// Falls back to `Heuristic` when training data is below `rl_min_samples`.
3335 Rl,
3336}
3337
3338fn validate_admission_weight<'de, D>(deserializer: D) -> Result<f32, D::Error>
3339where
3340 D: serde::Deserializer<'de>,
3341{
3342 let value = <f32 as serde::Deserialize>::deserialize(deserializer)?;
3343 if value < 0.0 {
3344 return Err(serde::de::Error::custom(
3345 "admission weight must be non-negative (>= 0.0)",
3346 ));
3347 }
3348 Ok(value)
3349}
3350
3351/// Per-factor weights for the A-MAC admission score (`[memory.admission.weights]`).
3352///
3353/// Weights are normalized at runtime (divided by their sum), so they do not need to sum to 1.0.
3354/// All values must be non-negative.
3355#[derive(Debug, Clone, Deserialize, Serialize)]
3356#[serde(default)]
3357pub struct AdmissionWeights {
3358 /// LLM-estimated future reuse probability. Default: `0.30`.
3359 #[serde(deserialize_with = "validate_admission_weight")]
3360 pub future_utility: f32,
3361 /// Factual confidence heuristic (inverse of hedging markers). Default: `0.15`.
3362 #[serde(deserialize_with = "validate_admission_weight")]
3363 pub factual_confidence: f32,
3364 /// Semantic novelty: 1 - max similarity to existing memories. Default: `0.30`.
3365 #[serde(deserialize_with = "validate_admission_weight")]
3366 pub semantic_novelty: f32,
3367 /// Temporal recency: always 1.0 at write time. Default: `0.10`.
3368 #[serde(deserialize_with = "validate_admission_weight")]
3369 pub temporal_recency: f32,
3370 /// Content type prior based on role. Default: `0.15`.
3371 #[serde(deserialize_with = "validate_admission_weight")]
3372 pub content_type_prior: f32,
3373 /// Goal-conditioned utility (#2408). `0.0` when `goal_conditioned_write = false`.
3374 /// When enabled, set this alongside reducing `future_utility` so total sums remain stable.
3375 /// Normalized automatically at runtime. Default: `0.0`.
3376 #[serde(deserialize_with = "validate_admission_weight")]
3377 pub goal_utility: f32,
3378}
3379
3380impl Default for AdmissionWeights {
3381 fn default() -> Self {
3382 Self {
3383 future_utility: 0.30,
3384 factual_confidence: 0.15,
3385 semantic_novelty: 0.30,
3386 temporal_recency: 0.10,
3387 content_type_prior: 0.15,
3388 goal_utility: 0.0,
3389 }
3390 }
3391}
3392
3393impl AdmissionWeights {
3394 /// Return weights normalized so they sum to 1.0.
3395 ///
3396 /// All weights are non-negative; the sum is always > 0 when defaults are used.
3397 #[must_use]
3398 pub fn normalized(&self) -> Self {
3399 let sum = self.future_utility
3400 + self.factual_confidence
3401 + self.semantic_novelty
3402 + self.temporal_recency
3403 + self.content_type_prior
3404 + self.goal_utility;
3405 if sum <= f32::EPSILON {
3406 return Self::default();
3407 }
3408 Self {
3409 future_utility: self.future_utility / sum,
3410 factual_confidence: self.factual_confidence / sum,
3411 semantic_novelty: self.semantic_novelty / sum,
3412 temporal_recency: self.temporal_recency / sum,
3413 content_type_prior: self.content_type_prior / sum,
3414 goal_utility: self.goal_utility / sum,
3415 }
3416 }
3417}
3418
3419/// Configuration for A-MAC adaptive memory admission control (`[memory.admission]` TOML section).
3420///
3421/// When `enabled = true`, a write-time gate evaluates each message before saving to memory.
3422/// Messages below the composite admission threshold are rejected and not persisted.
3423#[derive(Debug, Clone, Deserialize, Serialize)]
3424#[serde(default)]
3425pub struct AdmissionConfig {
3426 /// Enable A-MAC admission control. Default: `false`.
3427 pub enabled: bool,
3428 /// Composite score threshold below which messages are rejected. Range: `[0.0, 1.0]`.
3429 /// Default: `0.40`.
3430 #[serde(deserialize_with = "validate_admission_threshold")]
3431 pub threshold: f32,
3432 /// Margin above threshold at which the fast path admits without an LLM call. Range: `[0.0, 1.0]`.
3433 /// When heuristic score >= threshold + margin, LLM call is skipped. Default: `0.15`.
3434 #[serde(deserialize_with = "validate_admission_fast_path_margin")]
3435 pub fast_path_margin: f32,
3436 /// Provider name from `[[llm.providers]]` for `future_utility` LLM evaluation.
3437 /// Falls back to the primary provider when empty. Default: `""`.
3438 pub admission_provider: ProviderName,
3439 /// Per-factor weights. Normalized at runtime. Default: `{0.30, 0.15, 0.30, 0.10, 0.15}`.
3440 pub weights: AdmissionWeights,
3441 /// Admission decision strategy. Default: `heuristic`.
3442 #[serde(default)]
3443 pub admission_strategy: AdmissionStrategy,
3444 /// Minimum training samples before the RL model is activated.
3445 /// Below this count the system falls back to `Heuristic`. Default: `500`.
3446 #[serde(default = "default_rl_min_samples")]
3447 pub rl_min_samples: u32,
3448 /// Background RL model retraining interval in seconds. Default: `3600`.
3449 #[serde(default = "default_rl_retrain_interval_secs")]
3450 pub rl_retrain_interval_secs: u64,
3451 /// Enable goal-conditioned write gate (#2408). When `true`, memories are scored
3452 /// against the current task goal and rejected if relevance is below `goal_utility_threshold`.
3453 /// Zero regression when `false`. Default: `false`.
3454 #[serde(default)]
3455 pub goal_conditioned_write: bool,
3456 /// Provider name from `[[llm.providers]]` for goal-utility LLM refinement.
3457 /// Used only for borderline cases (similarity within 0.1 of threshold).
3458 /// Falls back to the primary provider when empty. Default: `""`.
3459 #[serde(default)]
3460 pub goal_utility_provider: ProviderName,
3461 /// Minimum cosine similarity between goal embedding and candidate memory
3462 /// to consider it goal-relevant. Below this, `goal_utility = 0.0`. Default: `0.4`.
3463 #[serde(default = "default_goal_utility_threshold")]
3464 pub goal_utility_threshold: f32,
3465 /// Weight of the `goal_utility` factor in the composite admission score.
3466 /// Set to `0.0` to disable (equivalent to `goal_conditioned_write = false`). Default: `0.25`.
3467 #[serde(default = "default_goal_utility_weight")]
3468 pub goal_utility_weight: f32,
3469}
3470
3471fn default_goal_utility_threshold() -> f32 {
3472 0.4
3473}
3474
3475fn default_goal_utility_weight() -> f32 {
3476 0.25
3477}
3478
3479impl Default for AdmissionConfig {
3480 fn default() -> Self {
3481 Self {
3482 enabled: false,
3483 threshold: default_admission_threshold(),
3484 fast_path_margin: default_admission_fast_path_margin(),
3485 admission_provider: ProviderName::default(),
3486 weights: AdmissionWeights::default(),
3487 admission_strategy: AdmissionStrategy::default(),
3488 rl_min_samples: default_rl_min_samples(),
3489 rl_retrain_interval_secs: default_rl_retrain_interval_secs(),
3490 goal_conditioned_write: false,
3491 goal_utility_provider: ProviderName::default(),
3492 goal_utility_threshold: default_goal_utility_threshold(),
3493 goal_utility_weight: default_goal_utility_weight(),
3494 }
3495 }
3496}
3497
3498/// Routing strategy for `[memory.store_routing]`.
3499#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
3500#[serde(rename_all = "snake_case")]
3501#[non_exhaustive]
3502pub enum StoreRoutingStrategy {
3503 /// Pure heuristic pattern matching. Zero LLM calls. Default.
3504 #[default]
3505 Heuristic,
3506 /// LLM-based classification via `routing_classifier_provider`.
3507 Llm,
3508 /// Heuristic first; escalates to LLM only when confidence is low.
3509 Hybrid,
3510}
3511
3512/// Configuration for cost-sensitive store routing (`[memory.store_routing]`).
3513///
3514/// Controls how each query is classified and routed to the appropriate memory
3515/// backend(s), avoiding unnecessary store queries for simple lookups.
3516#[derive(Debug, Clone, Deserialize, Serialize)]
3517#[serde(default)]
3518pub struct StoreRoutingConfig {
3519 /// Enable configurable store routing. When `false`, `HeuristicRouter` is used
3520 /// directly (existing behavior). Default: `false`.
3521 pub enabled: bool,
3522 /// Routing strategy. Default: `heuristic`.
3523 pub strategy: StoreRoutingStrategy,
3524 /// Provider name from `[[llm.providers]]` for LLM-based classification.
3525 /// Falls back to the primary provider when empty. Default: `""`.
3526 pub routing_classifier_provider: ProviderName,
3527 /// Route to use when the classifier is uncertain (confidence < threshold).
3528 ///
3529 /// Defaults to [`MemoryRoute::Hybrid`].
3530 pub fallback_route: MemoryRoute,
3531 /// Confidence threshold below which `HybridRouter` escalates to LLM.
3532 /// Range: `[0.0, 1.0]`. Default: `0.7`.
3533 pub confidence_threshold: f32,
3534}
3535
3536impl Default for StoreRoutingConfig {
3537 fn default() -> Self {
3538 Self {
3539 enabled: false,
3540 strategy: StoreRoutingStrategy::Heuristic,
3541 routing_classifier_provider: ProviderName::default(),
3542 fallback_route: MemoryRoute::Hybrid,
3543 confidence_threshold: 0.7,
3544 }
3545 }
3546}
3547
3548/// Persona memory layer configuration (#2461).
3549///
3550/// When `enabled = true`, user preferences and domain knowledge are extracted from
3551/// conversation history via a cheap LLM provider and injected after the system prompt.
3552#[derive(Debug, Clone, Deserialize, Serialize)]
3553#[serde(default)]
3554pub struct PersonaConfig {
3555 /// Enable persona memory extraction and injection. Default: `false`.
3556 pub enabled: bool,
3557 /// Provider name from `[[llm.providers]]` for persona extraction.
3558 /// Should be a cheap/fast model. Falls back to the primary provider when empty.
3559 pub persona_provider: ProviderName,
3560 /// Minimum confidence threshold for facts included in context. Default: `0.6`.
3561 pub min_confidence: f64,
3562 /// Minimum user messages before extraction runs in a session. Default: `3`.
3563 pub min_messages: usize,
3564 /// Maximum messages sent to the LLM per extraction pass. Default: `10`.
3565 pub max_messages: usize,
3566 /// LLM timeout for the extraction call in seconds. Default: `10`.
3567 pub extraction_timeout_secs: u64,
3568 /// Token budget allocated to persona context in assembly. Default: `500`.
3569 pub context_budget_tokens: usize,
3570}
3571
3572impl Default for PersonaConfig {
3573 fn default() -> Self {
3574 Self {
3575 enabled: false,
3576 persona_provider: ProviderName::default(),
3577 min_confidence: 0.6,
3578 min_messages: 3,
3579 max_messages: 10,
3580 extraction_timeout_secs: 10,
3581 context_budget_tokens: 500,
3582 }
3583 }
3584}
3585
3586/// Trajectory-informed memory configuration (#2498).
3587///
3588/// When `enabled = true`, tool-call turns are analyzed by a fast LLM provider to extract
3589/// procedural (reusable how-to) and episodic (one-off event) entries stored per-conversation.
3590/// Procedural entries are injected into context as "past experience" during assembly.
3591#[derive(Debug, Clone, Deserialize, Serialize)]
3592#[serde(default)]
3593pub struct TrajectoryConfig {
3594 /// Enable trajectory extraction and context injection. Default: `false`.
3595 pub enabled: bool,
3596 /// Provider name from `[[llm.providers]]` for extraction.
3597 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3598 pub trajectory_provider: ProviderName,
3599 /// Token budget allocated to trajectory hints in context assembly. Default: `400`.
3600 pub context_budget_tokens: usize,
3601 /// Maximum messages fed to the extraction LLM per pass. Default: `10`.
3602 pub max_messages: usize,
3603 /// LLM timeout for the extraction call in seconds. Default: `10`.
3604 pub extraction_timeout_secs: u64,
3605 /// Number of procedural entries retrieved for context injection. Default: `5`.
3606 pub recall_top_k: usize,
3607 /// Minimum confidence score for entries included in context. Default: `0.6`.
3608 pub min_confidence: f64,
3609}
3610
3611impl Default for TrajectoryConfig {
3612 fn default() -> Self {
3613 Self {
3614 enabled: false,
3615 trajectory_provider: ProviderName::default(),
3616 context_budget_tokens: 400,
3617 max_messages: 10,
3618 extraction_timeout_secs: 10,
3619 recall_top_k: 5,
3620 min_confidence: 0.6,
3621 }
3622 }
3623}
3624
3625/// Category-aware memory configuration (#2428).
3626///
3627/// When `enabled = true`, messages are auto-tagged with a category derived from the active
3628/// skill or tool context. The category is stored in the `messages.category` column and used
3629/// as a Qdrant payload filter during recall.
3630#[derive(Debug, Clone, Deserialize, Serialize)]
3631#[serde(default)]
3632pub struct CategoryConfig {
3633 /// Enable category tagging and category-filtered recall. Default: `false`.
3634 pub enabled: bool,
3635 /// Automatically assign category from skill metadata or tool type. Default: `true`.
3636 pub auto_tag: bool,
3637}
3638
3639impl Default for CategoryConfig {
3640 fn default() -> Self {
3641 Self {
3642 enabled: false,
3643 auto_tag: true,
3644 }
3645 }
3646}
3647
3648/// `TiMem` temporal-hierarchical memory tree configuration (#2262).
3649///
3650/// When `enabled = true`, memories are stored as leaf nodes and periodically consolidated
3651/// into hierarchical summaries by a background loop. Context assembly uses tree traversal
3652/// for complex queries.
3653#[derive(Debug, Clone, Deserialize, Serialize)]
3654#[serde(default)]
3655pub struct TreeConfig {
3656 /// Enable the memory tree and background consolidation loop. Default: `false`.
3657 pub enabled: bool,
3658 /// Provider name from `[[llm.providers]]` for node consolidation.
3659 /// Should be a fast/cheap model. Falls back to the primary provider when empty.
3660 pub consolidation_provider: ProviderName,
3661 /// Interval between consolidation sweeps in seconds. Default: `300`.
3662 pub sweep_interval_secs: u64,
3663 /// Maximum leaf nodes loaded per sweep batch. Default: `20`.
3664 pub batch_size: usize,
3665 /// Cosine similarity threshold for clustering leaves. Default: `0.8`.
3666 pub similarity_threshold: f32,
3667 /// Maximum tree depth (levels above leaves). Default: `3`.
3668 pub max_level: u32,
3669 /// Token budget allocated to tree memory in context assembly. Default: `400`.
3670 pub context_budget_tokens: usize,
3671 /// Number of tree nodes retrieved for context. Default: `5`.
3672 pub recall_top_k: usize,
3673 /// Minimum cluster size before triggering LLM consolidation. Default: `2`.
3674 pub min_cluster_size: usize,
3675}
3676
3677impl Default for TreeConfig {
3678 fn default() -> Self {
3679 Self {
3680 enabled: false,
3681 consolidation_provider: ProviderName::default(),
3682 sweep_interval_secs: 300,
3683 batch_size: 20,
3684 similarity_threshold: 0.8,
3685 max_level: 3,
3686 context_budget_tokens: 400,
3687 recall_top_k: 5,
3688 min_cluster_size: 2,
3689 }
3690 }
3691}
3692
3693/// Time-based microcompact configuration (#2699).
3694///
3695/// When `enabled = true`, low-value tool outputs are cleared from context
3696/// (replaced with a sentinel string) when the session gap exceeds `gap_threshold_minutes`.
3697/// The most recent `keep_recent` tool messages are preserved unconditionally.
3698#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3699#[serde(default)]
3700pub struct MicrocompactConfig {
3701 /// Enable time-based microcompaction. Default: `false`.
3702 pub enabled: bool,
3703 /// Minimum idle gap in minutes before stale tool outputs are cleared. Default: `60`.
3704 pub gap_threshold_minutes: u32,
3705 /// Number of most recent compactable tool messages to preserve. Default: `3`.
3706 pub keep_recent: usize,
3707}
3708
3709impl Default for MicrocompactConfig {
3710 fn default() -> Self {
3711 Self {
3712 enabled: false,
3713 gap_threshold_minutes: 60,
3714 keep_recent: 3,
3715 }
3716 }
3717}
3718
3719/// autoDream background memory consolidation configuration (#2697).
3720///
3721/// When `enabled = true`, a constrained consolidation subagent runs after
3722/// a session ends if both `min_sessions` and `min_hours` gates pass.
3723#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3724#[serde(default)]
3725pub struct AutoDreamConfig {
3726 /// Enable autoDream consolidation. Default: `false`.
3727 pub enabled: bool,
3728 /// Minimum number of sessions between consolidations. Default: `3`.
3729 pub min_sessions: u32,
3730 /// Minimum hours between consolidations. Default: `24`.
3731 pub min_hours: u32,
3732 /// Provider name from `[[llm.providers]]` for consolidation LLM calls.
3733 /// Falls back to the primary provider when empty. Default: `""`.
3734 pub consolidation_provider: ProviderName,
3735 /// Maximum agent loop iterations for the consolidation subagent. Default: `8`.
3736 pub max_iterations: u8,
3737 /// LLM call timeout per `propose_merge_op` invocation, in seconds. Default: `30`.
3738 #[serde(default = "default_autodream_llm_timeout_secs")]
3739 pub llm_timeout_secs: u64,
3740}
3741
3742impl Default for AutoDreamConfig {
3743 fn default() -> Self {
3744 Self {
3745 enabled: false,
3746 min_sessions: 3,
3747 min_hours: 24,
3748 consolidation_provider: ProviderName::default(),
3749 max_iterations: 8,
3750 llm_timeout_secs: default_autodream_llm_timeout_secs(),
3751 }
3752 }
3753}
3754
3755fn default_autodream_llm_timeout_secs() -> u64 {
3756 30
3757}
3758
3759/// `MagicDocs` auto-maintained markdown configuration (#2702).
3760///
3761/// When `enabled = true`, files read via file tools that contain a `# MAGIC DOC:` header
3762/// are registered and periodically updated by a constrained subagent.
3763#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
3764#[serde(default)]
3765pub struct MagicDocsConfig {
3766 /// Enable `MagicDocs` auto-maintenance. Default: `false`.
3767 pub enabled: bool,
3768 /// Minimum turns between updates for a given doc path. Default: `5`.
3769 pub min_turns_between_updates: u32,
3770 /// Provider name from `[[llm.providers]]` for doc update LLM calls.
3771 /// Falls back to the primary provider when empty. Default: `""`.
3772 pub update_provider: ProviderName,
3773 /// Maximum agent loop iterations per doc update. Default: `4`.
3774 pub max_iterations: u8,
3775}
3776
3777impl Default for MagicDocsConfig {
3778 fn default() -> Self {
3779 Self {
3780 enabled: false,
3781 min_turns_between_updates: 5,
3782 update_provider: ProviderName::default(),
3783 max_iterations: 4,
3784 }
3785 }
3786}
3787
3788#[cfg(test)]
3789mod tests {
3790 use super::*;
3791
3792 // Verify that serde deserialization routes through FromStr so that removed variants
3793 // (task_aware_mig) fall back to Reactive instead of hard-erroring when found in TOML.
3794 #[test]
3795 fn pruning_strategy_toml_task_aware_mig_falls_back_to_reactive() {
3796 #[derive(serde::Deserialize)]
3797 struct Wrapper {
3798 #[allow(dead_code)]
3799 pruning_strategy: PruningStrategy,
3800 }
3801 let toml = r#"pruning_strategy = "task_aware_mig""#;
3802 let w: Wrapper = toml::from_str(toml).expect("should deserialize without error");
3803 assert_eq!(
3804 w.pruning_strategy,
3805 PruningStrategy::Reactive,
3806 "task_aware_mig must fall back to Reactive"
3807 );
3808 }
3809
3810 #[test]
3811 fn pruning_strategy_toml_round_trip() {
3812 #[derive(serde::Deserialize)]
3813 struct Wrapper {
3814 #[allow(dead_code)]
3815 pruning_strategy: PruningStrategy,
3816 }
3817 for (input, expected) in [
3818 ("reactive", PruningStrategy::Reactive),
3819 ("task_aware", PruningStrategy::TaskAware),
3820 ("mig", PruningStrategy::Mig),
3821 ] {
3822 let toml = format!(r#"pruning_strategy = "{input}""#);
3823 let w: Wrapper = toml::from_str(&toml)
3824 .unwrap_or_else(|e| panic!("failed to deserialize `{input}`: {e}"));
3825 assert_eq!(w.pruning_strategy, expected, "mismatch for `{input}`");
3826 }
3827 }
3828
3829 #[test]
3830 fn pruning_strategy_toml_unknown_value_errors() {
3831 #[derive(serde::Deserialize)]
3832 #[allow(dead_code)]
3833 struct Wrapper {
3834 pruning_strategy: PruningStrategy,
3835 }
3836 let toml = r#"pruning_strategy = "nonexistent_strategy""#;
3837 assert!(
3838 toml::from_str::<Wrapper>(toml).is_err(),
3839 "unknown strategy must produce an error"
3840 );
3841 }
3842
3843 #[test]
3844 fn tier_config_defaults_are_correct() {
3845 let cfg = TierConfig::default();
3846 assert!(!cfg.enabled);
3847 assert_eq!(cfg.promotion_min_sessions, 3);
3848 assert!((cfg.similarity_threshold - 0.92).abs() < f32::EPSILON);
3849 assert_eq!(cfg.sweep_interval_secs, 3600);
3850 assert_eq!(cfg.sweep_batch_size, 100);
3851 }
3852
3853 #[test]
3854 fn tier_config_rejects_min_sessions_below_2() {
3855 let toml = "promotion_min_sessions = 1";
3856 assert!(toml::from_str::<TierConfig>(toml).is_err());
3857 }
3858
3859 #[test]
3860 fn tier_config_rejects_similarity_threshold_below_0_5() {
3861 let toml = "similarity_threshold = 0.4";
3862 assert!(toml::from_str::<TierConfig>(toml).is_err());
3863 }
3864
3865 #[test]
3866 fn tier_config_rejects_zero_sweep_batch_size() {
3867 let toml = "sweep_batch_size = 0";
3868 assert!(toml::from_str::<TierConfig>(toml).is_err());
3869 }
3870
3871 fn deserialize_importance_weight(toml_val: &str) -> Result<SemanticConfig, toml::de::Error> {
3872 let input = format!("importance_weight = {toml_val}");
3873 toml::from_str::<SemanticConfig>(&input)
3874 }
3875
3876 #[test]
3877 fn importance_weight_default_is_0_15() {
3878 let cfg = SemanticConfig::default();
3879 assert!((cfg.importance_weight - 0.15).abs() < f64::EPSILON);
3880 }
3881
3882 #[test]
3883 fn importance_weight_valid_zero() {
3884 let cfg = deserialize_importance_weight("0.0").unwrap();
3885 assert!((cfg.importance_weight - 0.0_f64).abs() < f64::EPSILON);
3886 }
3887
3888 #[test]
3889 fn importance_weight_valid_one() {
3890 let cfg = deserialize_importance_weight("1.0").unwrap();
3891 assert!((cfg.importance_weight - 1.0_f64).abs() < f64::EPSILON);
3892 }
3893
3894 #[test]
3895 fn importance_weight_rejects_near_zero_negative() {
3896 // TOML does not have a NaN literal, but we can test via a f64 that
3897 // the validator rejects out-of-range values. Test with negative here
3898 // and rely on validate_importance_weight rejecting non-finite via
3899 // a constructed deserializer call.
3900 let result = deserialize_importance_weight("-0.01");
3901 assert!(
3902 result.is_err(),
3903 "negative importance_weight must be rejected"
3904 );
3905 }
3906
3907 #[test]
3908 fn importance_weight_rejects_negative() {
3909 let result = deserialize_importance_weight("-1.0");
3910 assert!(result.is_err(), "negative value must be rejected");
3911 }
3912
3913 #[test]
3914 fn importance_weight_rejects_greater_than_one() {
3915 let result = deserialize_importance_weight("1.01");
3916 assert!(result.is_err(), "value > 1.0 must be rejected");
3917 }
3918
3919 // ── AdmissionWeights::normalized() tests (#2317) ────────────────────────
3920
3921 // Test: weights that don't sum to 1.0 are normalized to sum to 1.0.
3922 #[test]
3923 fn admission_weights_normalized_sums_to_one() {
3924 let w = AdmissionWeights {
3925 future_utility: 2.0,
3926 factual_confidence: 1.0,
3927 semantic_novelty: 3.0,
3928 temporal_recency: 1.0,
3929 content_type_prior: 3.0,
3930 goal_utility: 0.0,
3931 };
3932 let n = w.normalized();
3933 let sum = n.future_utility
3934 + n.factual_confidence
3935 + n.semantic_novelty
3936 + n.temporal_recency
3937 + n.content_type_prior;
3938 assert!(
3939 (sum - 1.0).abs() < 0.001,
3940 "normalized weights must sum to 1.0, got {sum}"
3941 );
3942 }
3943
3944 // Test: already-normalized weights are preserved.
3945 #[test]
3946 fn admission_weights_normalized_preserves_already_unit_sum() {
3947 let w = AdmissionWeights::default();
3948 let n = w.normalized();
3949 let sum = n.future_utility
3950 + n.factual_confidence
3951 + n.semantic_novelty
3952 + n.temporal_recency
3953 + n.content_type_prior;
3954 assert!(
3955 (sum - 1.0).abs() < 0.001,
3956 "default weights sum to ~1.0 after normalization"
3957 );
3958 }
3959
3960 // Test: zero weights fall back to default (no divide-by-zero panic).
3961 #[test]
3962 fn admission_weights_normalized_zero_sum_falls_back_to_default() {
3963 let w = AdmissionWeights {
3964 future_utility: 0.0,
3965 factual_confidence: 0.0,
3966 semantic_novelty: 0.0,
3967 temporal_recency: 0.0,
3968 content_type_prior: 0.0,
3969 goal_utility: 0.0,
3970 };
3971 let n = w.normalized();
3972 let default = AdmissionWeights::default();
3973 assert!(
3974 (n.future_utility - default.future_utility).abs() < 0.001,
3975 "zero-sum weights must fall back to defaults"
3976 );
3977 }
3978
3979 // Test: AdmissionConfig default values match documented defaults.
3980 #[test]
3981 fn admission_config_defaults() {
3982 let cfg = AdmissionConfig::default();
3983 assert!(!cfg.enabled);
3984 assert!((cfg.threshold - 0.40).abs() < 0.001);
3985 assert!((cfg.fast_path_margin - 0.15).abs() < 0.001);
3986 assert!(cfg.admission_provider.is_empty());
3987 }
3988
3989 // ── SpreadingActivationConfig tests (#2514) ──────────────────────────────
3990
3991 #[test]
3992 fn spreading_activation_default_recall_timeout_ms_is_1000() {
3993 let cfg = SpreadingActivationConfig::default();
3994 assert_eq!(
3995 cfg.recall_timeout_ms, 1000,
3996 "default recall_timeout_ms must be 1000ms"
3997 );
3998 }
3999
4000 #[test]
4001 fn spreading_activation_toml_recall_timeout_ms_round_trip() {
4002 #[derive(serde::Deserialize)]
4003 struct Wrapper {
4004 recall_timeout_ms: u64,
4005 }
4006 let toml = "recall_timeout_ms = 500";
4007 let w: Wrapper = toml::from_str(toml).unwrap();
4008 assert_eq!(w.recall_timeout_ms, 500);
4009 }
4010
4011 #[test]
4012 fn spreading_activation_validate_cross_field_constraints() {
4013 let mut cfg = SpreadingActivationConfig::default();
4014 // Default activation_threshold (0.1) < inhibition_threshold (0.8) → must be Ok.
4015 assert!(cfg.validate().is_ok());
4016
4017 // Equal thresholds must be rejected.
4018 cfg.activation_threshold = 0.5;
4019 cfg.inhibition_threshold = 0.5;
4020 assert!(cfg.validate().is_err());
4021 }
4022
4023 // ─── CompressionConfig: new Focus fields deserialization (#2510, #2481) ──
4024
4025 #[test]
4026 fn compression_config_focus_strategy_deserializes() {
4027 let toml = r#"strategy = "focus""#;
4028 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4029 assert_eq!(cfg.strategy, CompressionStrategy::Focus);
4030 }
4031
4032 #[test]
4033 fn compression_config_density_budget_defaults_on_deserialize() {
4034 // `#[serde(default = "...")]` applies during deserialization, not via Default::default().
4035 // Verify that omitting both fields yields the serde defaults (0.7 / 0.3).
4036 let toml = r#"strategy = "reactive""#;
4037 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4038 assert!((cfg.high_density_budget - 0.7).abs() < 1e-6);
4039 assert!((cfg.low_density_budget - 0.3).abs() < 1e-6);
4040 }
4041
4042 #[test]
4043 fn compression_config_density_budget_round_trip() {
4044 let toml = "strategy = \"reactive\"\nhigh_density_budget = 0.6\nlow_density_budget = 0.4";
4045 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4046 assert!((cfg.high_density_budget - 0.6).abs() < f32::EPSILON);
4047 assert!((cfg.low_density_budget - 0.4).abs() < f32::EPSILON);
4048 }
4049
4050 #[test]
4051 fn compression_config_focus_scorer_provider_default_empty() {
4052 let cfg = CompressionConfig::default();
4053 assert!(cfg.focus_scorer_provider.is_empty());
4054 }
4055
4056 #[test]
4057 fn compression_config_focus_scorer_provider_round_trip() {
4058 let toml = "strategy = \"focus\"\nfocus_scorer_provider = \"fast\"";
4059 let cfg: CompressionConfig = toml::from_str(toml).unwrap();
4060 assert_eq!(cfg.focus_scorer_provider.as_str(), "fast");
4061 }
4062}
4063
4064/// `ReasoningBank`: distilled reasoning strategy memory configuration (#3342).
4065///
4066/// When `enabled = true`, each completed agent turn is evaluated by a self-judge LLM call.
4067/// Successful and failed reasoning chains are compressed into short, generalizable strategy
4068/// summaries. At context-build time, top-k strategies are retrieved by embedding similarity
4069/// and injected into the prompt preamble.
4070///
4071/// All LLM work (self-judge, distillation) runs asynchronously — never on the turn thread.
4072///
4073/// # Example
4074///
4075/// ```toml
4076/// [memory.reasoning]
4077/// enabled = true
4078/// extract_provider = "fast"
4079/// distill_provider = "fast"
4080/// top_k = 3
4081/// store_limit = 1000
4082/// ```
4083#[derive(Debug, Clone, Deserialize, Serialize)]
4084#[serde(default)]
4085pub struct ReasoningConfig {
4086 /// Enable the reasoning-bank pipeline. Default: `false`.
4087 pub enabled: bool,
4088 /// Provider name from `[[llm.providers]]` for the self-judge step.
4089 /// Falls back to the primary provider when empty. Default: `""`.
4090 pub extract_provider: ProviderName,
4091 /// Provider name from `[[llm.providers]]` for the distillation step.
4092 /// Falls back to the primary provider when empty. Default: `""`.
4093 pub distill_provider: ProviderName,
4094 /// Number of strategies retrieved per turn for context injection. Default: `3`.
4095 pub top_k: usize,
4096 /// Maximum stored strategies; oldest unused are evicted when limit is reached. Default: `1000`.
4097 pub store_limit: usize,
4098 /// Maximum number of recent messages passed to the self-judge LLM. Default: `6`.
4099 pub max_messages: usize,
4100 /// Per-message content truncation limit (chars) before building the judge transcript. Default: `2000`.
4101 pub max_message_chars: usize,
4102 /// Maximum token budget for injected reasoning strategies in context. Default: `500`.
4103 pub context_budget_tokens: usize,
4104 /// Minimum number of messages required before self-judge fires. Default: `2`.
4105 pub min_messages: usize,
4106 /// Timeout in seconds for the self-judge LLM call. Default: `30`.
4107 pub extraction_timeout_secs: u64,
4108 /// Timeout in seconds for the distillation LLM call. Default: `30`.
4109 pub distill_timeout_secs: u64,
4110 /// Maximum number of recent messages passed to the self-judge evaluator.
4111 /// Narrowing to the last user+assistant pair improves classification accuracy.
4112 /// Default: `2`.
4113 pub self_judge_window: usize,
4114 /// Minimum characters in the assistant response to trigger self-judge.
4115 /// Short or trivial responses are skipped. Default: `50`.
4116 pub min_assistant_chars: usize,
4117}
4118
4119impl Default for ReasoningConfig {
4120 fn default() -> Self {
4121 Self {
4122 enabled: false,
4123 extract_provider: ProviderName::default(),
4124 distill_provider: ProviderName::default(),
4125 top_k: 3,
4126 store_limit: 1000,
4127 max_messages: 6,
4128 max_message_chars: 2000,
4129 context_budget_tokens: 500,
4130 min_messages: 2,
4131 extraction_timeout_secs: 30,
4132 distill_timeout_secs: 30,
4133 self_judge_window: 2,
4134 min_assistant_chars: 50,
4135 }
4136 }
4137}
4138
4139// ── Eviction config (moved from zeph-memory) ─────────────────────────────────
4140
4141/// Eviction policy variant.
4142///
4143/// Serialises as `"ebbinghaus"` in TOML/JSON so existing configs remain valid.
4144#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
4145#[serde(rename_all = "lowercase")]
4146#[non_exhaustive]
4147pub enum EvictionPolicy {
4148 /// Ebbinghaus forgetting-curve eviction.
4149 #[default]
4150 Ebbinghaus,
4151}
4152
4153/// Configuration for the memory eviction policy.
4154///
4155/// Controls which policy runs during the periodic sweep and how many entries
4156/// are retained. `zeph-memory` re-exports this type from here.
4157#[derive(Debug, Clone, Deserialize, Serialize)]
4158pub struct EvictionConfig {
4159 /// Eviction policy. Currently only [`EvictionPolicy::Ebbinghaus`] is supported.
4160 pub policy: EvictionPolicy,
4161 /// Maximum number of entries to retain. `0` means unlimited (eviction disabled).
4162 pub max_entries: usize,
4163 /// How often to run the eviction sweep, in seconds.
4164 pub sweep_interval_secs: u64,
4165}
4166
4167impl Default for EvictionConfig {
4168 fn default() -> Self {
4169 Self {
4170 policy: EvictionPolicy::Ebbinghaus,
4171 max_entries: 0,
4172 sweep_interval_secs: 3600,
4173 }
4174 }
4175}
4176
4177// ── Compression guidelines config (moved from zeph-memory) ───────────────────
4178
4179/// Configuration for ACON failure-driven compression guidelines.
4180///
4181/// `zeph-memory` re-exports this type from here.
4182#[derive(Debug, Clone, Deserialize, Serialize)]
4183#[serde(default)]
4184pub struct CompressionGuidelinesConfig {
4185 /// Enable the feature. Default: `false`.
4186 pub enabled: bool,
4187 /// Minimum unused failure pairs before triggering a guidelines update. Default: `5`.
4188 pub update_threshold: u16,
4189 /// Maximum token budget for the guidelines document. Default: `500`.
4190 pub max_guidelines_tokens: usize,
4191 /// Maximum failure pairs consumed per update cycle. Default: `10`.
4192 pub max_pairs_per_update: usize,
4193 /// Number of turns after hard compaction to watch for context loss. Default: `10`.
4194 pub detection_window_turns: u64,
4195 /// Interval in seconds between background updater checks. Default: `300`.
4196 pub update_interval_secs: u64,
4197 /// Maximum unused failure pairs to retain (cleanup policy). Default: `100`.
4198 pub max_stored_pairs: usize,
4199 /// Provider name from `[[llm.providers]]` for guidelines update LLM calls.
4200 /// `None` (or `Some("")`) falls back to the primary provider.
4201 #[serde(default, skip_serializing_if = "Option::is_none")]
4202 pub guidelines_provider: Option<ProviderName>,
4203 /// Maintain separate guideline documents per content category.
4204 #[serde(default)]
4205 pub categorized_guidelines: bool,
4206}
4207
4208impl Default for CompressionGuidelinesConfig {
4209 fn default() -> Self {
4210 Self {
4211 enabled: false,
4212 update_threshold: 5,
4213 max_guidelines_tokens: 500,
4214 max_pairs_per_update: 10,
4215 detection_window_turns: 10,
4216 update_interval_secs: 300,
4217 max_stored_pairs: 100,
4218 guidelines_provider: None,
4219 categorized_guidelines: false,
4220 }
4221 }
4222}
4223
4224// ── Compaction probe config (moved from zeph-memory) ─────────────────────────
4225
4226/// Functional category of a compaction probe question.
4227///
4228/// `zeph-memory` re-exports this type from here.
4229#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
4230#[serde(rename_all = "lowercase")]
4231#[non_exhaustive]
4232pub enum ProbeCategory {
4233 /// Did specific facts survive? (file paths, function names, values, decisions)
4234 Recall,
4235 /// Does the agent know which files/tools/URLs it used?
4236 Artifact,
4237 /// Can it pick up mid-task? (current step, next steps, blockers, open questions)
4238 Continuation,
4239 /// Are past reasoning traces intact? (why X over Y, trade-offs, constraints)
4240 Decision,
4241}
4242
4243/// Configuration for the compaction probe.
4244///
4245/// `zeph-memory` re-exports this type from here.
4246#[derive(Debug, Clone, Serialize, Deserialize)]
4247#[serde(default)]
4248pub struct CompactionProbeConfig {
4249 /// Enable compaction probe validation. Default: `false`.
4250 pub enabled: bool,
4251 /// Provider name from `[[llm.providers]]` for probe LLM calls.
4252 /// `None` (or `Some("")`) uses the summary provider.
4253 #[serde(default, skip_serializing_if = "Option::is_none")]
4254 pub probe_provider: Option<ProviderName>,
4255 /// Minimum score to pass without warnings. Default: `0.6`.
4256 pub threshold: f32,
4257 /// Score below this triggers `HardFail` (block compaction). Default: `0.35`.
4258 pub hard_fail_threshold: f32,
4259 /// Maximum number of probe questions to generate. Default: `5`.
4260 pub max_questions: usize,
4261 /// Timeout for the entire probe (both LLM calls) in seconds. Default: `15`.
4262 pub timeout_secs: u64,
4263 /// Optional per-category weight multipliers for the overall score.
4264 #[serde(default)]
4265 pub category_weights: Option<HashMap<ProbeCategory, f32>>,
4266}
4267
4268impl Default for CompactionProbeConfig {
4269 fn default() -> Self {
4270 Self {
4271 enabled: false,
4272 probe_provider: None,
4273 threshold: 0.6,
4274 hard_fail_threshold: 0.35,
4275 max_questions: 5,
4276 timeout_secs: 15,
4277 category_weights: None,
4278 }
4279 }
4280}
4281
4282// ── MemCoT semantic state config ─────────────────────────────────────────────
4283
4284/// `MemCoT` semantic-state distillation configuration.
4285///
4286/// When `enabled = true`, the agent maintains a short rolling "semantic state" buffer
4287/// summarizing conceptual progress across turns. This buffer is injected into graph
4288/// recall queries to improve retrieval relevance.
4289///
4290/// All LLM work (distillation) runs asynchronously — never on the turn thread.
4291/// When `enabled = false`, this is a **complete no-op**: no allocation, no LLM calls.
4292///
4293/// # Config example
4294///
4295/// ```toml
4296/// [memory.memcot]
4297/// enabled = true
4298/// distill_provider = "fast"
4299/// distill_timeout_secs = 5
4300/// min_assistant_chars = 200
4301/// min_distill_interval_secs = 30
4302/// max_distills_per_session = 50
4303/// max_state_chars = 800
4304/// recall_view = "head"
4305/// ```
4306#[derive(Debug, Clone, Serialize, Deserialize)]
4307#[serde(default)]
4308pub struct MemCotConfig {
4309 /// Enable the `MemCoT` semantic state pipeline. Default: `false`.
4310 ///
4311 /// When `false`, the accumulator is never allocated and no LLM calls are made.
4312 pub enabled: bool,
4313 /// Provider name from `[[llm.providers]]` for distillation.
4314 ///
4315 /// Must reference a **fast-tier** provider (e.g. `gpt-4o-mini`, `qwen3:8b`).
4316 /// A startup warning is emitted when the resolved model does not look fast-tier.
4317 /// Falls back to the primary provider when empty. Default: `""`.
4318 pub distill_provider: ProviderName,
4319 /// Timeout in seconds for each distillation LLM call. Default: `5`.
4320 pub distill_timeout_secs: u64,
4321 /// Minimum characters in the assistant response to trigger distillation.
4322 /// Short or trivial replies are skipped. Default: `200`.
4323 pub min_assistant_chars: usize,
4324 /// Minimum elapsed seconds between successive distillation spawns. Default: `30`.
4325 ///
4326 /// Prevents runaway costs on long sessions with rapid turns.
4327 /// Clearing `/new` resets this counter.
4328 pub min_distill_interval_secs: u64,
4329 /// Maximum distillation spawns per conversation session. Default: `50`.
4330 ///
4331 /// Once this cap is reached the accumulator stops distilling for the rest of the
4332 /// session. Counter is reset when the user sends `/new`.
4333 pub max_distills_per_session: u64,
4334 /// Maximum characters for the semantic state buffer (UTF-8 char boundary truncation).
4335 /// Default: `800`.
4336 pub max_state_chars: usize,
4337 /// Recall view applied when `MemCoT` is active. Default: `Head`.
4338 ///
4339 /// - `head`: standard retrieval, no enrichment (suitable for low-latency setups).
4340 /// - `zoom_in`: adds source-message provenance to each returned fact.
4341 /// - `zoom_out`: expands 1-hop neighbors per returned fact.
4342 ///
4343 /// TODO(F3): add a per-call override parameter on `recall_graph_view`.
4344 pub recall_view: RecallViewConfig,
4345 /// Maximum 1-hop neighbor facts per head fact in `zoom_out` view. Default: `3`.
4346 pub zoom_out_neighbor_cap: usize,
4347 /// Optional model name allowlist for the fast-tier soft validator (lowercase substring match).
4348 /// Empty (default) → falls back to the built-in `FAST_TIER_MODEL_HINTS` list.
4349 #[serde(default, skip_serializing_if = "Vec::is_empty")]
4350 pub fast_tier_models: Vec<String>,
4351}
4352
4353/// Recall view variant exposed in config.
4354///
4355/// Maps 1-to-1 to `zeph_memory::RecallView`.
4356#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
4357#[serde(rename_all = "snake_case")]
4358#[non_exhaustive]
4359pub enum RecallViewConfig {
4360 /// Standard retrieval — no enrichment. Byte-identical to legacy behaviour.
4361 #[default]
4362 Head,
4363 /// Adds source-message provenance to each returned fact.
4364 ZoomIn,
4365 /// Expands 1-hop neighbor facts per returned fact.
4366 ZoomOut,
4367}
4368
4369impl Default for MemCotConfig {
4370 fn default() -> Self {
4371 Self {
4372 enabled: false,
4373 distill_provider: ProviderName::default(),
4374 distill_timeout_secs: 5,
4375 min_assistant_chars: 200,
4376 min_distill_interval_secs: 30,
4377 max_distills_per_session: 50,
4378 max_state_chars: 800,
4379 recall_view: RecallViewConfig::Head,
4380 zoom_out_neighbor_cap: 3,
4381 fast_tier_models: Vec::new(),
4382 }
4383 }
4384}
4385
4386/// `OmniMem` retrieval failure tracking configuration (issue #3576).
4387///
4388/// Controls the async logger that records no-hit and low-confidence recall events
4389/// to `memory_retrieval_failures` for closed-loop memory parameter tuning.
4390#[derive(Debug, Clone, Deserialize, Serialize)]
4391#[serde(default)]
4392pub struct RetrievalFailuresConfig {
4393 /// Enable retrieval failure logging. Default: `false`.
4394 pub enabled: bool,
4395 /// Composite recall score below which a result is classified as low-confidence.
4396 ///
4397 /// The threshold applies to the post-reranking composite score (which incorporates
4398 /// MMR, temporal decay, importance weighting, and tier boost). Calibrate against
4399 /// the scoring pipeline in use. Default: `0.3`.
4400 #[serde(default = "default_retrieval_failures_low_confidence_threshold")]
4401 pub low_confidence_threshold: f32,
4402 /// Days to retain failure records before automatic cleanup. Default: `90`.
4403 #[serde(default = "default_retrieval_failures_retention_days")]
4404 pub retention_days: u32,
4405 /// Bounded mpsc channel capacity for the fire-and-forget write path. Default: `256`.
4406 #[serde(default = "default_retrieval_failures_channel_capacity")]
4407 pub channel_capacity: usize,
4408 /// Maximum records collected before flushing a batch INSERT. Default: `16`.
4409 #[serde(default = "default_retrieval_failures_batch_size")]
4410 pub batch_size: usize,
4411 /// Maximum milliseconds to wait before flushing a partial batch. Default: `100`.
4412 #[serde(default = "default_retrieval_failures_flush_interval_ms")]
4413 pub flush_interval_ms: u64,
4414}
4415
4416impl Default for RetrievalFailuresConfig {
4417 fn default() -> Self {
4418 Self {
4419 enabled: false,
4420 low_confidence_threshold: default_retrieval_failures_low_confidence_threshold(),
4421 retention_days: default_retrieval_failures_retention_days(),
4422 channel_capacity: default_retrieval_failures_channel_capacity(),
4423 batch_size: default_retrieval_failures_batch_size(),
4424 flush_interval_ms: default_retrieval_failures_flush_interval_ms(),
4425 }
4426 }
4427}
4428
4429// ── TrajectoryRiskAccumulator config (spec 004-16) ─────────────────────────────
4430
4431fn validate_tra_nonneg_weight<'de, D>(deserializer: D) -> Result<f64, D::Error>
4432where
4433 D: serde::Deserializer<'de>,
4434{
4435 let value = <f64 as serde::Deserialize>::deserialize(deserializer)?;
4436 if value.is_nan() || value.is_infinite() || value < 0.0 {
4437 return Err(serde::de::Error::custom(
4438 "signal weight and severity multiplier values must be finite and non-negative",
4439 ));
4440 }
4441 Ok(value)
4442}
4443
4444/// Per-signal-type base weights for the trajectory risk accumulator.
4445///
4446/// Each weight is in `(0.0, 1.0]` and is multiplied by the severity multiplier
4447/// before being added to `trajectory_risk`.
4448///
4449/// # Example (TOML)
4450///
4451/// ```toml
4452/// [memory.shadow_memory.signal_weights]
4453/// prompt_injection = 0.6
4454/// ```
4455#[derive(Debug, Clone, Serialize, Deserialize)]
4456pub struct TrajectorySignalWeights {
4457 /// Weight for `PolicyViolation` signals. Default: `0.30`.
4458 #[serde(
4459 default = "default_sw_policy_violation",
4460 deserialize_with = "validate_tra_nonneg_weight"
4461 )]
4462 pub policy_violation: f64,
4463 /// Weight for `PromptInjectionPattern` signals. Default: `0.50`.
4464 #[serde(
4465 default = "default_sw_prompt_injection",
4466 deserialize_with = "validate_tra_nonneg_weight"
4467 )]
4468 pub prompt_injection: f64,
4469 /// Weight for `ToolChainAnomaly` signals. Default: `0.25`.
4470 #[serde(
4471 default = "default_sw_tool_chain_anomaly",
4472 deserialize_with = "validate_tra_nonneg_weight"
4473 )]
4474 pub tool_chain_anomaly: f64,
4475 /// Weight for `ConfidenceDrop` signals. Default: `0.15`.
4476 #[serde(
4477 default = "default_sw_confidence_drop",
4478 deserialize_with = "validate_tra_nonneg_weight"
4479 )]
4480 pub confidence_drop: f64,
4481}
4482
4483fn default_sw_policy_violation() -> f64 {
4484 0.30
4485}
4486fn default_sw_prompt_injection() -> f64 {
4487 0.50
4488}
4489fn default_sw_tool_chain_anomaly() -> f64 {
4490 0.25
4491}
4492fn default_sw_confidence_drop() -> f64 {
4493 0.15
4494}
4495
4496impl Default for TrajectorySignalWeights {
4497 fn default() -> Self {
4498 Self {
4499 policy_violation: default_sw_policy_violation(),
4500 prompt_injection: default_sw_prompt_injection(),
4501 tool_chain_anomaly: default_sw_tool_chain_anomaly(),
4502 confidence_drop: default_sw_confidence_drop(),
4503 }
4504 }
4505}
4506
4507/// Per-severity multipliers applied on top of signal base weights.
4508///
4509/// # Example (TOML)
4510///
4511/// ```toml
4512/// [memory.shadow_memory.severity_multipliers]
4513/// high = 3.0
4514/// ```
4515#[derive(Debug, Clone, Serialize, Deserialize)]
4516pub struct TrajectorySeverityMultipliers {
4517 /// Multiplier for low-severity signals. Default: `0.5`.
4518 #[serde(
4519 default = "default_sev_low",
4520 deserialize_with = "validate_tra_nonneg_weight"
4521 )]
4522 pub low: f64,
4523 /// Multiplier for medium-severity signals. Default: `1.0`.
4524 #[serde(
4525 default = "default_sev_medium",
4526 deserialize_with = "validate_tra_nonneg_weight"
4527 )]
4528 pub medium: f64,
4529 /// Multiplier for high-severity signals. Default: `2.0`.
4530 #[serde(
4531 default = "default_sev_high",
4532 deserialize_with = "validate_tra_nonneg_weight"
4533 )]
4534 pub high: f64,
4535}
4536
4537fn default_sev_low() -> f64 {
4538 0.5
4539}
4540fn default_sev_medium() -> f64 {
4541 1.0
4542}
4543fn default_sev_high() -> f64 {
4544 2.0
4545}
4546
4547impl Default for TrajectorySeverityMultipliers {
4548 fn default() -> Self {
4549 Self {
4550 low: default_sev_low(),
4551 medium: default_sev_medium(),
4552 high: default_sev_high(),
4553 }
4554 }
4555}
4556
4557/// Configuration for the MAGE trajectory risk accumulator (spec 004-16).
4558///
4559/// Controls how per-turn safety signals accumulate into a session-level risk score
4560/// and when tool execution is blocked or escalated.
4561///
4562/// # Example (TOML)
4563///
4564/// ```toml
4565/// [memory.shadow_memory]
4566/// enabled = true
4567/// risk_threshold = 0.75
4568/// escalation_threshold = 0.50
4569/// risk_halflife_turns = 10
4570/// signal_history_cap = 200
4571/// tui_show_risk_gauge = true
4572/// reset_on_compaction = false
4573/// ```
4574#[derive(Debug, Clone, Serialize, Deserialize)]
4575pub struct TrajectoryRiskAccumulatorConfig {
4576 /// Enable shadow memory. When `false`, `TrajectoryRiskAccumulator` is a zero-cost noop.
4577 #[serde(default)]
4578 pub enabled: bool,
4579 /// Block tool execution when `trajectory_risk >= risk_threshold`. Default: `0.75`.
4580 #[serde(default = "default_tra_risk_threshold")]
4581 pub risk_threshold: f64,
4582 /// Escalate to human confirmation when risk is in `[escalation_threshold, risk_threshold)`.
4583 /// Default: `0.50`.
4584 #[serde(default = "default_tra_escalation_threshold")]
4585 pub escalation_threshold: f64,
4586 /// Number of turns after which accumulated risk halves (exponential decay). Default: `10`.
4587 #[serde(default = "default_tra_risk_halflife_turns")]
4588 pub risk_halflife_turns: u32,
4589 /// Maximum number of signal events kept in the ring buffer. Default: `200`.
4590 #[serde(default = "default_tra_signal_history_cap")]
4591 pub signal_history_cap: usize,
4592 /// Show a risk gauge in the TUI security panel when the TUI is enabled. Default: `true`.
4593 #[serde(default = "default_true")]
4594 pub tui_show_risk_gauge: bool,
4595 /// Reset `trajectory_risk` to zero when a context compaction occurs. Default: `false`.
4596 #[serde(default)]
4597 pub reset_on_compaction: bool,
4598 /// Per-signal-type base weights.
4599 #[serde(default)]
4600 pub signal_weights: TrajectorySignalWeights,
4601 /// Per-severity multipliers applied on top of signal weights.
4602 #[serde(default)]
4603 pub severity_multipliers: TrajectorySeverityMultipliers,
4604}
4605
4606fn default_tra_risk_threshold() -> f64 {
4607 0.75
4608}
4609fn default_tra_escalation_threshold() -> f64 {
4610 0.50
4611}
4612fn default_tra_risk_halflife_turns() -> u32 {
4613 10
4614}
4615fn default_tra_signal_history_cap() -> usize {
4616 200
4617}
4618
4619impl Default for TrajectoryRiskAccumulatorConfig {
4620 fn default() -> Self {
4621 Self {
4622 enabled: false,
4623 risk_threshold: default_tra_risk_threshold(),
4624 escalation_threshold: default_tra_escalation_threshold(),
4625 risk_halflife_turns: default_tra_risk_halflife_turns(),
4626 signal_history_cap: default_tra_signal_history_cap(),
4627 tui_show_risk_gauge: true,
4628 reset_on_compaction: false,
4629 signal_weights: TrajectorySignalWeights::default(),
4630 severity_multipliers: TrajectorySeverityMultipliers::default(),
4631 }
4632 }
4633}
4634
4635#[cfg(test)]
4636mod memcot_config_tests {
4637 use super::*;
4638
4639 #[test]
4640 fn memcot_config_default_disabled() {
4641 let cfg = MemCotConfig::default();
4642 assert!(!cfg.enabled);
4643 assert!(cfg.distill_provider.is_empty());
4644 assert_eq!(cfg.distill_timeout_secs, 5);
4645 assert_eq!(cfg.min_assistant_chars, 200);
4646 assert_eq!(cfg.min_distill_interval_secs, 30);
4647 assert_eq!(cfg.max_distills_per_session, 50);
4648 assert_eq!(cfg.max_state_chars, 800);
4649 assert_eq!(cfg.recall_view, RecallViewConfig::Head);
4650 assert_eq!(cfg.zoom_out_neighbor_cap, 3);
4651 }
4652
4653 #[test]
4654 fn memcot_config_round_trip() {
4655 let toml = r#"
4656 enabled = true
4657 distill_provider = "fast"
4658 distill_timeout_secs = 10
4659 min_assistant_chars = 100
4660 min_distill_interval_secs = 60
4661 max_distills_per_session = 20
4662 max_state_chars = 400
4663 recall_view = "zoom_in"
4664 zoom_out_neighbor_cap = 5
4665 "#;
4666 let cfg: MemCotConfig = toml::from_str(toml).unwrap();
4667 assert!(cfg.enabled);
4668 assert_eq!(cfg.distill_provider.as_str(), "fast");
4669 assert_eq!(cfg.distill_timeout_secs, 10);
4670 assert_eq!(cfg.min_distill_interval_secs, 60);
4671 assert_eq!(cfg.max_distills_per_session, 20);
4672 assert_eq!(cfg.recall_view, RecallViewConfig::ZoomIn);
4673 assert_eq!(cfg.zoom_out_neighbor_cap, 5);
4674 }
4675}
4676
4677#[cfg(test)]
4678mod apex_mem_quality_gate_config_tests {
4679 use super::*;
4680
4681 #[test]
4682 fn apex_mem_config_default_disabled() {
4683 let cfg = ApexMemConfig::default();
4684 assert!(!cfg.enabled, "APEX-MEM must be disabled by default");
4685 }
4686
4687 #[test]
4688 fn apex_mem_config_serde_round_trip() {
4689 let toml = "enabled = true";
4690 let cfg: ApexMemConfig = toml::from_str(toml).unwrap();
4691 assert!(cfg.enabled);
4692 }
4693
4694 #[test]
4695 fn apex_mem_config_empty_toml_uses_defaults() {
4696 let cfg: ApexMemConfig = toml::from_str("").unwrap();
4697 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4698 }
4699
4700 #[test]
4701 fn write_quality_gate_config_default_disabled() {
4702 let cfg = WriteQualityGateConfig::default();
4703 assert!(!cfg.enabled);
4704 assert!((cfg.threshold - 0.55).abs() < f32::EPSILON);
4705 assert_eq!(cfg.recent_window, 32);
4706 assert_eq!(cfg.contradiction_grace_seconds, 300);
4707 assert!((cfg.information_value_weight - 0.4).abs() < f32::EPSILON);
4708 assert!((cfg.reference_completeness_weight - 0.3).abs() < f32::EPSILON);
4709 assert!((cfg.contradiction_weight - 0.3).abs() < f32::EPSILON);
4710 assert!((cfg.rejection_rate_alarm_ratio - 0.35).abs() < f32::EPSILON);
4711 assert!(cfg.quality_gate_provider.is_empty());
4712 assert_eq!(cfg.llm_timeout_ms, 500);
4713 assert!((cfg.llm_weight - 0.5).abs() < f32::EPSILON);
4714 assert!(cfg.reference_check_lang_en);
4715 }
4716
4717 #[test]
4718 fn write_quality_gate_config_serde_round_trip() {
4719 let toml = r#"
4720 enabled = true
4721 threshold = 0.70
4722 recent_window = 16
4723 contradiction_grace_seconds = 600
4724 information_value_weight = 0.5
4725 reference_completeness_weight = 0.25
4726 contradiction_weight = 0.25
4727 rejection_rate_alarm_ratio = 0.50
4728 quality_gate_provider = "fast"
4729 llm_timeout_ms = 1000
4730 llm_weight = 0.3
4731 reference_check_lang_en = false
4732 "#;
4733 let cfg: WriteQualityGateConfig = toml::from_str(toml).unwrap();
4734 assert!(cfg.enabled);
4735 assert!((cfg.threshold - 0.70).abs() < f32::EPSILON);
4736 assert_eq!(cfg.recent_window, 16);
4737 assert_eq!(cfg.contradiction_grace_seconds, 600);
4738 assert_eq!(cfg.quality_gate_provider.as_str(), "fast");
4739 assert_eq!(cfg.llm_timeout_ms, 1000);
4740 assert!(!cfg.reference_check_lang_en);
4741 }
4742
4743 #[test]
4744 fn write_quality_gate_config_empty_toml_uses_defaults() {
4745 let cfg: WriteQualityGateConfig = toml::from_str("").unwrap();
4746 assert!(!cfg.enabled, "empty TOML must produce default (disabled)");
4747 assert_eq!(cfg.recent_window, 32);
4748 }
4749
4750 #[test]
4751 fn memory_config_shutdown_summary_provider_toml_roundtrip() {
4752 let toml = r#"
4753 history_limit = 50
4754 shutdown_summary_provider = "fast"
4755 "#;
4756 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4757 assert_eq!(
4758 cfg.shutdown_summary_provider.as_str(),
4759 "fast",
4760 "shutdown_summary_provider must deserialize from TOML"
4761 );
4762 }
4763
4764 #[test]
4765 fn five_signal_config_default_is_disabled() {
4766 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4767 assert!(!cfg.five_signal.enabled);
4768 assert!((cfg.five_signal.w_recency - 0.35).abs() < 1e-9);
4769 assert!((cfg.five_signal.w_relevance - 0.35).abs() < 1e-9);
4770 assert!((cfg.five_signal.w_frequency).abs() < 1e-9);
4771 assert!((cfg.five_signal.w_causal).abs() < 1e-9);
4772 assert!((cfg.five_signal.w_novelty).abs() < 1e-9);
4773 }
4774
4775 #[test]
4776 fn five_signal_config_toml_roundtrip() {
4777 let toml = r"
4778 history_limit = 50
4779 [five_signal]
4780 enabled = true
4781 w_recency = 0.35
4782 w_relevance = 0.35
4783 w_frequency = 0.15
4784 w_causal = 0.10
4785 w_novelty = 0.05
4786 ";
4787 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4788 assert!(cfg.five_signal.enabled);
4789 assert!((cfg.five_signal.w_frequency - 0.15).abs() < 1e-9);
4790 }
4791
4792 #[test]
4793 fn memory_config_shutdown_summary_provider_default_is_empty() {
4794 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4795 assert_eq!(
4796 cfg.shutdown_summary_provider.as_str(),
4797 "",
4798 "shutdown_summary_provider must default to empty string"
4799 );
4800 }
4801
4802 #[test]
4803 fn memory_config_compaction_provider_toml_roundtrip() {
4804 let toml = r#"
4805 history_limit = 50
4806 compaction_provider = "mid"
4807 "#;
4808 let cfg: MemoryConfig = toml::from_str(toml).expect("must deserialize");
4809 assert_eq!(
4810 cfg.compaction_provider.as_str(),
4811 "mid",
4812 "compaction_provider must deserialize from TOML"
4813 );
4814 }
4815
4816 #[test]
4817 fn memory_config_compaction_provider_default_is_empty() {
4818 let cfg: MemoryConfig = toml::from_str("history_limit = 50").expect("must deserialize");
4819 assert_eq!(
4820 cfg.compaction_provider.as_str(),
4821 "",
4822 "compaction_provider must default to empty string"
4823 );
4824 }
4825}