zeph_memory/five_signal/mod.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Five-signal SYNAPSE retrieval subsystem (issue #4374).
5//!
6//! Extends the SYNAPSE recall pipeline with three additional signals beyond the
7//! two-signal baseline (recency + relevance):
8//!
9//! - **Access frequency** — facts queried more often rank higher.
10//! - **Causal distance** — facts causally closer to the current goal rank higher.
11//! - **Novelty** — facts created early in the session rank higher than late-session facts.
12//!
13//! When all new signal weights are `0.0` (the default), the five-signal formula is
14//! algebraically equivalent to the existing two-signal baseline.
15
16pub mod access_frequency;
17pub mod causal_distance;
18pub mod consolidation;
19pub mod metrics;
20pub mod novelty;
21pub mod scoring;
22pub mod weights;
23
24use std::sync::Arc;
25
26use zeph_common::SessionId;
27use zeph_config::memory::FiveSignalConfig;
28use zeph_db::DbPool;
29
30use crate::embedding_store::EmbeddingStore;
31use crate::five_signal::{
32 access_frequency::AccessFrequencyCache, causal_distance::CausalDistanceComputer,
33 metrics::FiveSignalMetrics, novelty::NoveltyComputer, weights::FiveSignalWeights,
34};
35
36/// Runtime state for the five-signal retrieval subsystem.
37///
38/// Created once at bootstrap when `five_signal.enabled = true` and attached to
39/// [`crate::semantic::SemanticMemory`] via an `Option<Arc<FiveSignalRuntime>>`.
40/// `None` when disabled — guarantees zero overhead per NFR-005.
41pub struct FiveSignalRuntime {
42 /// Normalized signal weights (computed once at startup).
43 pub weights: FiveSignalWeights,
44 /// Access frequency aggregator.
45 pub access_cache: AccessFrequencyCache,
46 /// Causal distance computer (contains BFS cache per goal entity).
47 pub causal_computer: tokio::sync::Mutex<CausalDistanceComputer>,
48 /// Novelty computer (pure arithmetic, no I/O).
49 pub novelty_computer: NoveltyComputer,
50 /// Prometheus-compatible counters.
51 pub metrics: Arc<FiveSignalMetrics>,
52 /// `SQLite` pool (shared with the rest of `SemanticMemory`).
53 pub pool: DbPool,
54 /// Qdrant store (optional; used by the consolidation daemon).
55 pub qdrant: Option<Arc<EmbeddingStore>>,
56 /// Unix timestamp of session start, used by `NoveltyComputer`.
57 pub session_start: i64,
58 /// Session identifier used to scope `fact_access_log` inserts and queries.
59 ///
60 /// Set at bootstrap from a per-process UUID so access counts are isolated
61 /// per session and do not bleed across process restarts.
62 pub session_id: SessionId,
63 /// Config snapshot (used by the consolidation daemon).
64 pub config: FiveSignalConfig,
65}
66
67impl FiveSignalRuntime {
68 /// Create a new runtime from config, pool, and optional graph + Qdrant stores.
69 ///
70 /// Normalizes signal weights (logging `WARN` if they do not sum to `1.0`).
71 /// Logs a `WARN` if `consolidation_daemon.top_k_per_run < batch_size` (MINOR-03).
72 ///
73 /// # Examples
74 ///
75 /// ```no_run
76 /// use std::sync::Arc;
77 /// use zeph_config::memory::FiveSignalConfig;
78 /// use zeph_memory::five_signal::FiveSignalRuntime;
79 ///
80 /// # async fn example(pool: zeph_db::DbPool, graph: Arc<zeph_memory::graph::GraphStore>) {
81 /// let cfg = FiveSignalConfig::default();
82 /// let session_start = std::time::SystemTime::now()
83 /// .duration_since(std::time::UNIX_EPOCH)
84 /// .map_or(0, |d| d.as_secs() as i64);
85 /// let session_id = uuid::Uuid::new_v4().to_string();
86 /// let runtime = FiveSignalRuntime::new(cfg, pool, graph, None, session_start, session_id);
87 /// # }
88 /// ```
89 #[must_use]
90 pub fn new(
91 config: FiveSignalConfig,
92 pool: DbPool,
93 graph_store: Arc<crate::graph::GraphStore>,
94 qdrant: Option<Arc<EmbeddingStore>>,
95 session_start: i64,
96 session_id: impl Into<SessionId>,
97 ) -> Self {
98 let weights = FiveSignalWeights::normalized(&config);
99
100 // MINOR-03: enforce top_k_per_run >= batch_size at startup.
101 let daemon = &config.consolidation_daemon;
102 if daemon.enabled && daemon.top_k_per_run < daemon.batch_size {
103 tracing::warn!(
104 top_k_per_run = daemon.top_k_per_run,
105 batch_size = daemon.batch_size,
106 "five_signal: top_k_per_run < batch_size; daemon will only process top_k_per_run facts"
107 );
108 }
109
110 Self {
111 weights,
112 access_cache: AccessFrequencyCache::new(pool.clone()),
113 causal_computer: tokio::sync::Mutex::new(CausalDistanceComputer::new(
114 graph_store,
115 config.causal_bfs_max_depth,
116 config.neutral_causal_distance,
117 )),
118 novelty_computer: NoveltyComputer::new(session_start, config.novelty_decay_rate),
119 metrics: Arc::new(FiveSignalMetrics::default()),
120 pool,
121 qdrant,
122 session_start,
123 session_id: session_id.into(),
124 config,
125 }
126 }
127}