zeph_memory/semantic/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4mod algorithms;
5mod corrections;
6mod cross_session;
7mod graph;
8pub(crate) mod importance;
9mod recall;
10mod summarization;
11
12#[cfg(test)]
13mod tests;
14
15use std::sync::Arc;
16use std::sync::atomic::AtomicU64;
17
18use zeph_llm::any::AnyProvider;
19
20use crate::admission::AdmissionControl;
21use crate::embedding_store::EmbeddingStore;
22use crate::error::MemoryError;
23use crate::store::SqliteStore;
24use crate::token_counter::TokenCounter;
25
26pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
27pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
28pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
29
30pub use algorithms::{apply_mmr, apply_temporal_decay};
31pub use cross_session::SessionSummaryResult;
32pub use graph::{
33    ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
34    PostExtractValidator, extract_and_store, link_memory_notes,
35};
36pub use recall::RecalledMessage;
37pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
38
39pub struct SemanticMemory {
40    pub(crate) sqlite: SqliteStore,
41    pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
42    pub(crate) provider: AnyProvider,
43    pub(crate) embedding_model: String,
44    pub(crate) vector_weight: f64,
45    pub(crate) keyword_weight: f64,
46    pub(crate) temporal_decay_enabled: bool,
47    pub(crate) temporal_decay_half_life_days: u32,
48    pub(crate) mmr_enabled: bool,
49    pub(crate) mmr_lambda: f32,
50    pub(crate) importance_enabled: bool,
51    pub(crate) importance_weight: f64,
52    /// Multiplicative score boost for semantic-tier messages in recall ranking.
53    /// Default: `1.3`. Disabled when set to `1.0`.
54    pub(crate) tier_boost_semantic: f64,
55    pub token_counter: Arc<TokenCounter>,
56    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
57    pub(crate) community_detection_failures: Arc<AtomicU64>,
58    pub(crate) graph_extraction_count: Arc<AtomicU64>,
59    pub(crate) graph_extraction_failures: Arc<AtomicU64>,
60    /// A-MAC admission control gate. When `Some`, each `remember()` call is evaluated.
61    pub(crate) admission_control: Option<Arc<AdmissionControl>>,
62}
63
64impl SemanticMemory {
65    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
66    ///
67    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
68    ///
69    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
70    /// a single gRPC channel across all subsystems.
71    ///
72    /// # Errors
73    ///
74    /// Returns an error if `SQLite` cannot be initialized.
75    pub async fn new(
76        sqlite_path: &str,
77        qdrant_url: &str,
78        provider: AnyProvider,
79        embedding_model: &str,
80    ) -> Result<Self, MemoryError> {
81        Self::with_weights(sqlite_path, qdrant_url, provider, embedding_model, 0.7, 0.3).await
82    }
83
84    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
85    ///
86    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
87    /// a single gRPC channel across all subsystems.
88    ///
89    /// # Errors
90    ///
91    /// Returns an error if `SQLite` cannot be initialized.
92    pub async fn with_weights(
93        sqlite_path: &str,
94        qdrant_url: &str,
95        provider: AnyProvider,
96        embedding_model: &str,
97        vector_weight: f64,
98        keyword_weight: f64,
99    ) -> Result<Self, MemoryError> {
100        Self::with_weights_and_pool_size(
101            sqlite_path,
102            qdrant_url,
103            provider,
104            embedding_model,
105            vector_weight,
106            keyword_weight,
107            5,
108        )
109        .await
110    }
111
112    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
113    ///
114    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
115    /// a single gRPC channel across all subsystems.
116    ///
117    /// # Errors
118    ///
119    /// Returns an error if `SQLite` cannot be initialized.
120    pub async fn with_weights_and_pool_size(
121        sqlite_path: &str,
122        qdrant_url: &str,
123        provider: AnyProvider,
124        embedding_model: &str,
125        vector_weight: f64,
126        keyword_weight: f64,
127        pool_size: u32,
128    ) -> Result<Self, MemoryError> {
129        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
130        let pool = sqlite.pool().clone();
131
132        let qdrant = match EmbeddingStore::new(qdrant_url, pool) {
133            Ok(store) => Some(Arc::new(store)),
134            Err(e) => {
135                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
136                None
137            }
138        };
139
140        Ok(Self {
141            sqlite,
142            qdrant,
143            provider,
144            embedding_model: embedding_model.into(),
145            vector_weight,
146            keyword_weight,
147            temporal_decay_enabled: false,
148            temporal_decay_half_life_days: 30,
149            mmr_enabled: false,
150            mmr_lambda: 0.7,
151            importance_enabled: false,
152            importance_weight: 0.15,
153            tier_boost_semantic: 1.3,
154            token_counter: Arc::new(TokenCounter::new()),
155            graph_store: None,
156            community_detection_failures: Arc::new(AtomicU64::new(0)),
157            graph_extraction_count: Arc::new(AtomicU64::new(0)),
158            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
159            admission_control: None,
160        })
161    }
162
163    /// Create a `SemanticMemory` from a pre-built `QdrantOps` instance.
164    ///
165    /// Use this at bootstrap to share one `QdrantOps` (and thus one gRPC channel)
166    /// across all subsystems. The `ops` is consumed and wrapped inside `EmbeddingStore`.
167    ///
168    /// # Errors
169    ///
170    /// Returns an error if `SQLite` cannot be initialized.
171    pub async fn with_qdrant_ops(
172        sqlite_path: &str,
173        ops: crate::QdrantOps,
174        provider: AnyProvider,
175        embedding_model: &str,
176        vector_weight: f64,
177        keyword_weight: f64,
178        pool_size: u32,
179    ) -> Result<Self, MemoryError> {
180        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
181        let pool = sqlite.pool().clone();
182        let store = EmbeddingStore::with_store(Box::new(ops), pool);
183
184        Ok(Self {
185            sqlite,
186            qdrant: Some(Arc::new(store)),
187            provider,
188            embedding_model: embedding_model.into(),
189            vector_weight,
190            keyword_weight,
191            temporal_decay_enabled: false,
192            temporal_decay_half_life_days: 30,
193            mmr_enabled: false,
194            mmr_lambda: 0.7,
195            importance_enabled: false,
196            importance_weight: 0.15,
197            tier_boost_semantic: 1.3,
198            token_counter: Arc::new(TokenCounter::new()),
199            graph_store: None,
200            community_detection_failures: Arc::new(AtomicU64::new(0)),
201            graph_extraction_count: Arc::new(AtomicU64::new(0)),
202            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
203            admission_control: None,
204        })
205    }
206
207    /// Attach a `GraphStore` for graph-aware retrieval.
208    ///
209    /// When set, `recall_graph` traverses the graph starting from entities
210    /// matched by the query.
211    #[must_use]
212    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
213        self.graph_store = Some(store);
214        self
215    }
216
217    /// Returns the cumulative count of community detection failures since startup.
218    #[must_use]
219    pub fn community_detection_failures(&self) -> u64 {
220        use std::sync::atomic::Ordering;
221        self.community_detection_failures.load(Ordering::Relaxed)
222    }
223
224    /// Returns the cumulative count of successful graph extractions since startup.
225    #[must_use]
226    pub fn graph_extraction_count(&self) -> u64 {
227        use std::sync::atomic::Ordering;
228        self.graph_extraction_count.load(Ordering::Relaxed)
229    }
230
231    /// Returns the cumulative count of failed graph extractions since startup.
232    #[must_use]
233    pub fn graph_extraction_failures(&self) -> u64 {
234        use std::sync::atomic::Ordering;
235        self.graph_extraction_failures.load(Ordering::Relaxed)
236    }
237
238    /// Configure temporal decay and MMR re-ranking options.
239    #[must_use]
240    pub fn with_ranking_options(
241        mut self,
242        temporal_decay_enabled: bool,
243        temporal_decay_half_life_days: u32,
244        mmr_enabled: bool,
245        mmr_lambda: f32,
246    ) -> Self {
247        self.temporal_decay_enabled = temporal_decay_enabled;
248        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
249        self.mmr_enabled = mmr_enabled;
250        self.mmr_lambda = mmr_lambda;
251        self
252    }
253
254    /// Configure write-time importance scoring for memory retrieval.
255    #[must_use]
256    pub fn with_importance_options(mut self, enabled: bool, weight: f64) -> Self {
257        self.importance_enabled = enabled;
258        self.importance_weight = weight;
259        self
260    }
261
262    /// Configure the multiplicative score boost applied to semantic-tier messages during recall.
263    ///
264    /// Set to `1.0` to disable the boost. Default: `1.3`.
265    #[must_use]
266    pub fn with_tier_boost(mut self, boost: f64) -> Self {
267        self.tier_boost_semantic = boost;
268        self
269    }
270
271    /// Attach an A-MAC admission controller.
272    ///
273    /// When set, `remember()` and `remember_with_parts()` evaluate each message before persisting.
274    /// Messages below the admission threshold return `Ok(None)` without incrementing counts.
275    #[must_use]
276    pub fn with_admission_control(mut self, control: AdmissionControl) -> Self {
277        self.admission_control = Some(Arc::new(control));
278        self
279    }
280
281    /// Construct a `SemanticMemory` from pre-built parts.
282    ///
283    /// Intended for tests that need full control over the backing stores.
284    #[must_use]
285    pub fn from_parts(
286        sqlite: SqliteStore,
287        qdrant: Option<Arc<EmbeddingStore>>,
288        provider: AnyProvider,
289        embedding_model: impl Into<String>,
290        vector_weight: f64,
291        keyword_weight: f64,
292        token_counter: Arc<TokenCounter>,
293    ) -> Self {
294        Self {
295            sqlite,
296            qdrant,
297            provider,
298            embedding_model: embedding_model.into(),
299            vector_weight,
300            keyword_weight,
301            temporal_decay_enabled: false,
302            temporal_decay_half_life_days: 30,
303            mmr_enabled: false,
304            mmr_lambda: 0.7,
305            importance_enabled: false,
306            importance_weight: 0.15,
307            tier_boost_semantic: 1.3,
308            token_counter,
309            graph_store: None,
310            community_detection_failures: Arc::new(AtomicU64::new(0)),
311            graph_extraction_count: Arc::new(AtomicU64::new(0)),
312            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
313            admission_control: None,
314        }
315    }
316
317    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
318    ///
319    /// # Errors
320    ///
321    /// Returns an error if `SQLite` cannot be initialized.
322    pub async fn with_sqlite_backend(
323        sqlite_path: &str,
324        provider: AnyProvider,
325        embedding_model: &str,
326        vector_weight: f64,
327        keyword_weight: f64,
328    ) -> Result<Self, MemoryError> {
329        Self::with_sqlite_backend_and_pool_size(
330            sqlite_path,
331            provider,
332            embedding_model,
333            vector_weight,
334            keyword_weight,
335            5,
336        )
337        .await
338    }
339
340    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
341    ///
342    /// # Errors
343    ///
344    /// Returns an error if `SQLite` cannot be initialized.
345    pub async fn with_sqlite_backend_and_pool_size(
346        sqlite_path: &str,
347        provider: AnyProvider,
348        embedding_model: &str,
349        vector_weight: f64,
350        keyword_weight: f64,
351        pool_size: u32,
352    ) -> Result<Self, MemoryError> {
353        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
354        let pool = sqlite.pool().clone();
355        let store = EmbeddingStore::new_sqlite(pool);
356
357        Ok(Self {
358            sqlite,
359            qdrant: Some(Arc::new(store)),
360            provider,
361            embedding_model: embedding_model.into(),
362            vector_weight,
363            keyword_weight,
364            temporal_decay_enabled: false,
365            temporal_decay_half_life_days: 30,
366            mmr_enabled: false,
367            mmr_lambda: 0.7,
368            importance_enabled: false,
369            importance_weight: 0.15,
370            tier_boost_semantic: 1.3,
371            token_counter: Arc::new(TokenCounter::new()),
372            graph_store: None,
373            community_detection_failures: Arc::new(AtomicU64::new(0)),
374            graph_extraction_count: Arc::new(AtomicU64::new(0)),
375            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
376            admission_control: None,
377        })
378    }
379
380    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
381    #[must_use]
382    pub fn sqlite(&self) -> &SqliteStore {
383        &self.sqlite
384    }
385
386    /// Check if the vector store backend is reachable.
387    ///
388    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
389    /// instead of just checking whether the client was created.
390    pub async fn is_vector_store_connected(&self) -> bool {
391        match self.qdrant.as_ref() {
392            Some(store) => store.health_check().await,
393            None => false,
394        }
395    }
396
397    /// Check if a vector store client is configured (may not be connected).
398    #[must_use]
399    pub fn has_vector_store(&self) -> bool {
400        self.qdrant.is_some()
401    }
402
403    /// Return a reference to the embedding store, if configured.
404    #[must_use]
405    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
406        self.qdrant.as_ref()
407    }
408
409    /// Count messages in a conversation.
410    ///
411    /// # Errors
412    ///
413    /// Returns an error if the query fails.
414    pub async fn message_count(
415        &self,
416        conversation_id: crate::types::ConversationId,
417    ) -> Result<i64, MemoryError> {
418        self.sqlite.count_messages(conversation_id).await
419    }
420
421    /// Count messages not yet covered by any summary.
422    ///
423    /// # Errors
424    ///
425    /// Returns an error if the query fails.
426    pub async fn unsummarized_message_count(
427        &self,
428        conversation_id: crate::types::ConversationId,
429    ) -> Result<i64, MemoryError> {
430        let after_id = self
431            .sqlite
432            .latest_summary_last_message_id(conversation_id)
433            .await?
434            .unwrap_or(crate::types::MessageId(0));
435        self.sqlite
436            .count_messages_after(conversation_id, after_id)
437            .await
438    }
439}
zeph_memory/semantic/mod.rs

zeph_memory/semantic/
mod.rs