Skip to main content

zeph_memory/semantic/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4mod algorithms;
5mod corrections;
6mod cross_session;
7mod graph;
8pub(crate) mod importance;
9mod recall;
10mod summarization;
11
12#[cfg(test)]
13mod tests;
14
15use std::sync::Arc;
16use std::sync::atomic::AtomicU64;
17
18use zeph_llm::any::AnyProvider;
19
20use crate::embedding_store::EmbeddingStore;
21use crate::error::MemoryError;
22use crate::sqlite::SqliteStore;
23use crate::token_counter::TokenCounter;
24
25pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
26pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
27pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
28
29pub use algorithms::{apply_mmr, apply_temporal_decay};
30pub use cross_session::SessionSummaryResult;
31pub use graph::{
32    ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
33    PostExtractValidator, extract_and_store, link_memory_notes,
34};
35pub use recall::RecalledMessage;
36pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
37
38pub struct SemanticMemory {
39    pub(crate) sqlite: SqliteStore,
40    pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
41    pub(crate) provider: AnyProvider,
42    pub(crate) embedding_model: String,
43    pub(crate) vector_weight: f64,
44    pub(crate) keyword_weight: f64,
45    pub(crate) temporal_decay_enabled: bool,
46    pub(crate) temporal_decay_half_life_days: u32,
47    pub(crate) mmr_enabled: bool,
48    pub(crate) mmr_lambda: f32,
49    pub(crate) importance_enabled: bool,
50    pub(crate) importance_weight: f64,
51    /// Multiplicative score boost for semantic-tier messages in recall ranking.
52    /// Default: `1.3`. Disabled when set to `1.0`.
53    pub(crate) tier_boost_semantic: f64,
54    pub token_counter: Arc<TokenCounter>,
55    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
56    pub(crate) community_detection_failures: Arc<AtomicU64>,
57    pub(crate) graph_extraction_count: Arc<AtomicU64>,
58    pub(crate) graph_extraction_failures: Arc<AtomicU64>,
59}
60
61impl SemanticMemory {
62    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
63    ///
64    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
65    ///
66    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
67    /// a single gRPC channel across all subsystems.
68    ///
69    /// # Errors
70    ///
71    /// Returns an error if `SQLite` cannot be initialized.
72    pub async fn new(
73        sqlite_path: &str,
74        qdrant_url: &str,
75        provider: AnyProvider,
76        embedding_model: &str,
77    ) -> Result<Self, MemoryError> {
78        Self::with_weights(sqlite_path, qdrant_url, provider, embedding_model, 0.7, 0.3).await
79    }
80
81    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
82    ///
83    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
84    /// a single gRPC channel across all subsystems.
85    ///
86    /// # Errors
87    ///
88    /// Returns an error if `SQLite` cannot be initialized.
89    pub async fn with_weights(
90        sqlite_path: &str,
91        qdrant_url: &str,
92        provider: AnyProvider,
93        embedding_model: &str,
94        vector_weight: f64,
95        keyword_weight: f64,
96    ) -> Result<Self, MemoryError> {
97        Self::with_weights_and_pool_size(
98            sqlite_path,
99            qdrant_url,
100            provider,
101            embedding_model,
102            vector_weight,
103            keyword_weight,
104            5,
105        )
106        .await
107    }
108
109    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
110    ///
111    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
112    /// a single gRPC channel across all subsystems.
113    ///
114    /// # Errors
115    ///
116    /// Returns an error if `SQLite` cannot be initialized.
117    pub async fn with_weights_and_pool_size(
118        sqlite_path: &str,
119        qdrant_url: &str,
120        provider: AnyProvider,
121        embedding_model: &str,
122        vector_weight: f64,
123        keyword_weight: f64,
124        pool_size: u32,
125    ) -> Result<Self, MemoryError> {
126        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
127        let pool = sqlite.pool().clone();
128
129        let qdrant = match EmbeddingStore::new(qdrant_url, pool) {
130            Ok(store) => Some(Arc::new(store)),
131            Err(e) => {
132                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
133                None
134            }
135        };
136
137        Ok(Self {
138            sqlite,
139            qdrant,
140            provider,
141            embedding_model: embedding_model.into(),
142            vector_weight,
143            keyword_weight,
144            temporal_decay_enabled: false,
145            temporal_decay_half_life_days: 30,
146            mmr_enabled: false,
147            mmr_lambda: 0.7,
148            importance_enabled: false,
149            importance_weight: 0.15,
150            tier_boost_semantic: 1.3,
151            token_counter: Arc::new(TokenCounter::new()),
152            graph_store: None,
153            community_detection_failures: Arc::new(AtomicU64::new(0)),
154            graph_extraction_count: Arc::new(AtomicU64::new(0)),
155            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
156        })
157    }
158
159    /// Create a `SemanticMemory` from a pre-built `QdrantOps` instance.
160    ///
161    /// Use this at bootstrap to share one `QdrantOps` (and thus one gRPC channel)
162    /// across all subsystems. The `ops` is consumed and wrapped inside `EmbeddingStore`.
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if `SQLite` cannot be initialized.
167    pub async fn with_qdrant_ops(
168        sqlite_path: &str,
169        ops: crate::QdrantOps,
170        provider: AnyProvider,
171        embedding_model: &str,
172        vector_weight: f64,
173        keyword_weight: f64,
174        pool_size: u32,
175    ) -> Result<Self, MemoryError> {
176        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
177        let pool = sqlite.pool().clone();
178        let store = EmbeddingStore::with_store(Box::new(ops), pool);
179
180        Ok(Self {
181            sqlite,
182            qdrant: Some(Arc::new(store)),
183            provider,
184            embedding_model: embedding_model.into(),
185            vector_weight,
186            keyword_weight,
187            temporal_decay_enabled: false,
188            temporal_decay_half_life_days: 30,
189            mmr_enabled: false,
190            mmr_lambda: 0.7,
191            importance_enabled: false,
192            importance_weight: 0.15,
193            tier_boost_semantic: 1.3,
194            token_counter: Arc::new(TokenCounter::new()),
195            graph_store: None,
196            community_detection_failures: Arc::new(AtomicU64::new(0)),
197            graph_extraction_count: Arc::new(AtomicU64::new(0)),
198            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
199        })
200    }
201
202    /// Attach a `GraphStore` for graph-aware retrieval.
203    ///
204    /// When set, `recall_graph` traverses the graph starting from entities
205    /// matched by the query.
206    #[must_use]
207    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
208        self.graph_store = Some(store);
209        self
210    }
211
212    /// Returns the cumulative count of community detection failures since startup.
213    #[must_use]
214    pub fn community_detection_failures(&self) -> u64 {
215        use std::sync::atomic::Ordering;
216        self.community_detection_failures.load(Ordering::Relaxed)
217    }
218
219    /// Returns the cumulative count of successful graph extractions since startup.
220    #[must_use]
221    pub fn graph_extraction_count(&self) -> u64 {
222        use std::sync::atomic::Ordering;
223        self.graph_extraction_count.load(Ordering::Relaxed)
224    }
225
226    /// Returns the cumulative count of failed graph extractions since startup.
227    #[must_use]
228    pub fn graph_extraction_failures(&self) -> u64 {
229        use std::sync::atomic::Ordering;
230        self.graph_extraction_failures.load(Ordering::Relaxed)
231    }
232
233    /// Configure temporal decay and MMR re-ranking options.
234    #[must_use]
235    pub fn with_ranking_options(
236        mut self,
237        temporal_decay_enabled: bool,
238        temporal_decay_half_life_days: u32,
239        mmr_enabled: bool,
240        mmr_lambda: f32,
241    ) -> Self {
242        self.temporal_decay_enabled = temporal_decay_enabled;
243        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
244        self.mmr_enabled = mmr_enabled;
245        self.mmr_lambda = mmr_lambda;
246        self
247    }
248
249    /// Configure write-time importance scoring for memory retrieval.
250    #[must_use]
251    pub fn with_importance_options(mut self, enabled: bool, weight: f64) -> Self {
252        self.importance_enabled = enabled;
253        self.importance_weight = weight;
254        self
255    }
256
257    /// Configure the multiplicative score boost applied to semantic-tier messages during recall.
258    ///
259    /// Set to `1.0` to disable the boost. Default: `1.3`.
260    #[must_use]
261    pub fn with_tier_boost(mut self, boost: f64) -> Self {
262        self.tier_boost_semantic = boost;
263        self
264    }
265
266    /// Construct a `SemanticMemory` from pre-built parts.
267    ///
268    /// Intended for tests that need full control over the backing stores.
269    #[must_use]
270    pub fn from_parts(
271        sqlite: SqliteStore,
272        qdrant: Option<Arc<EmbeddingStore>>,
273        provider: AnyProvider,
274        embedding_model: impl Into<String>,
275        vector_weight: f64,
276        keyword_weight: f64,
277        token_counter: Arc<TokenCounter>,
278    ) -> Self {
279        Self {
280            sqlite,
281            qdrant,
282            provider,
283            embedding_model: embedding_model.into(),
284            vector_weight,
285            keyword_weight,
286            temporal_decay_enabled: false,
287            temporal_decay_half_life_days: 30,
288            mmr_enabled: false,
289            mmr_lambda: 0.7,
290            importance_enabled: false,
291            importance_weight: 0.15,
292            tier_boost_semantic: 1.3,
293            token_counter,
294            graph_store: None,
295            community_detection_failures: Arc::new(AtomicU64::new(0)),
296            graph_extraction_count: Arc::new(AtomicU64::new(0)),
297            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
298        }
299    }
300
301    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
302    ///
303    /// # Errors
304    ///
305    /// Returns an error if `SQLite` cannot be initialized.
306    pub async fn with_sqlite_backend(
307        sqlite_path: &str,
308        provider: AnyProvider,
309        embedding_model: &str,
310        vector_weight: f64,
311        keyword_weight: f64,
312    ) -> Result<Self, MemoryError> {
313        Self::with_sqlite_backend_and_pool_size(
314            sqlite_path,
315            provider,
316            embedding_model,
317            vector_weight,
318            keyword_weight,
319            5,
320        )
321        .await
322    }
323
324    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
325    ///
326    /// # Errors
327    ///
328    /// Returns an error if `SQLite` cannot be initialized.
329    pub async fn with_sqlite_backend_and_pool_size(
330        sqlite_path: &str,
331        provider: AnyProvider,
332        embedding_model: &str,
333        vector_weight: f64,
334        keyword_weight: f64,
335        pool_size: u32,
336    ) -> Result<Self, MemoryError> {
337        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
338        let pool = sqlite.pool().clone();
339        let store = EmbeddingStore::new_sqlite(pool);
340
341        Ok(Self {
342            sqlite,
343            qdrant: Some(Arc::new(store)),
344            provider,
345            embedding_model: embedding_model.into(),
346            vector_weight,
347            keyword_weight,
348            temporal_decay_enabled: false,
349            temporal_decay_half_life_days: 30,
350            mmr_enabled: false,
351            mmr_lambda: 0.7,
352            importance_enabled: false,
353            importance_weight: 0.15,
354            tier_boost_semantic: 1.3,
355            token_counter: Arc::new(TokenCounter::new()),
356            graph_store: None,
357            community_detection_failures: Arc::new(AtomicU64::new(0)),
358            graph_extraction_count: Arc::new(AtomicU64::new(0)),
359            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
360        })
361    }
362
363    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
364    #[must_use]
365    pub fn sqlite(&self) -> &SqliteStore {
366        &self.sqlite
367    }
368
369    /// Check if the vector store backend is reachable.
370    ///
371    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
372    /// instead of just checking whether the client was created.
373    pub async fn is_vector_store_connected(&self) -> bool {
374        match self.qdrant.as_ref() {
375            Some(store) => store.health_check().await,
376            None => false,
377        }
378    }
379
380    /// Check if a vector store client is configured (may not be connected).
381    #[must_use]
382    pub fn has_vector_store(&self) -> bool {
383        self.qdrant.is_some()
384    }
385
386    /// Return a reference to the embedding store, if configured.
387    #[must_use]
388    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
389        self.qdrant.as_ref()
390    }
391
392    /// Count messages in a conversation.
393    ///
394    /// # Errors
395    ///
396    /// Returns an error if the query fails.
397    pub async fn message_count(
398        &self,
399        conversation_id: crate::types::ConversationId,
400    ) -> Result<i64, MemoryError> {
401        self.sqlite.count_messages(conversation_id).await
402    }
403
404    /// Count messages not yet covered by any summary.
405    ///
406    /// # Errors
407    ///
408    /// Returns an error if the query fails.
409    pub async fn unsummarized_message_count(
410        &self,
411        conversation_id: crate::types::ConversationId,
412    ) -> Result<i64, MemoryError> {
413        let after_id = self
414            .sqlite
415            .latest_summary_last_message_id(conversation_id)
416            .await?
417            .unwrap_or(crate::types::MessageId(0));
418        self.sqlite
419            .count_messages_after(conversation_id, after_id)
420            .await
421    }
422}