Skip to main content

zeph_memory/semantic/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4mod algorithms;
5mod corrections;
6mod cross_session;
7mod graph;
8pub(crate) mod importance;
9mod recall;
10mod summarization;
11
12#[cfg(test)]
13mod tests;
14
15use std::sync::Arc;
16use std::sync::atomic::AtomicU64;
17
18use zeph_llm::any::AnyProvider;
19
20use crate::embedding_store::EmbeddingStore;
21use crate::error::MemoryError;
22use crate::sqlite::SqliteStore;
23use crate::token_counter::TokenCounter;
24
25pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
26pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
27pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
28
29pub use algorithms::{apply_mmr, apply_temporal_decay};
30pub use cross_session::SessionSummaryResult;
31pub use graph::{
32    ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
33    PostExtractValidator, extract_and_store, link_memory_notes,
34};
35pub use recall::RecalledMessage;
36pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
37
38pub struct SemanticMemory {
39    pub(crate) sqlite: SqliteStore,
40    pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
41    pub(crate) provider: AnyProvider,
42    pub(crate) embedding_model: String,
43    pub(crate) vector_weight: f64,
44    pub(crate) keyword_weight: f64,
45    pub(crate) temporal_decay_enabled: bool,
46    pub(crate) temporal_decay_half_life_days: u32,
47    pub(crate) mmr_enabled: bool,
48    pub(crate) mmr_lambda: f32,
49    pub(crate) importance_enabled: bool,
50    pub(crate) importance_weight: f64,
51    pub token_counter: Arc<TokenCounter>,
52    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
53    pub(crate) community_detection_failures: Arc<AtomicU64>,
54    pub(crate) graph_extraction_count: Arc<AtomicU64>,
55    pub(crate) graph_extraction_failures: Arc<AtomicU64>,
56}
57
58impl SemanticMemory {
59    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
60    ///
61    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
62    ///
63    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
64    /// a single gRPC channel across all subsystems.
65    ///
66    /// # Errors
67    ///
68    /// Returns an error if `SQLite` cannot be initialized.
69    pub async fn new(
70        sqlite_path: &str,
71        qdrant_url: &str,
72        provider: AnyProvider,
73        embedding_model: &str,
74    ) -> Result<Self, MemoryError> {
75        Self::with_weights(sqlite_path, qdrant_url, provider, embedding_model, 0.7, 0.3).await
76    }
77
78    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
79    ///
80    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
81    /// a single gRPC channel across all subsystems.
82    ///
83    /// # Errors
84    ///
85    /// Returns an error if `SQLite` cannot be initialized.
86    pub async fn with_weights(
87        sqlite_path: &str,
88        qdrant_url: &str,
89        provider: AnyProvider,
90        embedding_model: &str,
91        vector_weight: f64,
92        keyword_weight: f64,
93    ) -> Result<Self, MemoryError> {
94        Self::with_weights_and_pool_size(
95            sqlite_path,
96            qdrant_url,
97            provider,
98            embedding_model,
99            vector_weight,
100            keyword_weight,
101            5,
102        )
103        .await
104    }
105
106    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
107    ///
108    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
109    /// a single gRPC channel across all subsystems.
110    ///
111    /// # Errors
112    ///
113    /// Returns an error if `SQLite` cannot be initialized.
114    pub async fn with_weights_and_pool_size(
115        sqlite_path: &str,
116        qdrant_url: &str,
117        provider: AnyProvider,
118        embedding_model: &str,
119        vector_weight: f64,
120        keyword_weight: f64,
121        pool_size: u32,
122    ) -> Result<Self, MemoryError> {
123        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
124        let pool = sqlite.pool().clone();
125
126        let qdrant = match EmbeddingStore::new(qdrant_url, pool) {
127            Ok(store) => Some(Arc::new(store)),
128            Err(e) => {
129                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
130                None
131            }
132        };
133
134        Ok(Self {
135            sqlite,
136            qdrant,
137            provider,
138            embedding_model: embedding_model.into(),
139            vector_weight,
140            keyword_weight,
141            temporal_decay_enabled: false,
142            temporal_decay_half_life_days: 30,
143            mmr_enabled: false,
144            mmr_lambda: 0.7,
145            importance_enabled: false,
146            importance_weight: 0.15,
147            token_counter: Arc::new(TokenCounter::new()),
148            graph_store: None,
149            community_detection_failures: Arc::new(AtomicU64::new(0)),
150            graph_extraction_count: Arc::new(AtomicU64::new(0)),
151            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
152        })
153    }
154
155    /// Create a `SemanticMemory` from a pre-built `QdrantOps` instance.
156    ///
157    /// Use this at bootstrap to share one `QdrantOps` (and thus one gRPC channel)
158    /// across all subsystems. The `ops` is consumed and wrapped inside `EmbeddingStore`.
159    ///
160    /// # Errors
161    ///
162    /// Returns an error if `SQLite` cannot be initialized.
163    pub async fn with_qdrant_ops(
164        sqlite_path: &str,
165        ops: crate::QdrantOps,
166        provider: AnyProvider,
167        embedding_model: &str,
168        vector_weight: f64,
169        keyword_weight: f64,
170        pool_size: u32,
171    ) -> Result<Self, MemoryError> {
172        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
173        let pool = sqlite.pool().clone();
174        let store = EmbeddingStore::with_store(Box::new(ops), pool);
175
176        Ok(Self {
177            sqlite,
178            qdrant: Some(Arc::new(store)),
179            provider,
180            embedding_model: embedding_model.into(),
181            vector_weight,
182            keyword_weight,
183            temporal_decay_enabled: false,
184            temporal_decay_half_life_days: 30,
185            mmr_enabled: false,
186            mmr_lambda: 0.7,
187            importance_enabled: false,
188            importance_weight: 0.15,
189            token_counter: Arc::new(TokenCounter::new()),
190            graph_store: None,
191            community_detection_failures: Arc::new(AtomicU64::new(0)),
192            graph_extraction_count: Arc::new(AtomicU64::new(0)),
193            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
194        })
195    }
196
197    /// Attach a `GraphStore` for graph-aware retrieval.
198    ///
199    /// When set, `recall_graph` traverses the graph starting from entities
200    /// matched by the query.
201    #[must_use]
202    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
203        self.graph_store = Some(store);
204        self
205    }
206
207    /// Returns the cumulative count of community detection failures since startup.
208    #[must_use]
209    pub fn community_detection_failures(&self) -> u64 {
210        use std::sync::atomic::Ordering;
211        self.community_detection_failures.load(Ordering::Relaxed)
212    }
213
214    /// Returns the cumulative count of successful graph extractions since startup.
215    #[must_use]
216    pub fn graph_extraction_count(&self) -> u64 {
217        use std::sync::atomic::Ordering;
218        self.graph_extraction_count.load(Ordering::Relaxed)
219    }
220
221    /// Returns the cumulative count of failed graph extractions since startup.
222    #[must_use]
223    pub fn graph_extraction_failures(&self) -> u64 {
224        use std::sync::atomic::Ordering;
225        self.graph_extraction_failures.load(Ordering::Relaxed)
226    }
227
228    /// Configure temporal decay and MMR re-ranking options.
229    #[must_use]
230    pub fn with_ranking_options(
231        mut self,
232        temporal_decay_enabled: bool,
233        temporal_decay_half_life_days: u32,
234        mmr_enabled: bool,
235        mmr_lambda: f32,
236    ) -> Self {
237        self.temporal_decay_enabled = temporal_decay_enabled;
238        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
239        self.mmr_enabled = mmr_enabled;
240        self.mmr_lambda = mmr_lambda;
241        self
242    }
243
244    /// Configure write-time importance scoring for memory retrieval.
245    #[must_use]
246    pub fn with_importance_options(mut self, enabled: bool, weight: f64) -> Self {
247        self.importance_enabled = enabled;
248        self.importance_weight = weight;
249        self
250    }
251
252    /// Construct a `SemanticMemory` from pre-built parts.
253    ///
254    /// Intended for tests that need full control over the backing stores.
255    #[must_use]
256    pub fn from_parts(
257        sqlite: SqliteStore,
258        qdrant: Option<Arc<EmbeddingStore>>,
259        provider: AnyProvider,
260        embedding_model: impl Into<String>,
261        vector_weight: f64,
262        keyword_weight: f64,
263        token_counter: Arc<TokenCounter>,
264    ) -> Self {
265        Self {
266            sqlite,
267            qdrant,
268            provider,
269            embedding_model: embedding_model.into(),
270            vector_weight,
271            keyword_weight,
272            temporal_decay_enabled: false,
273            temporal_decay_half_life_days: 30,
274            mmr_enabled: false,
275            mmr_lambda: 0.7,
276            importance_enabled: false,
277            importance_weight: 0.15,
278            token_counter,
279            graph_store: None,
280            community_detection_failures: Arc::new(AtomicU64::new(0)),
281            graph_extraction_count: Arc::new(AtomicU64::new(0)),
282            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
283        }
284    }
285
286    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
287    ///
288    /// # Errors
289    ///
290    /// Returns an error if `SQLite` cannot be initialized.
291    pub async fn with_sqlite_backend(
292        sqlite_path: &str,
293        provider: AnyProvider,
294        embedding_model: &str,
295        vector_weight: f64,
296        keyword_weight: f64,
297    ) -> Result<Self, MemoryError> {
298        Self::with_sqlite_backend_and_pool_size(
299            sqlite_path,
300            provider,
301            embedding_model,
302            vector_weight,
303            keyword_weight,
304            5,
305        )
306        .await
307    }
308
309    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
310    ///
311    /// # Errors
312    ///
313    /// Returns an error if `SQLite` cannot be initialized.
314    pub async fn with_sqlite_backend_and_pool_size(
315        sqlite_path: &str,
316        provider: AnyProvider,
317        embedding_model: &str,
318        vector_weight: f64,
319        keyword_weight: f64,
320        pool_size: u32,
321    ) -> Result<Self, MemoryError> {
322        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
323        let pool = sqlite.pool().clone();
324        let store = EmbeddingStore::new_sqlite(pool);
325
326        Ok(Self {
327            sqlite,
328            qdrant: Some(Arc::new(store)),
329            provider,
330            embedding_model: embedding_model.into(),
331            vector_weight,
332            keyword_weight,
333            temporal_decay_enabled: false,
334            temporal_decay_half_life_days: 30,
335            mmr_enabled: false,
336            mmr_lambda: 0.7,
337            importance_enabled: false,
338            importance_weight: 0.15,
339            token_counter: Arc::new(TokenCounter::new()),
340            graph_store: None,
341            community_detection_failures: Arc::new(AtomicU64::new(0)),
342            graph_extraction_count: Arc::new(AtomicU64::new(0)),
343            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
344        })
345    }
346
347    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
348    #[must_use]
349    pub fn sqlite(&self) -> &SqliteStore {
350        &self.sqlite
351    }
352
353    /// Check if the vector store backend is reachable.
354    ///
355    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
356    /// instead of just checking whether the client was created.
357    pub async fn is_vector_store_connected(&self) -> bool {
358        match self.qdrant.as_ref() {
359            Some(store) => store.health_check().await,
360            None => false,
361        }
362    }
363
364    /// Check if a vector store client is configured (may not be connected).
365    #[must_use]
366    pub fn has_vector_store(&self) -> bool {
367        self.qdrant.is_some()
368    }
369
370    /// Return a reference to the embedding store, if configured.
371    #[must_use]
372    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
373        self.qdrant.as_ref()
374    }
375
376    /// Count messages in a conversation.
377    ///
378    /// # Errors
379    ///
380    /// Returns an error if the query fails.
381    pub async fn message_count(
382        &self,
383        conversation_id: crate::types::ConversationId,
384    ) -> Result<i64, MemoryError> {
385        self.sqlite.count_messages(conversation_id).await
386    }
387
388    /// Count messages not yet covered by any summary.
389    ///
390    /// # Errors
391    ///
392    /// Returns an error if the query fails.
393    pub async fn unsummarized_message_count(
394        &self,
395        conversation_id: crate::types::ConversationId,
396    ) -> Result<i64, MemoryError> {
397        let after_id = self
398            .sqlite
399            .latest_summary_last_message_id(conversation_id)
400            .await?
401            .unwrap_or(crate::types::MessageId(0));
402        self.sqlite
403            .count_messages_after(conversation_id, after_id)
404            .await
405    }
406}