Skip to main content

zeph_memory/semantic/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4mod algorithms;
5mod corrections;
6mod cross_session;
7mod graph;
8pub(crate) mod importance;
9pub mod persona;
10mod recall;
11mod summarization;
12pub(crate) mod write_buffer;
13
14#[cfg(test)]
15mod tests;
16
17use std::sync::Arc;
18use std::sync::atomic::AtomicU64;
19
20use zeph_llm::any::AnyProvider;
21
22use crate::admission::AdmissionControl;
23use crate::embedding_store::EmbeddingStore;
24use crate::error::MemoryError;
25use crate::store::SqliteStore;
26use crate::token_counter::TokenCounter;
27
28pub(crate) const SESSION_SUMMARIES_COLLECTION: &str = "zeph_session_summaries";
29pub(crate) const KEY_FACTS_COLLECTION: &str = "zeph_key_facts";
30pub(crate) const CORRECTIONS_COLLECTION: &str = "zeph_corrections";
31
32pub use algorithms::{apply_mmr, apply_temporal_decay};
33pub use cross_session::SessionSummaryResult;
34pub use graph::{
35    ExtractionResult, ExtractionStats, GraphExtractionConfig, LinkingStats, NoteLinkingConfig,
36    PostExtractValidator, extract_and_store, link_memory_notes,
37};
38pub use persona::{
39    PersonaExtractionConfig, contains_self_referential_language, extract_persona_facts,
40};
41pub use recall::{EmbedContext, RecalledMessage};
42pub use summarization::{StructuredSummary, Summary, build_summarization_prompt};
43pub use write_buffer::{BufferedWrite, WriteBuffer};
44
45pub struct SemanticMemory {
46    pub(crate) sqlite: SqliteStore,
47    pub(crate) qdrant: Option<Arc<EmbeddingStore>>,
48    pub(crate) provider: AnyProvider,
49    /// Dedicated provider for batch embedding calls (backfill, write-path embedding).
50    ///
51    /// When `Some`, all embedding I/O is routed through this provider instead of `provider`.
52    /// This prevents `embed_backfill` from saturating the main provider and causing guardrail
53    /// timeouts. When `None`, falls back to `provider`.
54    pub(crate) embed_provider: Option<AnyProvider>,
55    pub(crate) embedding_model: String,
56    pub(crate) vector_weight: f64,
57    pub(crate) keyword_weight: f64,
58    pub(crate) temporal_decay_enabled: bool,
59    pub(crate) temporal_decay_half_life_days: u32,
60    pub(crate) mmr_enabled: bool,
61    pub(crate) mmr_lambda: f32,
62    pub(crate) importance_enabled: bool,
63    pub(crate) importance_weight: f64,
64    /// Multiplicative score boost for semantic-tier messages in recall ranking.
65    /// Default: `1.3`. Disabled when set to `1.0`.
66    pub(crate) tier_boost_semantic: f64,
67    pub token_counter: Arc<TokenCounter>,
68    pub graph_store: Option<Arc<crate::graph::GraphStore>>,
69    pub(crate) community_detection_failures: Arc<AtomicU64>,
70    pub(crate) graph_extraction_count: Arc<AtomicU64>,
71    pub(crate) graph_extraction_failures: Arc<AtomicU64>,
72    /// A-MAC admission control gate. When `Some`, each `remember()` call is evaluated.
73    pub(crate) admission_control: Option<Arc<AdmissionControl>>,
74}
75
76impl SemanticMemory {
77    /// Create a new `SemanticMemory` instance with default hybrid search weights (0.7/0.3).
78    ///
79    /// Qdrant connection is best-effort: if unavailable, semantic search is disabled.
80    ///
81    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
82    /// a single gRPC channel across all subsystems.
83    ///
84    /// # Errors
85    ///
86    /// Returns an error if `SQLite` cannot be initialized.
87    pub async fn new(
88        sqlite_path: &str,
89        qdrant_url: &str,
90        provider: AnyProvider,
91        embedding_model: &str,
92    ) -> Result<Self, MemoryError> {
93        Self::with_weights(sqlite_path, qdrant_url, provider, embedding_model, 0.7, 0.3).await
94    }
95
96    /// Create a new `SemanticMemory` with custom vector/keyword weights for hybrid search.
97    ///
98    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
99    /// a single gRPC channel across all subsystems.
100    ///
101    /// # Errors
102    ///
103    /// Returns an error if `SQLite` cannot be initialized.
104    pub async fn with_weights(
105        sqlite_path: &str,
106        qdrant_url: &str,
107        provider: AnyProvider,
108        embedding_model: &str,
109        vector_weight: f64,
110        keyword_weight: f64,
111    ) -> Result<Self, MemoryError> {
112        Self::with_weights_and_pool_size(
113            sqlite_path,
114            qdrant_url,
115            provider,
116            embedding_model,
117            vector_weight,
118            keyword_weight,
119            5,
120        )
121        .await
122    }
123
124    /// Create a new `SemanticMemory` with custom weights and configurable pool size.
125    ///
126    /// For `AppBuilder` bootstrap, prefer [`SemanticMemory::with_qdrant_ops`] to share
127    /// a single gRPC channel across all subsystems.
128    ///
129    /// # Errors
130    ///
131    /// Returns an error if `SQLite` cannot be initialized.
132    pub async fn with_weights_and_pool_size(
133        sqlite_path: &str,
134        qdrant_url: &str,
135        provider: AnyProvider,
136        embedding_model: &str,
137        vector_weight: f64,
138        keyword_weight: f64,
139        pool_size: u32,
140    ) -> Result<Self, MemoryError> {
141        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
142        let pool = sqlite.pool().clone();
143
144        let qdrant = match EmbeddingStore::new(qdrant_url, pool) {
145            Ok(store) => Some(Arc::new(store)),
146            Err(e) => {
147                tracing::warn!("Qdrant unavailable, semantic search disabled: {e:#}");
148                None
149            }
150        };
151
152        Ok(Self {
153            sqlite,
154            qdrant,
155            provider,
156            embed_provider: None,
157            embedding_model: embedding_model.into(),
158            vector_weight,
159            keyword_weight,
160            temporal_decay_enabled: false,
161            temporal_decay_half_life_days: 30,
162            mmr_enabled: false,
163            mmr_lambda: 0.7,
164            importance_enabled: false,
165            importance_weight: 0.15,
166            tier_boost_semantic: 1.3,
167            token_counter: Arc::new(TokenCounter::new()),
168            graph_store: None,
169            community_detection_failures: Arc::new(AtomicU64::new(0)),
170            graph_extraction_count: Arc::new(AtomicU64::new(0)),
171            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
172            admission_control: None,
173        })
174    }
175
176    /// Create a `SemanticMemory` from a pre-built `QdrantOps` instance.
177    ///
178    /// Use this at bootstrap to share one `QdrantOps` (and thus one gRPC channel)
179    /// across all subsystems. The `ops` is consumed and wrapped inside `EmbeddingStore`.
180    ///
181    /// # Errors
182    ///
183    /// Returns an error if `SQLite` cannot be initialized.
184    pub async fn with_qdrant_ops(
185        sqlite_path: &str,
186        ops: crate::QdrantOps,
187        provider: AnyProvider,
188        embedding_model: &str,
189        vector_weight: f64,
190        keyword_weight: f64,
191        pool_size: u32,
192    ) -> Result<Self, MemoryError> {
193        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
194        let pool = sqlite.pool().clone();
195        let store = EmbeddingStore::with_store(Box::new(ops), pool);
196
197        Ok(Self {
198            sqlite,
199            qdrant: Some(Arc::new(store)),
200            provider,
201            embed_provider: None,
202            embedding_model: embedding_model.into(),
203            vector_weight,
204            keyword_weight,
205            temporal_decay_enabled: false,
206            temporal_decay_half_life_days: 30,
207            mmr_enabled: false,
208            mmr_lambda: 0.7,
209            importance_enabled: false,
210            importance_weight: 0.15,
211            tier_boost_semantic: 1.3,
212            token_counter: Arc::new(TokenCounter::new()),
213            graph_store: None,
214            community_detection_failures: Arc::new(AtomicU64::new(0)),
215            graph_extraction_count: Arc::new(AtomicU64::new(0)),
216            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
217            admission_control: None,
218        })
219    }
220
221    /// Attach a `GraphStore` for graph-aware retrieval.
222    ///
223    /// When set, `recall_graph` traverses the graph starting from entities
224    /// matched by the query.
225    #[must_use]
226    pub fn with_graph_store(mut self, store: Arc<crate::graph::GraphStore>) -> Self {
227        self.graph_store = Some(store);
228        self
229    }
230
231    /// Returns the cumulative count of community detection failures since startup.
232    #[must_use]
233    pub fn community_detection_failures(&self) -> u64 {
234        use std::sync::atomic::Ordering;
235        self.community_detection_failures.load(Ordering::Relaxed)
236    }
237
238    /// Returns the cumulative count of successful graph extractions since startup.
239    #[must_use]
240    pub fn graph_extraction_count(&self) -> u64 {
241        use std::sync::atomic::Ordering;
242        self.graph_extraction_count.load(Ordering::Relaxed)
243    }
244
245    /// Returns the cumulative count of failed graph extractions since startup.
246    #[must_use]
247    pub fn graph_extraction_failures(&self) -> u64 {
248        use std::sync::atomic::Ordering;
249        self.graph_extraction_failures.load(Ordering::Relaxed)
250    }
251
252    /// Configure temporal decay and MMR re-ranking options.
253    #[must_use]
254    pub fn with_ranking_options(
255        mut self,
256        temporal_decay_enabled: bool,
257        temporal_decay_half_life_days: u32,
258        mmr_enabled: bool,
259        mmr_lambda: f32,
260    ) -> Self {
261        self.temporal_decay_enabled = temporal_decay_enabled;
262        self.temporal_decay_half_life_days = temporal_decay_half_life_days;
263        self.mmr_enabled = mmr_enabled;
264        self.mmr_lambda = mmr_lambda;
265        self
266    }
267
268    /// Configure write-time importance scoring for memory retrieval.
269    #[must_use]
270    pub fn with_importance_options(mut self, enabled: bool, weight: f64) -> Self {
271        self.importance_enabled = enabled;
272        self.importance_weight = weight;
273        self
274    }
275
276    /// Configure the multiplicative score boost applied to semantic-tier messages during recall.
277    ///
278    /// Set to `1.0` to disable the boost. Default: `1.3`.
279    #[must_use]
280    pub fn with_tier_boost(mut self, boost: f64) -> Self {
281        self.tier_boost_semantic = boost;
282        self
283    }
284
285    /// Attach an A-MAC admission controller.
286    ///
287    /// When set, `remember()` and `remember_with_parts()` evaluate each message before persisting.
288    /// Messages below the admission threshold return `Ok(None)` without incrementing counts.
289    #[must_use]
290    pub fn with_admission_control(mut self, control: AdmissionControl) -> Self {
291        self.admission_control = Some(Arc::new(control));
292        self
293    }
294
295    /// Attach a dedicated embedding provider for write-path and backfill operations.
296    ///
297    /// When set, all batch embedding calls (backfill, `remember`) route through this provider
298    /// instead of the main `provider`. This prevents `embed_backfill` from saturating the main
299    /// provider and causing guardrail timeouts due to rate-limit contention or Ollama model-lock.
300    #[must_use]
301    pub fn with_embed_provider(mut self, embed_provider: AnyProvider) -> Self {
302        self.embed_provider = Some(embed_provider);
303        self
304    }
305
306    /// Returns the provider to use for embedding calls.
307    ///
308    /// Returns the dedicated embed provider when configured, falling back to the main provider.
309    pub(crate) fn effective_embed_provider(&self) -> &AnyProvider {
310        self.embed_provider.as_ref().unwrap_or(&self.provider)
311    }
312
313    /// Construct a `SemanticMemory` from pre-built parts.
314    ///
315    /// Intended for tests that need full control over the backing stores.
316    #[must_use]
317    pub fn from_parts(
318        sqlite: SqliteStore,
319        qdrant: Option<Arc<EmbeddingStore>>,
320        provider: AnyProvider,
321        embedding_model: impl Into<String>,
322        vector_weight: f64,
323        keyword_weight: f64,
324        token_counter: Arc<TokenCounter>,
325    ) -> Self {
326        Self {
327            sqlite,
328            qdrant,
329            provider,
330            embed_provider: None,
331            embedding_model: embedding_model.into(),
332            vector_weight,
333            keyword_weight,
334            temporal_decay_enabled: false,
335            temporal_decay_half_life_days: 30,
336            mmr_enabled: false,
337            mmr_lambda: 0.7,
338            importance_enabled: false,
339            importance_weight: 0.15,
340            tier_boost_semantic: 1.3,
341            token_counter,
342            graph_store: None,
343            community_detection_failures: Arc::new(AtomicU64::new(0)),
344            graph_extraction_count: Arc::new(AtomicU64::new(0)),
345            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
346            admission_control: None,
347        }
348    }
349
350    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
351    ///
352    /// # Errors
353    ///
354    /// Returns an error if `SQLite` cannot be initialized.
355    pub async fn with_sqlite_backend(
356        sqlite_path: &str,
357        provider: AnyProvider,
358        embedding_model: &str,
359        vector_weight: f64,
360        keyword_weight: f64,
361    ) -> Result<Self, MemoryError> {
362        Self::with_sqlite_backend_and_pool_size(
363            sqlite_path,
364            provider,
365            embedding_model,
366            vector_weight,
367            keyword_weight,
368            5,
369        )
370        .await
371    }
372
373    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend with configurable pool size.
374    ///
375    /// # Errors
376    ///
377    /// Returns an error if `SQLite` cannot be initialized.
378    pub async fn with_sqlite_backend_and_pool_size(
379        sqlite_path: &str,
380        provider: AnyProvider,
381        embedding_model: &str,
382        vector_weight: f64,
383        keyword_weight: f64,
384        pool_size: u32,
385    ) -> Result<Self, MemoryError> {
386        let sqlite = SqliteStore::with_pool_size(sqlite_path, pool_size).await?;
387        let pool = sqlite.pool().clone();
388        let store = EmbeddingStore::new_sqlite(pool);
389
390        Ok(Self {
391            sqlite,
392            qdrant: Some(Arc::new(store)),
393            provider,
394            embed_provider: None,
395            embedding_model: embedding_model.into(),
396            vector_weight,
397            keyword_weight,
398            temporal_decay_enabled: false,
399            temporal_decay_half_life_days: 30,
400            mmr_enabled: false,
401            mmr_lambda: 0.7,
402            importance_enabled: false,
403            importance_weight: 0.15,
404            tier_boost_semantic: 1.3,
405            token_counter: Arc::new(TokenCounter::new()),
406            graph_store: None,
407            community_detection_failures: Arc::new(AtomicU64::new(0)),
408            graph_extraction_count: Arc::new(AtomicU64::new(0)),
409            graph_extraction_failures: Arc::new(AtomicU64::new(0)),
410            admission_control: None,
411        })
412    }
413
414    /// Access the underlying `SqliteStore` for operations that don't involve semantics.
415    #[must_use]
416    pub fn sqlite(&self) -> &SqliteStore {
417        &self.sqlite
418    }
419
420    /// Check if the vector store backend is reachable.
421    ///
422    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
423    /// instead of just checking whether the client was created.
424    pub async fn is_vector_store_connected(&self) -> bool {
425        match self.qdrant.as_ref() {
426            Some(store) => store.health_check().await,
427            None => false,
428        }
429    }
430
431    /// Check if a vector store client is configured (may not be connected).
432    #[must_use]
433    pub fn has_vector_store(&self) -> bool {
434        self.qdrant.is_some()
435    }
436
437    /// Return a reference to the embedding store, if configured.
438    #[must_use]
439    pub fn embedding_store(&self) -> Option<&Arc<EmbeddingStore>> {
440        self.qdrant.as_ref()
441    }
442
443    /// Return a reference to the underlying LLM provider (used for RPE embedding).
444    pub fn provider(&self) -> &AnyProvider {
445        &self.provider
446    }
447
448    /// Count messages in a conversation.
449    ///
450    /// # Errors
451    ///
452    /// Returns an error if the query fails.
453    pub async fn message_count(
454        &self,
455        conversation_id: crate::types::ConversationId,
456    ) -> Result<i64, MemoryError> {
457        self.sqlite.count_messages(conversation_id).await
458    }
459
460    /// Count messages not yet covered by any summary.
461    ///
462    /// # Errors
463    ///
464    /// Returns an error if the query fails.
465    pub async fn unsummarized_message_count(
466        &self,
467        conversation_id: crate::types::ConversationId,
468    ) -> Result<i64, MemoryError> {
469        let after_id = self
470            .sqlite
471            .latest_summary_last_message_id(conversation_id)
472            .await?
473            .unwrap_or(crate::types::MessageId(0));
474        self.sqlite
475            .count_messages_after(conversation_id, after_id)
476            .await
477    }
478}