mnemefusion_core/memory.rs
1//! Memory engine - main API entry point
2//!
3//! The MemoryEngine struct provides the primary interface for interacting
4//! with a MnemeFusion database.
5
6use crate::{
7 config::Config,
8 error::{Error, Result},
9 graph::{CausalTraversalResult, GraphManager},
10 index::{BM25Config, BM25Index, TemporalIndex, VectorIndex, VectorIndexConfig},
11 ingest::IngestionPipeline,
12 query::{profile_search::fact_embedding_key, FusedResult, IntentClassification, QueryPlanner},
13 storage::StorageEngine,
14 trace::{Trace, TraceRecorder},
15 types::{
16 AddResult, BatchResult, Entity, EntityProfile, Memory, MemoryId, MemoryInput,
17 MetadataFilter, Source, Timestamp, UpsertResult,
18 },
19};
20use std::cell::RefCell;
21use std::collections::HashMap;
22use std::path::Path;
23use std::sync::{Arc, RwLock};
24
25#[cfg(feature = "slm")]
26use crate::ingest::SlmMetadataExtractor;
27#[cfg(feature = "slm")]
28use std::sync::Mutex;
29
30#[cfg(feature = "entity-extraction")]
31use crate::extraction::{LlmEntityExtractor, ModelTier};
32#[cfg(all(feature = "entity-extraction", not(feature = "slm")))]
33use std::sync::Mutex;
34
35/// Callback type for computing embeddings at ingestion time.
36///
37/// Provided by the caller (e.g., Python's `SentenceTransformer.encode()`).
38/// Called for each fact text during ingestion to compute fact embeddings.
39pub type EmbeddingFn = Arc<dyn Fn(&str) -> Vec<f32> + Send + Sync>;
40
41/// Build contextual text for embedding by prepending speaker metadata.
42///
43/// When computing embeddings, prepending the speaker name helps the embedding model
44/// distinguish between the same statement made by different people. The original
45/// content is stored as-is; only the embedding carries the speaker context.
46///
47/// Returns the original content unchanged if no "speaker" key exists in metadata.
48pub fn contextualize_for_embedding(content: &str, metadata: &HashMap<String, String>) -> String {
49 if let Some(speaker) = metadata.get("speaker") {
50 if !speaker.is_empty() {
51 return format!("{}: {}", speaker, content);
52 }
53 }
54 content.to_string()
55}
56
57/// Convert first-person pronouns in content to third-person using the speaker's name.
58///
59/// Used at embedding time: "I joined a gym" → "Alice joined a gym" produces ~+0.25 better
60/// cosine similarity with queries like "What fitness activities does Alice do?" when using
61/// symmetric bi-encoders (BGE-base-en-v1.5).
62///
63/// Substitution order (specific before general, to avoid double-matching):
64/// contractions/phrases → standalone "I" → reflexive → possessive → object.
65///
66/// The original content is **not** modified — only the embedding text changes.
67/// Returns the original string unchanged when no first-person pronouns are found
68/// (i.e., `result == content` signals a no-op).
69pub fn first_person_to_third(content: &str, speaker: &str) -> String {
70 use regex::{NoExpand, Regex};
71
72 // Ordered rules: longer/more-specific patterns first to prevent double-substitution.
73 // Each entry is (regex_pattern, replacement_factory).
74 let rules: &[(&str, fn(&str) -> String)] = &[
75 // Contractions (case-insensitive: "I'm" and "i'm" both match)
76 (r"(?i)\bI'm\b", |s| format!("{} is", s)),
77 (r"(?i)\bI've\b", |s| format!("{} has", s)),
78 (r"(?i)\bI'll\b", |s| format!("{} will", s)),
79 (r"(?i)\bI'd\b", |s| format!("{} would", s)),
80 // 2-word phrases (always capitalized in English)
81 (r"\bI am\b", |s| format!("{} is", s)),
82 (r"\bI have\b", |s| format!("{} has", s)),
83 (r"\bI will\b", |s| format!("{} will", s)),
84 (r"\bI would\b", |s| format!("{} would", s)),
85 // Standalone "I" (always capitalized in English; case-sensitive is fine)
86 (r"\bI\b", |s| s.to_string()),
87 // "myself" / "Myself" before "my" to prevent double-substitution
88 (r"(?i)\bmyself\b", |s| s.to_string()),
89 // Possessive (case-insensitive: "My" at sentence start, "my" mid-sentence)
90 (r"(?i)\bmy\b", |s| format!("{}'s", s)),
91 (r"(?i)\bmine\b", |s| format!("{}'s", s)),
92 // Object pronoun (case-insensitive: "Me and Alice went..." edge case)
93 (r"(?i)\bme\b", |s| s.to_string()),
94 ];
95
96 let mut result = content.to_string();
97 for (pattern, make_repl) in rules {
98 let repl = make_repl(speaker);
99 // NoExpand: treat replacement literally (safe when speaker contains '$')
100 result = Regex::new(pattern)
101 .expect("valid first_person_to_third pattern")
102 .replace_all(&result, NoExpand(repl.as_str()))
103 .into_owned();
104 }
105 result
106}
107
108/// Main memory engine interface
109///
110/// This is the primary entry point for all MnemeFusion operations.
111/// It coordinates storage, indexing, and retrieval across all dimensions.
112pub struct MemoryEngine {
113 storage: Arc<StorageEngine>,
114 vector_index: Arc<RwLock<VectorIndex>>,
115 bm25_index: Arc<BM25Index>,
116 temporal_index: Arc<TemporalIndex>,
117 graph_manager: Arc<RwLock<GraphManager>>,
118 pipeline: IngestionPipeline,
119 query_planner: QueryPlanner,
120 config: Config,
121 /// Auto-embedding engine. Set via `Config::embedding_model` or `with_embedding_engine()`.
122 #[cfg(feature = "embedding-onnx")]
123 embedding_engine: Option<std::sync::Arc<crate::embedding::EmbeddingEngine>>,
124 /// Default namespace applied to all add/query calls when `namespace` arg is None.
125 /// Set via `with_user(user_name)`.
126 default_namespace: Option<String>,
127 /// User entity name for first-person pronoun resolution in queries.
128 /// When set, queries containing "I", "me", "my" automatically include this
129 /// entity in profile injection. Separate from `default_namespace` so it can
130 /// be used without enabling namespace filtering.
131 /// Set via `set_user_entity(name)`.
132 user_entity: Option<String>,
133 /// Last query trace (diagnostic side-channel, RefCell because query() takes &self).
134 last_query_trace: RefCell<Option<Trace>>,
135 /// Whether pipeline tracing is enabled.
136 enable_trace: bool,
137}
138
139impl MemoryEngine {
140 /// Open or create a memory database
141 ///
142 /// # Arguments
143 ///
144 /// * `path` - Path to the .mfdb file
145 /// * `config` - Configuration options
146 ///
147 /// # Returns
148 ///
149 /// A new MemoryEngine instance
150 ///
151 /// # Errors
152 ///
153 /// Returns an error if:
154 /// - The database file cannot be created or opened
155 /// - The file format is invalid
156 /// - The configuration is invalid
157 ///
158 /// # Example
159 ///
160 /// ```no_run
161 /// use mnemefusion_core::{MemoryEngine, Config};
162 ///
163 /// let engine = MemoryEngine::open("./brain.mfdb", Config::default()).unwrap();
164 /// ```
165 pub fn open<P: AsRef<Path>>(path: P, config: Config) -> Result<Self> {
166 // Validate configuration
167 config.validate()?;
168 let mut config = config;
169
170 // Open storage
171 let storage = Arc::new(StorageEngine::open(path)?);
172
173 // Create vector index configuration from main config
174 let vector_config = VectorIndexConfig {
175 dimension: config.embedding_dim,
176 connectivity: config.hnsw_m,
177 expansion_add: config.hnsw_ef_construction,
178 expansion_search: config.hnsw_ef_search,
179 };
180
181 // Create and load vector index
182 let mut vector_index = VectorIndex::new(vector_config, Arc::clone(&storage))?;
183 vector_index.load()?;
184
185 // Auto-detect embedding dimension from existing DB.
186 // After load(), the vector index knows the true dimension from the serialized data.
187 // Override config to match, so all subsequent add()/query() validations use the
188 // correct dimension. This eliminates the need for users to manually specify
189 // embedding_dim when opening an existing DB.
190 let detected_dim = vector_index.config().dimension;
191 if detected_dim != config.embedding_dim {
192 config.embedding_dim = detected_dim;
193 }
194
195 let vector_index = Arc::new(RwLock::new(vector_index));
196
197 // Create and load BM25 index
198 let bm25_config = BM25Config::default();
199 let bm25_index = Arc::new(BM25Index::new(Arc::clone(&storage), bm25_config));
200 bm25_index.load()?;
201
202 // Create temporal index
203 let temporal_index = Arc::new(TemporalIndex::new(Arc::clone(&storage)));
204
205 // Create and load graph manager
206 let mut graph_manager = GraphManager::new();
207 crate::graph::persist::load_graph(&mut graph_manager, &storage)?;
208
209 // One-time migration: repair Entity→Entity edges from profile facts
210 // (older DBs stored relationship facts but lost graph edges on save)
211 crate::graph::persist::repair_relationship_edges(&mut graph_manager, &storage)?;
212
213 let graph_manager = Arc::new(RwLock::new(graph_manager));
214
215 // Create ingestion pipeline
216 let mut pipeline = IngestionPipeline::new(
217 Arc::clone(&storage),
218 Arc::clone(&vector_index),
219 Arc::clone(&bm25_index),
220 Arc::clone(&temporal_index),
221 Arc::clone(&graph_manager),
222 config.entity_extraction_enabled,
223 );
224
225 // Attach SLM metadata extractor if enabled
226 #[cfg(feature = "slm")]
227 if config.slm_metadata_extraction_enabled {
228 if let Some(ref slm_config) = config.slm_config {
229 tracing::info!("Initializing SLM metadata extractor for ingestion...");
230 match SlmMetadataExtractor::new(slm_config.clone()) {
231 Ok(extractor) => {
232 pipeline = pipeline.with_slm_extractor(Arc::new(Mutex::new(extractor)));
233 tracing::info!("SLM metadata extractor attached to pipeline");
234 }
235 Err(e) => {
236 tracing::warn!(
237 "Failed to initialize SLM metadata extractor, using pattern-based extraction: {}",
238 e
239 );
240 }
241 }
242 } else {
243 tracing::debug!("SLM metadata extraction enabled but no slm_config provided");
244 }
245 }
246
247 // Wire extraction_passes from config to pipeline
248 #[cfg(feature = "entity-extraction")]
249 if config.extraction_passes > 1 {
250 pipeline = pipeline.with_extraction_passes(config.extraction_passes);
251 }
252
253 // Wire profile entity type filter from config to pipeline
254 pipeline.set_profile_entity_types(config.profile_entity_types.clone());
255
256 // Wire async extraction threshold from config to pipeline
257 if config.async_extraction_threshold > 0 {
258 pipeline.set_async_extraction_threshold(config.async_extraction_threshold);
259 }
260
261 // Create query planner
262 let query_planner = QueryPlanner::new(
263 Arc::clone(&storage),
264 Arc::clone(&vector_index),
265 Arc::clone(&bm25_index),
266 Arc::clone(&temporal_index),
267 Arc::clone(&graph_manager),
268 config.fusion_semantic_threshold,
269 config.semantic_prefilter_threshold,
270 config.fusion_strategy,
271 config.rrf_k,
272 #[cfg(feature = "slm")]
273 config.slm_config.clone(),
274 config.slm_query_classification_enabled,
275 config.adaptive_k_threshold,
276 )?;
277
278 // Initialize embedding engine if configured
279 #[cfg(feature = "embedding-onnx")]
280 let embedding_engine = if let Some(ref model_path) = config.embedding_model {
281 tracing::info!("Initializing embedding engine from '{}'...", model_path);
282 match crate::embedding::EmbeddingEngine::from_path(model_path) {
283 Ok(engine) => {
284 tracing::info!("Embedding engine ready (dim={})", engine.dim);
285 Some(std::sync::Arc::new(engine))
286 }
287 Err(e) => {
288 tracing::warn!(
289 "Failed to load embedding model from '{}': {}. \
290 Embeddings must be supplied explicitly.",
291 model_path,
292 e
293 );
294 None
295 }
296 }
297 } else {
298 None
299 };
300
301 let enable_trace = config.enable_trace;
302 Ok(Self {
303 storage,
304 vector_index,
305 bm25_index,
306 temporal_index,
307 graph_manager,
308 pipeline,
309 query_planner,
310 config,
311 #[cfg(feature = "embedding-onnx")]
312 embedding_engine,
313 default_namespace: None,
314 user_entity: None,
315 last_query_trace: RefCell::new(None),
316 enable_trace,
317 })
318 }
319
320 /// Enable native LLM entity extraction with the specified model tier
321 ///
322 /// This enables automatic entity and fact extraction during memory ingestion
323 /// using a locally-running LLM via llama.cpp. Extraction results are stored
324 /// in entity profiles for fast retrieval.
325 ///
326 /// # Arguments
327 ///
328 /// * `tier` - Model tier to use (Balanced = 4B, Quality = 7B)
329 ///
330 /// # Returns
331 ///
332 /// Self for method chaining
333 ///
334 /// # Errors
335 ///
336 /// Returns an error if the model cannot be loaded.
337 ///
338 /// # Example
339 ///
340 /// ```no_run
341 /// use mnemefusion_core::{MemoryEngine, Config, ModelTier};
342 ///
343 /// let engine = MemoryEngine::open("./brain.mfdb", Config::default())?
344 /// .with_llm_entity_extraction(ModelTier::Balanced)?;
345 /// # Ok::<(), mnemefusion_core::Error>(())
346 /// ```
347 #[cfg(feature = "entity-extraction")]
348 pub fn with_llm_entity_extraction(mut self, tier: ModelTier) -> Result<Self> {
349 tracing::info!("Initializing LLM entity extractor ({:?})...", tier);
350
351 let extractor = LlmEntityExtractor::load(tier)?;
352 self.pipeline = self
353 .pipeline
354 .with_llm_extractor(Arc::new(Mutex::new(extractor)));
355
356 tracing::info!("LLM entity extractor attached to pipeline");
357 Ok(self)
358 }
359
360 /// Enable native LLM entity extraction with a custom model path
361 ///
362 /// This enables automatic entity and fact extraction using a model
363 /// at the specified path.
364 ///
365 /// # Arguments
366 ///
367 /// * `model_path` - Path to the GGUF model file
368 /// * `tier` - Model tier (affects generation parameters)
369 ///
370 /// # Returns
371 ///
372 /// Self for method chaining
373 ///
374 /// # Errors
375 ///
376 /// Returns an error if the model cannot be loaded.
377 #[cfg(feature = "entity-extraction")]
378 pub fn with_llm_entity_extraction_from_path(
379 mut self,
380 model_path: impl Into<std::path::PathBuf>,
381 tier: ModelTier,
382 ) -> Result<Self> {
383 tracing::info!("Initializing LLM entity extractor from custom path...");
384
385 let extractor = LlmEntityExtractor::load_from_path(model_path, tier)?;
386 self.pipeline = self
387 .pipeline
388 .with_llm_extractor(Arc::new(Mutex::new(extractor)));
389
390 tracing::info!("LLM entity extractor attached to pipeline");
391 Ok(self)
392 }
393
394 /// Enable Triplex KG extraction for clean entity-to-entity relationships.
395 ///
396 /// Loads the SciPhi Triplex model (Phi-3 3.8B fine-tune) as a second
397 /// extraction model. During ingestion, Triplex runs after Phi-4 to produce
398 /// clean (subject, predicate, object) triples with constrained entity types.
399 ///
400 /// This is the "Full" ingestion tier — requires 8GB+ GPU VRAM for both models.
401 ///
402 /// # Arguments
403 ///
404 /// * `model_path` - Path to the Triplex GGUF model file
405 ///
406 /// # Example
407 ///
408 /// ```rust,ignore
409 /// let engine = MemoryEngine::open("./brain.mfdb", Config::default())?
410 /// .with_llm_entity_extraction(ModelTier::Balanced)?
411 /// .with_kg_extraction("models/triplex/Triplex-Q4_K_M.gguf")?;
412 /// ```
413 #[cfg(feature = "entity-extraction")]
414 pub fn with_kg_extraction(mut self, model_path: impl AsRef<std::path::Path>) -> Result<Self> {
415 tracing::info!("Initializing Triplex KG extractor...");
416
417 let extractor = crate::extraction::TriplexExtractor::load(model_path)?;
418 self.pipeline = self
419 .pipeline
420 .with_triplex_extractor(Arc::new(Mutex::new(extractor)));
421
422 tracing::info!("Triplex KG extractor attached to pipeline (Full tier)");
423 Ok(self)
424 }
425
426 /// Set the number of LLM extraction passes per document.
427 ///
428 /// This must be called after `with_llm_entity_extraction*()` to take effect.
429 /// Multiple passes capture different facts, producing richer profiles.
430 #[cfg(feature = "entity-extraction")]
431 pub fn set_extraction_passes(&mut self, passes: usize) {
432 self.pipeline.set_extraction_passes(passes);
433 }
434
435 /// Backfill KG triples for all existing memories using Triplex.
436 ///
437 /// Runs Triplex extraction on every memory in the database and stores
438 /// the resulting entity-to-entity relationship triples. This adds KG
439 /// edges to a DB that was originally ingested without Triplex.
440 ///
441 /// Requires `with_kg_extraction()` to have been called first.
442 /// Returns the number of memories that produced triples.
443 #[cfg(feature = "entity-extraction")]
444 pub fn backfill_kg(&self) -> Result<usize> {
445 self.pipeline.backfill_kg(None)
446 }
447
448 /// Like `backfill_kg()` but with a progress callback.
449 ///
450 /// Calls `progress_callback(current, total)` after each memory is processed.
451 #[cfg(feature = "entity-extraction")]
452 pub fn backfill_kg_with_progress(
453 &self,
454 progress_callback: Option<Box<dyn Fn(usize, usize)>>,
455 ) -> Result<usize> {
456 self.pipeline.backfill_kg(progress_callback)
457 }
458
459 /// Process all deferred LLM extractions queued by `add()` in async mode.
460 ///
461 /// When `async_extraction_threshold > 0` (set via config or
462 /// `with_async_extraction_threshold()`), `add()` stores large memories
463 /// immediately and defers LLM extraction here. Call this periodically
464 /// (e.g., every N messages, or before querying) to build entity profiles.
465 ///
466 /// Returns the number of memories whose extraction was processed.
467 /// Safe to call when the queue is empty (returns `Ok(0)`).
468 pub fn flush_extraction_queue(&self) -> Result<usize> {
469 self.pipeline.flush_extraction_queue()
470 }
471
472 /// Returns the number of memories with deferred LLM extractions pending.
473 ///
474 /// Non-zero only when `async_extraction_threshold > 0` and large `add()` calls
475 /// have been made since the last `flush_extraction_queue()`.
476 pub fn pending_extraction_count(&self) -> usize {
477 self.pipeline.pending_extraction_count()
478 }
479
480 /// Set a default namespace (user identity) for all add/query operations.
481 ///
482 /// When set, any call to `add()` or `query()` that does not supply an explicit
483 /// `namespace` argument will use this value automatically. Equivalent to always
484 /// passing `namespace = Some(user)` — enables "Memory is per-user" semantics
485 /// without changing every call site.
486 ///
487 /// # Example
488 ///
489 /// ```no_run
490 /// # use mnemefusion_core::{MemoryEngine, Config};
491 /// let engine = MemoryEngine::open("./brain.mfdb", Config::default()).unwrap()
492 /// .with_user("alice");
493 /// // All subsequent add/query calls default to namespace="alice"
494 /// ```
495 pub fn with_user(mut self, user: impl Into<String>) -> Self {
496 self.default_namespace = Some(user.into());
497 self
498 }
499
500 /// Set the user entity name for first-person pronoun resolution.
501 ///
502 /// When set, queries containing "I", "me", "my", etc. automatically include
503 /// this entity in the profile injection step (Step 2.1), ensuring the user's
504 /// own memories get the entity score boost.
505 ///
506 /// Unlike `with_user()`, this does NOT enable namespace filtering — it only
507 /// affects entity detection at query time. Use this when memories are stored
508 /// without namespace but you want pronoun resolution.
509 pub fn set_user_entity(&mut self, name: impl Into<String>) {
510 self.user_entity = Some(name.into());
511 }
512
513 /// Attach an embedding engine for automatic text vectorization.
514 ///
515 /// After this call, `add()` and `query()` can be called without supplying
516 /// explicit embedding vectors.
517 ///
518 /// Requires the `embedding-onnx` feature at compile time.
519 #[cfg(feature = "embedding-onnx")]
520 pub fn with_embedding_engine(mut self, engine: crate::embedding::EmbeddingEngine) -> Self {
521 self.embedding_engine = Some(std::sync::Arc::new(engine));
522 self
523 }
524
525 /// Auto-compute an embedding using the configured engine or embedding_fn.
526 ///
527 /// Returns `Err(Error::NoEmbeddingEngine)` if neither is configured.
528 #[cfg(feature = "embedding-onnx")]
529 fn auto_embed(&self, text: &str) -> Result<Vec<f32>> {
530 if let Some(engine) = self.embedding_engine.as_ref() {
531 return engine.embed(text);
532 }
533 if let Some(f) = self.pipeline.embedding_fn() {
534 return Ok(f(text));
535 }
536 Err(Error::NoEmbeddingEngine)
537 }
538
539 /// Auto-compute an embedding (no ONNX engine — fall back to embedding_fn).
540 #[cfg(not(feature = "embedding-onnx"))]
541 fn auto_embed(&self, text: &str) -> Result<Vec<f32>> {
542 if let Some(f) = self.pipeline.embedding_fn() {
543 Ok(f(text))
544 } else {
545 Err(Error::NoEmbeddingEngine)
546 }
547 }
548
549 /// Set the embedding function for computing fact embeddings at ingestion time.
550 ///
551 /// When set, the pipeline will compute and store embeddings for each extracted
552 /// entity fact during ingestion. These embeddings enable semantic matching in
553 /// ProfileSearch (cosine similarity vs word-overlap).
554 ///
555 /// The function should return an embedding vector for the given text input.
556 /// Typically this wraps the same embedding model used for memory embeddings
557 /// (e.g., `SentenceTransformer.encode()`).
558 ///
559 /// # Arguments
560 ///
561 /// * `f` - Embedding function: `Fn(&str) -> Vec<f32>`
562 pub fn set_embedding_fn(&mut self, f: EmbeddingFn) {
563 self.pipeline.set_embedding_fn(f);
564 }
565
566 /// Precompute missing fact embeddings for all entity profiles.
567 ///
568 /// Iterates all stored profiles, checks each fact for a stored embedding,
569 /// and computes + stores any missing ones using the registered EmbeddingFn.
570 /// This is a one-time backfill operation — "pay the cost once."
571 ///
572 /// Returns the number of fact embeddings computed.
573 pub fn precompute_fact_embeddings(&self) -> Result<usize> {
574 let embed_fn = self.pipeline.embedding_fn().ok_or_else(|| {
575 Error::Configuration("No embedding function set. Call set_embedding_fn() first.".into())
576 })?;
577
578 let profiles = self.storage.list_entity_profiles()?;
579 let mut computed = 0;
580
581 for profile in &profiles {
582 for (fact_type, facts) in &profile.facts {
583 for fact in facts {
584 let key = fact_embedding_key(&profile.name, fact_type, &fact.value);
585 if self.storage.get_fact_embedding(&key)?.is_none() {
586 let fact_text = format!("{} {}", fact_type.replace('_', " "), fact.value);
587 let embedding = embed_fn(&fact_text);
588 self.storage.store_fact_embedding(&key, &embedding)?;
589 computed += 1;
590 }
591 }
592 }
593 }
594
595 Ok(computed)
596 }
597
598 /// Rebuild embeddings for memories with first-person content using speaker-aware
599 /// pronoun substitution.
600 ///
601 /// For each memory that has a `"speaker"` in its metadata and first-person content
602 /// (e.g., `"I joined a gym"`), recomputes the embedding on the third-person form
603 /// (`"Alice joined a gym"`) to improve semantic similarity with entity-centric queries.
604 ///
605 /// This is a one-time backfill for databases ingested before this feature was added.
606 /// Safe to call multiple times — only updates memories where pronoun substitution
607 /// changes the text (i.e., skips memories without first-person pronouns).
608 ///
609 /// Uses the registered `EmbeddingFn` (set via `set_embedding_fn()`) when available,
610 /// falling back to the internal `auto_embed()` engine otherwise.
611 ///
612 /// Returns the number of memory embeddings updated.
613 pub fn rebuild_speaker_embeddings(&self) -> Result<usize> {
614 let embed_fn = self.pipeline.embedding_fn();
615 let ids = self.storage.list_memory_ids()?;
616 let mut updated = 0;
617
618 for id in &ids {
619 let memory = match self.storage.get_memory(id)? {
620 Some(m) => m,
621 None => continue,
622 };
623 let speaker = memory
624 .metadata
625 .get("speaker")
626 .map(String::as_str)
627 .unwrap_or("");
628 if speaker.is_empty() {
629 continue;
630 }
631
632 let substituted = first_person_to_third(&memory.content, speaker);
633 if substituted == memory.content {
634 continue; // no first-person pronouns found — nothing to do
635 }
636
637 let new_embedding = match embed_fn.as_ref() {
638 Some(ef) => ef(&substituted),
639 None => self.auto_embed(&substituted)?,
640 };
641
642 self.update_embedding(id, new_embedding)?;
643 updated += 1;
644 }
645
646 tracing::info!(
647 "rebuild_speaker_embeddings: updated {} memory embeddings",
648 updated
649 );
650 Ok(updated)
651 }
652
653 /// Run entity extraction on text without adding to the database.
654 ///
655 /// Useful for testing extraction quality or comparing model outputs.
656 /// Requires `with_llm_entity_extraction*()` to have been called first.
657 ///
658 /// # Arguments
659 /// * `content` - The text to extract entities from
660 /// * `speaker` - Optional speaker name for first-person attribution
661 ///
662 /// # Returns
663 /// The extraction result with entities, facts, records, and relationships.
664 #[cfg(feature = "entity-extraction")]
665 pub fn extract_text(
666 &self,
667 content: &str,
668 speaker: Option<&str>,
669 ) -> Result<crate::extraction::ExtractionResult> {
670 self.pipeline.extract_text(content, speaker)
671 }
672
673 /// Apply an externally-produced extraction result to a memory's entity profiles.
674 ///
675 /// This enables API-based extraction backends (e.g., NScale cloud inference)
676 /// to inject entity profiles without requiring a local LLM. The extraction
677 /// result must match the same JSON schema as the local Qwen3 extractor.
678 ///
679 /// # Arguments
680 /// * `memory_id` - The memory ID to associate the extraction with
681 /// * `extraction` - The extraction result from an external source
682 #[cfg(feature = "entity-extraction")]
683 pub fn apply_extraction(
684 &self,
685 memory_id: &MemoryId,
686 extraction: &crate::extraction::ExtractionResult,
687 ) -> Result<()> {
688 // Update entity profiles from facts
689 self.pipeline
690 .update_entity_profiles_from_llm(memory_id, extraction)?;
691
692 // Store entity-to-entity relationships
693 if !extraction.relationships.is_empty() {
694 self.pipeline
695 .store_relationships(memory_id, &extraction.relationships)?;
696 }
697
698 // Annotate parent memory with typed record metadata (record_type, event_date).
699 // We do NOT create child memories — they flood the vector index and degrade
700 // recall. Instead, typed decomposition is stored as metadata on the parent
701 // for type-aware retrieval balancing.
702 if !extraction.records.is_empty() {
703 self.pipeline
704 .annotate_parent_with_types(memory_id, &extraction.records);
705 }
706
707 Ok(())
708 }
709
710 /// Generate summaries for all entity profiles.
711 ///
712 /// For each profile with facts, generates a dense summary paragraph that
713 /// condenses the profile's facts into one text block. When present, query()
714 /// injects summaries as single context items instead of N individual facts,
715 /// addressing RANK failures where evidence is present but buried.
716 ///
717 /// Returns the number of profiles summarized.
718 pub fn summarize_profiles(&self) -> Result<usize> {
719 let profiles = self.storage.list_entity_profiles()?;
720 let mut summarized = 0;
721 for mut profile in profiles {
722 if profile.generate_summary().is_some() {
723 self.storage.store_entity_profile(&profile)?;
724 summarized += 1;
725 }
726 }
727 Ok(summarized)
728 }
729
730 /// Consolidate entity profiles by removing noise and deduplicating facts.
731 ///
732 /// Performs the following cleanup operations:
733 /// 1. Remove null-indicator values ("none", "N/A", etc.)
734 /// 2. Remove overly verbose values (>100 chars)
735 /// 3. Semantic dedup within same fact_type using embedding similarity (threshold: 0.85)
736 /// — keeps fact with higher confidence, or first encountered on tie
737 /// 4. Delete garbage entity profiles (non-person entities with ≤2 facts)
738 ///
739 /// Returns (facts_removed, profiles_deleted).
740 pub fn consolidate_profiles(&self) -> Result<(usize, usize)> {
741 use crate::query::profile_search::{cosine_similarity, resolve_entity_alias};
742
743 let embed_fn = self.pipeline.embedding_fn();
744
745 let mut total_facts_removed = 0usize;
746 let mut profiles_deleted = 0usize;
747
748 // Phase 0: Merge alias profiles into their canonical forms.
749 // E.g., "mel" → "melanie", "mell" → "melanie" (via fuzzy matching).
750 {
751 let mut all_names = self.storage.list_entity_profile_names()?;
752 // Sort by length (shortest first) so short aliases resolve to longer canonicals
753 all_names.sort_by_key(|n| n.len());
754
755 let mut merged_away: std::collections::HashSet<String> =
756 std::collections::HashSet::new();
757
758 for i in 0..all_names.len() {
759 let short_name = &all_names[i];
760 if merged_away.contains(short_name) {
761 continue;
762 }
763 if let Some(canonical) = resolve_entity_alias(short_name, &all_names) {
764 if merged_away.contains(&canonical) {
765 continue;
766 }
767 // Load both profiles
768 let short_profile = match self.storage.get_entity_profile(short_name)? {
769 Some(p) => p,
770 None => continue,
771 };
772 let mut canon_profile = match self.storage.get_entity_profile(&canonical)? {
773 Some(p) => p,
774 None => continue,
775 };
776
777 // Move all facts from short → canonical (add_fact handles dedup)
778 for facts in short_profile.facts.values() {
779 for fact in facts {
780 canon_profile.add_fact(fact.clone());
781 }
782 }
783
784 // Move all source_memories
785 for mem_id in &short_profile.source_memories {
786 canon_profile.add_source_memory(mem_id.clone());
787 }
788
789 // Save canonical, delete alias
790 self.storage.store_entity_profile(&canon_profile)?;
791 self.storage.delete_entity_profile(short_name)?;
792 merged_away.insert(short_name.clone());
793 profiles_deleted += 1;
794
795 tracing::info!(
796 "Merged alias profile '{}' into canonical '{}'",
797 short_name,
798 canonical,
799 );
800 }
801 }
802 }
803
804 let profiles = self.storage.list_entity_profiles()?;
805
806 const NULL_INDICATORS: &[&str] = &[
807 "none",
808 "n/a",
809 "na",
810 "not specified",
811 "not mentioned",
812 "unknown",
813 "unspecified",
814 "not provided",
815 "no information",
816 ];
817
818 for mut profile in profiles {
819 let mut facts_removed_in_profile = 0usize;
820
821 // Phase 1 & 2: Remove null and long values
822 for (_fact_type, facts) in profile.facts.iter_mut() {
823 let before = facts.len();
824 facts.retain(|f| {
825 let trimmed = f.value.trim();
826 let lower = trimmed.to_lowercase();
827 // Keep if NOT a null indicator AND NOT too long
828 !NULL_INDICATORS.contains(&lower.as_str()) && trimmed.len() <= 100
829 });
830 facts_removed_in_profile += before - facts.len();
831 }
832
833 // Phase 3: Semantic dedup within same fact_type (requires embedding fn)
834 if let Some(ref embed_fn) = embed_fn {
835 for (fact_type, facts) in profile.facts.iter_mut() {
836 if facts.len() <= 1 {
837 continue;
838 }
839
840 // Sort by confidence descending (highest confidence kept first)
841 facts.sort_by(|a, b| {
842 b.confidence
843 .partial_cmp(&a.confidence)
844 .unwrap_or(std::cmp::Ordering::Equal)
845 });
846
847 // Collect embeddings for all facts
848 let embeddings: Vec<Vec<f32>> = facts
849 .iter()
850 .map(|f| {
851 // Try stored embedding first, compute on the fly if missing
852 let key = fact_embedding_key(&profile.name, fact_type, &f.value);
853 self.storage
854 .get_fact_embedding(&key)
855 .ok()
856 .flatten()
857 .unwrap_or_else(|| {
858 let text =
859 format!("{} {}", fact_type.replace('_', " "), f.value);
860 embed_fn(&text)
861 })
862 })
863 .collect();
864
865 // Greedy dedup: keep first (highest confidence), skip near-duplicates
866 let mut keep_indices: Vec<usize> = Vec::new();
867 for i in 0..facts.len() {
868 let mut is_dup = false;
869 for &kept_idx in &keep_indices {
870 let sim = cosine_similarity(&embeddings[i], &embeddings[kept_idx]);
871 if sim > 0.85 {
872 is_dup = true;
873 break;
874 }
875 }
876 if !is_dup {
877 keep_indices.push(i);
878 }
879 }
880
881 let before = facts.len();
882 let kept_facts: Vec<_> =
883 keep_indices.into_iter().map(|i| facts[i].clone()).collect();
884 *facts = kept_facts;
885 facts_removed_in_profile += before - facts.len();
886 }
887 }
888
889 // Remove empty fact type entries
890 profile.facts.retain(|_, v| !v.is_empty());
891
892 total_facts_removed += facts_removed_in_profile;
893
894 // Phase 4: Delete garbage profiles (non-person with ≤2 facts)
895 let total_facts = profile.total_facts();
896 if profile.entity_type != "person" && total_facts <= 2 {
897 self.storage.delete_entity_profile(&profile.name)?;
898 profiles_deleted += 1;
899 continue;
900 }
901
902 // Save updated profile if any facts were removed
903 if facts_removed_in_profile > 0 {
904 self.storage.store_entity_profile(&profile)?;
905 }
906 }
907
908 Ok((total_facts_removed, profiles_deleted))
909 }
910
911 /// Repair entity profiles by re-processing llm_extraction metadata stored in memories.
912 ///
913 /// This is a recovery function for databases where entity profiles are missing or
914 /// incomplete due to extraction failures, consolidation over-pruning, or ingestion bugs.
915 ///
916 /// For every memory in the DB:
917 /// 1. Parse the `llm_extraction` JSON from metadata (if present)
918 /// 2. For each entity_fact: create/update the entity profile with the fact
919 /// and add the memory as a source_memory
920 /// 3. For the `speaker` metadata field: ensure the speaker entity's profile
921 /// includes this memory as a source_memory (handles first-person statements
922 /// where the speaker name isn't in the content text)
923 ///
924 /// Respects the pipeline's `profile_entity_types` filter and type allowlist.
925 /// Skips entities whose names appear to be pronouns or generic placeholders.
926 ///
927 /// Returns (profiles_created, source_memories_added).
928 pub fn repair_profiles_from_metadata(&self) -> Result<(usize, usize)> {
929 use crate::query::profile_search::resolve_entity_alias;
930 use crate::types::{EntityFact, EntityId};
931
932 let junk_names: &[&str] = &[
933 "i",
934 "me",
935 "my",
936 "we",
937 "our",
938 "you",
939 "your",
940 "he",
941 "she",
942 "it",
943 "they",
944 "them",
945 "his",
946 "her",
947 "their",
948 "him",
949 "this",
950 "that",
951 "unknown",
952 "unspecified",
953 "someone",
954 "somebody",
955 "anyone",
956 ];
957
958 let allowed_types: &[&str] = &["person", "organization", "location"];
959
960 let mut profiles_created = 0usize;
961 let mut source_memories_added = 0usize;
962
963 let all_ids = self.storage.list_memory_ids()?;
964 let total = all_ids.len();
965 tracing::info!("repair_profiles_from_metadata: scanning {} memories", total);
966
967 for (idx, mem_id) in all_ids.iter().enumerate() {
968 if idx % 500 == 0 {
969 tracing::info!(" {}/{}", idx, total);
970 }
971
972 let memory = match self.storage.get_memory(mem_id)? {
973 Some(m) => m,
974 None => continue,
975 };
976
977 // Load fresh known_names once per memory (profiles may have been added)
978 let known_names = self.storage.list_entity_profile_names()?;
979
980 // ── Step A: re-process llm_extraction entity_facts ──────────────────
981 if let Some(json_str) = memory.metadata.get("llm_extraction") {
982 if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
983 // Build entity_type lookup from "entities" array
984 let mut entity_types: HashMap<String, String> = HashMap::new();
985 if let Some(ents) = v["entities"].as_array() {
986 for e in ents {
987 if let (Some(name), Some(etype)) =
988 (e["name"].as_str(), e["type"].as_str())
989 {
990 entity_types.insert(name.to_lowercase(), etype.to_lowercase());
991 }
992 }
993 }
994
995 if let Some(facts_arr) = v["entity_facts"].as_array() {
996 for fact_val in facts_arr {
997 let entity_raw = match fact_val["entity"].as_str() {
998 Some(e) => e,
999 None => continue,
1000 };
1001 let entity_lower = entity_raw.to_lowercase();
1002
1003 // Skip junk names and single-char names
1004 if entity_lower.len() < 2 || junk_names.contains(&entity_lower.as_str())
1005 {
1006 continue;
1007 }
1008
1009 let etype = entity_types
1010 .get(&entity_lower)
1011 .map(|s| s.as_str())
1012 .unwrap_or("person");
1013
1014 // Only create profiles for allowed entity types
1015 if !allowed_types.contains(&etype) {
1016 continue;
1017 }
1018
1019 // Canonicalize via alias resolution
1020 let canonical = resolve_entity_alias(&entity_lower, &known_names)
1021 .unwrap_or_else(|| entity_lower.clone());
1022
1023 let fact_type = fact_val["fact_type"]
1024 .as_str()
1025 .unwrap_or("unknown")
1026 .to_string();
1027 let value = fact_val["value"].as_str().unwrap_or("").to_string();
1028 let confidence = fact_val["confidence"].as_f64().unwrap_or(0.8) as f32;
1029
1030 if value.is_empty() || value.len() > 100 {
1031 continue;
1032 }
1033
1034 let is_new = self.storage.get_entity_profile(&canonical)?.is_none();
1035
1036 let mut profile = self
1037 .storage
1038 .get_entity_profile(&canonical)?
1039 .unwrap_or_else(|| {
1040 EntityProfile::new(
1041 EntityId::new(),
1042 canonical.clone(),
1043 etype.to_string(),
1044 )
1045 });
1046
1047 profile.add_fact(EntityFact {
1048 fact_type,
1049 value,
1050 confidence,
1051 source_memory: mem_id.clone(),
1052 extracted_at: Timestamp::now(),
1053 });
1054 profile.add_source_memory(mem_id.clone());
1055
1056 self.storage.store_entity_profile(&profile)?;
1057
1058 if is_new {
1059 profiles_created += 1;
1060 }
1061 source_memories_added += 1;
1062 }
1063 }
1064 }
1065 }
1066
1067 // ── Step B: speaker → source_memory attribution ─────────────────────
1068 // When the speaker says "I joined a gym", the entity name ("Maria") isn't
1069 // in the content. If an entity profile exists for the speaker, add this
1070 // memory as a source_memory so query-time entity injection can find it.
1071 if let Some(speaker_raw) = memory.metadata.get("speaker") {
1072 let speaker_lower = speaker_raw.trim().to_lowercase();
1073 if speaker_lower.len() < 2 || junk_names.contains(&speaker_lower.as_str()) {
1074 continue;
1075 }
1076
1077 // Re-load known_names (may have been updated by Step A above)
1078 let known_names2 = self.storage.list_entity_profile_names()?;
1079 let canonical = resolve_entity_alias(&speaker_lower, &known_names2)
1080 .unwrap_or_else(|| speaker_lower.clone());
1081
1082 if let Ok(Some(mut profile)) = self.storage.get_entity_profile(&canonical) {
1083 if !profile.source_memories.contains(mem_id) {
1084 profile.add_source_memory(mem_id.clone());
1085 self.storage.store_entity_profile(&profile)?;
1086 source_memories_added += 1;
1087 }
1088 }
1089 }
1090 }
1091
1092 tracing::info!(
1093 "repair_profiles_from_metadata: created {} profiles, added {} source_memory links",
1094 profiles_created,
1095 source_memories_added
1096 );
1097 Ok((profiles_created, source_memories_added))
1098 }
1099
1100 /// Add a new memory to the database
1101 ///
1102 /// This will automatically index the memory across all dimensions:
1103 /// - Semantic (vector similarity)
1104 /// - Temporal (time-based)
1105 /// - Entity (if auto-extraction enabled)
1106 ///
1107 /// # Arguments
1108 ///
1109 /// * `content` - The text content to store
1110 /// * `embedding` - Vector embedding (must match configured dimension)
1111 /// * `metadata` - Optional key-value metadata
1112 /// * `timestamp` - Optional custom timestamp (defaults to now)
1113 /// * `source` - Optional provenance/source tracking information
1114 ///
1115 /// # Returns
1116 ///
1117 /// The ID of the created memory
1118 ///
1119 /// # Errors
1120 ///
1121 /// Returns an error if:
1122 /// - Embedding dimension doesn't match configuration
1123 /// - Storage operation fails
1124 /// - Source serialization fails
1125 ///
1126 /// # Example
1127 ///
1128 /// ```no_run
1129 /// # use mnemefusion_core::{MemoryEngine, Config};
1130 /// # use mnemefusion_core::types::{Source, SourceType};
1131 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1132 /// let embedding = vec![0.1; 384];
1133 ///
1134 /// // Add memory with source tracking
1135 /// let source = Source::new(SourceType::Conversation)
1136 /// .with_id("conv_123")
1137 /// .with_confidence(0.95);
1138 ///
1139 /// let id = engine.add(
1140 /// "Meeting scheduled for next week".to_string(),
1141 /// embedding,
1142 /// None,
1143 /// None,
1144 /// Some(source),
1145 /// None,
1146 /// ).unwrap();
1147 /// ```
1148 pub fn add(
1149 &self,
1150 content: String,
1151 embedding: impl Into<Option<Vec<f32>>>,
1152 metadata: Option<HashMap<String, String>>,
1153 timestamp: Option<Timestamp>,
1154 source: Option<Source>,
1155 namespace: Option<&str>,
1156 ) -> Result<MemoryId> {
1157 // Resolve embedding: use provided value or auto-compute from content.
1158 // When auto-computing and a speaker is known, compute on the third-person form
1159 // ("Alice joined a gym") rather than the raw first-person form ("I joined a gym").
1160 // This yields ~+0.25 cosine similarity improvement with entity-centric queries.
1161 // The original first-person content is stored unchanged.
1162 let embedding = match embedding.into() {
1163 Some(e) => e,
1164 None => {
1165 let text_for_embedding = metadata
1166 .as_ref()
1167 .and_then(|m| m.get("speaker"))
1168 .filter(|s| !s.is_empty())
1169 .map(|speaker| {
1170 let subst = first_person_to_third(&content, speaker);
1171 if subst != content {
1172 subst
1173 } else {
1174 content.clone()
1175 }
1176 })
1177 .unwrap_or_else(|| content.clone());
1178 self.auto_embed(&text_for_embedding)?
1179 }
1180 };
1181
1182 // Validate embedding dimension
1183 if embedding.len() != self.config.embedding_dim {
1184 return Err(Error::InvalidEmbeddingDimension {
1185 expected: self.config.embedding_dim,
1186 got: embedding.len(),
1187 });
1188 }
1189
1190 // Apply default namespace if caller didn't supply one
1191 let effective_ns = namespace.or(self.default_namespace.as_deref());
1192
1193 // Create memory
1194 let mut memory = if let Some(ts) = timestamp {
1195 let mut mem = Memory::new_with_timestamp(content, embedding, ts);
1196 if let Some(meta) = metadata {
1197 mem.metadata = meta;
1198 }
1199 mem
1200 } else {
1201 let mut mem = Memory::new(content, embedding);
1202 if let Some(meta) = metadata {
1203 mem.metadata = meta;
1204 }
1205 mem
1206 };
1207
1208 // Add source if provided
1209 if let Some(src) = source {
1210 memory.set_source(src)?;
1211 }
1212
1213 // Set namespace (defaults to empty string)
1214 memory.set_namespace(effective_ns.unwrap_or(""));
1215
1216 // Delegate to ingestion pipeline for atomic indexing
1217 self.pipeline.add(memory)
1218 }
1219
1220 /// Retrieve a memory by ID
1221 ///
1222 /// # Arguments
1223 ///
1224 /// * `id` - The memory ID to retrieve
1225 ///
1226 /// # Returns
1227 ///
1228 /// The memory record if found, or None
1229 ///
1230 /// # Example
1231 ///
1232 /// ```no_run
1233 /// # use mnemefusion_core::{MemoryEngine, Config};
1234 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1235 /// # let id = engine.add("test".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1236 /// let memory = engine.get(&id).unwrap();
1237 /// if let Some(mem) = memory {
1238 /// println!("Content: {}", mem.content);
1239 /// }
1240 /// ```
1241 pub fn get(&self, id: &MemoryId) -> Result<Option<Memory>> {
1242 self.storage.get_memory(id)
1243 }
1244
1245 /// Delete a memory by ID
1246 ///
1247 /// This will remove the memory from all indexes.
1248 ///
1249 /// # Arguments
1250 ///
1251 /// * `id` - The memory ID to delete
1252 /// * `namespace` - Optional namespace. If provided, verifies the memory is in this namespace before deleting
1253 ///
1254 /// # Returns
1255 ///
1256 /// true if the memory was deleted, false if it didn't exist
1257 ///
1258 /// # Errors
1259 ///
1260 /// Returns `Error::NamespaceMismatch` if namespace is provided and doesn't match
1261 ///
1262 /// # Example
1263 ///
1264 /// ```no_run
1265 /// # use mnemefusion_core::{MemoryEngine, Config};
1266 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1267 /// # let id = engine.add("test".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1268 /// let deleted = engine.delete(&id, None).unwrap();
1269 /// assert!(deleted);
1270 /// ```
1271 pub fn delete(&self, id: &MemoryId, namespace: Option<&str>) -> Result<bool> {
1272 // If namespace is provided, verify it matches before deleting
1273 if let Some(expected_ns) = namespace {
1274 if let Some(memory) = self.storage.get_memory(id)? {
1275 let found_ns = memory.get_namespace();
1276 if found_ns != expected_ns {
1277 return Err(Error::NamespaceMismatch {
1278 expected: expected_ns.to_string(),
1279 found: found_ns,
1280 });
1281 }
1282 } else {
1283 // Memory doesn't exist
1284 return Ok(false);
1285 }
1286 }
1287
1288 // Delegate to ingestion pipeline for atomic cleanup
1289 self.pipeline.delete(id)
1290 }
1291
1292 /// Add multiple memories in a batch operation
1293 ///
1294 /// This is significantly faster than calling `add()` multiple times (10x+ improvement)
1295 /// because it uses:
1296 /// - Single transaction for all storage operations
1297 /// - Vector index locked once for all additions
1298 /// - Batched entity extraction with deduplication
1299 ///
1300 /// # Arguments
1301 ///
1302 /// * `inputs` - Vector of MemoryInput to add
1303 ///
1304 /// # Returns
1305 ///
1306 /// BatchResult containing IDs of created memories and any errors
1307 ///
1308 /// # Performance
1309 ///
1310 /// Target: 1,000 memories in <500ms
1311 ///
1312 /// # Example
1313 ///
1314 /// ```no_run
1315 /// use mnemefusion_core::{MemoryEngine, Config};
1316 /// use mnemefusion_core::types::MemoryInput;
1317 ///
1318 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1319 /// let inputs = vec![
1320 /// MemoryInput::new("content 1".to_string(), vec![0.1; 384]),
1321 /// MemoryInput::new("content 2".to_string(), vec![0.2; 384]),
1322 /// ];
1323 ///
1324 /// let result = engine.add_batch(inputs, None).unwrap();
1325 /// println!("Created {} memories", result.created_count);
1326 /// if result.has_errors() {
1327 /// println!("Encountered {} errors", result.errors.len());
1328 /// }
1329 /// ```
1330 pub fn add_batch(
1331 &self,
1332 inputs: Vec<MemoryInput>,
1333 namespace: Option<&str>,
1334 ) -> Result<BatchResult> {
1335 self.add_batch_with_progress(inputs, namespace, None)
1336 }
1337
1338 /// Add multiple memories in a single batch operation with progress reporting.
1339 ///
1340 /// Like `add_batch()`, but calls `progress_callback(current, total)` after each
1341 /// memory is processed. Useful for long ingestion runs.
1342 ///
1343 /// # Example
1344 ///
1345 /// ```no_run
1346 /// # use mnemefusion_core::{MemoryEngine, Config, MemoryInput};
1347 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1348 /// let inputs: Vec<MemoryInput> = vec![]; // ...
1349 /// let result = engine.add_batch_with_progress(
1350 /// inputs,
1351 /// None,
1352 /// Some(Box::new(|current, total| {
1353 /// println!("Progress: {}/{}", current, total);
1354 /// })),
1355 /// ).unwrap();
1356 /// ```
1357 pub fn add_batch_with_progress(
1358 &self,
1359 inputs: Vec<MemoryInput>,
1360 namespace: Option<&str>,
1361 progress_callback: Option<Box<dyn Fn(usize, usize)>>,
1362 ) -> Result<BatchResult> {
1363 // Validate all embeddings upfront
1364 for (index, input) in inputs.iter().enumerate() {
1365 if input.embedding.len() != self.config.embedding_dim {
1366 let mut result = BatchResult::new();
1367 result.errors.push(crate::types::BatchError::new(
1368 index,
1369 format!(
1370 "Invalid embedding dimension: expected {}, got {}",
1371 self.config.embedding_dim,
1372 input.embedding.len()
1373 ),
1374 ));
1375 return Ok(result);
1376 }
1377 }
1378
1379 // Set namespace on all inputs if provided
1380 let mut inputs_with_ns = inputs;
1381 if let Some(ns) = namespace {
1382 for input in &mut inputs_with_ns {
1383 input.namespace = Some(ns.to_string());
1384 }
1385 }
1386
1387 // Delegate to ingestion pipeline
1388 self.pipeline.add_batch(inputs_with_ns, progress_callback)
1389 }
1390
1391 /// Delete multiple memories in a batch operation
1392 ///
1393 /// This is faster than calling `delete()` multiple times because it uses:
1394 /// - Single transaction for all storage operations
1395 /// - Batched entity cleanup
1396 ///
1397 /// # Arguments
1398 ///
1399 /// * `ids` - Vector of MemoryIds to delete
1400 /// * `namespace` - Optional namespace. If provided, only deletes memories in this namespace
1401 ///
1402 /// # Returns
1403 ///
1404 /// Number of memories actually deleted (may be less than input if some don't exist or are in wrong namespace)
1405 ///
1406 /// # Example
1407 ///
1408 /// ```no_run
1409 /// use mnemefusion_core::{MemoryEngine, Config};
1410 ///
1411 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1412 /// # let id1 = engine.add("test1".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1413 /// # let id2 = engine.add("test2".to_string(), vec![0.2; 384], None, None, None, None).unwrap();
1414 /// let ids = vec![id1, id2];
1415 /// let deleted_count = engine.delete_batch(ids, None).unwrap();
1416 /// println!("Deleted {} memories", deleted_count);
1417 /// ```
1418 pub fn delete_batch(&self, ids: Vec<MemoryId>, namespace: Option<&str>) -> Result<usize> {
1419 // If namespace is provided, filter IDs to only those in the namespace
1420 let ids_to_delete = if let Some(expected_ns) = namespace {
1421 let mut filtered_ids = Vec::new();
1422 for id in ids {
1423 if let Some(memory) = self.storage.get_memory(&id)? {
1424 if memory.get_namespace() == expected_ns {
1425 filtered_ids.push(id);
1426 }
1427 }
1428 }
1429 filtered_ids
1430 } else {
1431 ids
1432 };
1433
1434 // Delegate to ingestion pipeline
1435 self.pipeline.delete_batch(ids_to_delete)
1436 }
1437
1438 /// Add a memory with automatic deduplication
1439 ///
1440 /// Uses content hash to detect duplicates. If identical content already exists,
1441 /// returns the existing memory ID without creating a duplicate.
1442 ///
1443 /// # Arguments
1444 ///
1445 /// * `content` - Text content
1446 /// * `embedding` - Vector embedding
1447 /// * `metadata` - Optional metadata
1448 /// * `timestamp` - Optional custom timestamp
1449 /// * `source` - Optional source/provenance
1450 ///
1451 /// # Returns
1452 ///
1453 /// AddResult with created flag and ID (either new or existing)
1454 ///
1455 /// # Example
1456 ///
1457 /// ```no_run
1458 /// use mnemefusion_core::{MemoryEngine, Config};
1459 ///
1460 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1461 /// let embedding = vec![0.1; 384];
1462 ///
1463 /// // First add
1464 /// let result1 = engine.add_with_dedup(
1465 /// "Meeting notes".to_string(),
1466 /// embedding.clone(),
1467 /// None,
1468 /// None,
1469 /// None,
1470 /// None,
1471 /// ).unwrap();
1472 /// assert!(result1.created);
1473 ///
1474 /// // Second add with same content
1475 /// let result2 = engine.add_with_dedup(
1476 /// "Meeting notes".to_string(),
1477 /// embedding.clone(),
1478 /// None,
1479 /// None,
1480 /// None,
1481 /// None,
1482 /// ).unwrap();
1483 /// assert!(!result2.created); // Duplicate detected
1484 /// assert_eq!(result1.id, result2.id); // Same ID returned
1485 /// ```
1486 pub fn add_with_dedup(
1487 &self,
1488 content: String,
1489 embedding: Vec<f32>,
1490 metadata: Option<HashMap<String, String>>,
1491 timestamp: Option<Timestamp>,
1492 source: Option<Source>,
1493 namespace: Option<&str>,
1494 ) -> Result<AddResult> {
1495 // Validate embedding dimension
1496 if embedding.len() != self.config.embedding_dim {
1497 return Err(Error::InvalidEmbeddingDimension {
1498 expected: self.config.embedding_dim,
1499 got: embedding.len(),
1500 });
1501 }
1502
1503 // Create memory
1504 let mut memory = if let Some(ts) = timestamp {
1505 Memory::new_with_timestamp(content, embedding, ts)
1506 } else {
1507 Memory::new(content, embedding)
1508 };
1509
1510 // Add metadata
1511 if let Some(meta) = metadata {
1512 for (key, value) in meta {
1513 memory.set_metadata(key, value);
1514 }
1515 }
1516
1517 // Add source
1518 if let Some(src) = source {
1519 memory.set_source(src)?;
1520 }
1521
1522 // Set namespace if provided
1523 memory.set_namespace(namespace.unwrap_or(""));
1524
1525 // Delegate to pipeline with deduplication
1526 self.pipeline.add_with_dedup(memory)
1527 }
1528
1529 /// Upsert a memory by logical key
1530 ///
1531 /// If key exists: replaces content, embedding, and metadata
1532 /// If key doesn't exist: creates new memory and associates with key
1533 ///
1534 /// This is useful for updating facts that may change over time.
1535 ///
1536 /// # Arguments
1537 ///
1538 /// * `key` - Logical key (e.g., "user_profile:123", "doc:readme")
1539 /// * `content` - Text content
1540 /// * `embedding` - Vector embedding
1541 /// * `metadata` - Optional metadata
1542 /// * `timestamp` - Optional custom timestamp
1543 /// * `source` - Optional source/provenance
1544 ///
1545 /// # Returns
1546 ///
1547 /// UpsertResult indicating whether memory was created or updated
1548 ///
1549 /// # Example
1550 ///
1551 /// ```no_run
1552 /// use mnemefusion_core::{MemoryEngine, Config};
1553 ///
1554 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1555 /// let embedding = vec![0.1; 384];
1556 ///
1557 /// // First upsert - creates new
1558 /// let result1 = engine.upsert(
1559 /// "user:profile",
1560 /// "Alice likes hiking".to_string(),
1561 /// embedding.clone(),
1562 /// None,
1563 /// None,
1564 /// None,
1565 /// None,
1566 /// ).unwrap();
1567 /// assert!(result1.created);
1568 ///
1569 /// // Second upsert - updates existing
1570 /// let result2 = engine.upsert(
1571 /// "user:profile",
1572 /// "Alice likes hiking and photography".to_string(),
1573 /// vec![0.2; 384],
1574 /// None,
1575 /// None,
1576 /// None,
1577 /// None,
1578 /// ).unwrap();
1579 /// assert!(result2.updated);
1580 /// assert_eq!(result2.previous_content, Some("Alice likes hiking".to_string()));
1581 /// ```
1582 pub fn upsert(
1583 &self,
1584 key: &str,
1585 content: String,
1586 embedding: Vec<f32>,
1587 metadata: Option<HashMap<String, String>>,
1588 timestamp: Option<Timestamp>,
1589 source: Option<Source>,
1590 namespace: Option<&str>,
1591 ) -> Result<UpsertResult> {
1592 // Validate embedding dimension
1593 if embedding.len() != self.config.embedding_dim {
1594 return Err(Error::InvalidEmbeddingDimension {
1595 expected: self.config.embedding_dim,
1596 got: embedding.len(),
1597 });
1598 }
1599
1600 // Create memory
1601 let mut memory = if let Some(ts) = timestamp {
1602 Memory::new_with_timestamp(content, embedding, ts)
1603 } else {
1604 Memory::new(content, embedding)
1605 };
1606
1607 // Add metadata
1608 if let Some(meta) = metadata {
1609 for (key, value) in meta {
1610 memory.set_metadata(key, value);
1611 }
1612 }
1613
1614 // Add source
1615 if let Some(src) = source {
1616 memory.set_source(src)?;
1617 }
1618
1619 // Set namespace if provided
1620 memory.set_namespace(namespace.unwrap_or(""));
1621
1622 // Delegate to pipeline
1623 self.pipeline.upsert(key, memory)
1624 }
1625
1626 /// Get the number of memories in the database
1627 ///
1628 /// # Example
1629 ///
1630 /// ```no_run
1631 /// # use mnemefusion_core::{MemoryEngine, Config};
1632 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1633 /// let count = engine.count().unwrap();
1634 /// println!("Total memories: {}", count);
1635 /// ```
1636 pub fn count(&self) -> Result<usize> {
1637 self.storage.count_memories()
1638 }
1639
1640 /// List all memory IDs (for debugging/testing)
1641 ///
1642 /// # Warning
1643 ///
1644 /// This loads all memory IDs into memory. Use with caution on large databases.
1645 pub fn list_ids(&self) -> Result<Vec<MemoryId>> {
1646 self.storage.list_memory_ids()
1647 }
1648
1649 /// Update the embedding vector for an existing memory.
1650 ///
1651 /// This updates both the stored memory record (used by MMR diversity) and
1652 /// the HNSW vector index (used by semantic search). The memory content,
1653 /// metadata, and all other fields are preserved.
1654 ///
1655 /// # Arguments
1656 ///
1657 /// * `id` - The memory ID to update
1658 /// * `new_embedding` - The new embedding vector (must match configured dimension)
1659 ///
1660 /// # Errors
1661 ///
1662 /// Returns error if the memory doesn't exist or the embedding dimension is wrong.
1663 pub fn update_embedding(&self, id: &MemoryId, new_embedding: Vec<f32>) -> Result<()> {
1664 // Load existing memory
1665 let mut memory = self
1666 .storage
1667 .get_memory(id)?
1668 .ok_or_else(|| Error::MemoryNotFound(id.to_string()))?;
1669
1670 // Validate dimension
1671 let expected = self.config.embedding_dim;
1672 if new_embedding.len() != expected {
1673 return Err(Error::InvalidEmbeddingDimension {
1674 expected,
1675 got: new_embedding.len(),
1676 });
1677 }
1678
1679 // Update embedding in memory record
1680 memory.embedding = new_embedding.clone();
1681
1682 // Update storage (redb)
1683 self.storage.store_memory(&memory)?;
1684
1685 // Update vector index (usearch HNSW)
1686 let mut vi = self.vector_index.write().unwrap();
1687 // Remove old vector, then add new one
1688 let _ = vi.remove(id); // ignore error if not found (fresh index)
1689 vi.add(id.clone(), &new_embedding)?;
1690
1691 Ok(())
1692 }
1693
1694 /// Get the configuration
1695 pub fn config(&self) -> &Config {
1696 &self.config
1697 }
1698
1699 /// Reserve capacity in the vector index for future insertions
1700 ///
1701 /// This is useful when you know you'll be adding many memories
1702 /// and want to avoid repeated reallocations, improving performance.
1703 ///
1704 /// # Arguments
1705 ///
1706 /// * `capacity` - Number of vectors to reserve space for
1707 ///
1708 /// # Example
1709 ///
1710 /// ```no_run
1711 /// # use mnemefusion_core::{MemoryEngine, Config};
1712 /// # let mut engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1713 /// // Reserve space for 10,000 memories before bulk insertion
1714 /// engine.reserve_capacity(10_000).unwrap();
1715 /// ```
1716 pub fn reserve_capacity(&self, capacity: usize) -> Result<()> {
1717 self.pipeline.reserve_capacity(capacity)
1718 }
1719
1720 /// Search for memories by semantic similarity
1721 ///
1722 /// # Arguments
1723 ///
1724 /// * `query_embedding` - The query vector to search for
1725 /// * `top_k` - Maximum number of results to return
1726 /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1727 ///
1728 /// # Returns
1729 ///
1730 /// A vector of (Memory, similarity_score) tuples, sorted by similarity (highest first)
1731 ///
1732 /// # Example
1733 ///
1734 /// ```no_run
1735 /// # use mnemefusion_core::{MemoryEngine, Config};
1736 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1737 /// # let query_embedding = vec![0.1; 384];
1738 /// let results = engine.search(&query_embedding, 10, None, None).unwrap();
1739 /// for (memory, score) in results {
1740 /// println!("Similarity: {:.3} - {}", score, memory.content);
1741 /// }
1742 /// ```
1743 pub fn search(
1744 &self,
1745 query_embedding: &[f32],
1746 top_k: usize,
1747 namespace: Option<&str>,
1748 filters: Option<&[MetadataFilter]>,
1749 ) -> Result<Vec<(Memory, f32)>> {
1750 // If filtering is needed, fetch more results (5x) and filter
1751 let needs_filtering =
1752 namespace.is_some() || (filters.is_some() && !filters.unwrap().is_empty());
1753 let fetch_k = if needs_filtering { top_k * 5 } else { top_k };
1754
1755 // Search vector index
1756 let vector_results = {
1757 let index = self.vector_index.read().unwrap();
1758 index.search(query_embedding, fetch_k)?
1759 };
1760
1761 // Retrieve full memory records using u64 lookup
1762 let mut results = Vec::with_capacity(vector_results.len());
1763
1764 for vector_result in vector_results {
1765 // Look up memory using the u64 key from vector index
1766 let key = vector_result.id.to_u64();
1767 if let Some(memory) = self.storage.get_memory_by_u64(key)? {
1768 // Filter by namespace if provided
1769 if let Some(ns) = namespace {
1770 if memory.get_namespace() != ns {
1771 continue;
1772 }
1773 }
1774
1775 // Filter by metadata if provided
1776 if let Some(filter_list) = filters {
1777 if !Self::memory_matches_filters(&memory, filter_list) {
1778 continue;
1779 }
1780 }
1781
1782 results.push((memory, vector_result.similarity));
1783
1784 // Stop if we have enough results after filtering
1785 if results.len() >= top_k {
1786 break;
1787 }
1788 }
1789 }
1790
1791 Ok(results)
1792 }
1793
1794 /// Check if a memory matches all metadata filters
1795 fn memory_matches_filters(memory: &Memory, filters: &[MetadataFilter]) -> bool {
1796 for filter in filters {
1797 let value = memory.metadata.get(&filter.field).map(|s| s.as_str());
1798 if !filter.matches(value) {
1799 return false;
1800 }
1801 }
1802 true
1803 }
1804
1805 /// Intelligent multi-dimensional query with intent classification
1806 ///
1807 /// This method performs intent-aware retrieval across all dimensions:
1808 /// - Classifies the query intent (temporal, causal, entity, factual)
1809 /// - Retrieves results from relevant dimensions
1810 /// - Fuses results with adaptive weights based on intent
1811 ///
1812 /// # Arguments
1813 ///
1814 /// * `query_text` - Natural language query text
1815 /// * `query_embedding` - Vector embedding of the query
1816 /// * `limit` - Maximum number of results to return
1817 /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1818 ///
1819 /// # Returns
1820 ///
1821 /// Tuple of (intent classification, fused results with full memory records)
1822 ///
1823 /// # Example
1824 ///
1825 /// ```no_run
1826 /// # use mnemefusion_core::{MemoryEngine, Config};
1827 /// # let engine = MemoryEngine::open("test.mfdb", Config::default()).unwrap();
1828 /// # let query_embedding = vec![0.1; 384];
1829 /// let (intent, results, profile_context) = engine.query(
1830 /// "Why was the meeting cancelled?",
1831 /// &query_embedding,
1832 /// 10,
1833 /// None,
1834 /// None
1835 /// ).unwrap();
1836 ///
1837 /// println!("Query intent: {:?}", intent.intent);
1838 /// println!("Profile context: {} entries", profile_context.len());
1839 /// for result in results {
1840 /// println!("Score: {:.3} - {}", result.1.fused_score, result.0.content);
1841 /// }
1842 /// ```
1843 pub fn query(
1844 &self,
1845 query_text: &str,
1846 query_embedding: impl Into<Option<Vec<f32>>>,
1847 limit: usize,
1848 namespace: Option<&str>,
1849 filters: Option<&[MetadataFilter]>,
1850 ) -> Result<(
1851 IntentClassification,
1852 Vec<(Memory, FusedResult)>,
1853 Vec<String>,
1854 )> {
1855 // Resolve query embedding: use provided value or auto-compute from query text
1856 let embedding_vec: Vec<f32> = match query_embedding.into() {
1857 Some(e) => e,
1858 None => self.auto_embed(query_text)?,
1859 };
1860
1861 // Apply default namespace if caller didn't supply one
1862 let effective_ns = namespace.or(self.default_namespace.as_deref());
1863
1864 // Create trace recorder if tracing is enabled
1865 let mut trace_recorder = if self.enable_trace {
1866 Some(TraceRecorder::new("query"))
1867 } else {
1868 None
1869 };
1870
1871 // Execute query using query planner.
1872 // Pass user_entity for first-person pronoun resolution:
1873 // when user says "I like hiking", the system maps "I" to their entity profile,
1874 // ensuring their memories get the Step 2.1 entity boost.
1875 let (intent, fused_results, matched_facts) = self.query_planner.query(
1876 query_text,
1877 &embedding_vec,
1878 limit,
1879 effective_ns,
1880 filters,
1881 self.user_entity.as_deref(),
1882 trace_recorder.as_mut(),
1883 )?;
1884
1885 // Build profile context as SEPARATE strings (not mixed into results).
1886 // Profile facts contain entity knowledge ("Caroline's hobby: painting") but
1887 // lack dates, speaker context, and conversational detail. Mixing them into
1888 // the results Vec with high scores pushes real memories out of top-K context.
1889 let mut profile_context = Vec::new();
1890
1891 // Group matched facts by entity name
1892 let mut facts_by_entity: HashMap<String, Vec<&crate::query::MatchedProfileFact>> =
1893 HashMap::new();
1894 for fact in &matched_facts {
1895 facts_by_entity
1896 .entry(fact.entity_name.clone())
1897 .or_default()
1898 .push(fact);
1899 }
1900
1901 for (entity_name, facts) in &facts_by_entity {
1902 // If profile has a pre-computed summary, use it as ONE context item
1903 // Otherwise, fall back to individual fact format
1904 let profile_summary = self
1905 .storage
1906 .get_entity_profile(entity_name)
1907 .ok()
1908 .flatten()
1909 .and_then(|p| p.summary.clone());
1910
1911 if let Some(summary) = profile_summary {
1912 profile_context.push(summary);
1913 } else {
1914 // No summary — format individual facts
1915 for fact in facts {
1916 let content = format!(
1917 "{}'s {}: {}",
1918 fact.entity_name,
1919 fact.fact_type.replace('_', " "),
1920 fact.value
1921 );
1922 profile_context.push(content);
1923 }
1924 }
1925 }
1926
1927 // Retrieve full memory records using u64 key lookup
1928 // Note: Vector index returns partial MemoryIds (first 8 bytes only),
1929 // so we use get_memory_by_u64 which looks up the full UUID from the index table
1930 let mut results = Vec::new();
1931 for fused_result in fused_results {
1932 let key = fused_result.id.to_u64();
1933 if let Some(memory) = self.storage.get_memory_by_u64(key)? {
1934 results.push((memory, fused_result));
1935 }
1936 }
1937
1938 // Store trace if recording
1939 if let Some(rec) = trace_recorder {
1940 *self.last_query_trace.borrow_mut() = Some(rec.finish());
1941 }
1942
1943 Ok((intent, results, profile_context))
1944 }
1945
1946 /// Returns the trace from the most recent `query()` call, if tracing is enabled.
1947 pub fn last_query_trace(&self) -> Option<Trace> {
1948 self.last_query_trace.borrow().clone()
1949 }
1950
1951 /// Query memories within a time range
1952 ///
1953 /// Returns memories whose timestamps fall within the specified range,
1954 /// sorted by timestamp (newest first).
1955 ///
1956 /// # Arguments
1957 ///
1958 /// * `start` - Start of the time range (inclusive)
1959 /// * `end` - End of the time range (inclusive)
1960 /// * `limit` - Maximum number of results to return
1961 /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1962 ///
1963 /// # Returns
1964 ///
1965 /// A vector of (Memory, Timestamp) tuples, sorted newest first
1966 ///
1967 /// # Example
1968 ///
1969 /// ```no_run
1970 /// # use mnemefusion_core::{MemoryEngine, Config, Timestamp};
1971 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1972 /// let now = Timestamp::now();
1973 /// let week_ago = now.subtract_days(7);
1974 ///
1975 /// let results = engine.get_range(week_ago, now, 100, None).unwrap();
1976 /// for (memory, timestamp) in results {
1977 /// println!("{}: {}", timestamp.as_unix_secs(), memory.content);
1978 /// }
1979 /// ```
1980 pub fn get_range(
1981 &self,
1982 start: Timestamp,
1983 end: Timestamp,
1984 limit: usize,
1985 namespace: Option<&str>,
1986 ) -> Result<Vec<(Memory, Timestamp)>> {
1987 // Fetch more results if filtering by namespace
1988 let fetch_limit = if namespace.is_some() {
1989 limit * 3
1990 } else {
1991 limit
1992 };
1993 let temporal_results = self.temporal_index.range_query(start, end, fetch_limit)?;
1994
1995 // Retrieve and filter full memory records
1996 let mut results = Vec::with_capacity(temporal_results.len());
1997
1998 for temp_result in temporal_results {
1999 if let Some(memory) = self.storage.get_memory(&temp_result.id)? {
2000 // Filter by namespace if provided
2001 if let Some(ns) = namespace {
2002 if memory.get_namespace() != ns {
2003 continue;
2004 }
2005 }
2006 results.push((memory, temp_result.timestamp));
2007
2008 // Stop if we have enough results after filtering
2009 if results.len() >= limit {
2010 break;
2011 }
2012 }
2013 }
2014
2015 Ok(results)
2016 }
2017
2018 /// Get the N most recent memories
2019 ///
2020 /// Returns the most recent memories, sorted by timestamp (newest first).
2021 ///
2022 /// # Arguments
2023 ///
2024 /// * `n` - Number of recent memories to retrieve
2025 /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
2026 ///
2027 /// # Returns
2028 ///
2029 /// A vector of (Memory, Timestamp) tuples, sorted newest first
2030 ///
2031 /// # Example
2032 ///
2033 /// ```no_run
2034 /// # use mnemefusion_core::{MemoryEngine, Config};
2035 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2036 /// let recent = engine.get_recent(10, None).unwrap();
2037 /// println!("10 most recent memories:");
2038 /// for (memory, timestamp) in recent {
2039 /// println!(" {} - {}", timestamp.as_unix_secs(), memory.content);
2040 /// }
2041 /// ```
2042 pub fn get_recent(
2043 &self,
2044 n: usize,
2045 namespace: Option<&str>,
2046 ) -> Result<Vec<(Memory, Timestamp)>> {
2047 // Fetch more results if filtering by namespace
2048 let fetch_n = if namespace.is_some() { n * 3 } else { n };
2049 let temporal_results = self.temporal_index.recent(fetch_n)?;
2050
2051 // Retrieve and filter full memory records
2052 let mut results = Vec::with_capacity(temporal_results.len());
2053
2054 for temp_result in temporal_results {
2055 if let Some(memory) = self.storage.get_memory(&temp_result.id)? {
2056 // Filter by namespace if provided
2057 if let Some(ns) = namespace {
2058 if memory.get_namespace() != ns {
2059 continue;
2060 }
2061 }
2062 results.push((memory, temp_result.timestamp));
2063
2064 // Stop if we have enough results after filtering
2065 if results.len() >= n {
2066 break;
2067 }
2068 }
2069 }
2070
2071 Ok(results)
2072 }
2073
2074 /// Add a causal link between two memories
2075 ///
2076 /// Links a cause memory to an effect memory with a confidence score.
2077 ///
2078 /// # Arguments
2079 ///
2080 /// * `cause` - The MemoryId of the cause
2081 /// * `effect` - The MemoryId of the effect
2082 /// * `confidence` - Confidence score (0.0 to 1.0)
2083 /// * `evidence` - Evidence text explaining the causal relationship
2084 ///
2085 /// # Errors
2086 ///
2087 /// Returns error if confidence is not in range [0.0, 1.0]
2088 ///
2089 /// # Example
2090 ///
2091 /// ```no_run
2092 /// # use mnemefusion_core::{MemoryEngine, Config};
2093 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2094 /// # let id1 = engine.add("Cause".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2095 /// # let id2 = engine.add("Effect".to_string(), vec![0.2; 384], None, None, None, None).unwrap();
2096 /// engine.add_causal_link(&id1, &id2, 0.9, "id1 caused id2".to_string()).unwrap();
2097 /// ```
2098 pub fn add_causal_link(
2099 &self,
2100 cause: &MemoryId,
2101 effect: &MemoryId,
2102 confidence: f32,
2103 evidence: String,
2104 ) -> Result<()> {
2105 // Add the causal link to the graph
2106 {
2107 let mut graph = self.graph_manager.write().unwrap();
2108 graph.add_causal_link(cause, effect, confidence, evidence)?;
2109 }
2110
2111 // Persist graph immediately for crash recovery
2112 // This ensures causal links are durable
2113 {
2114 let graph = self.graph_manager.read().unwrap();
2115 crate::graph::persist::save_graph(&graph, &self.storage)?;
2116 }
2117
2118 Ok(())
2119 }
2120
2121 /// Get causes of a memory (backward traversal)
2122 ///
2123 /// Finds all memories that causally precede the given memory, up to max_hops.
2124 ///
2125 /// # Arguments
2126 ///
2127 /// * `memory_id` - The memory to find causes for
2128 /// * `max_hops` - Maximum traversal depth
2129 ///
2130 /// # Returns
2131 ///
2132 /// CausalTraversalResult with all paths found
2133 ///
2134 /// # Example
2135 ///
2136 /// ```no_run
2137 /// # use mnemefusion_core::{MemoryEngine, Config};
2138 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2139 /// # let id = engine.add("Memory".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2140 /// let causes = engine.get_causes(&id, 3).unwrap();
2141 /// for path in causes.paths {
2142 /// println!("Found causal path with {} steps (confidence: {})",
2143 /// path.memories.len(), path.confidence);
2144 /// }
2145 /// ```
2146 pub fn get_causes(
2147 &self,
2148 memory_id: &MemoryId,
2149 max_hops: usize,
2150 ) -> Result<CausalTraversalResult> {
2151 let graph = self.graph_manager.read().unwrap();
2152 graph.get_causes(memory_id, max_hops)
2153 }
2154
2155 /// Get effects of a memory (forward traversal)
2156 ///
2157 /// Finds all memories that causally follow the given memory, up to max_hops.
2158 ///
2159 /// # Arguments
2160 ///
2161 /// * `memory_id` - The memory to find effects for
2162 /// * `max_hops` - Maximum traversal depth
2163 ///
2164 /// # Returns
2165 ///
2166 /// CausalTraversalResult with all paths found
2167 ///
2168 /// # Example
2169 ///
2170 /// ```no_run
2171 /// # use mnemefusion_core::{MemoryEngine, Config};
2172 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2173 /// # let id = engine.add("Memory".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2174 /// let effects = engine.get_effects(&id, 3).unwrap();
2175 /// for path in effects.paths {
2176 /// println!("Found effect chain with {} steps (confidence: {})",
2177 /// path.memories.len(), path.confidence);
2178 /// }
2179 /// ```
2180 pub fn get_effects(
2181 &self,
2182 memory_id: &MemoryId,
2183 max_hops: usize,
2184 ) -> Result<CausalTraversalResult> {
2185 let graph = self.graph_manager.read().unwrap();
2186 graph.get_effects(memory_id, max_hops)
2187 }
2188
2189 // ========== Namespace Operations ==========
2190
2191 /// List all namespaces in the database
2192 ///
2193 /// Returns a sorted list of all unique namespace strings, excluding the default namespace ("").
2194 ///
2195 /// # Performance
2196 ///
2197 /// O(n) where n = total memories. This scans all memories to extract namespaces.
2198 ///
2199 /// # Example
2200 ///
2201 /// ```no_run
2202 /// # use mnemefusion_core::{MemoryEngine, Config};
2203 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2204 /// let namespaces = engine.list_namespaces().unwrap();
2205 /// for ns in namespaces {
2206 /// println!("Namespace: {}", ns);
2207 /// }
2208 /// ```
2209 pub fn list_namespaces(&self) -> Result<Vec<String>> {
2210 self.storage.list_namespaces()
2211 }
2212
2213 /// Count memories in a specific namespace
2214 ///
2215 /// # Arguments
2216 ///
2217 /// * `namespace` - The namespace to count (empty string "" for default namespace)
2218 ///
2219 /// # Returns
2220 ///
2221 /// Number of memories in the namespace
2222 ///
2223 /// # Example
2224 ///
2225 /// ```no_run
2226 /// # use mnemefusion_core::{MemoryEngine, Config};
2227 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2228 /// let count = engine.count_namespace("user_123").unwrap();
2229 /// println!("User has {} memories", count);
2230 /// ```
2231 pub fn count_namespace(&self, namespace: &str) -> Result<usize> {
2232 self.storage.count_namespace(namespace)
2233 }
2234
2235 /// Delete all memories in a namespace
2236 ///
2237 /// This is a convenience method that lists all memory IDs in the namespace
2238 /// and deletes them via the ingestion pipeline (ensuring proper cleanup of indexes).
2239 ///
2240 /// # Arguments
2241 ///
2242 /// * `namespace` - The namespace to delete (empty string "" for default namespace)
2243 ///
2244 /// # Returns
2245 ///
2246 /// Number of memories deleted
2247 ///
2248 /// # Warning
2249 ///
2250 /// This operation cannot be undone. Use with caution.
2251 ///
2252 /// # Example
2253 ///
2254 /// ```no_run
2255 /// # use mnemefusion_core::{MemoryEngine, Config};
2256 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2257 /// let deleted = engine.delete_namespace("old_user").unwrap();
2258 /// println!("Deleted {} memories from namespace", deleted);
2259 /// ```
2260 pub fn delete_namespace(&self, namespace: &str) -> Result<usize> {
2261 // Get all memory IDs in this namespace
2262 let ids = self.storage.list_namespace_ids(namespace)?;
2263
2264 // Delete via pipeline for proper cleanup
2265 self.pipeline.delete_batch(ids)
2266 }
2267
2268 // ========== Entity Operations ==========
2269
2270 /// Get all memories that mention a specific entity
2271 ///
2272 /// # Arguments
2273 ///
2274 /// * `entity_name` - The name of the entity to query (case-insensitive)
2275 ///
2276 /// # Returns
2277 ///
2278 /// A vector of Memory objects that mention this entity
2279 ///
2280 /// # Example
2281 ///
2282 /// ```no_run
2283 /// # use mnemefusion_core::{MemoryEngine, Config};
2284 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2285 /// let memories = engine.get_entity_memories("Project Alpha").unwrap();
2286 /// for memory in memories {
2287 /// println!("{}", memory.content);
2288 /// }
2289 /// ```
2290 pub fn get_entity_memories(&self, entity_name: &str) -> Result<Vec<Memory>> {
2291 // Find the entity by name
2292 let entity = self.storage.find_entity_by_name(entity_name)?;
2293
2294 match entity {
2295 Some(entity) => {
2296 // Query entity graph
2297 let graph = self.graph_manager.read().unwrap();
2298 let result = graph.get_entity_memories(&entity.id);
2299
2300 // Retrieve full memory records
2301 let mut memories = Vec::with_capacity(result.memories.len());
2302 for memory_id in result.memories {
2303 if let Some(memory) = self.storage.get_memory(&memory_id)? {
2304 memories.push(memory);
2305 }
2306 }
2307
2308 Ok(memories)
2309 }
2310 None => Ok(Vec::new()), // Entity not found, return empty list
2311 }
2312 }
2313
2314 /// Get all entities mentioned in a specific memory
2315 ///
2316 /// # Arguments
2317 ///
2318 /// * `memory_id` - The memory to query
2319 ///
2320 /// # Returns
2321 ///
2322 /// A vector of Entity objects mentioned in this memory
2323 ///
2324 /// # Example
2325 ///
2326 /// ```no_run
2327 /// # use mnemefusion_core::{MemoryEngine, Config};
2328 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2329 /// # let id = engine.add("Alice met Bob".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2330 /// let entities = engine.get_memory_entities(&id).unwrap();
2331 /// for entity in entities {
2332 /// println!("Entity: {}", entity.name);
2333 /// }
2334 /// ```
2335 pub fn get_memory_entities(&self, memory_id: &MemoryId) -> Result<Vec<Entity>> {
2336 // Query entity graph
2337 let graph = self.graph_manager.read().unwrap();
2338 let entity_ids = graph.get_memory_entities(memory_id);
2339
2340 // Retrieve full entity records
2341 let mut entities = Vec::with_capacity(entity_ids.len());
2342 for entity_id in entity_ids {
2343 if let Some(entity) = self.storage.get_entity(&entity_id)? {
2344 entities.push(entity);
2345 }
2346 }
2347
2348 Ok(entities)
2349 }
2350
2351 /// List all entities in the database
2352 ///
2353 /// # Returns
2354 ///
2355 /// A vector of all Entity objects
2356 ///
2357 /// # Example
2358 ///
2359 /// ```no_run
2360 /// # use mnemefusion_core::{MemoryEngine, Config};
2361 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2362 /// let all_entities = engine.list_entities().unwrap();
2363 /// for entity in all_entities {
2364 /// println!("{}: {} mentions", entity.name, entity.mention_count);
2365 /// }
2366 /// ```
2367 pub fn list_entities(&self) -> Result<Vec<Entity>> {
2368 self.storage.list_entities()
2369 }
2370
2371 // ========== Entity Profile Operations ==========
2372
2373 /// Get the profile for an entity by name
2374 ///
2375 /// Entity profiles aggregate facts about entities across all memories.
2376 /// They are automatically built during ingestion when SLM metadata extraction
2377 /// is enabled.
2378 ///
2379 /// # Arguments
2380 ///
2381 /// * `name` - The entity name (case-insensitive)
2382 ///
2383 /// # Returns
2384 ///
2385 /// The EntityProfile if found, or None
2386 ///
2387 /// # Example
2388 ///
2389 /// ```no_run
2390 /// # use mnemefusion_core::{MemoryEngine, Config};
2391 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2392 /// if let Some(profile) = engine.get_entity_profile("Alice").unwrap() {
2393 /// println!("Entity: {} ({})", profile.name, profile.entity_type);
2394 ///
2395 /// // Get facts about Alice's occupation
2396 /// for fact in profile.get_facts("occupation") {
2397 /// println!(" Occupation: {} (confidence: {})", fact.value, fact.confidence);
2398 /// }
2399 ///
2400 /// // Get facts about Alice's research
2401 /// for fact in profile.get_facts("research_topic") {
2402 /// println!(" Research: {} (confidence: {})", fact.value, fact.confidence);
2403 /// }
2404 /// }
2405 /// ```
2406 pub fn get_entity_profile(&self, name: &str) -> Result<Option<EntityProfile>> {
2407 self.storage.get_entity_profile(name)
2408 }
2409
2410 /// List all entity profiles in the database
2411 ///
2412 /// # Returns
2413 ///
2414 /// A vector of all EntityProfile objects
2415 ///
2416 /// # Example
2417 ///
2418 /// ```no_run
2419 /// # use mnemefusion_core::{MemoryEngine, Config};
2420 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2421 /// let profiles = engine.list_entity_profiles().unwrap();
2422 /// for profile in profiles {
2423 /// println!("{} ({}) - {} facts from {} memories",
2424 /// profile.name,
2425 /// profile.entity_type,
2426 /// profile.total_facts(),
2427 /// profile.source_memories.len()
2428 /// );
2429 /// }
2430 /// ```
2431 pub fn list_entity_profiles(&self) -> Result<Vec<EntityProfile>> {
2432 self.storage.list_entity_profiles()
2433 }
2434
2435 /// Count entity profiles in the database
2436 ///
2437 /// # Returns
2438 ///
2439 /// The number of entity profiles
2440 ///
2441 /// # Example
2442 ///
2443 /// ```no_run
2444 /// # use mnemefusion_core::{MemoryEngine, Config};
2445 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2446 /// let count = engine.count_entity_profiles().unwrap();
2447 /// println!("Total entity profiles: {}", count);
2448 /// ```
2449 pub fn count_entity_profiles(&self) -> Result<usize> {
2450 self.storage.count_entity_profiles()
2451 }
2452
2453 /// Create a scoped view for namespace-specific operations
2454 ///
2455 /// Returns a ScopedMemory that automatically applies the namespace to all operations.
2456 /// This provides a more ergonomic API when working with a single namespace.
2457 ///
2458 /// # Arguments
2459 ///
2460 /// * `namespace` - The namespace to scope to (empty string "" for default namespace)
2461 ///
2462 /// # Returns
2463 ///
2464 /// A ScopedMemory view bound to this namespace
2465 ///
2466 /// # Example
2467 ///
2468 /// ```no_run
2469 /// # use mnemefusion_core::{MemoryEngine, Config};
2470 /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2471 /// // Create scoped view for a user
2472 /// let user_memory = engine.scope("user_123");
2473 ///
2474 /// // All operations automatically use the namespace
2475 /// let id = user_memory.add("User note".to_string(), vec![0.1; 384], None, None, None).unwrap();
2476 /// let results = user_memory.search(&vec![0.1; 384], 10, None).unwrap();
2477 /// let count = user_memory.count().unwrap();
2478 /// user_memory.delete_all().unwrap();
2479 /// ```
2480 pub fn scope<S: Into<String>>(&self, namespace: S) -> ScopedMemory<'_> {
2481 ScopedMemory {
2482 engine: self,
2483 namespace: namespace.into(),
2484 }
2485 }
2486
2487 /// Close the database
2488 ///
2489 /// This saves all indexes and ensures all data is flushed to disk.
2490 /// While not strictly necessary (redb handles persistence automatically),
2491 /// it's good practice to call this explicitly when you're done.
2492 ///
2493 /// # Example
2494 ///
2495 /// ```no_run
2496 /// # use mnemefusion_core::{MemoryEngine, Config};
2497 /// let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2498 /// // ... use engine ...
2499 /// engine.close().unwrap();
2500 /// ```
2501 pub fn close(self) -> Result<()> {
2502 // Save vector index
2503 {
2504 let index = self.vector_index.read().unwrap();
2505 index.save()?;
2506 }
2507
2508 // Save BM25 index
2509 self.bm25_index.save()?;
2510
2511 // Save causal graph
2512 {
2513 let graph = self.graph_manager.read().unwrap();
2514 crate::graph::persist::save_graph(&graph, &self.storage)?;
2515 }
2516
2517 // Storage (redb) handles persistence automatically
2518 Ok(())
2519 }
2520}
2521
2522/// Scoped memory view for namespace-specific operations
2523///
2524/// This wrapper provides an ergonomic API for working with a single namespace.
2525/// All operations automatically apply the namespace, eliminating the need to pass
2526/// it to every method call.
2527///
2528/// Created via `MemoryEngine::scope()`.
2529///
2530/// # Example
2531///
2532/// ```no_run
2533/// use mnemefusion_core::{MemoryEngine, Config};
2534///
2535/// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2536/// // Create scoped view for a specific user
2537/// let user_memory = engine.scope("user_123");
2538///
2539/// // Add memory (automatically in user_123 namespace)
2540/// let id = user_memory.add(
2541/// "User note".to_string(),
2542/// vec![0.1; 384],
2543/// None,
2544/// None,
2545/// None,
2546/// ).unwrap();
2547///
2548/// // Search within namespace
2549/// let results = user_memory.search(&vec![0.1; 384], 10, None).unwrap();
2550///
2551/// // Count memories in namespace
2552/// println!("User has {} memories", user_memory.count().unwrap());
2553///
2554/// // Delete all memories in namespace
2555/// user_memory.delete_all().unwrap();
2556/// ```
2557pub struct ScopedMemory<'a> {
2558 engine: &'a MemoryEngine,
2559 namespace: String,
2560}
2561
2562impl<'a> ScopedMemory<'a> {
2563 /// Add a memory to this namespace
2564 ///
2565 /// Equivalent to calling `engine.add(..., Some(namespace))`
2566 ///
2567 /// # Arguments
2568 ///
2569 /// * `content` - Text content
2570 /// * `embedding` - Vector embedding
2571 /// * `metadata` - Optional metadata
2572 /// * `timestamp` - Optional custom timestamp
2573 /// * `source` - Optional source/provenance
2574 ///
2575 /// # Returns
2576 ///
2577 /// The ID of the created memory
2578 pub fn add(
2579 &self,
2580 content: String,
2581 embedding: Vec<f32>,
2582 metadata: Option<HashMap<String, String>>,
2583 timestamp: Option<Timestamp>,
2584 source: Option<Source>,
2585 ) -> Result<MemoryId> {
2586 self.engine.add(
2587 content,
2588 embedding,
2589 metadata,
2590 timestamp,
2591 source,
2592 Some(&self.namespace),
2593 )
2594 }
2595
2596 /// Search for memories in this namespace
2597 ///
2598 /// Equivalent to calling `engine.search(..., Some(namespace), filters)`
2599 ///
2600 /// # Arguments
2601 ///
2602 /// * `query_embedding` - Query vector
2603 /// * `top_k` - Maximum number of results
2604 /// * `filters` - Optional metadata filters
2605 ///
2606 /// # Returns
2607 ///
2608 /// Vector of (Memory, similarity_score) tuples
2609 pub fn search(
2610 &self,
2611 query_embedding: &[f32],
2612 top_k: usize,
2613 filters: Option<&[MetadataFilter]>,
2614 ) -> Result<Vec<(Memory, f32)>> {
2615 self.engine
2616 .search(query_embedding, top_k, Some(&self.namespace), filters)
2617 }
2618
2619 /// Delete a memory from this namespace
2620 ///
2621 /// Equivalent to calling `engine.delete(..., Some(namespace))`
2622 ///
2623 /// # Arguments
2624 ///
2625 /// * `id` - The memory ID to delete
2626 ///
2627 /// # Returns
2628 ///
2629 /// true if deleted, false if not found
2630 ///
2631 /// # Errors
2632 ///
2633 /// Returns `Error::NamespaceMismatch` if the memory exists but is in a different namespace
2634 pub fn delete(&self, id: &MemoryId) -> Result<bool> {
2635 self.engine.delete(id, Some(&self.namespace))
2636 }
2637
2638 /// Add multiple memories to this namespace in a batch
2639 ///
2640 /// Equivalent to calling `engine.add_batch(..., Some(namespace))`
2641 ///
2642 /// # Arguments
2643 ///
2644 /// * `inputs` - Vector of MemoryInput
2645 ///
2646 /// # Returns
2647 ///
2648 /// BatchResult with IDs and error information
2649 pub fn add_batch(&self, inputs: Vec<MemoryInput>) -> Result<BatchResult> {
2650 self.engine.add_batch(inputs, Some(&self.namespace))
2651 }
2652
2653 /// Delete multiple memories from this namespace
2654 ///
2655 /// Equivalent to calling `engine.delete_batch(..., Some(namespace))`
2656 ///
2657 /// # Arguments
2658 ///
2659 /// * `ids` - Vector of memory IDs
2660 ///
2661 /// # Returns
2662 ///
2663 /// Number of memories deleted
2664 pub fn delete_batch(&self, ids: Vec<MemoryId>) -> Result<usize> {
2665 self.engine.delete_batch(ids, Some(&self.namespace))
2666 }
2667
2668 /// Add a memory with deduplication in this namespace
2669 ///
2670 /// Equivalent to calling `engine.add_with_dedup(..., Some(namespace))`
2671 pub fn add_with_dedup(
2672 &self,
2673 content: String,
2674 embedding: Vec<f32>,
2675 metadata: Option<HashMap<String, String>>,
2676 timestamp: Option<Timestamp>,
2677 source: Option<Source>,
2678 ) -> Result<AddResult> {
2679 self.engine.add_with_dedup(
2680 content,
2681 embedding,
2682 metadata,
2683 timestamp,
2684 source,
2685 Some(&self.namespace),
2686 )
2687 }
2688
2689 /// Upsert a memory in this namespace
2690 ///
2691 /// Equivalent to calling `engine.upsert(..., Some(namespace))`
2692 pub fn upsert(
2693 &self,
2694 key: &str,
2695 content: String,
2696 embedding: Vec<f32>,
2697 metadata: Option<HashMap<String, String>>,
2698 timestamp: Option<Timestamp>,
2699 source: Option<Source>,
2700 ) -> Result<UpsertResult> {
2701 self.engine.upsert(
2702 key,
2703 content,
2704 embedding,
2705 metadata,
2706 timestamp,
2707 source,
2708 Some(&self.namespace),
2709 )
2710 }
2711
2712 /// Count memories in this namespace
2713 ///
2714 /// Equivalent to calling `engine.count_namespace(namespace)`
2715 ///
2716 /// # Returns
2717 ///
2718 /// Number of memories in the namespace
2719 pub fn count(&self) -> Result<usize> {
2720 self.engine.count_namespace(&self.namespace)
2721 }
2722
2723 /// Delete all memories in this namespace
2724 ///
2725 /// Equivalent to calling `engine.delete_namespace(namespace)`
2726 ///
2727 /// # Returns
2728 ///
2729 /// Number of memories deleted
2730 ///
2731 /// # Warning
2732 ///
2733 /// This operation cannot be undone. Use with caution.
2734 pub fn delete_all(&self) -> Result<usize> {
2735 self.engine.delete_namespace(&self.namespace)
2736 }
2737
2738 /// Multi-dimensional query within this namespace
2739 ///
2740 /// Equivalent to calling `engine.query(..., Some(namespace), filters)`
2741 ///
2742 /// # Arguments
2743 ///
2744 /// * `query_text` - Natural language query
2745 /// * `query_embedding` - Query vector
2746 /// * `limit` - Maximum number of results
2747 /// * `filters` - Optional metadata filters
2748 ///
2749 /// # Returns
2750 ///
2751 /// Tuple of (intent classification, results, profile context)
2752 pub fn query(
2753 &self,
2754 query_text: &str,
2755 query_embedding: &[f32],
2756 limit: usize,
2757 filters: Option<&[MetadataFilter]>,
2758 ) -> Result<(
2759 IntentClassification,
2760 Vec<(Memory, FusedResult)>,
2761 Vec<String>,
2762 )> {
2763 self.engine.query(
2764 query_text,
2765 query_embedding.to_vec(),
2766 limit,
2767 Some(&self.namespace),
2768 filters,
2769 )
2770 }
2771
2772 /// Get memories in time range within this namespace
2773 ///
2774 /// Equivalent to calling `engine.get_range(..., Some(namespace))`
2775 pub fn get_range(
2776 &self,
2777 start: Timestamp,
2778 end: Timestamp,
2779 limit: usize,
2780 ) -> Result<Vec<(Memory, Timestamp)>> {
2781 self.engine
2782 .get_range(start, end, limit, Some(&self.namespace))
2783 }
2784
2785 /// Get recent memories within this namespace
2786 ///
2787 /// Equivalent to calling `engine.get_recent(..., Some(namespace))`
2788 pub fn get_recent(&self, n: usize) -> Result<Vec<(Memory, Timestamp)>> {
2789 self.engine.get_recent(n, Some(&self.namespace))
2790 }
2791
2792 /// Get the namespace this view is scoped to
2793 pub fn namespace(&self) -> &str {
2794 &self.namespace
2795 }
2796}
2797
2798#[cfg(test)]
2799mod tests {
2800 use super::*;
2801 use tempfile::tempdir;
2802
2803 #[test]
2804 fn test_memory_engine_open() {
2805 let dir = tempdir().unwrap();
2806 let path = dir.path().join("test.mfdb");
2807
2808 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2809 assert_eq!(engine.config().embedding_dim, 384);
2810 }
2811
2812 #[test]
2813 fn test_memory_engine_invalid_config() {
2814 let dir = tempdir().unwrap();
2815 let path = dir.path().join("test.mfdb");
2816
2817 let mut config = Config::default();
2818 config.embedding_dim = 0;
2819
2820 let result = MemoryEngine::open(&path, config);
2821 assert!(result.is_err());
2822 }
2823
2824 #[test]
2825 fn test_memory_engine_add_and_get() {
2826 let dir = tempdir().unwrap();
2827 let path = dir.path().join("test.mfdb");
2828 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2829
2830 let content = "Test memory content".to_string();
2831 let embedding = vec![0.1; 384];
2832
2833 let id = engine
2834 .add(content.clone(), embedding.clone(), None, None, None, None)
2835 .unwrap();
2836
2837 let memory = engine.get(&id).unwrap();
2838 assert!(memory.is_some());
2839
2840 let memory = memory.unwrap();
2841 assert_eq!(memory.content, content);
2842 assert_eq!(memory.embedding, embedding);
2843 }
2844
2845 #[test]
2846 fn test_memory_engine_invalid_dimension() {
2847 let dir = tempdir().unwrap();
2848 let path = dir.path().join("test.mfdb");
2849 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2850
2851 let result = engine.add(
2852 "test".to_string(),
2853 vec![0.1; 512], // Wrong dimension
2854 None,
2855 None,
2856 None,
2857 None,
2858 );
2859 assert!(result.is_err());
2860 }
2861
2862 #[test]
2863 fn test_memory_engine_with_metadata() {
2864 let dir = tempdir().unwrap();
2865 let path = dir.path().join("test.mfdb");
2866 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2867
2868 let mut metadata = HashMap::new();
2869 metadata.insert("source".to_string(), "test".to_string());
2870
2871 let id = engine
2872 .add(
2873 "test".to_string(),
2874 vec![0.1; 384],
2875 Some(metadata),
2876 None,
2877 None,
2878 None,
2879 )
2880 .unwrap();
2881
2882 let memory = engine.get(&id).unwrap().unwrap();
2883 assert_eq!(memory.metadata.get("source"), Some(&"test".to_string()));
2884 }
2885
2886 #[test]
2887 fn test_memory_engine_with_custom_timestamp() {
2888 let dir = tempdir().unwrap();
2889 let path = dir.path().join("test.mfdb");
2890 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2891
2892 let ts = Timestamp::from_unix_secs(1609459200.0); // 2021-01-01
2893 let id = engine
2894 .add(
2895 "test".to_string(),
2896 vec![0.1; 384],
2897 None,
2898 Some(ts),
2899 None,
2900 None,
2901 )
2902 .unwrap();
2903
2904 let memory = engine.get(&id).unwrap().unwrap();
2905 assert_eq!(memory.created_at, ts);
2906 }
2907
2908 #[test]
2909 fn test_memory_engine_delete() {
2910 let dir = tempdir().unwrap();
2911 let path = dir.path().join("test.mfdb");
2912 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2913
2914 let id = engine
2915 .add("test".to_string(), vec![0.1; 384], None, None, None, None)
2916 .unwrap();
2917
2918 let deleted = engine.delete(&id, None).unwrap();
2919 assert!(deleted);
2920
2921 let memory = engine.get(&id).unwrap();
2922 assert!(memory.is_none());
2923 }
2924
2925 #[test]
2926 fn test_memory_engine_count() {
2927 let dir = tempdir().unwrap();
2928 let path = dir.path().join("test.mfdb");
2929 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2930
2931 assert_eq!(engine.count().unwrap(), 0);
2932
2933 engine
2934 .add("test1".to_string(), vec![0.1; 384], None, None, None, None)
2935 .unwrap();
2936 assert_eq!(engine.count().unwrap(), 1);
2937
2938 engine
2939 .add("test2".to_string(), vec![0.2; 384], None, None, None, None)
2940 .unwrap();
2941 assert_eq!(engine.count().unwrap(), 2);
2942 }
2943
2944 #[test]
2945 fn test_memory_engine_list_ids() {
2946 let dir = tempdir().unwrap();
2947 let path = dir.path().join("test.mfdb");
2948 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2949
2950 let id1 = engine
2951 .add("test1".to_string(), vec![0.1; 384], None, None, None, None)
2952 .unwrap();
2953 let id2 = engine
2954 .add("test2".to_string(), vec![0.2; 384], None, None, None, None)
2955 .unwrap();
2956
2957 let ids = engine.list_ids().unwrap();
2958 assert_eq!(ids.len(), 2);
2959 assert!(ids.contains(&id1));
2960 assert!(ids.contains(&id2));
2961 }
2962
2963 #[test]
2964 fn test_memory_engine_persistence() {
2965 let dir = tempdir().unwrap();
2966 let path = dir.path().join("test.mfdb");
2967
2968 let id = {
2969 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2970 let id = engine
2971 .add(
2972 "persistent".to_string(),
2973 vec![0.5; 384],
2974 None,
2975 None,
2976 None,
2977 None,
2978 )
2979 .unwrap();
2980 engine.close().unwrap();
2981 id
2982 };
2983
2984 // Reopen and verify
2985 {
2986 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2987 let memory = engine.get(&id).unwrap();
2988 assert!(memory.is_some());
2989 assert_eq!(memory.unwrap().content, "persistent");
2990 }
2991 }
2992
2993 #[test]
2994 fn test_namespace_add_and_filter() {
2995 let dir = tempdir().unwrap();
2996 let path = dir.path().join("test.mfdb");
2997 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2998
2999 // Add memories to different namespaces
3000 let id1 = engine
3001 .add(
3002 "User 1 memory".to_string(),
3003 vec![0.1; 384],
3004 None,
3005 None,
3006 None,
3007 Some("user_1"),
3008 )
3009 .unwrap();
3010
3011 let id2 = engine
3012 .add(
3013 "User 2 memory".to_string(),
3014 vec![0.2; 384],
3015 None,
3016 None,
3017 None,
3018 Some("user_2"),
3019 )
3020 .unwrap();
3021
3022 let id3 = engine
3023 .add(
3024 "Default memory".to_string(),
3025 vec![0.3; 384],
3026 None,
3027 None,
3028 None,
3029 None,
3030 )
3031 .unwrap();
3032
3033 // Verify memories are in correct namespaces
3034 let mem1 = engine.get(&id1).unwrap().unwrap();
3035 assert_eq!(mem1.get_namespace(), "user_1");
3036
3037 let mem2 = engine.get(&id2).unwrap().unwrap();
3038 assert_eq!(mem2.get_namespace(), "user_2");
3039
3040 let mem3 = engine.get(&id3).unwrap().unwrap();
3041 assert_eq!(mem3.get_namespace(), "");
3042
3043 // Test search with namespace filtering
3044 let query_embedding = vec![0.15; 384];
3045
3046 // Search in user_1 namespace
3047 let results = engine
3048 .search(&query_embedding, 10, Some("user_1"), None)
3049 .unwrap();
3050 assert_eq!(results.len(), 1);
3051 assert_eq!(results[0].0.id, id1);
3052
3053 // Search in user_2 namespace
3054 let results = engine
3055 .search(&query_embedding, 10, Some("user_2"), None)
3056 .unwrap();
3057 assert_eq!(results.len(), 1);
3058 assert_eq!(results[0].0.id, id2);
3059
3060 // Search in default namespace
3061 let results = engine.search(&query_embedding, 10, Some(""), None).unwrap();
3062 assert_eq!(results.len(), 1);
3063 assert_eq!(results[0].0.id, id3);
3064
3065 // Search without namespace filter (should get all)
3066 let results = engine.search(&query_embedding, 10, None, None).unwrap();
3067 assert_eq!(results.len(), 3);
3068 }
3069
3070 #[test]
3071 fn test_namespace_delete_with_verification() {
3072 let dir = tempdir().unwrap();
3073 let path = dir.path().join("test.mfdb");
3074 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3075
3076 // Add memory to namespace
3077 let id = engine
3078 .add(
3079 "User memory".to_string(),
3080 vec![0.1; 384],
3081 None,
3082 None,
3083 None,
3084 Some("user_1"),
3085 )
3086 .unwrap();
3087
3088 // Try to delete with wrong namespace - should fail
3089 let result = engine.delete(&id, Some("user_2"));
3090 assert!(result.is_err());
3091 assert!(matches!(
3092 result.unwrap_err(),
3093 crate::Error::NamespaceMismatch { .. }
3094 ));
3095
3096 // Verify memory still exists
3097 assert!(engine.get(&id).unwrap().is_some());
3098
3099 // Delete with correct namespace - should succeed
3100 let deleted = engine.delete(&id, Some("user_1")).unwrap();
3101 assert!(deleted);
3102
3103 // Verify memory is gone
3104 assert!(engine.get(&id).unwrap().is_none());
3105 }
3106
3107 #[test]
3108 fn test_namespace_management_methods() {
3109 let dir = tempdir().unwrap();
3110 let path = dir.path().join("test.mfdb");
3111 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3112
3113 // Add memories to different namespaces
3114 engine
3115 .add(
3116 "Memory 1".to_string(),
3117 vec![0.1; 384],
3118 None,
3119 None,
3120 None,
3121 Some("ns1"),
3122 )
3123 .unwrap();
3124 engine
3125 .add(
3126 "Memory 2".to_string(),
3127 vec![0.2; 384],
3128 None,
3129 None,
3130 None,
3131 Some("ns1"),
3132 )
3133 .unwrap();
3134 engine
3135 .add(
3136 "Memory 3".to_string(),
3137 vec![0.3; 384],
3138 None,
3139 None,
3140 None,
3141 Some("ns2"),
3142 )
3143 .unwrap();
3144 engine
3145 .add(
3146 "Memory 4".to_string(),
3147 vec![0.4; 384],
3148 None,
3149 None,
3150 None,
3151 None,
3152 )
3153 .unwrap();
3154
3155 // List namespaces
3156 let namespaces = engine.list_namespaces().unwrap();
3157 assert_eq!(namespaces.len(), 2);
3158 assert!(namespaces.contains(&"ns1".to_string()));
3159 assert!(namespaces.contains(&"ns2".to_string()));
3160
3161 // Count in namespace
3162 assert_eq!(engine.count_namespace("ns1").unwrap(), 2);
3163 assert_eq!(engine.count_namespace("ns2").unwrap(), 1);
3164 assert_eq!(engine.count_namespace("").unwrap(), 1); // Default namespace
3165
3166 // Delete entire namespace
3167 let deleted = engine.delete_namespace("ns1").unwrap();
3168 assert_eq!(deleted, 2);
3169
3170 // Verify namespace is gone
3171 assert_eq!(engine.count_namespace("ns1").unwrap(), 0);
3172 let namespaces = engine.list_namespaces().unwrap();
3173 assert_eq!(namespaces.len(), 1);
3174 assert!(namespaces.contains(&"ns2".to_string()));
3175
3176 // Total count should be 2 now
3177 assert_eq!(engine.count().unwrap(), 2);
3178 }
3179
3180 #[test]
3181 fn test_namespace_batch_operations() {
3182 use crate::types::MemoryInput;
3183
3184 let dir = tempdir().unwrap();
3185 let path = dir.path().join("test.mfdb");
3186 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3187
3188 // Create batch inputs
3189 let inputs = vec![
3190 MemoryInput::new("Memory 1".to_string(), vec![0.1; 384]),
3191 MemoryInput::new("Memory 2".to_string(), vec![0.2; 384]),
3192 MemoryInput::new("Memory 3".to_string(), vec![0.3; 384]),
3193 ];
3194
3195 // Add batch with namespace
3196 let result = engine.add_batch(inputs, Some("batch_ns")).unwrap();
3197 assert_eq!(result.created_count, 3);
3198 assert!(result.is_success());
3199
3200 // Verify all are in the namespace
3201 assert_eq!(engine.count_namespace("batch_ns").unwrap(), 3);
3202
3203 // Batch delete with namespace filter
3204 let deleted = engine
3205 .delete_batch(result.ids.clone(), Some("batch_ns"))
3206 .unwrap();
3207 assert_eq!(deleted, 3);
3208
3209 // Verify namespace is empty
3210 assert_eq!(engine.count_namespace("batch_ns").unwrap(), 0);
3211 }
3212
3213 // ========== ScopedMemory Tests ==========
3214
3215 #[test]
3216 fn test_scoped_memory_add_and_search() {
3217 let dir = tempdir().unwrap();
3218 let path = dir.path().join("test.mfdb");
3219 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3220
3221 // Create scoped view
3222 let scoped = engine.scope("user_123");
3223
3224 // Add memory via scoped view
3225 let id = scoped
3226 .add(
3227 "Scoped memory".to_string(),
3228 vec![0.5; 384],
3229 None,
3230 None,
3231 None,
3232 )
3233 .unwrap();
3234
3235 // Verify memory is in the namespace
3236 let memory = engine.get(&id).unwrap().unwrap();
3237 assert_eq!(memory.get_namespace(), "user_123");
3238 assert_eq!(memory.content, "Scoped memory");
3239
3240 // Search via scoped view
3241 let results = scoped.search(&vec![0.5; 384], 10, None).unwrap();
3242 assert_eq!(results.len(), 1);
3243 assert_eq!(results[0].0.id, id);
3244
3245 // Verify namespace isolation
3246 assert_eq!(scoped.namespace(), "user_123");
3247 }
3248
3249 #[test]
3250 fn test_scoped_memory_count_and_delete_all() {
3251 let dir = tempdir().unwrap();
3252 let path = dir.path().join("test.mfdb");
3253 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3254
3255 let scoped = engine.scope("user_456");
3256
3257 // Add multiple memories
3258 scoped
3259 .add("Memory 1".to_string(), vec![0.1; 384], None, None, None)
3260 .unwrap();
3261 scoped
3262 .add("Memory 2".to_string(), vec![0.2; 384], None, None, None)
3263 .unwrap();
3264 scoped
3265 .add("Memory 3".to_string(), vec![0.3; 384], None, None, None)
3266 .unwrap();
3267
3268 // Count via scoped view
3269 assert_eq!(scoped.count().unwrap(), 3);
3270
3271 // Total engine count should also be 3
3272 assert_eq!(engine.count().unwrap(), 3);
3273
3274 // Delete all via scoped view
3275 let deleted = scoped.delete_all().unwrap();
3276 assert_eq!(deleted, 3);
3277
3278 // Verify namespace is empty
3279 assert_eq!(scoped.count().unwrap(), 0);
3280 assert_eq!(engine.count().unwrap(), 0);
3281 }
3282
3283 #[test]
3284 fn test_scoped_memory_isolation() {
3285 let dir = tempdir().unwrap();
3286 let path = dir.path().join("test.mfdb");
3287 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3288
3289 // Create two scoped views
3290 let scope1 = engine.scope("ns1");
3291 let scope2 = engine.scope("ns2");
3292
3293 // Add memories to each namespace
3294 let id1 = scope1
3295 .add("NS1 memory".to_string(), vec![0.1; 384], None, None, None)
3296 .unwrap();
3297 let id2 = scope2
3298 .add("NS2 memory".to_string(), vec![0.2; 384], None, None, None)
3299 .unwrap();
3300
3301 // Each scope should only see its own memories
3302 assert_eq!(scope1.count().unwrap(), 1);
3303 assert_eq!(scope2.count().unwrap(), 1);
3304
3305 // Search should be isolated
3306 let results1 = scope1.search(&vec![0.1; 384], 10, None).unwrap();
3307 assert_eq!(results1.len(), 1);
3308 assert_eq!(results1[0].0.id, id1);
3309
3310 let results2 = scope2.search(&vec![0.2; 384], 10, None).unwrap();
3311 assert_eq!(results2.len(), 1);
3312 assert_eq!(results2[0].0.id, id2);
3313
3314 // Delete from scope1 shouldn't affect scope2
3315 scope1.delete_all().unwrap();
3316 assert_eq!(scope1.count().unwrap(), 0);
3317 assert_eq!(scope2.count().unwrap(), 1);
3318 }
3319
3320 #[test]
3321 fn test_scoped_memory_delete_with_verification() {
3322 let dir = tempdir().unwrap();
3323 let path = dir.path().join("test.mfdb");
3324 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3325
3326 let scope1 = engine.scope("ns1");
3327 let scope2 = engine.scope("ns2");
3328
3329 // Add memory to ns1
3330 let id = scope1
3331 .add("NS1 memory".to_string(), vec![0.1; 384], None, None, None)
3332 .unwrap();
3333
3334 // Try to delete from wrong namespace - should fail
3335 let result = scope2.delete(&id);
3336 assert!(result.is_err());
3337 assert!(matches!(
3338 result.unwrap_err(),
3339 crate::Error::NamespaceMismatch { .. }
3340 ));
3341
3342 // Verify memory still exists
3343 assert_eq!(scope1.count().unwrap(), 1);
3344
3345 // Delete from correct namespace - should succeed
3346 let deleted = scope1.delete(&id).unwrap();
3347 assert!(deleted);
3348 assert_eq!(scope1.count().unwrap(), 0);
3349 }
3350
3351 #[test]
3352 fn test_scoped_memory_batch_operations() {
3353 use crate::types::MemoryInput;
3354
3355 let dir = tempdir().unwrap();
3356 let path = dir.path().join("test.mfdb");
3357 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3358
3359 let scoped = engine.scope("batch_scope");
3360
3361 // Add batch via scoped view
3362 let inputs = vec![
3363 MemoryInput::new("Batch 1".to_string(), vec![0.1; 384]),
3364 MemoryInput::new("Batch 2".to_string(), vec![0.2; 384]),
3365 MemoryInput::new("Batch 3".to_string(), vec![0.3; 384]),
3366 ];
3367
3368 let result = scoped.add_batch(inputs).unwrap();
3369 assert_eq!(result.created_count, 3);
3370 assert!(result.is_success());
3371
3372 // Verify count
3373 assert_eq!(scoped.count().unwrap(), 3);
3374
3375 // Delete batch via scoped view
3376 let deleted = scoped.delete_batch(result.ids).unwrap();
3377 assert_eq!(deleted, 3);
3378
3379 // Verify empty
3380 assert_eq!(scoped.count().unwrap(), 0);
3381 }
3382
3383 #[test]
3384 fn test_search_with_metadata_filters() {
3385 let dir = tempdir().unwrap();
3386 let path = dir.path().join("test.mfdb");
3387 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3388
3389 // Add memories with different metadata
3390 let mut mem1 = Memory::new("Event 1".to_string(), vec![0.1; 384]);
3391 mem1.metadata
3392 .insert("type".to_string(), "event".to_string());
3393 mem1.metadata
3394 .insert("priority".to_string(), "high".to_string());
3395 engine
3396 .add(
3397 mem1.content.clone(),
3398 mem1.embedding.clone(),
3399 Some(mem1.metadata.clone()),
3400 None,
3401 None,
3402 None,
3403 )
3404 .unwrap();
3405
3406 let mut mem2 = Memory::new("Event 2".to_string(), vec![0.11; 384]);
3407 mem2.metadata
3408 .insert("type".to_string(), "event".to_string());
3409 mem2.metadata
3410 .insert("priority".to_string(), "low".to_string());
3411 engine
3412 .add(
3413 mem2.content.clone(),
3414 mem2.embedding.clone(),
3415 Some(mem2.metadata.clone()),
3416 None,
3417 None,
3418 None,
3419 )
3420 .unwrap();
3421
3422 let mut mem3 = Memory::new("Task 1".to_string(), vec![0.12; 384]);
3423 mem3.metadata.insert("type".to_string(), "task".to_string());
3424 mem3.metadata
3425 .insert("priority".to_string(), "high".to_string());
3426 engine
3427 .add(
3428 mem3.content.clone(),
3429 mem3.embedding.clone(),
3430 Some(mem3.metadata.clone()),
3431 None,
3432 None,
3433 None,
3434 )
3435 .unwrap();
3436
3437 // Search with filter: type=event
3438 let filters = vec![MetadataFilter::eq("type", "event")];
3439 let results = engine
3440 .search(&vec![0.1; 384], 10, None, Some(&filters))
3441 .unwrap();
3442 assert_eq!(results.len(), 2);
3443 assert!(results
3444 .iter()
3445 .all(|(m, _)| m.metadata.get("type").unwrap() == "event"));
3446
3447 // Search with filter: priority=high
3448 let filters = vec![MetadataFilter::eq("priority", "high")];
3449 let results = engine
3450 .search(&vec![0.1; 384], 10, None, Some(&filters))
3451 .unwrap();
3452 assert_eq!(results.len(), 2);
3453 assert!(results
3454 .iter()
3455 .all(|(m, _)| m.metadata.get("priority").unwrap() == "high"));
3456
3457 // Search with multiple filters: type=event AND priority=high
3458 let filters = vec![
3459 MetadataFilter::eq("type", "event"),
3460 MetadataFilter::eq("priority", "high"),
3461 ];
3462 let results = engine
3463 .search(&vec![0.1; 384], 10, None, Some(&filters))
3464 .unwrap();
3465 assert_eq!(results.len(), 1);
3466 assert_eq!(results[0].0.content, "Event 1");
3467 }
3468
3469 #[test]
3470 fn test_query_with_metadata_filters() {
3471 let dir = tempdir().unwrap();
3472 let path = dir.path().join("test.mfdb");
3473 // Disable semantic threshold for test (simple test embeddings)
3474 let config = Config::default().with_fusion_semantic_threshold(0.0);
3475 let engine = MemoryEngine::open(&path, config).unwrap();
3476
3477 // Add memories with different metadata
3478 let mut mem1 = Memory::new("Important meeting".to_string(), vec![0.1; 384]);
3479 mem1.metadata
3480 .insert("type".to_string(), "event".to_string());
3481 mem1.metadata
3482 .insert("priority".to_string(), "high".to_string());
3483 engine
3484 .add(
3485 mem1.content.clone(),
3486 mem1.embedding.clone(),
3487 Some(mem1.metadata.clone()),
3488 None,
3489 None,
3490 None,
3491 )
3492 .unwrap();
3493
3494 let mut mem2 = Memory::new("Casual meeting".to_string(), vec![0.11; 384]);
3495 mem2.metadata
3496 .insert("type".to_string(), "event".to_string());
3497 mem2.metadata
3498 .insert("priority".to_string(), "low".to_string());
3499 engine
3500 .add(
3501 mem2.content.clone(),
3502 mem2.embedding.clone(),
3503 Some(mem2.metadata.clone()),
3504 None,
3505 None,
3506 None,
3507 )
3508 .unwrap();
3509
3510 // Query with filter
3511 let filters = vec![MetadataFilter::eq("priority", "high")];
3512 let (_intent, results, _profile_ctx) = engine
3513 .query("meeting", vec![0.1f32; 384], 10, None, Some(&filters))
3514 .unwrap();
3515
3516 assert_eq!(results.len(), 1);
3517 assert_eq!(results[0].0.content, "Important meeting");
3518 }
3519
3520 #[test]
3521 fn test_scoped_memory_with_filters() {
3522 let dir = tempdir().unwrap();
3523 let path = dir.path().join("test.mfdb");
3524 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3525
3526 let scoped = engine.scope("user_123");
3527
3528 // Add memories with different metadata to the namespace
3529 let mut mem1 = Memory::new("Event 1".to_string(), vec![0.1; 384]);
3530 mem1.metadata
3531 .insert("type".to_string(), "event".to_string());
3532 scoped
3533 .add(
3534 mem1.content.clone(),
3535 mem1.embedding.clone(),
3536 Some(mem1.metadata.clone()),
3537 None,
3538 None,
3539 )
3540 .unwrap();
3541
3542 let mut mem2 = Memory::new("Task 1".to_string(), vec![0.11; 384]);
3543 mem2.metadata.insert("type".to_string(), "task".to_string());
3544 scoped
3545 .add(
3546 mem2.content.clone(),
3547 mem2.embedding.clone(),
3548 Some(mem2.metadata.clone()),
3549 None,
3550 None,
3551 )
3552 .unwrap();
3553
3554 // Search with filter in scoped view
3555 let filters = vec![MetadataFilter::eq("type", "event")];
3556 let results = scoped.search(&vec![0.1; 384], 10, Some(&filters)).unwrap();
3557
3558 assert_eq!(results.len(), 1);
3559 assert_eq!(results[0].0.content, "Event 1");
3560 }
3561
3562 #[test]
3563 fn test_consolidate_merges_aliases() {
3564 use crate::types::{EntityFact, EntityId, EntityProfile};
3565
3566 let dir = tempdir().unwrap();
3567 let path = dir.path().join("test.mfdb");
3568 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3569
3570 let mem1 = MemoryId::new();
3571 let mem2 = MemoryId::new();
3572
3573 // Create orphan "mel" profile with 1 fact
3574 let mut mel_profile =
3575 EntityProfile::new(EntityId::new(), "mel".to_string(), "person".to_string());
3576 mel_profile.add_fact(EntityFact::new("hobby", "hiking", 0.9, mem1.clone()));
3577 mel_profile.add_source_memory(mem1.clone());
3578 engine.storage.store_entity_profile(&mel_profile).unwrap();
3579
3580 // Create canonical "melanie" profile with 2 facts
3581 let mut melanie_profile =
3582 EntityProfile::new(EntityId::new(), "melanie".to_string(), "person".to_string());
3583 melanie_profile.add_fact(EntityFact::new("instrument", "guitar", 0.9, mem2.clone()));
3584 melanie_profile.add_fact(EntityFact::new("occupation", "teacher", 0.8, mem2.clone()));
3585 melanie_profile.add_source_memory(mem2.clone());
3586 engine
3587 .storage
3588 .store_entity_profile(&melanie_profile)
3589 .unwrap();
3590
3591 // Verify both exist before consolidation
3592 assert!(engine.storage.get_entity_profile("mel").unwrap().is_some());
3593 assert!(engine
3594 .storage
3595 .get_entity_profile("melanie")
3596 .unwrap()
3597 .is_some());
3598
3599 let (_facts_removed, profiles_deleted) = engine.consolidate_profiles().unwrap();
3600
3601 // "mel" should be merged into "melanie" and deleted
3602 assert!(
3603 profiles_deleted >= 1,
3604 "At least 1 profile should be deleted (mel)"
3605 );
3606 assert!(
3607 engine.storage.get_entity_profile("mel").unwrap().is_none(),
3608 "mel profile should be deleted after merge"
3609 );
3610
3611 // "melanie" should have all facts merged
3612 let melanie = engine
3613 .storage
3614 .get_entity_profile("melanie")
3615 .unwrap()
3616 .expect("melanie should still exist");
3617 assert!(
3618 melanie.total_facts() >= 3,
3619 "melanie should have merged facts (hiking + guitar + teacher), got {}",
3620 melanie.total_facts()
3621 );
3622 assert!(
3623 melanie.source_memories.contains(&mem1),
3624 "melanie should have mem1 from merged mel"
3625 );
3626 assert!(
3627 melanie.source_memories.contains(&mem2),
3628 "melanie should have mem2 from original melanie"
3629 );
3630 }
3631
3632 // ======= Embedding-auto tests =======
3633
3634 #[test]
3635 fn test_add_explicit_embedding_still_works() {
3636 // Existing callers that pass Vec<f32> directly should compile and work
3637 let dir = tempdir().unwrap();
3638 let path = dir.path().join("test.mfdb");
3639 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3640 let embedding = vec![0.1f32; 384];
3641 // Vec<f32> implements Into<Option<Vec<f32>>> — this must compile
3642 let id = engine
3643 .add(
3644 "Alice loves hiking".to_string(),
3645 embedding,
3646 None,
3647 None,
3648 None,
3649 None,
3650 )
3651 .unwrap();
3652 assert!(engine.get(&id).unwrap().is_some());
3653 }
3654
3655 #[test]
3656 fn test_add_some_embedding_works() {
3657 // Some(Vec<f32>) should also compile
3658 let dir = tempdir().unwrap();
3659 let path = dir.path().join("test.mfdb");
3660 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3661 let id = engine
3662 .add(
3663 "Test content".to_string(),
3664 Some(vec![0.2f32; 384]),
3665 None,
3666 None,
3667 None,
3668 None,
3669 )
3670 .unwrap();
3671 assert!(engine.get(&id).unwrap().is_some());
3672 }
3673
3674 #[test]
3675 fn test_add_none_embedding_without_engine_errors() {
3676 // None embedding without embedding engine configured → Error::NoEmbeddingEngine
3677 let dir = tempdir().unwrap();
3678 let path = dir.path().join("test.mfdb");
3679 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3680 let result = engine.add("Test".to_string(), None::<Vec<f32>>, None, None, None, None);
3681 assert!(
3682 result.is_err(),
3683 "Expected error when no embedding engine configured"
3684 );
3685 assert!(matches!(result.unwrap_err(), Error::NoEmbeddingEngine));
3686 }
3687
3688 #[test]
3689 fn test_with_user_sets_default_namespace() {
3690 let dir = tempdir().unwrap();
3691 let path = dir.path().join("test.mfdb");
3692 let engine = MemoryEngine::open(&path, Config::default())
3693 .unwrap()
3694 .with_user("alice");
3695 assert_eq!(engine.default_namespace.as_deref(), Some("alice"));
3696
3697 // add() with no explicit namespace should use "alice"
3698 let id = engine
3699 .add(
3700 "Alice's memory".to_string(),
3701 vec![0.1f32; 384],
3702 None,
3703 None,
3704 None,
3705 None,
3706 )
3707 .unwrap();
3708 let mem = engine.get(&id).unwrap().unwrap();
3709 assert_eq!(mem.get_namespace(), "alice");
3710 }
3711
3712 #[test]
3713 fn test_query_none_embedding_without_engine_errors() {
3714 let dir = tempdir().unwrap();
3715 let path = dir.path().join("test.mfdb");
3716 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3717 let result = engine.query("test query", None::<Vec<f32>>, 10, None, None);
3718 assert!(result.is_err());
3719 assert!(matches!(result.unwrap_err(), Error::NoEmbeddingEngine));
3720 }
3721
3722 // ======= first_person_to_third tests =======
3723
3724 #[test]
3725 fn test_first_person_to_third_basic() {
3726 assert_eq!(
3727 first_person_to_third("I joined a gym", "Alice"),
3728 "Alice joined a gym"
3729 );
3730 assert_eq!(
3731 first_person_to_third("I love hiking in the mountains", "Alice"),
3732 "Alice love hiking in the mountains"
3733 );
3734 }
3735
3736 #[test]
3737 fn test_first_person_to_third_contractions() {
3738 assert_eq!(
3739 first_person_to_third("I'm learning guitar", "Bob"),
3740 "Bob is learning guitar"
3741 );
3742 assert_eq!(
3743 first_person_to_third("I've started a new job", "Bob"),
3744 "Bob has started a new job"
3745 );
3746 assert_eq!(
3747 first_person_to_third("I'll be there tomorrow", "Bob"),
3748 "Bob will be there tomorrow"
3749 );
3750 assert_eq!(
3751 first_person_to_third("I'd love to visit Paris", "Bob"),
3752 "Bob would love to visit Paris"
3753 );
3754 }
3755
3756 #[test]
3757 fn test_first_person_to_third_possessive() {
3758 assert_eq!(
3759 first_person_to_third("My hobby is hiking", "Alice"),
3760 "Alice's hobby is hiking"
3761 );
3762 assert_eq!(
3763 first_person_to_third("That book is mine", "Alice"),
3764 "That book is Alice's"
3765 );
3766 // "myself" before "my" prevents double-substitution
3767 assert_eq!(
3768 first_person_to_third("I hurt myself at the gym", "Alice"),
3769 "Alice hurt Alice at the gym"
3770 );
3771 }
3772
3773 #[test]
3774 fn test_first_person_to_third_object_pronoun() {
3775 assert_eq!(
3776 first_person_to_third("He gave me the book", "Alice"),
3777 "He gave Alice the book"
3778 );
3779 }
3780
3781 #[test]
3782 fn test_first_person_to_third_no_pronouns() {
3783 // Content without first-person pronouns should be returned unchanged
3784 let content = "She likes hiking in the mountains";
3785 assert_eq!(first_person_to_third(content, "Alice"), content);
3786 }
3787
3788 #[test]
3789 fn test_first_person_to_third_no_word_boundary_false_positive() {
3790 // "me" inside "intermediate" should not match \bme\b
3791 let content = "The intermediate step";
3792 assert_eq!(first_person_to_third(content, "Alice"), content);
3793 }
3794
3795 #[test]
3796 fn test_auto_detect_embedding_dim_from_existing_db() {
3797 // Create a DB with 768-dim embeddings
3798 let dir = tempdir().unwrap();
3799 let path = dir.path().join("test.mfdb");
3800
3801 let mut config = Config::default();
3802 config.embedding_dim = 768;
3803 let engine = MemoryEngine::open(&path, config).unwrap();
3804
3805 // Add a vector so the index has data to persist
3806 let embedding = vec![0.1; 768];
3807 engine
3808 .add(
3809 "test content".to_string(),
3810 embedding,
3811 None,
3812 None,
3813 None,
3814 None,
3815 )
3816 .unwrap();
3817 drop(engine);
3818
3819 // Re-open with default config (384) — should auto-detect 768
3820 let engine2 = MemoryEngine::open(&path, Config::default()).unwrap();
3821 assert_eq!(
3822 engine2.config().embedding_dim,
3823 768,
3824 "Should auto-detect embedding dim from existing DB"
3825 );
3826
3827 // Verify we can add more vectors with the detected dimension
3828 let embedding2 = vec![0.2; 768];
3829 engine2
3830 .add(
3831 "another memory".to_string(),
3832 embedding2,
3833 None,
3834 None,
3835 None,
3836 None,
3837 )
3838 .unwrap();
3839 }
3840
3841 #[test]
3842 fn test_add_batch_with_progress_callback() {
3843 let dir = tempdir().unwrap();
3844 let path = dir.path().join("test.mfdb");
3845 let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3846
3847 let inputs: Vec<MemoryInput> = (0..5)
3848 .map(|i| MemoryInput::new(format!("memory {}", i), vec![0.1 * (i as f32 + 1.0); 384]))
3849 .collect();
3850
3851 let progress = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
3852 let progress_clone = progress.clone();
3853
3854 let result = engine
3855 .add_batch_with_progress(
3856 inputs,
3857 None,
3858 Some(Box::new(move |current, total| {
3859 progress_clone.lock().unwrap().push((current, total));
3860 })),
3861 )
3862 .unwrap();
3863
3864 assert_eq!(result.created_count, 5);
3865 let recorded = progress.lock().unwrap();
3866 assert_eq!(recorded.len(), 5);
3867 assert_eq!(recorded[0], (1, 5));
3868 assert_eq!(recorded[4], (5, 5));
3869 }
3870}