Skip to main content

mnemefusion_core/
memory.rs

1//! Memory engine - main API entry point
2//!
3//! The MemoryEngine struct provides the primary interface for interacting
4//! with a MnemeFusion database.
5
6use crate::{
7    config::Config,
8    error::{Error, Result},
9    graph::{CausalTraversalResult, GraphManager},
10    index::{BM25Config, BM25Index, TemporalIndex, VectorIndex, VectorIndexConfig},
11    ingest::IngestionPipeline,
12    query::{profile_search::fact_embedding_key, FusedResult, IntentClassification, QueryPlanner},
13    storage::StorageEngine,
14    trace::{Trace, TraceRecorder},
15    types::{
16        AddResult, BatchResult, Entity, EntityProfile, Memory, MemoryId, MemoryInput,
17        MetadataFilter, Source, Timestamp, UpsertResult,
18    },
19};
20use std::cell::RefCell;
21use std::collections::HashMap;
22use std::path::Path;
23use std::sync::{Arc, RwLock};
24
25#[cfg(feature = "slm")]
26use crate::ingest::SlmMetadataExtractor;
27#[cfg(feature = "slm")]
28use std::sync::Mutex;
29
30#[cfg(feature = "entity-extraction")]
31use crate::extraction::{LlmEntityExtractor, ModelTier};
32#[cfg(all(feature = "entity-extraction", not(feature = "slm")))]
33use std::sync::Mutex;
34
35/// Callback type for computing embeddings at ingestion time.
36///
37/// Provided by the caller (e.g., Python's `SentenceTransformer.encode()`).
38/// Called for each fact text during ingestion to compute fact embeddings.
39pub type EmbeddingFn = Arc<dyn Fn(&str) -> Vec<f32> + Send + Sync>;
40
41/// Build contextual text for embedding by prepending speaker metadata.
42///
43/// When computing embeddings, prepending the speaker name helps the embedding model
44/// distinguish between the same statement made by different people. The original
45/// content is stored as-is; only the embedding carries the speaker context.
46///
47/// Returns the original content unchanged if no "speaker" key exists in metadata.
48pub fn contextualize_for_embedding(content: &str, metadata: &HashMap<String, String>) -> String {
49    if let Some(speaker) = metadata.get("speaker") {
50        if !speaker.is_empty() {
51            return format!("{}: {}", speaker, content);
52        }
53    }
54    content.to_string()
55}
56
57/// Convert first-person pronouns in content to third-person using the speaker's name.
58///
59/// Used at embedding time: "I joined a gym" → "Alice joined a gym" produces ~+0.25 better
60/// cosine similarity with queries like "What fitness activities does Alice do?" when using
61/// symmetric bi-encoders (BGE-base-en-v1.5).
62///
63/// Substitution order (specific before general, to avoid double-matching):
64/// contractions/phrases → standalone "I" → reflexive → possessive → object.
65///
66/// The original content is **not** modified — only the embedding text changes.
67/// Returns the original string unchanged when no first-person pronouns are found
68/// (i.e., `result == content` signals a no-op).
69pub fn first_person_to_third(content: &str, speaker: &str) -> String {
70    use regex::{NoExpand, Regex};
71
72    // Ordered rules: longer/more-specific patterns first to prevent double-substitution.
73    // Each entry is (regex_pattern, replacement_factory).
74    let rules: &[(&str, fn(&str) -> String)] = &[
75        // Contractions (case-insensitive: "I'm" and "i'm" both match)
76        (r"(?i)\bI'm\b", |s| format!("{} is", s)),
77        (r"(?i)\bI've\b", |s| format!("{} has", s)),
78        (r"(?i)\bI'll\b", |s| format!("{} will", s)),
79        (r"(?i)\bI'd\b", |s| format!("{} would", s)),
80        // 2-word phrases (always capitalized in English)
81        (r"\bI am\b", |s| format!("{} is", s)),
82        (r"\bI have\b", |s| format!("{} has", s)),
83        (r"\bI will\b", |s| format!("{} will", s)),
84        (r"\bI would\b", |s| format!("{} would", s)),
85        // Standalone "I" (always capitalized in English; case-sensitive is fine)
86        (r"\bI\b", |s| s.to_string()),
87        // "myself" / "Myself" before "my" to prevent double-substitution
88        (r"(?i)\bmyself\b", |s| s.to_string()),
89        // Possessive (case-insensitive: "My" at sentence start, "my" mid-sentence)
90        (r"(?i)\bmy\b", |s| format!("{}'s", s)),
91        (r"(?i)\bmine\b", |s| format!("{}'s", s)),
92        // Object pronoun (case-insensitive: "Me and Alice went..." edge case)
93        (r"(?i)\bme\b", |s| s.to_string()),
94    ];
95
96    let mut result = content.to_string();
97    for (pattern, make_repl) in rules {
98        let repl = make_repl(speaker);
99        // NoExpand: treat replacement literally (safe when speaker contains '$')
100        result = Regex::new(pattern)
101            .expect("valid first_person_to_third pattern")
102            .replace_all(&result, NoExpand(repl.as_str()))
103            .into_owned();
104    }
105    result
106}
107
108/// Main memory engine interface
109///
110/// This is the primary entry point for all MnemeFusion operations.
111/// It coordinates storage, indexing, and retrieval across all dimensions.
112pub struct MemoryEngine {
113    storage: Arc<StorageEngine>,
114    vector_index: Arc<RwLock<VectorIndex>>,
115    bm25_index: Arc<BM25Index>,
116    temporal_index: Arc<TemporalIndex>,
117    graph_manager: Arc<RwLock<GraphManager>>,
118    pipeline: IngestionPipeline,
119    query_planner: QueryPlanner,
120    config: Config,
121    /// Auto-embedding engine. Set via `Config::embedding_model` or `with_embedding_engine()`.
122    #[cfg(feature = "embedding-onnx")]
123    embedding_engine: Option<std::sync::Arc<crate::embedding::EmbeddingEngine>>,
124    /// Default namespace applied to all add/query calls when `namespace` arg is None.
125    /// Set via `with_user(user_name)`.
126    default_namespace: Option<String>,
127    /// User entity name for first-person pronoun resolution in queries.
128    /// When set, queries containing "I", "me", "my" automatically include this
129    /// entity in profile injection. Separate from `default_namespace` so it can
130    /// be used without enabling namespace filtering.
131    /// Set via `set_user_entity(name)`.
132    user_entity: Option<String>,
133    /// Last query trace (diagnostic side-channel, RefCell because query() takes &self).
134    last_query_trace: RefCell<Option<Trace>>,
135    /// Whether pipeline tracing is enabled.
136    enable_trace: bool,
137}
138
139impl MemoryEngine {
140    /// Open or create a memory database
141    ///
142    /// # Arguments
143    ///
144    /// * `path` - Path to the .mfdb file
145    /// * `config` - Configuration options
146    ///
147    /// # Returns
148    ///
149    /// A new MemoryEngine instance
150    ///
151    /// # Errors
152    ///
153    /// Returns an error if:
154    /// - The database file cannot be created or opened
155    /// - The file format is invalid
156    /// - The configuration is invalid
157    ///
158    /// # Example
159    ///
160    /// ```no_run
161    /// use mnemefusion_core::{MemoryEngine, Config};
162    ///
163    /// let engine = MemoryEngine::open("./brain.mfdb", Config::default()).unwrap();
164    /// ```
165    pub fn open<P: AsRef<Path>>(path: P, config: Config) -> Result<Self> {
166        // Validate configuration
167        config.validate()?;
168        let mut config = config;
169
170        // Open storage
171        let storage = Arc::new(StorageEngine::open(path)?);
172
173        // Create vector index configuration from main config
174        let vector_config = VectorIndexConfig {
175            dimension: config.embedding_dim,
176            connectivity: config.hnsw_m,
177            expansion_add: config.hnsw_ef_construction,
178            expansion_search: config.hnsw_ef_search,
179        };
180
181        // Create and load vector index
182        let mut vector_index = VectorIndex::new(vector_config, Arc::clone(&storage))?;
183        vector_index.load()?;
184
185        // Auto-detect embedding dimension from existing DB.
186        // After load(), the vector index knows the true dimension from the serialized data.
187        // Override config to match, so all subsequent add()/query() validations use the
188        // correct dimension. This eliminates the need for users to manually specify
189        // embedding_dim when opening an existing DB.
190        let detected_dim = vector_index.config().dimension;
191        if detected_dim != config.embedding_dim {
192            config.embedding_dim = detected_dim;
193        }
194
195        let vector_index = Arc::new(RwLock::new(vector_index));
196
197        // Create and load BM25 index
198        let bm25_config = BM25Config::default();
199        let bm25_index = Arc::new(BM25Index::new(Arc::clone(&storage), bm25_config));
200        bm25_index.load()?;
201
202        // Create temporal index
203        let temporal_index = Arc::new(TemporalIndex::new(Arc::clone(&storage)));
204
205        // Create and load graph manager
206        let mut graph_manager = GraphManager::new();
207        crate::graph::persist::load_graph(&mut graph_manager, &storage)?;
208
209        // One-time migration: repair Entity→Entity edges from profile facts
210        // (older DBs stored relationship facts but lost graph edges on save)
211        crate::graph::persist::repair_relationship_edges(&mut graph_manager, &storage)?;
212
213        let graph_manager = Arc::new(RwLock::new(graph_manager));
214
215        // Create ingestion pipeline
216        let mut pipeline = IngestionPipeline::new(
217            Arc::clone(&storage),
218            Arc::clone(&vector_index),
219            Arc::clone(&bm25_index),
220            Arc::clone(&temporal_index),
221            Arc::clone(&graph_manager),
222            config.entity_extraction_enabled,
223        );
224
225        // Attach SLM metadata extractor if enabled
226        #[cfg(feature = "slm")]
227        if config.slm_metadata_extraction_enabled {
228            if let Some(ref slm_config) = config.slm_config {
229                tracing::info!("Initializing SLM metadata extractor for ingestion...");
230                match SlmMetadataExtractor::new(slm_config.clone()) {
231                    Ok(extractor) => {
232                        pipeline = pipeline.with_slm_extractor(Arc::new(Mutex::new(extractor)));
233                        tracing::info!("SLM metadata extractor attached to pipeline");
234                    }
235                    Err(e) => {
236                        tracing::warn!(
237                            "Failed to initialize SLM metadata extractor, using pattern-based extraction: {}",
238                            e
239                        );
240                    }
241                }
242            } else {
243                tracing::debug!("SLM metadata extraction enabled but no slm_config provided");
244            }
245        }
246
247        // Wire extraction_passes from config to pipeline
248        #[cfg(feature = "entity-extraction")]
249        if config.extraction_passes > 1 {
250            pipeline = pipeline.with_extraction_passes(config.extraction_passes);
251        }
252
253        // Wire profile entity type filter from config to pipeline
254        pipeline.set_profile_entity_types(config.profile_entity_types.clone());
255
256        // Wire async extraction threshold from config to pipeline
257        if config.async_extraction_threshold > 0 {
258            pipeline.set_async_extraction_threshold(config.async_extraction_threshold);
259        }
260
261        // Create query planner
262        let query_planner = QueryPlanner::new(
263            Arc::clone(&storage),
264            Arc::clone(&vector_index),
265            Arc::clone(&bm25_index),
266            Arc::clone(&temporal_index),
267            Arc::clone(&graph_manager),
268            config.fusion_semantic_threshold,
269            config.semantic_prefilter_threshold,
270            config.fusion_strategy,
271            config.rrf_k,
272            #[cfg(feature = "slm")]
273            config.slm_config.clone(),
274            config.slm_query_classification_enabled,
275            config.adaptive_k_threshold,
276        )?;
277
278        // Initialize embedding engine if configured
279        #[cfg(feature = "embedding-onnx")]
280        let embedding_engine = if let Some(ref model_path) = config.embedding_model {
281            tracing::info!("Initializing embedding engine from '{}'...", model_path);
282            match crate::embedding::EmbeddingEngine::from_path(model_path) {
283                Ok(engine) => {
284                    tracing::info!("Embedding engine ready (dim={})", engine.dim);
285                    Some(std::sync::Arc::new(engine))
286                }
287                Err(e) => {
288                    tracing::warn!(
289                        "Failed to load embedding model from '{}': {}. \
290                         Embeddings must be supplied explicitly.",
291                        model_path,
292                        e
293                    );
294                    None
295                }
296            }
297        } else {
298            None
299        };
300
301        let enable_trace = config.enable_trace;
302        Ok(Self {
303            storage,
304            vector_index,
305            bm25_index,
306            temporal_index,
307            graph_manager,
308            pipeline,
309            query_planner,
310            config,
311            #[cfg(feature = "embedding-onnx")]
312            embedding_engine,
313            default_namespace: None,
314            user_entity: None,
315            last_query_trace: RefCell::new(None),
316            enable_trace,
317        })
318    }
319
320    /// Enable native LLM entity extraction with the specified model tier
321    ///
322    /// This enables automatic entity and fact extraction during memory ingestion
323    /// using a locally-running LLM via llama.cpp. Extraction results are stored
324    /// in entity profiles for fast retrieval.
325    ///
326    /// # Arguments
327    ///
328    /// * `tier` - Model tier to use (Balanced = 4B, Quality = 7B)
329    ///
330    /// # Returns
331    ///
332    /// Self for method chaining
333    ///
334    /// # Errors
335    ///
336    /// Returns an error if the model cannot be loaded.
337    ///
338    /// # Example
339    ///
340    /// ```no_run
341    /// use mnemefusion_core::{MemoryEngine, Config, ModelTier};
342    ///
343    /// let engine = MemoryEngine::open("./brain.mfdb", Config::default())?
344    ///     .with_llm_entity_extraction(ModelTier::Balanced)?;
345    /// # Ok::<(), mnemefusion_core::Error>(())
346    /// ```
347    #[cfg(feature = "entity-extraction")]
348    pub fn with_llm_entity_extraction(mut self, tier: ModelTier) -> Result<Self> {
349        tracing::info!("Initializing LLM entity extractor ({:?})...", tier);
350
351        let extractor = LlmEntityExtractor::load(tier)?;
352        self.pipeline = self
353            .pipeline
354            .with_llm_extractor(Arc::new(Mutex::new(extractor)));
355
356        tracing::info!("LLM entity extractor attached to pipeline");
357        Ok(self)
358    }
359
360    /// Enable native LLM entity extraction with a custom model path
361    ///
362    /// This enables automatic entity and fact extraction using a model
363    /// at the specified path.
364    ///
365    /// # Arguments
366    ///
367    /// * `model_path` - Path to the GGUF model file
368    /// * `tier` - Model tier (affects generation parameters)
369    ///
370    /// # Returns
371    ///
372    /// Self for method chaining
373    ///
374    /// # Errors
375    ///
376    /// Returns an error if the model cannot be loaded.
377    #[cfg(feature = "entity-extraction")]
378    pub fn with_llm_entity_extraction_from_path(
379        mut self,
380        model_path: impl Into<std::path::PathBuf>,
381        tier: ModelTier,
382    ) -> Result<Self> {
383        tracing::info!("Initializing LLM entity extractor from custom path...");
384
385        let extractor = LlmEntityExtractor::load_from_path(model_path, tier)?;
386        self.pipeline = self
387            .pipeline
388            .with_llm_extractor(Arc::new(Mutex::new(extractor)));
389
390        tracing::info!("LLM entity extractor attached to pipeline");
391        Ok(self)
392    }
393
394    /// Enable Triplex KG extraction for clean entity-to-entity relationships.
395    ///
396    /// Loads the SciPhi Triplex model (Phi-3 3.8B fine-tune) as a second
397    /// extraction model. During ingestion, Triplex runs after Phi-4 to produce
398    /// clean (subject, predicate, object) triples with constrained entity types.
399    ///
400    /// This is the "Full" ingestion tier — requires 8GB+ GPU VRAM for both models.
401    ///
402    /// # Arguments
403    ///
404    /// * `model_path` - Path to the Triplex GGUF model file
405    ///
406    /// # Example
407    ///
408    /// ```rust,ignore
409    /// let engine = MemoryEngine::open("./brain.mfdb", Config::default())?
410    ///     .with_llm_entity_extraction(ModelTier::Balanced)?
411    ///     .with_kg_extraction("models/triplex/Triplex-Q4_K_M.gguf")?;
412    /// ```
413    #[cfg(feature = "entity-extraction")]
414    pub fn with_kg_extraction(mut self, model_path: impl AsRef<std::path::Path>) -> Result<Self> {
415        tracing::info!("Initializing Triplex KG extractor...");
416
417        let extractor = crate::extraction::TriplexExtractor::load(model_path)?;
418        self.pipeline = self
419            .pipeline
420            .with_triplex_extractor(Arc::new(Mutex::new(extractor)));
421
422        tracing::info!("Triplex KG extractor attached to pipeline (Full tier)");
423        Ok(self)
424    }
425
426    /// Set the number of LLM extraction passes per document.
427    ///
428    /// This must be called after `with_llm_entity_extraction*()` to take effect.
429    /// Multiple passes capture different facts, producing richer profiles.
430    #[cfg(feature = "entity-extraction")]
431    pub fn set_extraction_passes(&mut self, passes: usize) {
432        self.pipeline.set_extraction_passes(passes);
433    }
434
435    /// Backfill KG triples for all existing memories using Triplex.
436    ///
437    /// Runs Triplex extraction on every memory in the database and stores
438    /// the resulting entity-to-entity relationship triples. This adds KG
439    /// edges to a DB that was originally ingested without Triplex.
440    ///
441    /// Requires `with_kg_extraction()` to have been called first.
442    /// Returns the number of memories that produced triples.
443    #[cfg(feature = "entity-extraction")]
444    pub fn backfill_kg(&self) -> Result<usize> {
445        self.pipeline.backfill_kg(None)
446    }
447
448    /// Like `backfill_kg()` but with a progress callback.
449    ///
450    /// Calls `progress_callback(current, total)` after each memory is processed.
451    #[cfg(feature = "entity-extraction")]
452    pub fn backfill_kg_with_progress(
453        &self,
454        progress_callback: Option<Box<dyn Fn(usize, usize)>>,
455    ) -> Result<usize> {
456        self.pipeline.backfill_kg(progress_callback)
457    }
458
459    /// Process all deferred LLM extractions queued by `add()` in async mode.
460    ///
461    /// When `async_extraction_threshold > 0` (set via config or
462    /// `with_async_extraction_threshold()`), `add()` stores large memories
463    /// immediately and defers LLM extraction here. Call this periodically
464    /// (e.g., every N messages, or before querying) to build entity profiles.
465    ///
466    /// Returns the number of memories whose extraction was processed.
467    /// Safe to call when the queue is empty (returns `Ok(0)`).
468    pub fn flush_extraction_queue(&self) -> Result<usize> {
469        self.pipeline.flush_extraction_queue()
470    }
471
472    /// Returns the number of memories with deferred LLM extractions pending.
473    ///
474    /// Non-zero only when `async_extraction_threshold > 0` and large `add()` calls
475    /// have been made since the last `flush_extraction_queue()`.
476    pub fn pending_extraction_count(&self) -> usize {
477        self.pipeline.pending_extraction_count()
478    }
479
480    /// Set a default namespace (user identity) for all add/query operations.
481    ///
482    /// When set, any call to `add()` or `query()` that does not supply an explicit
483    /// `namespace` argument will use this value automatically. Equivalent to always
484    /// passing `namespace = Some(user)` — enables "Memory is per-user" semantics
485    /// without changing every call site.
486    ///
487    /// # Example
488    ///
489    /// ```no_run
490    /// # use mnemefusion_core::{MemoryEngine, Config};
491    /// let engine = MemoryEngine::open("./brain.mfdb", Config::default()).unwrap()
492    ///     .with_user("alice");
493    /// // All subsequent add/query calls default to namespace="alice"
494    /// ```
495    pub fn with_user(mut self, user: impl Into<String>) -> Self {
496        self.default_namespace = Some(user.into());
497        self
498    }
499
500    /// Set the user entity name for first-person pronoun resolution.
501    ///
502    /// When set, queries containing "I", "me", "my", etc. automatically include
503    /// this entity in the profile injection step (Step 2.1), ensuring the user's
504    /// own memories get the entity score boost.
505    ///
506    /// Unlike `with_user()`, this does NOT enable namespace filtering — it only
507    /// affects entity detection at query time. Use this when memories are stored
508    /// without namespace but you want pronoun resolution.
509    pub fn set_user_entity(&mut self, name: impl Into<String>) {
510        self.user_entity = Some(name.into());
511    }
512
513    /// Attach an embedding engine for automatic text vectorization.
514    ///
515    /// After this call, `add()` and `query()` can be called without supplying
516    /// explicit embedding vectors.
517    ///
518    /// Requires the `embedding-onnx` feature at compile time.
519    #[cfg(feature = "embedding-onnx")]
520    pub fn with_embedding_engine(mut self, engine: crate::embedding::EmbeddingEngine) -> Self {
521        self.embedding_engine = Some(std::sync::Arc::new(engine));
522        self
523    }
524
525    /// Auto-compute an embedding using the configured engine or embedding_fn.
526    ///
527    /// Returns `Err(Error::NoEmbeddingEngine)` if neither is configured.
528    #[cfg(feature = "embedding-onnx")]
529    fn auto_embed(&self, text: &str) -> Result<Vec<f32>> {
530        if let Some(engine) = self.embedding_engine.as_ref() {
531            return engine.embed(text);
532        }
533        if let Some(f) = self.pipeline.embedding_fn() {
534            return Ok(f(text));
535        }
536        Err(Error::NoEmbeddingEngine)
537    }
538
539    /// Auto-compute an embedding (no ONNX engine — fall back to embedding_fn).
540    #[cfg(not(feature = "embedding-onnx"))]
541    fn auto_embed(&self, text: &str) -> Result<Vec<f32>> {
542        if let Some(f) = self.pipeline.embedding_fn() {
543            Ok(f(text))
544        } else {
545            Err(Error::NoEmbeddingEngine)
546        }
547    }
548
549    /// Set the embedding function for computing fact embeddings at ingestion time.
550    ///
551    /// When set, the pipeline will compute and store embeddings for each extracted
552    /// entity fact during ingestion. These embeddings enable semantic matching in
553    /// ProfileSearch (cosine similarity vs word-overlap).
554    ///
555    /// The function should return an embedding vector for the given text input.
556    /// Typically this wraps the same embedding model used for memory embeddings
557    /// (e.g., `SentenceTransformer.encode()`).
558    ///
559    /// # Arguments
560    ///
561    /// * `f` - Embedding function: `Fn(&str) -> Vec<f32>`
562    pub fn set_embedding_fn(&mut self, f: EmbeddingFn) {
563        self.pipeline.set_embedding_fn(f);
564    }
565
566    /// Precompute missing fact embeddings for all entity profiles.
567    ///
568    /// Iterates all stored profiles, checks each fact for a stored embedding,
569    /// and computes + stores any missing ones using the registered EmbeddingFn.
570    /// This is a one-time backfill operation — "pay the cost once."
571    ///
572    /// Returns the number of fact embeddings computed.
573    pub fn precompute_fact_embeddings(&self) -> Result<usize> {
574        let embed_fn = self.pipeline.embedding_fn().ok_or_else(|| {
575            Error::Configuration("No embedding function set. Call set_embedding_fn() first.".into())
576        })?;
577
578        let profiles = self.storage.list_entity_profiles()?;
579        let mut computed = 0;
580
581        for profile in &profiles {
582            for (fact_type, facts) in &profile.facts {
583                for fact in facts {
584                    let key = fact_embedding_key(&profile.name, fact_type, &fact.value);
585                    if self.storage.get_fact_embedding(&key)?.is_none() {
586                        let fact_text = format!("{} {}", fact_type.replace('_', " "), fact.value);
587                        let embedding = embed_fn(&fact_text);
588                        self.storage.store_fact_embedding(&key, &embedding)?;
589                        computed += 1;
590                    }
591                }
592            }
593        }
594
595        Ok(computed)
596    }
597
598    /// Rebuild embeddings for memories with first-person content using speaker-aware
599    /// pronoun substitution.
600    ///
601    /// For each memory that has a `"speaker"` in its metadata and first-person content
602    /// (e.g., `"I joined a gym"`), recomputes the embedding on the third-person form
603    /// (`"Alice joined a gym"`) to improve semantic similarity with entity-centric queries.
604    ///
605    /// This is a one-time backfill for databases ingested before this feature was added.
606    /// Safe to call multiple times — only updates memories where pronoun substitution
607    /// changes the text (i.e., skips memories without first-person pronouns).
608    ///
609    /// Uses the registered `EmbeddingFn` (set via `set_embedding_fn()`) when available,
610    /// falling back to the internal `auto_embed()` engine otherwise.
611    ///
612    /// Returns the number of memory embeddings updated.
613    pub fn rebuild_speaker_embeddings(&self) -> Result<usize> {
614        let embed_fn = self.pipeline.embedding_fn();
615        let ids = self.storage.list_memory_ids()?;
616        let mut updated = 0;
617
618        for id in &ids {
619            let memory = match self.storage.get_memory(id)? {
620                Some(m) => m,
621                None => continue,
622            };
623            let speaker = memory
624                .metadata
625                .get("speaker")
626                .map(String::as_str)
627                .unwrap_or("");
628            if speaker.is_empty() {
629                continue;
630            }
631
632            let substituted = first_person_to_third(&memory.content, speaker);
633            if substituted == memory.content {
634                continue; // no first-person pronouns found — nothing to do
635            }
636
637            let new_embedding = match embed_fn.as_ref() {
638                Some(ef) => ef(&substituted),
639                None => self.auto_embed(&substituted)?,
640            };
641
642            self.update_embedding(id, new_embedding)?;
643            updated += 1;
644        }
645
646        tracing::info!(
647            "rebuild_speaker_embeddings: updated {} memory embeddings",
648            updated
649        );
650        Ok(updated)
651    }
652
653    /// Run entity extraction on text without adding to the database.
654    ///
655    /// Useful for testing extraction quality or comparing model outputs.
656    /// Requires `with_llm_entity_extraction*()` to have been called first.
657    ///
658    /// # Arguments
659    /// * `content` - The text to extract entities from
660    /// * `speaker` - Optional speaker name for first-person attribution
661    ///
662    /// # Returns
663    /// The extraction result with entities, facts, records, and relationships.
664    #[cfg(feature = "entity-extraction")]
665    pub fn extract_text(
666        &self,
667        content: &str,
668        speaker: Option<&str>,
669    ) -> Result<crate::extraction::ExtractionResult> {
670        self.pipeline.extract_text(content, speaker)
671    }
672
673    /// Apply an externally-produced extraction result to a memory's entity profiles.
674    ///
675    /// This enables API-based extraction backends (e.g., NScale cloud inference)
676    /// to inject entity profiles without requiring a local LLM. The extraction
677    /// result must match the same JSON schema as the local Qwen3 extractor.
678    ///
679    /// # Arguments
680    /// * `memory_id` - The memory ID to associate the extraction with
681    /// * `extraction` - The extraction result from an external source
682    #[cfg(feature = "entity-extraction")]
683    pub fn apply_extraction(
684        &self,
685        memory_id: &MemoryId,
686        extraction: &crate::extraction::ExtractionResult,
687    ) -> Result<()> {
688        // Update entity profiles from facts
689        self.pipeline
690            .update_entity_profiles_from_llm(memory_id, extraction)?;
691
692        // Store entity-to-entity relationships
693        if !extraction.relationships.is_empty() {
694            self.pipeline
695                .store_relationships(memory_id, &extraction.relationships)?;
696        }
697
698        // Annotate parent memory with typed record metadata (record_type, event_date).
699        // We do NOT create child memories — they flood the vector index and degrade
700        // recall. Instead, typed decomposition is stored as metadata on the parent
701        // for type-aware retrieval balancing.
702        if !extraction.records.is_empty() {
703            self.pipeline
704                .annotate_parent_with_types(memory_id, &extraction.records);
705        }
706
707        Ok(())
708    }
709
710    /// Generate summaries for all entity profiles.
711    ///
712    /// For each profile with facts, generates a dense summary paragraph that
713    /// condenses the profile's facts into one text block. When present, query()
714    /// injects summaries as single context items instead of N individual facts,
715    /// addressing RANK failures where evidence is present but buried.
716    ///
717    /// Returns the number of profiles summarized.
718    pub fn summarize_profiles(&self) -> Result<usize> {
719        let profiles = self.storage.list_entity_profiles()?;
720        let mut summarized = 0;
721        for mut profile in profiles {
722            if profile.generate_summary().is_some() {
723                self.storage.store_entity_profile(&profile)?;
724                summarized += 1;
725            }
726        }
727        Ok(summarized)
728    }
729
730    /// Consolidate entity profiles by removing noise and deduplicating facts.
731    ///
732    /// Performs the following cleanup operations:
733    /// 1. Remove null-indicator values ("none", "N/A", etc.)
734    /// 2. Remove overly verbose values (>100 chars)
735    /// 3. Semantic dedup within same fact_type using embedding similarity (threshold: 0.85)
736    ///    — keeps fact with higher confidence, or first encountered on tie
737    /// 4. Delete garbage entity profiles (non-person entities with ≤2 facts)
738    ///
739    /// Returns (facts_removed, profiles_deleted).
740    pub fn consolidate_profiles(&self) -> Result<(usize, usize)> {
741        use crate::query::profile_search::{cosine_similarity, resolve_entity_alias};
742
743        let embed_fn = self.pipeline.embedding_fn();
744
745        let mut total_facts_removed = 0usize;
746        let mut profiles_deleted = 0usize;
747
748        // Phase 0: Merge alias profiles into their canonical forms.
749        // E.g., "mel" → "melanie", "mell" → "melanie" (via fuzzy matching).
750        {
751            let mut all_names = self.storage.list_entity_profile_names()?;
752            // Sort by length (shortest first) so short aliases resolve to longer canonicals
753            all_names.sort_by_key(|n| n.len());
754
755            let mut merged_away: std::collections::HashSet<String> =
756                std::collections::HashSet::new();
757
758            for i in 0..all_names.len() {
759                let short_name = &all_names[i];
760                if merged_away.contains(short_name) {
761                    continue;
762                }
763                if let Some(canonical) = resolve_entity_alias(short_name, &all_names) {
764                    if merged_away.contains(&canonical) {
765                        continue;
766                    }
767                    // Load both profiles
768                    let short_profile = match self.storage.get_entity_profile(short_name)? {
769                        Some(p) => p,
770                        None => continue,
771                    };
772                    let mut canon_profile = match self.storage.get_entity_profile(&canonical)? {
773                        Some(p) => p,
774                        None => continue,
775                    };
776
777                    // Move all facts from short → canonical (add_fact handles dedup)
778                    for facts in short_profile.facts.values() {
779                        for fact in facts {
780                            canon_profile.add_fact(fact.clone());
781                        }
782                    }
783
784                    // Move all source_memories
785                    for mem_id in &short_profile.source_memories {
786                        canon_profile.add_source_memory(mem_id.clone());
787                    }
788
789                    // Save canonical, delete alias
790                    self.storage.store_entity_profile(&canon_profile)?;
791                    self.storage.delete_entity_profile(short_name)?;
792                    merged_away.insert(short_name.clone());
793                    profiles_deleted += 1;
794
795                    tracing::info!(
796                        "Merged alias profile '{}' into canonical '{}'",
797                        short_name,
798                        canonical,
799                    );
800                }
801            }
802        }
803
804        let profiles = self.storage.list_entity_profiles()?;
805
806        const NULL_INDICATORS: &[&str] = &[
807            "none",
808            "n/a",
809            "na",
810            "not specified",
811            "not mentioned",
812            "unknown",
813            "unspecified",
814            "not provided",
815            "no information",
816        ];
817
818        for mut profile in profiles {
819            let mut facts_removed_in_profile = 0usize;
820
821            // Phase 1 & 2: Remove null and long values
822            for (_fact_type, facts) in profile.facts.iter_mut() {
823                let before = facts.len();
824                facts.retain(|f| {
825                    let trimmed = f.value.trim();
826                    let lower = trimmed.to_lowercase();
827                    // Keep if NOT a null indicator AND NOT too long
828                    !NULL_INDICATORS.contains(&lower.as_str()) && trimmed.len() <= 100
829                });
830                facts_removed_in_profile += before - facts.len();
831            }
832
833            // Phase 3: Semantic dedup within same fact_type (requires embedding fn)
834            if let Some(ref embed_fn) = embed_fn {
835                for (fact_type, facts) in profile.facts.iter_mut() {
836                    if facts.len() <= 1 {
837                        continue;
838                    }
839
840                    // Sort by confidence descending (highest confidence kept first)
841                    facts.sort_by(|a, b| {
842                        b.confidence
843                            .partial_cmp(&a.confidence)
844                            .unwrap_or(std::cmp::Ordering::Equal)
845                    });
846
847                    // Collect embeddings for all facts
848                    let embeddings: Vec<Vec<f32>> = facts
849                        .iter()
850                        .map(|f| {
851                            // Try stored embedding first, compute on the fly if missing
852                            let key = fact_embedding_key(&profile.name, fact_type, &f.value);
853                            self.storage
854                                .get_fact_embedding(&key)
855                                .ok()
856                                .flatten()
857                                .unwrap_or_else(|| {
858                                    let text =
859                                        format!("{} {}", fact_type.replace('_', " "), f.value);
860                                    embed_fn(&text)
861                                })
862                        })
863                        .collect();
864
865                    // Greedy dedup: keep first (highest confidence), skip near-duplicates
866                    let mut keep_indices: Vec<usize> = Vec::new();
867                    for i in 0..facts.len() {
868                        let mut is_dup = false;
869                        for &kept_idx in &keep_indices {
870                            let sim = cosine_similarity(&embeddings[i], &embeddings[kept_idx]);
871                            if sim > 0.85 {
872                                is_dup = true;
873                                break;
874                            }
875                        }
876                        if !is_dup {
877                            keep_indices.push(i);
878                        }
879                    }
880
881                    let before = facts.len();
882                    let kept_facts: Vec<_> =
883                        keep_indices.into_iter().map(|i| facts[i].clone()).collect();
884                    *facts = kept_facts;
885                    facts_removed_in_profile += before - facts.len();
886                }
887            }
888
889            // Remove empty fact type entries
890            profile.facts.retain(|_, v| !v.is_empty());
891
892            total_facts_removed += facts_removed_in_profile;
893
894            // Phase 4: Delete garbage profiles (non-person with ≤2 facts)
895            let total_facts = profile.total_facts();
896            if profile.entity_type != "person" && total_facts <= 2 {
897                self.storage.delete_entity_profile(&profile.name)?;
898                profiles_deleted += 1;
899                continue;
900            }
901
902            // Save updated profile if any facts were removed
903            if facts_removed_in_profile > 0 {
904                self.storage.store_entity_profile(&profile)?;
905            }
906        }
907
908        Ok((total_facts_removed, profiles_deleted))
909    }
910
911    /// Repair entity profiles by re-processing llm_extraction metadata stored in memories.
912    ///
913    /// This is a recovery function for databases where entity profiles are missing or
914    /// incomplete due to extraction failures, consolidation over-pruning, or ingestion bugs.
915    ///
916    /// For every memory in the DB:
917    /// 1. Parse the `llm_extraction` JSON from metadata (if present)
918    /// 2. For each entity_fact: create/update the entity profile with the fact
919    ///    and add the memory as a source_memory
920    /// 3. For the `speaker` metadata field: ensure the speaker entity's profile
921    ///    includes this memory as a source_memory (handles first-person statements
922    ///    where the speaker name isn't in the content text)
923    ///
924    /// Respects the pipeline's `profile_entity_types` filter and type allowlist.
925    /// Skips entities whose names appear to be pronouns or generic placeholders.
926    ///
927    /// Returns (profiles_created, source_memories_added).
928    pub fn repair_profiles_from_metadata(&self) -> Result<(usize, usize)> {
929        use crate::query::profile_search::resolve_entity_alias;
930        use crate::types::{EntityFact, EntityId};
931
932        let junk_names: &[&str] = &[
933            "i",
934            "me",
935            "my",
936            "we",
937            "our",
938            "you",
939            "your",
940            "he",
941            "she",
942            "it",
943            "they",
944            "them",
945            "his",
946            "her",
947            "their",
948            "him",
949            "this",
950            "that",
951            "unknown",
952            "unspecified",
953            "someone",
954            "somebody",
955            "anyone",
956        ];
957
958        let allowed_types: &[&str] = &["person", "organization", "location"];
959
960        let mut profiles_created = 0usize;
961        let mut source_memories_added = 0usize;
962
963        let all_ids = self.storage.list_memory_ids()?;
964        let total = all_ids.len();
965        tracing::info!("repair_profiles_from_metadata: scanning {} memories", total);
966
967        for (idx, mem_id) in all_ids.iter().enumerate() {
968            if idx % 500 == 0 {
969                tracing::info!("  {}/{}", idx, total);
970            }
971
972            let memory = match self.storage.get_memory(mem_id)? {
973                Some(m) => m,
974                None => continue,
975            };
976
977            // Load fresh known_names once per memory (profiles may have been added)
978            let known_names = self.storage.list_entity_profile_names()?;
979
980            // ── Step A: re-process llm_extraction entity_facts ──────────────────
981            if let Some(json_str) = memory.metadata.get("llm_extraction") {
982                if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
983                    // Build entity_type lookup from "entities" array
984                    let mut entity_types: HashMap<String, String> = HashMap::new();
985                    if let Some(ents) = v["entities"].as_array() {
986                        for e in ents {
987                            if let (Some(name), Some(etype)) =
988                                (e["name"].as_str(), e["type"].as_str())
989                            {
990                                entity_types.insert(name.to_lowercase(), etype.to_lowercase());
991                            }
992                        }
993                    }
994
995                    if let Some(facts_arr) = v["entity_facts"].as_array() {
996                        for fact_val in facts_arr {
997                            let entity_raw = match fact_val["entity"].as_str() {
998                                Some(e) => e,
999                                None => continue,
1000                            };
1001                            let entity_lower = entity_raw.to_lowercase();
1002
1003                            // Skip junk names and single-char names
1004                            if entity_lower.len() < 2 || junk_names.contains(&entity_lower.as_str())
1005                            {
1006                                continue;
1007                            }
1008
1009                            let etype = entity_types
1010                                .get(&entity_lower)
1011                                .map(|s| s.as_str())
1012                                .unwrap_or("person");
1013
1014                            // Only create profiles for allowed entity types
1015                            if !allowed_types.contains(&etype) {
1016                                continue;
1017                            }
1018
1019                            // Canonicalize via alias resolution
1020                            let canonical = resolve_entity_alias(&entity_lower, &known_names)
1021                                .unwrap_or_else(|| entity_lower.clone());
1022
1023                            let fact_type = fact_val["fact_type"]
1024                                .as_str()
1025                                .unwrap_or("unknown")
1026                                .to_string();
1027                            let value = fact_val["value"].as_str().unwrap_or("").to_string();
1028                            let confidence = fact_val["confidence"].as_f64().unwrap_or(0.8) as f32;
1029
1030                            if value.is_empty() || value.len() > 100 {
1031                                continue;
1032                            }
1033
1034                            let is_new = self.storage.get_entity_profile(&canonical)?.is_none();
1035
1036                            let mut profile = self
1037                                .storage
1038                                .get_entity_profile(&canonical)?
1039                                .unwrap_or_else(|| {
1040                                    EntityProfile::new(
1041                                        EntityId::new(),
1042                                        canonical.clone(),
1043                                        etype.to_string(),
1044                                    )
1045                                });
1046
1047                            profile.add_fact(EntityFact {
1048                                fact_type,
1049                                value,
1050                                confidence,
1051                                source_memory: mem_id.clone(),
1052                                extracted_at: Timestamp::now(),
1053                            });
1054                            profile.add_source_memory(mem_id.clone());
1055
1056                            self.storage.store_entity_profile(&profile)?;
1057
1058                            if is_new {
1059                                profiles_created += 1;
1060                            }
1061                            source_memories_added += 1;
1062                        }
1063                    }
1064                }
1065            }
1066
1067            // ── Step B: speaker → source_memory attribution ─────────────────────
1068            // When the speaker says "I joined a gym", the entity name ("Maria") isn't
1069            // in the content. If an entity profile exists for the speaker, add this
1070            // memory as a source_memory so query-time entity injection can find it.
1071            if let Some(speaker_raw) = memory.metadata.get("speaker") {
1072                let speaker_lower = speaker_raw.trim().to_lowercase();
1073                if speaker_lower.len() < 2 || junk_names.contains(&speaker_lower.as_str()) {
1074                    continue;
1075                }
1076
1077                // Re-load known_names (may have been updated by Step A above)
1078                let known_names2 = self.storage.list_entity_profile_names()?;
1079                let canonical = resolve_entity_alias(&speaker_lower, &known_names2)
1080                    .unwrap_or_else(|| speaker_lower.clone());
1081
1082                if let Ok(Some(mut profile)) = self.storage.get_entity_profile(&canonical) {
1083                    if !profile.source_memories.contains(mem_id) {
1084                        profile.add_source_memory(mem_id.clone());
1085                        self.storage.store_entity_profile(&profile)?;
1086                        source_memories_added += 1;
1087                    }
1088                }
1089            }
1090        }
1091
1092        tracing::info!(
1093            "repair_profiles_from_metadata: created {} profiles, added {} source_memory links",
1094            profiles_created,
1095            source_memories_added
1096        );
1097        Ok((profiles_created, source_memories_added))
1098    }
1099
1100    /// Add a new memory to the database
1101    ///
1102    /// This will automatically index the memory across all dimensions:
1103    /// - Semantic (vector similarity)
1104    /// - Temporal (time-based)
1105    /// - Entity (if auto-extraction enabled)
1106    ///
1107    /// # Arguments
1108    ///
1109    /// * `content` - The text content to store
1110    /// * `embedding` - Vector embedding (must match configured dimension)
1111    /// * `metadata` - Optional key-value metadata
1112    /// * `timestamp` - Optional custom timestamp (defaults to now)
1113    /// * `source` - Optional provenance/source tracking information
1114    ///
1115    /// # Returns
1116    ///
1117    /// The ID of the created memory
1118    ///
1119    /// # Errors
1120    ///
1121    /// Returns an error if:
1122    /// - Embedding dimension doesn't match configuration
1123    /// - Storage operation fails
1124    /// - Source serialization fails
1125    ///
1126    /// # Example
1127    ///
1128    /// ```no_run
1129    /// # use mnemefusion_core::{MemoryEngine, Config};
1130    /// # use mnemefusion_core::types::{Source, SourceType};
1131    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1132    /// let embedding = vec![0.1; 384];
1133    ///
1134    /// // Add memory with source tracking
1135    /// let source = Source::new(SourceType::Conversation)
1136    ///     .with_id("conv_123")
1137    ///     .with_confidence(0.95);
1138    ///
1139    /// let id = engine.add(
1140    ///     "Meeting scheduled for next week".to_string(),
1141    ///     embedding,
1142    ///     None,
1143    ///     None,
1144    ///     Some(source),
1145    ///     None,
1146    /// ).unwrap();
1147    /// ```
1148    pub fn add(
1149        &self,
1150        content: String,
1151        embedding: impl Into<Option<Vec<f32>>>,
1152        metadata: Option<HashMap<String, String>>,
1153        timestamp: Option<Timestamp>,
1154        source: Option<Source>,
1155        namespace: Option<&str>,
1156    ) -> Result<MemoryId> {
1157        // Resolve embedding: use provided value or auto-compute from content.
1158        // When auto-computing and a speaker is known, compute on the third-person form
1159        // ("Alice joined a gym") rather than the raw first-person form ("I joined a gym").
1160        // This yields ~+0.25 cosine similarity improvement with entity-centric queries.
1161        // The original first-person content is stored unchanged.
1162        let embedding = match embedding.into() {
1163            Some(e) => e,
1164            None => {
1165                let text_for_embedding = metadata
1166                    .as_ref()
1167                    .and_then(|m| m.get("speaker"))
1168                    .filter(|s| !s.is_empty())
1169                    .map(|speaker| {
1170                        let subst = first_person_to_third(&content, speaker);
1171                        if subst != content {
1172                            subst
1173                        } else {
1174                            content.clone()
1175                        }
1176                    })
1177                    .unwrap_or_else(|| content.clone());
1178                self.auto_embed(&text_for_embedding)?
1179            }
1180        };
1181
1182        // Validate embedding dimension
1183        if embedding.len() != self.config.embedding_dim {
1184            return Err(Error::InvalidEmbeddingDimension {
1185                expected: self.config.embedding_dim,
1186                got: embedding.len(),
1187            });
1188        }
1189
1190        // Apply default namespace if caller didn't supply one
1191        let effective_ns = namespace.or(self.default_namespace.as_deref());
1192
1193        // Create memory
1194        let mut memory = if let Some(ts) = timestamp {
1195            let mut mem = Memory::new_with_timestamp(content, embedding, ts);
1196            if let Some(meta) = metadata {
1197                mem.metadata = meta;
1198            }
1199            mem
1200        } else {
1201            let mut mem = Memory::new(content, embedding);
1202            if let Some(meta) = metadata {
1203                mem.metadata = meta;
1204            }
1205            mem
1206        };
1207
1208        // Add source if provided
1209        if let Some(src) = source {
1210            memory.set_source(src)?;
1211        }
1212
1213        // Set namespace (defaults to empty string)
1214        memory.set_namespace(effective_ns.unwrap_or(""));
1215
1216        // Delegate to ingestion pipeline for atomic indexing
1217        self.pipeline.add(memory)
1218    }
1219
1220    /// Retrieve a memory by ID
1221    ///
1222    /// # Arguments
1223    ///
1224    /// * `id` - The memory ID to retrieve
1225    ///
1226    /// # Returns
1227    ///
1228    /// The memory record if found, or None
1229    ///
1230    /// # Example
1231    ///
1232    /// ```no_run
1233    /// # use mnemefusion_core::{MemoryEngine, Config};
1234    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1235    /// # let id = engine.add("test".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1236    /// let memory = engine.get(&id).unwrap();
1237    /// if let Some(mem) = memory {
1238    ///     println!("Content: {}", mem.content);
1239    /// }
1240    /// ```
1241    pub fn get(&self, id: &MemoryId) -> Result<Option<Memory>> {
1242        self.storage.get_memory(id)
1243    }
1244
1245    /// Delete a memory by ID
1246    ///
1247    /// This will remove the memory from all indexes.
1248    ///
1249    /// # Arguments
1250    ///
1251    /// * `id` - The memory ID to delete
1252    /// * `namespace` - Optional namespace. If provided, verifies the memory is in this namespace before deleting
1253    ///
1254    /// # Returns
1255    ///
1256    /// true if the memory was deleted, false if it didn't exist
1257    ///
1258    /// # Errors
1259    ///
1260    /// Returns `Error::NamespaceMismatch` if namespace is provided and doesn't match
1261    ///
1262    /// # Example
1263    ///
1264    /// ```no_run
1265    /// # use mnemefusion_core::{MemoryEngine, Config};
1266    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1267    /// # let id = engine.add("test".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1268    /// let deleted = engine.delete(&id, None).unwrap();
1269    /// assert!(deleted);
1270    /// ```
1271    pub fn delete(&self, id: &MemoryId, namespace: Option<&str>) -> Result<bool> {
1272        // If namespace is provided, verify it matches before deleting
1273        if let Some(expected_ns) = namespace {
1274            if let Some(memory) = self.storage.get_memory(id)? {
1275                let found_ns = memory.get_namespace();
1276                if found_ns != expected_ns {
1277                    return Err(Error::NamespaceMismatch {
1278                        expected: expected_ns.to_string(),
1279                        found: found_ns,
1280                    });
1281                }
1282            } else {
1283                // Memory doesn't exist
1284                return Ok(false);
1285            }
1286        }
1287
1288        // Delegate to ingestion pipeline for atomic cleanup
1289        self.pipeline.delete(id)
1290    }
1291
1292    /// Add multiple memories in a batch operation
1293    ///
1294    /// This is significantly faster than calling `add()` multiple times (10x+ improvement)
1295    /// because it uses:
1296    /// - Single transaction for all storage operations
1297    /// - Vector index locked once for all additions
1298    /// - Batched entity extraction with deduplication
1299    ///
1300    /// # Arguments
1301    ///
1302    /// * `inputs` - Vector of MemoryInput to add
1303    ///
1304    /// # Returns
1305    ///
1306    /// BatchResult containing IDs of created memories and any errors
1307    ///
1308    /// # Performance
1309    ///
1310    /// Target: 1,000 memories in <500ms
1311    ///
1312    /// # Example
1313    ///
1314    /// ```no_run
1315    /// use mnemefusion_core::{MemoryEngine, Config};
1316    /// use mnemefusion_core::types::MemoryInput;
1317    ///
1318    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1319    /// let inputs = vec![
1320    ///     MemoryInput::new("content 1".to_string(), vec![0.1; 384]),
1321    ///     MemoryInput::new("content 2".to_string(), vec![0.2; 384]),
1322    /// ];
1323    ///
1324    /// let result = engine.add_batch(inputs, None).unwrap();
1325    /// println!("Created {} memories", result.created_count);
1326    /// if result.has_errors() {
1327    ///     println!("Encountered {} errors", result.errors.len());
1328    /// }
1329    /// ```
1330    pub fn add_batch(
1331        &self,
1332        inputs: Vec<MemoryInput>,
1333        namespace: Option<&str>,
1334    ) -> Result<BatchResult> {
1335        self.add_batch_with_progress(inputs, namespace, None)
1336    }
1337
1338    /// Add multiple memories in a single batch operation with progress reporting.
1339    ///
1340    /// Like `add_batch()`, but calls `progress_callback(current, total)` after each
1341    /// memory is processed. Useful for long ingestion runs.
1342    ///
1343    /// # Example
1344    ///
1345    /// ```no_run
1346    /// # use mnemefusion_core::{MemoryEngine, Config, MemoryInput};
1347    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1348    /// let inputs: Vec<MemoryInput> = vec![]; // ...
1349    /// let result = engine.add_batch_with_progress(
1350    ///     inputs,
1351    ///     None,
1352    ///     Some(Box::new(|current, total| {
1353    ///         println!("Progress: {}/{}", current, total);
1354    ///     })),
1355    /// ).unwrap();
1356    /// ```
1357    pub fn add_batch_with_progress(
1358        &self,
1359        inputs: Vec<MemoryInput>,
1360        namespace: Option<&str>,
1361        progress_callback: Option<Box<dyn Fn(usize, usize)>>,
1362    ) -> Result<BatchResult> {
1363        // Validate all embeddings upfront
1364        for (index, input) in inputs.iter().enumerate() {
1365            if input.embedding.len() != self.config.embedding_dim {
1366                let mut result = BatchResult::new();
1367                result.errors.push(crate::types::BatchError::new(
1368                    index,
1369                    format!(
1370                        "Invalid embedding dimension: expected {}, got {}",
1371                        self.config.embedding_dim,
1372                        input.embedding.len()
1373                    ),
1374                ));
1375                return Ok(result);
1376            }
1377        }
1378
1379        // Set namespace on all inputs if provided
1380        let mut inputs_with_ns = inputs;
1381        if let Some(ns) = namespace {
1382            for input in &mut inputs_with_ns {
1383                input.namespace = Some(ns.to_string());
1384            }
1385        }
1386
1387        // Delegate to ingestion pipeline
1388        self.pipeline.add_batch(inputs_with_ns, progress_callback)
1389    }
1390
1391    /// Delete multiple memories in a batch operation
1392    ///
1393    /// This is faster than calling `delete()` multiple times because it uses:
1394    /// - Single transaction for all storage operations
1395    /// - Batched entity cleanup
1396    ///
1397    /// # Arguments
1398    ///
1399    /// * `ids` - Vector of MemoryIds to delete
1400    /// * `namespace` - Optional namespace. If provided, only deletes memories in this namespace
1401    ///
1402    /// # Returns
1403    ///
1404    /// Number of memories actually deleted (may be less than input if some don't exist or are in wrong namespace)
1405    ///
1406    /// # Example
1407    ///
1408    /// ```no_run
1409    /// use mnemefusion_core::{MemoryEngine, Config};
1410    ///
1411    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1412    /// # let id1 = engine.add("test1".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
1413    /// # let id2 = engine.add("test2".to_string(), vec![0.2; 384], None, None, None, None).unwrap();
1414    /// let ids = vec![id1, id2];
1415    /// let deleted_count = engine.delete_batch(ids, None).unwrap();
1416    /// println!("Deleted {} memories", deleted_count);
1417    /// ```
1418    pub fn delete_batch(&self, ids: Vec<MemoryId>, namespace: Option<&str>) -> Result<usize> {
1419        // If namespace is provided, filter IDs to only those in the namespace
1420        let ids_to_delete = if let Some(expected_ns) = namespace {
1421            let mut filtered_ids = Vec::new();
1422            for id in ids {
1423                if let Some(memory) = self.storage.get_memory(&id)? {
1424                    if memory.get_namespace() == expected_ns {
1425                        filtered_ids.push(id);
1426                    }
1427                }
1428            }
1429            filtered_ids
1430        } else {
1431            ids
1432        };
1433
1434        // Delegate to ingestion pipeline
1435        self.pipeline.delete_batch(ids_to_delete)
1436    }
1437
1438    /// Add a memory with automatic deduplication
1439    ///
1440    /// Uses content hash to detect duplicates. If identical content already exists,
1441    /// returns the existing memory ID without creating a duplicate.
1442    ///
1443    /// # Arguments
1444    ///
1445    /// * `content` - Text content
1446    /// * `embedding` - Vector embedding
1447    /// * `metadata` - Optional metadata
1448    /// * `timestamp` - Optional custom timestamp
1449    /// * `source` - Optional source/provenance
1450    ///
1451    /// # Returns
1452    ///
1453    /// AddResult with created flag and ID (either new or existing)
1454    ///
1455    /// # Example
1456    ///
1457    /// ```no_run
1458    /// use mnemefusion_core::{MemoryEngine, Config};
1459    ///
1460    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1461    /// let embedding = vec![0.1; 384];
1462    ///
1463    /// // First add
1464    /// let result1 = engine.add_with_dedup(
1465    ///     "Meeting notes".to_string(),
1466    ///     embedding.clone(),
1467    ///     None,
1468    ///     None,
1469    ///     None,
1470    ///     None,
1471    /// ).unwrap();
1472    /// assert!(result1.created);
1473    ///
1474    /// // Second add with same content
1475    /// let result2 = engine.add_with_dedup(
1476    ///     "Meeting notes".to_string(),
1477    ///     embedding.clone(),
1478    ///     None,
1479    ///     None,
1480    ///     None,
1481    ///     None,
1482    /// ).unwrap();
1483    /// assert!(!result2.created); // Duplicate detected
1484    /// assert_eq!(result1.id, result2.id); // Same ID returned
1485    /// ```
1486    pub fn add_with_dedup(
1487        &self,
1488        content: String,
1489        embedding: Vec<f32>,
1490        metadata: Option<HashMap<String, String>>,
1491        timestamp: Option<Timestamp>,
1492        source: Option<Source>,
1493        namespace: Option<&str>,
1494    ) -> Result<AddResult> {
1495        // Validate embedding dimension
1496        if embedding.len() != self.config.embedding_dim {
1497            return Err(Error::InvalidEmbeddingDimension {
1498                expected: self.config.embedding_dim,
1499                got: embedding.len(),
1500            });
1501        }
1502
1503        // Create memory
1504        let mut memory = if let Some(ts) = timestamp {
1505            Memory::new_with_timestamp(content, embedding, ts)
1506        } else {
1507            Memory::new(content, embedding)
1508        };
1509
1510        // Add metadata
1511        if let Some(meta) = metadata {
1512            for (key, value) in meta {
1513                memory.set_metadata(key, value);
1514            }
1515        }
1516
1517        // Add source
1518        if let Some(src) = source {
1519            memory.set_source(src)?;
1520        }
1521
1522        // Set namespace if provided
1523        memory.set_namespace(namespace.unwrap_or(""));
1524
1525        // Delegate to pipeline with deduplication
1526        self.pipeline.add_with_dedup(memory)
1527    }
1528
1529    /// Upsert a memory by logical key
1530    ///
1531    /// If key exists: replaces content, embedding, and metadata
1532    /// If key doesn't exist: creates new memory and associates with key
1533    ///
1534    /// This is useful for updating facts that may change over time.
1535    ///
1536    /// # Arguments
1537    ///
1538    /// * `key` - Logical key (e.g., "user_profile:123", "doc:readme")
1539    /// * `content` - Text content
1540    /// * `embedding` - Vector embedding
1541    /// * `metadata` - Optional metadata
1542    /// * `timestamp` - Optional custom timestamp
1543    /// * `source` - Optional source/provenance
1544    ///
1545    /// # Returns
1546    ///
1547    /// UpsertResult indicating whether memory was created or updated
1548    ///
1549    /// # Example
1550    ///
1551    /// ```no_run
1552    /// use mnemefusion_core::{MemoryEngine, Config};
1553    ///
1554    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1555    /// let embedding = vec![0.1; 384];
1556    ///
1557    /// // First upsert - creates new
1558    /// let result1 = engine.upsert(
1559    ///     "user:profile",
1560    ///     "Alice likes hiking".to_string(),
1561    ///     embedding.clone(),
1562    ///     None,
1563    ///     None,
1564    ///     None,
1565    ///     None,
1566    /// ).unwrap();
1567    /// assert!(result1.created);
1568    ///
1569    /// // Second upsert - updates existing
1570    /// let result2 = engine.upsert(
1571    ///     "user:profile",
1572    ///     "Alice likes hiking and photography".to_string(),
1573    ///     vec![0.2; 384],
1574    ///     None,
1575    ///     None,
1576    ///     None,
1577    ///     None,
1578    /// ).unwrap();
1579    /// assert!(result2.updated);
1580    /// assert_eq!(result2.previous_content, Some("Alice likes hiking".to_string()));
1581    /// ```
1582    pub fn upsert(
1583        &self,
1584        key: &str,
1585        content: String,
1586        embedding: Vec<f32>,
1587        metadata: Option<HashMap<String, String>>,
1588        timestamp: Option<Timestamp>,
1589        source: Option<Source>,
1590        namespace: Option<&str>,
1591    ) -> Result<UpsertResult> {
1592        // Validate embedding dimension
1593        if embedding.len() != self.config.embedding_dim {
1594            return Err(Error::InvalidEmbeddingDimension {
1595                expected: self.config.embedding_dim,
1596                got: embedding.len(),
1597            });
1598        }
1599
1600        // Create memory
1601        let mut memory = if let Some(ts) = timestamp {
1602            Memory::new_with_timestamp(content, embedding, ts)
1603        } else {
1604            Memory::new(content, embedding)
1605        };
1606
1607        // Add metadata
1608        if let Some(meta) = metadata {
1609            for (key, value) in meta {
1610                memory.set_metadata(key, value);
1611            }
1612        }
1613
1614        // Add source
1615        if let Some(src) = source {
1616            memory.set_source(src)?;
1617        }
1618
1619        // Set namespace if provided
1620        memory.set_namespace(namespace.unwrap_or(""));
1621
1622        // Delegate to pipeline
1623        self.pipeline.upsert(key, memory)
1624    }
1625
1626    /// Get the number of memories in the database
1627    ///
1628    /// # Example
1629    ///
1630    /// ```no_run
1631    /// # use mnemefusion_core::{MemoryEngine, Config};
1632    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1633    /// let count = engine.count().unwrap();
1634    /// println!("Total memories: {}", count);
1635    /// ```
1636    pub fn count(&self) -> Result<usize> {
1637        self.storage.count_memories()
1638    }
1639
1640    /// List all memory IDs (for debugging/testing)
1641    ///
1642    /// # Warning
1643    ///
1644    /// This loads all memory IDs into memory. Use with caution on large databases.
1645    pub fn list_ids(&self) -> Result<Vec<MemoryId>> {
1646        self.storage.list_memory_ids()
1647    }
1648
1649    /// Update the embedding vector for an existing memory.
1650    ///
1651    /// This updates both the stored memory record (used by MMR diversity) and
1652    /// the HNSW vector index (used by semantic search). The memory content,
1653    /// metadata, and all other fields are preserved.
1654    ///
1655    /// # Arguments
1656    ///
1657    /// * `id` - The memory ID to update
1658    /// * `new_embedding` - The new embedding vector (must match configured dimension)
1659    ///
1660    /// # Errors
1661    ///
1662    /// Returns error if the memory doesn't exist or the embedding dimension is wrong.
1663    pub fn update_embedding(&self, id: &MemoryId, new_embedding: Vec<f32>) -> Result<()> {
1664        // Load existing memory
1665        let mut memory = self
1666            .storage
1667            .get_memory(id)?
1668            .ok_or_else(|| Error::MemoryNotFound(id.to_string()))?;
1669
1670        // Validate dimension
1671        let expected = self.config.embedding_dim;
1672        if new_embedding.len() != expected {
1673            return Err(Error::InvalidEmbeddingDimension {
1674                expected,
1675                got: new_embedding.len(),
1676            });
1677        }
1678
1679        // Update embedding in memory record
1680        memory.embedding = new_embedding.clone();
1681
1682        // Update storage (redb)
1683        self.storage.store_memory(&memory)?;
1684
1685        // Update vector index (usearch HNSW)
1686        let mut vi = self.vector_index.write().unwrap();
1687        // Remove old vector, then add new one
1688        let _ = vi.remove(id); // ignore error if not found (fresh index)
1689        vi.add(id.clone(), &new_embedding)?;
1690
1691        Ok(())
1692    }
1693
1694    /// Get the configuration
1695    pub fn config(&self) -> &Config {
1696        &self.config
1697    }
1698
1699    /// Reserve capacity in the vector index for future insertions
1700    ///
1701    /// This is useful when you know you'll be adding many memories
1702    /// and want to avoid repeated reallocations, improving performance.
1703    ///
1704    /// # Arguments
1705    ///
1706    /// * `capacity` - Number of vectors to reserve space for
1707    ///
1708    /// # Example
1709    ///
1710    /// ```no_run
1711    /// # use mnemefusion_core::{MemoryEngine, Config};
1712    /// # let mut engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1713    /// // Reserve space for 10,000 memories before bulk insertion
1714    /// engine.reserve_capacity(10_000).unwrap();
1715    /// ```
1716    pub fn reserve_capacity(&self, capacity: usize) -> Result<()> {
1717        self.pipeline.reserve_capacity(capacity)
1718    }
1719
1720    /// Search for memories by semantic similarity
1721    ///
1722    /// # Arguments
1723    ///
1724    /// * `query_embedding` - The query vector to search for
1725    /// * `top_k` - Maximum number of results to return
1726    /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1727    ///
1728    /// # Returns
1729    ///
1730    /// A vector of (Memory, similarity_score) tuples, sorted by similarity (highest first)
1731    ///
1732    /// # Example
1733    ///
1734    /// ```no_run
1735    /// # use mnemefusion_core::{MemoryEngine, Config};
1736    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1737    /// # let query_embedding = vec![0.1; 384];
1738    /// let results = engine.search(&query_embedding, 10, None, None).unwrap();
1739    /// for (memory, score) in results {
1740    ///     println!("Similarity: {:.3} - {}", score, memory.content);
1741    /// }
1742    /// ```
1743    pub fn search(
1744        &self,
1745        query_embedding: &[f32],
1746        top_k: usize,
1747        namespace: Option<&str>,
1748        filters: Option<&[MetadataFilter]>,
1749    ) -> Result<Vec<(Memory, f32)>> {
1750        // If filtering is needed, fetch more results (5x) and filter
1751        let needs_filtering =
1752            namespace.is_some() || (filters.is_some() && !filters.unwrap().is_empty());
1753        let fetch_k = if needs_filtering { top_k * 5 } else { top_k };
1754
1755        // Search vector index
1756        let vector_results = {
1757            let index = self.vector_index.read().unwrap();
1758            index.search(query_embedding, fetch_k)?
1759        };
1760
1761        // Retrieve full memory records using u64 lookup
1762        let mut results = Vec::with_capacity(vector_results.len());
1763
1764        for vector_result in vector_results {
1765            // Look up memory using the u64 key from vector index
1766            let key = vector_result.id.to_u64();
1767            if let Some(memory) = self.storage.get_memory_by_u64(key)? {
1768                // Filter by namespace if provided
1769                if let Some(ns) = namespace {
1770                    if memory.get_namespace() != ns {
1771                        continue;
1772                    }
1773                }
1774
1775                // Filter by metadata if provided
1776                if let Some(filter_list) = filters {
1777                    if !Self::memory_matches_filters(&memory, filter_list) {
1778                        continue;
1779                    }
1780                }
1781
1782                results.push((memory, vector_result.similarity));
1783
1784                // Stop if we have enough results after filtering
1785                if results.len() >= top_k {
1786                    break;
1787                }
1788            }
1789        }
1790
1791        Ok(results)
1792    }
1793
1794    /// Check if a memory matches all metadata filters
1795    fn memory_matches_filters(memory: &Memory, filters: &[MetadataFilter]) -> bool {
1796        for filter in filters {
1797            let value = memory.metadata.get(&filter.field).map(|s| s.as_str());
1798            if !filter.matches(value) {
1799                return false;
1800            }
1801        }
1802        true
1803    }
1804
1805    /// Intelligent multi-dimensional query with intent classification
1806    ///
1807    /// This method performs intent-aware retrieval across all dimensions:
1808    /// - Classifies the query intent (temporal, causal, entity, factual)
1809    /// - Retrieves results from relevant dimensions
1810    /// - Fuses results with adaptive weights based on intent
1811    ///
1812    /// # Arguments
1813    ///
1814    /// * `query_text` - Natural language query text
1815    /// * `query_embedding` - Vector embedding of the query
1816    /// * `limit` - Maximum number of results to return
1817    /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1818    ///
1819    /// # Returns
1820    ///
1821    /// Tuple of (intent classification, fused results with full memory records)
1822    ///
1823    /// # Example
1824    ///
1825    /// ```no_run
1826    /// # use mnemefusion_core::{MemoryEngine, Config};
1827    /// # let engine = MemoryEngine::open("test.mfdb", Config::default()).unwrap();
1828    /// # let query_embedding = vec![0.1; 384];
1829    /// let (intent, results, profile_context) = engine.query(
1830    ///     "Why was the meeting cancelled?",
1831    ///     &query_embedding,
1832    ///     10,
1833    ///     None,
1834    ///     None
1835    /// ).unwrap();
1836    ///
1837    /// println!("Query intent: {:?}", intent.intent);
1838    /// println!("Profile context: {} entries", profile_context.len());
1839    /// for result in results {
1840    ///     println!("Score: {:.3} - {}", result.1.fused_score, result.0.content);
1841    /// }
1842    /// ```
1843    pub fn query(
1844        &self,
1845        query_text: &str,
1846        query_embedding: impl Into<Option<Vec<f32>>>,
1847        limit: usize,
1848        namespace: Option<&str>,
1849        filters: Option<&[MetadataFilter]>,
1850    ) -> Result<(
1851        IntentClassification,
1852        Vec<(Memory, FusedResult)>,
1853        Vec<String>,
1854    )> {
1855        // Resolve query embedding: use provided value or auto-compute from query text
1856        let embedding_vec: Vec<f32> = match query_embedding.into() {
1857            Some(e) => e,
1858            None => self.auto_embed(query_text)?,
1859        };
1860
1861        // Apply default namespace if caller didn't supply one
1862        let effective_ns = namespace.or(self.default_namespace.as_deref());
1863
1864        // Create trace recorder if tracing is enabled
1865        let mut trace_recorder = if self.enable_trace {
1866            Some(TraceRecorder::new("query"))
1867        } else {
1868            None
1869        };
1870
1871        // Execute query using query planner.
1872        // Pass user_entity for first-person pronoun resolution:
1873        // when user says "I like hiking", the system maps "I" to their entity profile,
1874        // ensuring their memories get the Step 2.1 entity boost.
1875        let (intent, fused_results, matched_facts) = self.query_planner.query(
1876            query_text,
1877            &embedding_vec,
1878            limit,
1879            effective_ns,
1880            filters,
1881            self.user_entity.as_deref(),
1882            trace_recorder.as_mut(),
1883        )?;
1884
1885        // Build profile context as SEPARATE strings (not mixed into results).
1886        // Profile facts contain entity knowledge ("Caroline's hobby: painting") but
1887        // lack dates, speaker context, and conversational detail. Mixing them into
1888        // the results Vec with high scores pushes real memories out of top-K context.
1889        let mut profile_context = Vec::new();
1890
1891        // Group matched facts by entity name
1892        let mut facts_by_entity: HashMap<String, Vec<&crate::query::MatchedProfileFact>> =
1893            HashMap::new();
1894        for fact in &matched_facts {
1895            facts_by_entity
1896                .entry(fact.entity_name.clone())
1897                .or_default()
1898                .push(fact);
1899        }
1900
1901        for (entity_name, facts) in &facts_by_entity {
1902            // If profile has a pre-computed summary, use it as ONE context item
1903            // Otherwise, fall back to individual fact format
1904            let profile_summary = self
1905                .storage
1906                .get_entity_profile(entity_name)
1907                .ok()
1908                .flatten()
1909                .and_then(|p| p.summary.clone());
1910
1911            if let Some(summary) = profile_summary {
1912                profile_context.push(summary);
1913            } else {
1914                // No summary — format individual facts
1915                for fact in facts {
1916                    let content = format!(
1917                        "{}'s {}: {}",
1918                        fact.entity_name,
1919                        fact.fact_type.replace('_', " "),
1920                        fact.value
1921                    );
1922                    profile_context.push(content);
1923                }
1924            }
1925        }
1926
1927        // Retrieve full memory records using u64 key lookup
1928        // Note: Vector index returns partial MemoryIds (first 8 bytes only),
1929        // so we use get_memory_by_u64 which looks up the full UUID from the index table
1930        let mut results = Vec::new();
1931        for fused_result in fused_results {
1932            let key = fused_result.id.to_u64();
1933            if let Some(memory) = self.storage.get_memory_by_u64(key)? {
1934                results.push((memory, fused_result));
1935            }
1936        }
1937
1938        // Store trace if recording
1939        if let Some(rec) = trace_recorder {
1940            *self.last_query_trace.borrow_mut() = Some(rec.finish());
1941        }
1942
1943        Ok((intent, results, profile_context))
1944    }
1945
1946    /// Returns the trace from the most recent `query()` call, if tracing is enabled.
1947    pub fn last_query_trace(&self) -> Option<Trace> {
1948        self.last_query_trace.borrow().clone()
1949    }
1950
1951    /// Query memories within a time range
1952    ///
1953    /// Returns memories whose timestamps fall within the specified range,
1954    /// sorted by timestamp (newest first).
1955    ///
1956    /// # Arguments
1957    ///
1958    /// * `start` - Start of the time range (inclusive)
1959    /// * `end` - End of the time range (inclusive)
1960    /// * `limit` - Maximum number of results to return
1961    /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
1962    ///
1963    /// # Returns
1964    ///
1965    /// A vector of (Memory, Timestamp) tuples, sorted newest first
1966    ///
1967    /// # Example
1968    ///
1969    /// ```no_run
1970    /// # use mnemefusion_core::{MemoryEngine, Config, Timestamp};
1971    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
1972    /// let now = Timestamp::now();
1973    /// let week_ago = now.subtract_days(7);
1974    ///
1975    /// let results = engine.get_range(week_ago, now, 100, None).unwrap();
1976    /// for (memory, timestamp) in results {
1977    ///     println!("{}: {}", timestamp.as_unix_secs(), memory.content);
1978    /// }
1979    /// ```
1980    pub fn get_range(
1981        &self,
1982        start: Timestamp,
1983        end: Timestamp,
1984        limit: usize,
1985        namespace: Option<&str>,
1986    ) -> Result<Vec<(Memory, Timestamp)>> {
1987        // Fetch more results if filtering by namespace
1988        let fetch_limit = if namespace.is_some() {
1989            limit * 3
1990        } else {
1991            limit
1992        };
1993        let temporal_results = self.temporal_index.range_query(start, end, fetch_limit)?;
1994
1995        // Retrieve and filter full memory records
1996        let mut results = Vec::with_capacity(temporal_results.len());
1997
1998        for temp_result in temporal_results {
1999            if let Some(memory) = self.storage.get_memory(&temp_result.id)? {
2000                // Filter by namespace if provided
2001                if let Some(ns) = namespace {
2002                    if memory.get_namespace() != ns {
2003                        continue;
2004                    }
2005                }
2006                results.push((memory, temp_result.timestamp));
2007
2008                // Stop if we have enough results after filtering
2009                if results.len() >= limit {
2010                    break;
2011                }
2012            }
2013        }
2014
2015        Ok(results)
2016    }
2017
2018    /// Get the N most recent memories
2019    ///
2020    /// Returns the most recent memories, sorted by timestamp (newest first).
2021    ///
2022    /// # Arguments
2023    ///
2024    /// * `n` - Number of recent memories to retrieve
2025    /// * `namespace` - Optional namespace filter. If provided, only returns memories in this namespace
2026    ///
2027    /// # Returns
2028    ///
2029    /// A vector of (Memory, Timestamp) tuples, sorted newest first
2030    ///
2031    /// # Example
2032    ///
2033    /// ```no_run
2034    /// # use mnemefusion_core::{MemoryEngine, Config};
2035    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2036    /// let recent = engine.get_recent(10, None).unwrap();
2037    /// println!("10 most recent memories:");
2038    /// for (memory, timestamp) in recent {
2039    ///     println!("  {} - {}", timestamp.as_unix_secs(), memory.content);
2040    /// }
2041    /// ```
2042    pub fn get_recent(
2043        &self,
2044        n: usize,
2045        namespace: Option<&str>,
2046    ) -> Result<Vec<(Memory, Timestamp)>> {
2047        // Fetch more results if filtering by namespace
2048        let fetch_n = if namespace.is_some() { n * 3 } else { n };
2049        let temporal_results = self.temporal_index.recent(fetch_n)?;
2050
2051        // Retrieve and filter full memory records
2052        let mut results = Vec::with_capacity(temporal_results.len());
2053
2054        for temp_result in temporal_results {
2055            if let Some(memory) = self.storage.get_memory(&temp_result.id)? {
2056                // Filter by namespace if provided
2057                if let Some(ns) = namespace {
2058                    if memory.get_namespace() != ns {
2059                        continue;
2060                    }
2061                }
2062                results.push((memory, temp_result.timestamp));
2063
2064                // Stop if we have enough results after filtering
2065                if results.len() >= n {
2066                    break;
2067                }
2068            }
2069        }
2070
2071        Ok(results)
2072    }
2073
2074    /// Add a causal link between two memories
2075    ///
2076    /// Links a cause memory to an effect memory with a confidence score.
2077    ///
2078    /// # Arguments
2079    ///
2080    /// * `cause` - The MemoryId of the cause
2081    /// * `effect` - The MemoryId of the effect
2082    /// * `confidence` - Confidence score (0.0 to 1.0)
2083    /// * `evidence` - Evidence text explaining the causal relationship
2084    ///
2085    /// # Errors
2086    ///
2087    /// Returns error if confidence is not in range [0.0, 1.0]
2088    ///
2089    /// # Example
2090    ///
2091    /// ```no_run
2092    /// # use mnemefusion_core::{MemoryEngine, Config};
2093    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2094    /// # let id1 = engine.add("Cause".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2095    /// # let id2 = engine.add("Effect".to_string(), vec![0.2; 384], None, None, None, None).unwrap();
2096    /// engine.add_causal_link(&id1, &id2, 0.9, "id1 caused id2".to_string()).unwrap();
2097    /// ```
2098    pub fn add_causal_link(
2099        &self,
2100        cause: &MemoryId,
2101        effect: &MemoryId,
2102        confidence: f32,
2103        evidence: String,
2104    ) -> Result<()> {
2105        // Add the causal link to the graph
2106        {
2107            let mut graph = self.graph_manager.write().unwrap();
2108            graph.add_causal_link(cause, effect, confidence, evidence)?;
2109        }
2110
2111        // Persist graph immediately for crash recovery
2112        // This ensures causal links are durable
2113        {
2114            let graph = self.graph_manager.read().unwrap();
2115            crate::graph::persist::save_graph(&graph, &self.storage)?;
2116        }
2117
2118        Ok(())
2119    }
2120
2121    /// Get causes of a memory (backward traversal)
2122    ///
2123    /// Finds all memories that causally precede the given memory, up to max_hops.
2124    ///
2125    /// # Arguments
2126    ///
2127    /// * `memory_id` - The memory to find causes for
2128    /// * `max_hops` - Maximum traversal depth
2129    ///
2130    /// # Returns
2131    ///
2132    /// CausalTraversalResult with all paths found
2133    ///
2134    /// # Example
2135    ///
2136    /// ```no_run
2137    /// # use mnemefusion_core::{MemoryEngine, Config};
2138    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2139    /// # let id = engine.add("Memory".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2140    /// let causes = engine.get_causes(&id, 3).unwrap();
2141    /// for path in causes.paths {
2142    ///     println!("Found causal path with {} steps (confidence: {})",
2143    ///              path.memories.len(), path.confidence);
2144    /// }
2145    /// ```
2146    pub fn get_causes(
2147        &self,
2148        memory_id: &MemoryId,
2149        max_hops: usize,
2150    ) -> Result<CausalTraversalResult> {
2151        let graph = self.graph_manager.read().unwrap();
2152        graph.get_causes(memory_id, max_hops)
2153    }
2154
2155    /// Get effects of a memory (forward traversal)
2156    ///
2157    /// Finds all memories that causally follow the given memory, up to max_hops.
2158    ///
2159    /// # Arguments
2160    ///
2161    /// * `memory_id` - The memory to find effects for
2162    /// * `max_hops` - Maximum traversal depth
2163    ///
2164    /// # Returns
2165    ///
2166    /// CausalTraversalResult with all paths found
2167    ///
2168    /// # Example
2169    ///
2170    /// ```no_run
2171    /// # use mnemefusion_core::{MemoryEngine, Config};
2172    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2173    /// # let id = engine.add("Memory".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2174    /// let effects = engine.get_effects(&id, 3).unwrap();
2175    /// for path in effects.paths {
2176    ///     println!("Found effect chain with {} steps (confidence: {})",
2177    ///              path.memories.len(), path.confidence);
2178    /// }
2179    /// ```
2180    pub fn get_effects(
2181        &self,
2182        memory_id: &MemoryId,
2183        max_hops: usize,
2184    ) -> Result<CausalTraversalResult> {
2185        let graph = self.graph_manager.read().unwrap();
2186        graph.get_effects(memory_id, max_hops)
2187    }
2188
2189    // ========== Namespace Operations ==========
2190
2191    /// List all namespaces in the database
2192    ///
2193    /// Returns a sorted list of all unique namespace strings, excluding the default namespace ("").
2194    ///
2195    /// # Performance
2196    ///
2197    /// O(n) where n = total memories. This scans all memories to extract namespaces.
2198    ///
2199    /// # Example
2200    ///
2201    /// ```no_run
2202    /// # use mnemefusion_core::{MemoryEngine, Config};
2203    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2204    /// let namespaces = engine.list_namespaces().unwrap();
2205    /// for ns in namespaces {
2206    ///     println!("Namespace: {}", ns);
2207    /// }
2208    /// ```
2209    pub fn list_namespaces(&self) -> Result<Vec<String>> {
2210        self.storage.list_namespaces()
2211    }
2212
2213    /// Count memories in a specific namespace
2214    ///
2215    /// # Arguments
2216    ///
2217    /// * `namespace` - The namespace to count (empty string "" for default namespace)
2218    ///
2219    /// # Returns
2220    ///
2221    /// Number of memories in the namespace
2222    ///
2223    /// # Example
2224    ///
2225    /// ```no_run
2226    /// # use mnemefusion_core::{MemoryEngine, Config};
2227    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2228    /// let count = engine.count_namespace("user_123").unwrap();
2229    /// println!("User has {} memories", count);
2230    /// ```
2231    pub fn count_namespace(&self, namespace: &str) -> Result<usize> {
2232        self.storage.count_namespace(namespace)
2233    }
2234
2235    /// Delete all memories in a namespace
2236    ///
2237    /// This is a convenience method that lists all memory IDs in the namespace
2238    /// and deletes them via the ingestion pipeline (ensuring proper cleanup of indexes).
2239    ///
2240    /// # Arguments
2241    ///
2242    /// * `namespace` - The namespace to delete (empty string "" for default namespace)
2243    ///
2244    /// # Returns
2245    ///
2246    /// Number of memories deleted
2247    ///
2248    /// # Warning
2249    ///
2250    /// This operation cannot be undone. Use with caution.
2251    ///
2252    /// # Example
2253    ///
2254    /// ```no_run
2255    /// # use mnemefusion_core::{MemoryEngine, Config};
2256    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2257    /// let deleted = engine.delete_namespace("old_user").unwrap();
2258    /// println!("Deleted {} memories from namespace", deleted);
2259    /// ```
2260    pub fn delete_namespace(&self, namespace: &str) -> Result<usize> {
2261        // Get all memory IDs in this namespace
2262        let ids = self.storage.list_namespace_ids(namespace)?;
2263
2264        // Delete via pipeline for proper cleanup
2265        self.pipeline.delete_batch(ids)
2266    }
2267
2268    // ========== Entity Operations ==========
2269
2270    /// Get all memories that mention a specific entity
2271    ///
2272    /// # Arguments
2273    ///
2274    /// * `entity_name` - The name of the entity to query (case-insensitive)
2275    ///
2276    /// # Returns
2277    ///
2278    /// A vector of Memory objects that mention this entity
2279    ///
2280    /// # Example
2281    ///
2282    /// ```no_run
2283    /// # use mnemefusion_core::{MemoryEngine, Config};
2284    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2285    /// let memories = engine.get_entity_memories("Project Alpha").unwrap();
2286    /// for memory in memories {
2287    ///     println!("{}", memory.content);
2288    /// }
2289    /// ```
2290    pub fn get_entity_memories(&self, entity_name: &str) -> Result<Vec<Memory>> {
2291        // Find the entity by name
2292        let entity = self.storage.find_entity_by_name(entity_name)?;
2293
2294        match entity {
2295            Some(entity) => {
2296                // Query entity graph
2297                let graph = self.graph_manager.read().unwrap();
2298                let result = graph.get_entity_memories(&entity.id);
2299
2300                // Retrieve full memory records
2301                let mut memories = Vec::with_capacity(result.memories.len());
2302                for memory_id in result.memories {
2303                    if let Some(memory) = self.storage.get_memory(&memory_id)? {
2304                        memories.push(memory);
2305                    }
2306                }
2307
2308                Ok(memories)
2309            }
2310            None => Ok(Vec::new()), // Entity not found, return empty list
2311        }
2312    }
2313
2314    /// Get all entities mentioned in a specific memory
2315    ///
2316    /// # Arguments
2317    ///
2318    /// * `memory_id` - The memory to query
2319    ///
2320    /// # Returns
2321    ///
2322    /// A vector of Entity objects mentioned in this memory
2323    ///
2324    /// # Example
2325    ///
2326    /// ```no_run
2327    /// # use mnemefusion_core::{MemoryEngine, Config};
2328    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2329    /// # let id = engine.add("Alice met Bob".to_string(), vec![0.1; 384], None, None, None, None).unwrap();
2330    /// let entities = engine.get_memory_entities(&id).unwrap();
2331    /// for entity in entities {
2332    ///     println!("Entity: {}", entity.name);
2333    /// }
2334    /// ```
2335    pub fn get_memory_entities(&self, memory_id: &MemoryId) -> Result<Vec<Entity>> {
2336        // Query entity graph
2337        let graph = self.graph_manager.read().unwrap();
2338        let entity_ids = graph.get_memory_entities(memory_id);
2339
2340        // Retrieve full entity records
2341        let mut entities = Vec::with_capacity(entity_ids.len());
2342        for entity_id in entity_ids {
2343            if let Some(entity) = self.storage.get_entity(&entity_id)? {
2344                entities.push(entity);
2345            }
2346        }
2347
2348        Ok(entities)
2349    }
2350
2351    /// List all entities in the database
2352    ///
2353    /// # Returns
2354    ///
2355    /// A vector of all Entity objects
2356    ///
2357    /// # Example
2358    ///
2359    /// ```no_run
2360    /// # use mnemefusion_core::{MemoryEngine, Config};
2361    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2362    /// let all_entities = engine.list_entities().unwrap();
2363    /// for entity in all_entities {
2364    ///     println!("{}: {} mentions", entity.name, entity.mention_count);
2365    /// }
2366    /// ```
2367    pub fn list_entities(&self) -> Result<Vec<Entity>> {
2368        self.storage.list_entities()
2369    }
2370
2371    // ========== Entity Profile Operations ==========
2372
2373    /// Get the profile for an entity by name
2374    ///
2375    /// Entity profiles aggregate facts about entities across all memories.
2376    /// They are automatically built during ingestion when SLM metadata extraction
2377    /// is enabled.
2378    ///
2379    /// # Arguments
2380    ///
2381    /// * `name` - The entity name (case-insensitive)
2382    ///
2383    /// # Returns
2384    ///
2385    /// The EntityProfile if found, or None
2386    ///
2387    /// # Example
2388    ///
2389    /// ```no_run
2390    /// # use mnemefusion_core::{MemoryEngine, Config};
2391    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2392    /// if let Some(profile) = engine.get_entity_profile("Alice").unwrap() {
2393    ///     println!("Entity: {} ({})", profile.name, profile.entity_type);
2394    ///
2395    ///     // Get facts about Alice's occupation
2396    ///     for fact in profile.get_facts("occupation") {
2397    ///         println!("  Occupation: {} (confidence: {})", fact.value, fact.confidence);
2398    ///     }
2399    ///
2400    ///     // Get facts about Alice's research
2401    ///     for fact in profile.get_facts("research_topic") {
2402    ///         println!("  Research: {} (confidence: {})", fact.value, fact.confidence);
2403    ///     }
2404    /// }
2405    /// ```
2406    pub fn get_entity_profile(&self, name: &str) -> Result<Option<EntityProfile>> {
2407        self.storage.get_entity_profile(name)
2408    }
2409
2410    /// List all entity profiles in the database
2411    ///
2412    /// # Returns
2413    ///
2414    /// A vector of all EntityProfile objects
2415    ///
2416    /// # Example
2417    ///
2418    /// ```no_run
2419    /// # use mnemefusion_core::{MemoryEngine, Config};
2420    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2421    /// let profiles = engine.list_entity_profiles().unwrap();
2422    /// for profile in profiles {
2423    ///     println!("{} ({}) - {} facts from {} memories",
2424    ///         profile.name,
2425    ///         profile.entity_type,
2426    ///         profile.total_facts(),
2427    ///         profile.source_memories.len()
2428    ///     );
2429    /// }
2430    /// ```
2431    pub fn list_entity_profiles(&self) -> Result<Vec<EntityProfile>> {
2432        self.storage.list_entity_profiles()
2433    }
2434
2435    /// Count entity profiles in the database
2436    ///
2437    /// # Returns
2438    ///
2439    /// The number of entity profiles
2440    ///
2441    /// # Example
2442    ///
2443    /// ```no_run
2444    /// # use mnemefusion_core::{MemoryEngine, Config};
2445    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2446    /// let count = engine.count_entity_profiles().unwrap();
2447    /// println!("Total entity profiles: {}", count);
2448    /// ```
2449    pub fn count_entity_profiles(&self) -> Result<usize> {
2450        self.storage.count_entity_profiles()
2451    }
2452
2453    /// Create a scoped view for namespace-specific operations
2454    ///
2455    /// Returns a ScopedMemory that automatically applies the namespace to all operations.
2456    /// This provides a more ergonomic API when working with a single namespace.
2457    ///
2458    /// # Arguments
2459    ///
2460    /// * `namespace` - The namespace to scope to (empty string "" for default namespace)
2461    ///
2462    /// # Returns
2463    ///
2464    /// A ScopedMemory view bound to this namespace
2465    ///
2466    /// # Example
2467    ///
2468    /// ```no_run
2469    /// # use mnemefusion_core::{MemoryEngine, Config};
2470    /// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2471    /// // Create scoped view for a user
2472    /// let user_memory = engine.scope("user_123");
2473    ///
2474    /// // All operations automatically use the namespace
2475    /// let id = user_memory.add("User note".to_string(), vec![0.1; 384], None, None, None).unwrap();
2476    /// let results = user_memory.search(&vec![0.1; 384], 10, None).unwrap();
2477    /// let count = user_memory.count().unwrap();
2478    /// user_memory.delete_all().unwrap();
2479    /// ```
2480    pub fn scope<S: Into<String>>(&self, namespace: S) -> ScopedMemory<'_> {
2481        ScopedMemory {
2482            engine: self,
2483            namespace: namespace.into(),
2484        }
2485    }
2486
2487    /// Close the database
2488    ///
2489    /// This saves all indexes and ensures all data is flushed to disk.
2490    /// While not strictly necessary (redb handles persistence automatically),
2491    /// it's good practice to call this explicitly when you're done.
2492    ///
2493    /// # Example
2494    ///
2495    /// ```no_run
2496    /// # use mnemefusion_core::{MemoryEngine, Config};
2497    /// let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2498    /// // ... use engine ...
2499    /// engine.close().unwrap();
2500    /// ```
2501    pub fn close(self) -> Result<()> {
2502        // Save vector index
2503        {
2504            let index = self.vector_index.read().unwrap();
2505            index.save()?;
2506        }
2507
2508        // Save BM25 index
2509        self.bm25_index.save()?;
2510
2511        // Save causal graph
2512        {
2513            let graph = self.graph_manager.read().unwrap();
2514            crate::graph::persist::save_graph(&graph, &self.storage)?;
2515        }
2516
2517        // Storage (redb) handles persistence automatically
2518        Ok(())
2519    }
2520}
2521
2522/// Scoped memory view for namespace-specific operations
2523///
2524/// This wrapper provides an ergonomic API for working with a single namespace.
2525/// All operations automatically apply the namespace, eliminating the need to pass
2526/// it to every method call.
2527///
2528/// Created via `MemoryEngine::scope()`.
2529///
2530/// # Example
2531///
2532/// ```no_run
2533/// use mnemefusion_core::{MemoryEngine, Config};
2534///
2535/// # let engine = MemoryEngine::open("./test.mfdb", Config::default()).unwrap();
2536/// // Create scoped view for a specific user
2537/// let user_memory = engine.scope("user_123");
2538///
2539/// // Add memory (automatically in user_123 namespace)
2540/// let id = user_memory.add(
2541///     "User note".to_string(),
2542///     vec![0.1; 384],
2543///     None,
2544///     None,
2545///     None,
2546/// ).unwrap();
2547///
2548/// // Search within namespace
2549/// let results = user_memory.search(&vec![0.1; 384], 10, None).unwrap();
2550///
2551/// // Count memories in namespace
2552/// println!("User has {} memories", user_memory.count().unwrap());
2553///
2554/// // Delete all memories in namespace
2555/// user_memory.delete_all().unwrap();
2556/// ```
2557pub struct ScopedMemory<'a> {
2558    engine: &'a MemoryEngine,
2559    namespace: String,
2560}
2561
2562impl<'a> ScopedMemory<'a> {
2563    /// Add a memory to this namespace
2564    ///
2565    /// Equivalent to calling `engine.add(..., Some(namespace))`
2566    ///
2567    /// # Arguments
2568    ///
2569    /// * `content` - Text content
2570    /// * `embedding` - Vector embedding
2571    /// * `metadata` - Optional metadata
2572    /// * `timestamp` - Optional custom timestamp
2573    /// * `source` - Optional source/provenance
2574    ///
2575    /// # Returns
2576    ///
2577    /// The ID of the created memory
2578    pub fn add(
2579        &self,
2580        content: String,
2581        embedding: Vec<f32>,
2582        metadata: Option<HashMap<String, String>>,
2583        timestamp: Option<Timestamp>,
2584        source: Option<Source>,
2585    ) -> Result<MemoryId> {
2586        self.engine.add(
2587            content,
2588            embedding,
2589            metadata,
2590            timestamp,
2591            source,
2592            Some(&self.namespace),
2593        )
2594    }
2595
2596    /// Search for memories in this namespace
2597    ///
2598    /// Equivalent to calling `engine.search(..., Some(namespace), filters)`
2599    ///
2600    /// # Arguments
2601    ///
2602    /// * `query_embedding` - Query vector
2603    /// * `top_k` - Maximum number of results
2604    /// * `filters` - Optional metadata filters
2605    ///
2606    /// # Returns
2607    ///
2608    /// Vector of (Memory, similarity_score) tuples
2609    pub fn search(
2610        &self,
2611        query_embedding: &[f32],
2612        top_k: usize,
2613        filters: Option<&[MetadataFilter]>,
2614    ) -> Result<Vec<(Memory, f32)>> {
2615        self.engine
2616            .search(query_embedding, top_k, Some(&self.namespace), filters)
2617    }
2618
2619    /// Delete a memory from this namespace
2620    ///
2621    /// Equivalent to calling `engine.delete(..., Some(namespace))`
2622    ///
2623    /// # Arguments
2624    ///
2625    /// * `id` - The memory ID to delete
2626    ///
2627    /// # Returns
2628    ///
2629    /// true if deleted, false if not found
2630    ///
2631    /// # Errors
2632    ///
2633    /// Returns `Error::NamespaceMismatch` if the memory exists but is in a different namespace
2634    pub fn delete(&self, id: &MemoryId) -> Result<bool> {
2635        self.engine.delete(id, Some(&self.namespace))
2636    }
2637
2638    /// Add multiple memories to this namespace in a batch
2639    ///
2640    /// Equivalent to calling `engine.add_batch(..., Some(namespace))`
2641    ///
2642    /// # Arguments
2643    ///
2644    /// * `inputs` - Vector of MemoryInput
2645    ///
2646    /// # Returns
2647    ///
2648    /// BatchResult with IDs and error information
2649    pub fn add_batch(&self, inputs: Vec<MemoryInput>) -> Result<BatchResult> {
2650        self.engine.add_batch(inputs, Some(&self.namespace))
2651    }
2652
2653    /// Delete multiple memories from this namespace
2654    ///
2655    /// Equivalent to calling `engine.delete_batch(..., Some(namespace))`
2656    ///
2657    /// # Arguments
2658    ///
2659    /// * `ids` - Vector of memory IDs
2660    ///
2661    /// # Returns
2662    ///
2663    /// Number of memories deleted
2664    pub fn delete_batch(&self, ids: Vec<MemoryId>) -> Result<usize> {
2665        self.engine.delete_batch(ids, Some(&self.namespace))
2666    }
2667
2668    /// Add a memory with deduplication in this namespace
2669    ///
2670    /// Equivalent to calling `engine.add_with_dedup(..., Some(namespace))`
2671    pub fn add_with_dedup(
2672        &self,
2673        content: String,
2674        embedding: Vec<f32>,
2675        metadata: Option<HashMap<String, String>>,
2676        timestamp: Option<Timestamp>,
2677        source: Option<Source>,
2678    ) -> Result<AddResult> {
2679        self.engine.add_with_dedup(
2680            content,
2681            embedding,
2682            metadata,
2683            timestamp,
2684            source,
2685            Some(&self.namespace),
2686        )
2687    }
2688
2689    /// Upsert a memory in this namespace
2690    ///
2691    /// Equivalent to calling `engine.upsert(..., Some(namespace))`
2692    pub fn upsert(
2693        &self,
2694        key: &str,
2695        content: String,
2696        embedding: Vec<f32>,
2697        metadata: Option<HashMap<String, String>>,
2698        timestamp: Option<Timestamp>,
2699        source: Option<Source>,
2700    ) -> Result<UpsertResult> {
2701        self.engine.upsert(
2702            key,
2703            content,
2704            embedding,
2705            metadata,
2706            timestamp,
2707            source,
2708            Some(&self.namespace),
2709        )
2710    }
2711
2712    /// Count memories in this namespace
2713    ///
2714    /// Equivalent to calling `engine.count_namespace(namespace)`
2715    ///
2716    /// # Returns
2717    ///
2718    /// Number of memories in the namespace
2719    pub fn count(&self) -> Result<usize> {
2720        self.engine.count_namespace(&self.namespace)
2721    }
2722
2723    /// Delete all memories in this namespace
2724    ///
2725    /// Equivalent to calling `engine.delete_namespace(namespace)`
2726    ///
2727    /// # Returns
2728    ///
2729    /// Number of memories deleted
2730    ///
2731    /// # Warning
2732    ///
2733    /// This operation cannot be undone. Use with caution.
2734    pub fn delete_all(&self) -> Result<usize> {
2735        self.engine.delete_namespace(&self.namespace)
2736    }
2737
2738    /// Multi-dimensional query within this namespace
2739    ///
2740    /// Equivalent to calling `engine.query(..., Some(namespace), filters)`
2741    ///
2742    /// # Arguments
2743    ///
2744    /// * `query_text` - Natural language query
2745    /// * `query_embedding` - Query vector
2746    /// * `limit` - Maximum number of results
2747    /// * `filters` - Optional metadata filters
2748    ///
2749    /// # Returns
2750    ///
2751    /// Tuple of (intent classification, results, profile context)
2752    pub fn query(
2753        &self,
2754        query_text: &str,
2755        query_embedding: &[f32],
2756        limit: usize,
2757        filters: Option<&[MetadataFilter]>,
2758    ) -> Result<(
2759        IntentClassification,
2760        Vec<(Memory, FusedResult)>,
2761        Vec<String>,
2762    )> {
2763        self.engine.query(
2764            query_text,
2765            query_embedding.to_vec(),
2766            limit,
2767            Some(&self.namespace),
2768            filters,
2769        )
2770    }
2771
2772    /// Get memories in time range within this namespace
2773    ///
2774    /// Equivalent to calling `engine.get_range(..., Some(namespace))`
2775    pub fn get_range(
2776        &self,
2777        start: Timestamp,
2778        end: Timestamp,
2779        limit: usize,
2780    ) -> Result<Vec<(Memory, Timestamp)>> {
2781        self.engine
2782            .get_range(start, end, limit, Some(&self.namespace))
2783    }
2784
2785    /// Get recent memories within this namespace
2786    ///
2787    /// Equivalent to calling `engine.get_recent(..., Some(namespace))`
2788    pub fn get_recent(&self, n: usize) -> Result<Vec<(Memory, Timestamp)>> {
2789        self.engine.get_recent(n, Some(&self.namespace))
2790    }
2791
2792    /// Get the namespace this view is scoped to
2793    pub fn namespace(&self) -> &str {
2794        &self.namespace
2795    }
2796}
2797
2798#[cfg(test)]
2799mod tests {
2800    use super::*;
2801    use tempfile::tempdir;
2802
2803    #[test]
2804    fn test_memory_engine_open() {
2805        let dir = tempdir().unwrap();
2806        let path = dir.path().join("test.mfdb");
2807
2808        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2809        assert_eq!(engine.config().embedding_dim, 384);
2810    }
2811
2812    #[test]
2813    fn test_memory_engine_invalid_config() {
2814        let dir = tempdir().unwrap();
2815        let path = dir.path().join("test.mfdb");
2816
2817        let mut config = Config::default();
2818        config.embedding_dim = 0;
2819
2820        let result = MemoryEngine::open(&path, config);
2821        assert!(result.is_err());
2822    }
2823
2824    #[test]
2825    fn test_memory_engine_add_and_get() {
2826        let dir = tempdir().unwrap();
2827        let path = dir.path().join("test.mfdb");
2828        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2829
2830        let content = "Test memory content".to_string();
2831        let embedding = vec![0.1; 384];
2832
2833        let id = engine
2834            .add(content.clone(), embedding.clone(), None, None, None, None)
2835            .unwrap();
2836
2837        let memory = engine.get(&id).unwrap();
2838        assert!(memory.is_some());
2839
2840        let memory = memory.unwrap();
2841        assert_eq!(memory.content, content);
2842        assert_eq!(memory.embedding, embedding);
2843    }
2844
2845    #[test]
2846    fn test_memory_engine_invalid_dimension() {
2847        let dir = tempdir().unwrap();
2848        let path = dir.path().join("test.mfdb");
2849        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2850
2851        let result = engine.add(
2852            "test".to_string(),
2853            vec![0.1; 512], // Wrong dimension
2854            None,
2855            None,
2856            None,
2857            None,
2858        );
2859        assert!(result.is_err());
2860    }
2861
2862    #[test]
2863    fn test_memory_engine_with_metadata() {
2864        let dir = tempdir().unwrap();
2865        let path = dir.path().join("test.mfdb");
2866        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2867
2868        let mut metadata = HashMap::new();
2869        metadata.insert("source".to_string(), "test".to_string());
2870
2871        let id = engine
2872            .add(
2873                "test".to_string(),
2874                vec![0.1; 384],
2875                Some(metadata),
2876                None,
2877                None,
2878                None,
2879            )
2880            .unwrap();
2881
2882        let memory = engine.get(&id).unwrap().unwrap();
2883        assert_eq!(memory.metadata.get("source"), Some(&"test".to_string()));
2884    }
2885
2886    #[test]
2887    fn test_memory_engine_with_custom_timestamp() {
2888        let dir = tempdir().unwrap();
2889        let path = dir.path().join("test.mfdb");
2890        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2891
2892        let ts = Timestamp::from_unix_secs(1609459200.0); // 2021-01-01
2893        let id = engine
2894            .add(
2895                "test".to_string(),
2896                vec![0.1; 384],
2897                None,
2898                Some(ts),
2899                None,
2900                None,
2901            )
2902            .unwrap();
2903
2904        let memory = engine.get(&id).unwrap().unwrap();
2905        assert_eq!(memory.created_at, ts);
2906    }
2907
2908    #[test]
2909    fn test_memory_engine_delete() {
2910        let dir = tempdir().unwrap();
2911        let path = dir.path().join("test.mfdb");
2912        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2913
2914        let id = engine
2915            .add("test".to_string(), vec![0.1; 384], None, None, None, None)
2916            .unwrap();
2917
2918        let deleted = engine.delete(&id, None).unwrap();
2919        assert!(deleted);
2920
2921        let memory = engine.get(&id).unwrap();
2922        assert!(memory.is_none());
2923    }
2924
2925    #[test]
2926    fn test_memory_engine_count() {
2927        let dir = tempdir().unwrap();
2928        let path = dir.path().join("test.mfdb");
2929        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2930
2931        assert_eq!(engine.count().unwrap(), 0);
2932
2933        engine
2934            .add("test1".to_string(), vec![0.1; 384], None, None, None, None)
2935            .unwrap();
2936        assert_eq!(engine.count().unwrap(), 1);
2937
2938        engine
2939            .add("test2".to_string(), vec![0.2; 384], None, None, None, None)
2940            .unwrap();
2941        assert_eq!(engine.count().unwrap(), 2);
2942    }
2943
2944    #[test]
2945    fn test_memory_engine_list_ids() {
2946        let dir = tempdir().unwrap();
2947        let path = dir.path().join("test.mfdb");
2948        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2949
2950        let id1 = engine
2951            .add("test1".to_string(), vec![0.1; 384], None, None, None, None)
2952            .unwrap();
2953        let id2 = engine
2954            .add("test2".to_string(), vec![0.2; 384], None, None, None, None)
2955            .unwrap();
2956
2957        let ids = engine.list_ids().unwrap();
2958        assert_eq!(ids.len(), 2);
2959        assert!(ids.contains(&id1));
2960        assert!(ids.contains(&id2));
2961    }
2962
2963    #[test]
2964    fn test_memory_engine_persistence() {
2965        let dir = tempdir().unwrap();
2966        let path = dir.path().join("test.mfdb");
2967
2968        let id = {
2969            let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2970            let id = engine
2971                .add(
2972                    "persistent".to_string(),
2973                    vec![0.5; 384],
2974                    None,
2975                    None,
2976                    None,
2977                    None,
2978                )
2979                .unwrap();
2980            engine.close().unwrap();
2981            id
2982        };
2983
2984        // Reopen and verify
2985        {
2986            let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2987            let memory = engine.get(&id).unwrap();
2988            assert!(memory.is_some());
2989            assert_eq!(memory.unwrap().content, "persistent");
2990        }
2991    }
2992
2993    #[test]
2994    fn test_namespace_add_and_filter() {
2995        let dir = tempdir().unwrap();
2996        let path = dir.path().join("test.mfdb");
2997        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
2998
2999        // Add memories to different namespaces
3000        let id1 = engine
3001            .add(
3002                "User 1 memory".to_string(),
3003                vec![0.1; 384],
3004                None,
3005                None,
3006                None,
3007                Some("user_1"),
3008            )
3009            .unwrap();
3010
3011        let id2 = engine
3012            .add(
3013                "User 2 memory".to_string(),
3014                vec![0.2; 384],
3015                None,
3016                None,
3017                None,
3018                Some("user_2"),
3019            )
3020            .unwrap();
3021
3022        let id3 = engine
3023            .add(
3024                "Default memory".to_string(),
3025                vec![0.3; 384],
3026                None,
3027                None,
3028                None,
3029                None,
3030            )
3031            .unwrap();
3032
3033        // Verify memories are in correct namespaces
3034        let mem1 = engine.get(&id1).unwrap().unwrap();
3035        assert_eq!(mem1.get_namespace(), "user_1");
3036
3037        let mem2 = engine.get(&id2).unwrap().unwrap();
3038        assert_eq!(mem2.get_namespace(), "user_2");
3039
3040        let mem3 = engine.get(&id3).unwrap().unwrap();
3041        assert_eq!(mem3.get_namespace(), "");
3042
3043        // Test search with namespace filtering
3044        let query_embedding = vec![0.15; 384];
3045
3046        // Search in user_1 namespace
3047        let results = engine
3048            .search(&query_embedding, 10, Some("user_1"), None)
3049            .unwrap();
3050        assert_eq!(results.len(), 1);
3051        assert_eq!(results[0].0.id, id1);
3052
3053        // Search in user_2 namespace
3054        let results = engine
3055            .search(&query_embedding, 10, Some("user_2"), None)
3056            .unwrap();
3057        assert_eq!(results.len(), 1);
3058        assert_eq!(results[0].0.id, id2);
3059
3060        // Search in default namespace
3061        let results = engine.search(&query_embedding, 10, Some(""), None).unwrap();
3062        assert_eq!(results.len(), 1);
3063        assert_eq!(results[0].0.id, id3);
3064
3065        // Search without namespace filter (should get all)
3066        let results = engine.search(&query_embedding, 10, None, None).unwrap();
3067        assert_eq!(results.len(), 3);
3068    }
3069
3070    #[test]
3071    fn test_namespace_delete_with_verification() {
3072        let dir = tempdir().unwrap();
3073        let path = dir.path().join("test.mfdb");
3074        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3075
3076        // Add memory to namespace
3077        let id = engine
3078            .add(
3079                "User memory".to_string(),
3080                vec![0.1; 384],
3081                None,
3082                None,
3083                None,
3084                Some("user_1"),
3085            )
3086            .unwrap();
3087
3088        // Try to delete with wrong namespace - should fail
3089        let result = engine.delete(&id, Some("user_2"));
3090        assert!(result.is_err());
3091        assert!(matches!(
3092            result.unwrap_err(),
3093            crate::Error::NamespaceMismatch { .. }
3094        ));
3095
3096        // Verify memory still exists
3097        assert!(engine.get(&id).unwrap().is_some());
3098
3099        // Delete with correct namespace - should succeed
3100        let deleted = engine.delete(&id, Some("user_1")).unwrap();
3101        assert!(deleted);
3102
3103        // Verify memory is gone
3104        assert!(engine.get(&id).unwrap().is_none());
3105    }
3106
3107    #[test]
3108    fn test_namespace_management_methods() {
3109        let dir = tempdir().unwrap();
3110        let path = dir.path().join("test.mfdb");
3111        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3112
3113        // Add memories to different namespaces
3114        engine
3115            .add(
3116                "Memory 1".to_string(),
3117                vec![0.1; 384],
3118                None,
3119                None,
3120                None,
3121                Some("ns1"),
3122            )
3123            .unwrap();
3124        engine
3125            .add(
3126                "Memory 2".to_string(),
3127                vec![0.2; 384],
3128                None,
3129                None,
3130                None,
3131                Some("ns1"),
3132            )
3133            .unwrap();
3134        engine
3135            .add(
3136                "Memory 3".to_string(),
3137                vec![0.3; 384],
3138                None,
3139                None,
3140                None,
3141                Some("ns2"),
3142            )
3143            .unwrap();
3144        engine
3145            .add(
3146                "Memory 4".to_string(),
3147                vec![0.4; 384],
3148                None,
3149                None,
3150                None,
3151                None,
3152            )
3153            .unwrap();
3154
3155        // List namespaces
3156        let namespaces = engine.list_namespaces().unwrap();
3157        assert_eq!(namespaces.len(), 2);
3158        assert!(namespaces.contains(&"ns1".to_string()));
3159        assert!(namespaces.contains(&"ns2".to_string()));
3160
3161        // Count in namespace
3162        assert_eq!(engine.count_namespace("ns1").unwrap(), 2);
3163        assert_eq!(engine.count_namespace("ns2").unwrap(), 1);
3164        assert_eq!(engine.count_namespace("").unwrap(), 1); // Default namespace
3165
3166        // Delete entire namespace
3167        let deleted = engine.delete_namespace("ns1").unwrap();
3168        assert_eq!(deleted, 2);
3169
3170        // Verify namespace is gone
3171        assert_eq!(engine.count_namespace("ns1").unwrap(), 0);
3172        let namespaces = engine.list_namespaces().unwrap();
3173        assert_eq!(namespaces.len(), 1);
3174        assert!(namespaces.contains(&"ns2".to_string()));
3175
3176        // Total count should be 2 now
3177        assert_eq!(engine.count().unwrap(), 2);
3178    }
3179
3180    #[test]
3181    fn test_namespace_batch_operations() {
3182        use crate::types::MemoryInput;
3183
3184        let dir = tempdir().unwrap();
3185        let path = dir.path().join("test.mfdb");
3186        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3187
3188        // Create batch inputs
3189        let inputs = vec![
3190            MemoryInput::new("Memory 1".to_string(), vec![0.1; 384]),
3191            MemoryInput::new("Memory 2".to_string(), vec![0.2; 384]),
3192            MemoryInput::new("Memory 3".to_string(), vec![0.3; 384]),
3193        ];
3194
3195        // Add batch with namespace
3196        let result = engine.add_batch(inputs, Some("batch_ns")).unwrap();
3197        assert_eq!(result.created_count, 3);
3198        assert!(result.is_success());
3199
3200        // Verify all are in the namespace
3201        assert_eq!(engine.count_namespace("batch_ns").unwrap(), 3);
3202
3203        // Batch delete with namespace filter
3204        let deleted = engine
3205            .delete_batch(result.ids.clone(), Some("batch_ns"))
3206            .unwrap();
3207        assert_eq!(deleted, 3);
3208
3209        // Verify namespace is empty
3210        assert_eq!(engine.count_namespace("batch_ns").unwrap(), 0);
3211    }
3212
3213    // ========== ScopedMemory Tests ==========
3214
3215    #[test]
3216    fn test_scoped_memory_add_and_search() {
3217        let dir = tempdir().unwrap();
3218        let path = dir.path().join("test.mfdb");
3219        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3220
3221        // Create scoped view
3222        let scoped = engine.scope("user_123");
3223
3224        // Add memory via scoped view
3225        let id = scoped
3226            .add(
3227                "Scoped memory".to_string(),
3228                vec![0.5; 384],
3229                None,
3230                None,
3231                None,
3232            )
3233            .unwrap();
3234
3235        // Verify memory is in the namespace
3236        let memory = engine.get(&id).unwrap().unwrap();
3237        assert_eq!(memory.get_namespace(), "user_123");
3238        assert_eq!(memory.content, "Scoped memory");
3239
3240        // Search via scoped view
3241        let results = scoped.search(&vec![0.5; 384], 10, None).unwrap();
3242        assert_eq!(results.len(), 1);
3243        assert_eq!(results[0].0.id, id);
3244
3245        // Verify namespace isolation
3246        assert_eq!(scoped.namespace(), "user_123");
3247    }
3248
3249    #[test]
3250    fn test_scoped_memory_count_and_delete_all() {
3251        let dir = tempdir().unwrap();
3252        let path = dir.path().join("test.mfdb");
3253        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3254
3255        let scoped = engine.scope("user_456");
3256
3257        // Add multiple memories
3258        scoped
3259            .add("Memory 1".to_string(), vec![0.1; 384], None, None, None)
3260            .unwrap();
3261        scoped
3262            .add("Memory 2".to_string(), vec![0.2; 384], None, None, None)
3263            .unwrap();
3264        scoped
3265            .add("Memory 3".to_string(), vec![0.3; 384], None, None, None)
3266            .unwrap();
3267
3268        // Count via scoped view
3269        assert_eq!(scoped.count().unwrap(), 3);
3270
3271        // Total engine count should also be 3
3272        assert_eq!(engine.count().unwrap(), 3);
3273
3274        // Delete all via scoped view
3275        let deleted = scoped.delete_all().unwrap();
3276        assert_eq!(deleted, 3);
3277
3278        // Verify namespace is empty
3279        assert_eq!(scoped.count().unwrap(), 0);
3280        assert_eq!(engine.count().unwrap(), 0);
3281    }
3282
3283    #[test]
3284    fn test_scoped_memory_isolation() {
3285        let dir = tempdir().unwrap();
3286        let path = dir.path().join("test.mfdb");
3287        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3288
3289        // Create two scoped views
3290        let scope1 = engine.scope("ns1");
3291        let scope2 = engine.scope("ns2");
3292
3293        // Add memories to each namespace
3294        let id1 = scope1
3295            .add("NS1 memory".to_string(), vec![0.1; 384], None, None, None)
3296            .unwrap();
3297        let id2 = scope2
3298            .add("NS2 memory".to_string(), vec![0.2; 384], None, None, None)
3299            .unwrap();
3300
3301        // Each scope should only see its own memories
3302        assert_eq!(scope1.count().unwrap(), 1);
3303        assert_eq!(scope2.count().unwrap(), 1);
3304
3305        // Search should be isolated
3306        let results1 = scope1.search(&vec![0.1; 384], 10, None).unwrap();
3307        assert_eq!(results1.len(), 1);
3308        assert_eq!(results1[0].0.id, id1);
3309
3310        let results2 = scope2.search(&vec![0.2; 384], 10, None).unwrap();
3311        assert_eq!(results2.len(), 1);
3312        assert_eq!(results2[0].0.id, id2);
3313
3314        // Delete from scope1 shouldn't affect scope2
3315        scope1.delete_all().unwrap();
3316        assert_eq!(scope1.count().unwrap(), 0);
3317        assert_eq!(scope2.count().unwrap(), 1);
3318    }
3319
3320    #[test]
3321    fn test_scoped_memory_delete_with_verification() {
3322        let dir = tempdir().unwrap();
3323        let path = dir.path().join("test.mfdb");
3324        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3325
3326        let scope1 = engine.scope("ns1");
3327        let scope2 = engine.scope("ns2");
3328
3329        // Add memory to ns1
3330        let id = scope1
3331            .add("NS1 memory".to_string(), vec![0.1; 384], None, None, None)
3332            .unwrap();
3333
3334        // Try to delete from wrong namespace - should fail
3335        let result = scope2.delete(&id);
3336        assert!(result.is_err());
3337        assert!(matches!(
3338            result.unwrap_err(),
3339            crate::Error::NamespaceMismatch { .. }
3340        ));
3341
3342        // Verify memory still exists
3343        assert_eq!(scope1.count().unwrap(), 1);
3344
3345        // Delete from correct namespace - should succeed
3346        let deleted = scope1.delete(&id).unwrap();
3347        assert!(deleted);
3348        assert_eq!(scope1.count().unwrap(), 0);
3349    }
3350
3351    #[test]
3352    fn test_scoped_memory_batch_operations() {
3353        use crate::types::MemoryInput;
3354
3355        let dir = tempdir().unwrap();
3356        let path = dir.path().join("test.mfdb");
3357        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3358
3359        let scoped = engine.scope("batch_scope");
3360
3361        // Add batch via scoped view
3362        let inputs = vec![
3363            MemoryInput::new("Batch 1".to_string(), vec![0.1; 384]),
3364            MemoryInput::new("Batch 2".to_string(), vec![0.2; 384]),
3365            MemoryInput::new("Batch 3".to_string(), vec![0.3; 384]),
3366        ];
3367
3368        let result = scoped.add_batch(inputs).unwrap();
3369        assert_eq!(result.created_count, 3);
3370        assert!(result.is_success());
3371
3372        // Verify count
3373        assert_eq!(scoped.count().unwrap(), 3);
3374
3375        // Delete batch via scoped view
3376        let deleted = scoped.delete_batch(result.ids).unwrap();
3377        assert_eq!(deleted, 3);
3378
3379        // Verify empty
3380        assert_eq!(scoped.count().unwrap(), 0);
3381    }
3382
3383    #[test]
3384    fn test_search_with_metadata_filters() {
3385        let dir = tempdir().unwrap();
3386        let path = dir.path().join("test.mfdb");
3387        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3388
3389        // Add memories with different metadata
3390        let mut mem1 = Memory::new("Event 1".to_string(), vec![0.1; 384]);
3391        mem1.metadata
3392            .insert("type".to_string(), "event".to_string());
3393        mem1.metadata
3394            .insert("priority".to_string(), "high".to_string());
3395        engine
3396            .add(
3397                mem1.content.clone(),
3398                mem1.embedding.clone(),
3399                Some(mem1.metadata.clone()),
3400                None,
3401                None,
3402                None,
3403            )
3404            .unwrap();
3405
3406        let mut mem2 = Memory::new("Event 2".to_string(), vec![0.11; 384]);
3407        mem2.metadata
3408            .insert("type".to_string(), "event".to_string());
3409        mem2.metadata
3410            .insert("priority".to_string(), "low".to_string());
3411        engine
3412            .add(
3413                mem2.content.clone(),
3414                mem2.embedding.clone(),
3415                Some(mem2.metadata.clone()),
3416                None,
3417                None,
3418                None,
3419            )
3420            .unwrap();
3421
3422        let mut mem3 = Memory::new("Task 1".to_string(), vec![0.12; 384]);
3423        mem3.metadata.insert("type".to_string(), "task".to_string());
3424        mem3.metadata
3425            .insert("priority".to_string(), "high".to_string());
3426        engine
3427            .add(
3428                mem3.content.clone(),
3429                mem3.embedding.clone(),
3430                Some(mem3.metadata.clone()),
3431                None,
3432                None,
3433                None,
3434            )
3435            .unwrap();
3436
3437        // Search with filter: type=event
3438        let filters = vec![MetadataFilter::eq("type", "event")];
3439        let results = engine
3440            .search(&vec![0.1; 384], 10, None, Some(&filters))
3441            .unwrap();
3442        assert_eq!(results.len(), 2);
3443        assert!(results
3444            .iter()
3445            .all(|(m, _)| m.metadata.get("type").unwrap() == "event"));
3446
3447        // Search with filter: priority=high
3448        let filters = vec![MetadataFilter::eq("priority", "high")];
3449        let results = engine
3450            .search(&vec![0.1; 384], 10, None, Some(&filters))
3451            .unwrap();
3452        assert_eq!(results.len(), 2);
3453        assert!(results
3454            .iter()
3455            .all(|(m, _)| m.metadata.get("priority").unwrap() == "high"));
3456
3457        // Search with multiple filters: type=event AND priority=high
3458        let filters = vec![
3459            MetadataFilter::eq("type", "event"),
3460            MetadataFilter::eq("priority", "high"),
3461        ];
3462        let results = engine
3463            .search(&vec![0.1; 384], 10, None, Some(&filters))
3464            .unwrap();
3465        assert_eq!(results.len(), 1);
3466        assert_eq!(results[0].0.content, "Event 1");
3467    }
3468
3469    #[test]
3470    fn test_query_with_metadata_filters() {
3471        let dir = tempdir().unwrap();
3472        let path = dir.path().join("test.mfdb");
3473        // Disable semantic threshold for test (simple test embeddings)
3474        let config = Config::default().with_fusion_semantic_threshold(0.0);
3475        let engine = MemoryEngine::open(&path, config).unwrap();
3476
3477        // Add memories with different metadata
3478        let mut mem1 = Memory::new("Important meeting".to_string(), vec![0.1; 384]);
3479        mem1.metadata
3480            .insert("type".to_string(), "event".to_string());
3481        mem1.metadata
3482            .insert("priority".to_string(), "high".to_string());
3483        engine
3484            .add(
3485                mem1.content.clone(),
3486                mem1.embedding.clone(),
3487                Some(mem1.metadata.clone()),
3488                None,
3489                None,
3490                None,
3491            )
3492            .unwrap();
3493
3494        let mut mem2 = Memory::new("Casual meeting".to_string(), vec![0.11; 384]);
3495        mem2.metadata
3496            .insert("type".to_string(), "event".to_string());
3497        mem2.metadata
3498            .insert("priority".to_string(), "low".to_string());
3499        engine
3500            .add(
3501                mem2.content.clone(),
3502                mem2.embedding.clone(),
3503                Some(mem2.metadata.clone()),
3504                None,
3505                None,
3506                None,
3507            )
3508            .unwrap();
3509
3510        // Query with filter
3511        let filters = vec![MetadataFilter::eq("priority", "high")];
3512        let (_intent, results, _profile_ctx) = engine
3513            .query("meeting", vec![0.1f32; 384], 10, None, Some(&filters))
3514            .unwrap();
3515
3516        assert_eq!(results.len(), 1);
3517        assert_eq!(results[0].0.content, "Important meeting");
3518    }
3519
3520    #[test]
3521    fn test_scoped_memory_with_filters() {
3522        let dir = tempdir().unwrap();
3523        let path = dir.path().join("test.mfdb");
3524        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3525
3526        let scoped = engine.scope("user_123");
3527
3528        // Add memories with different metadata to the namespace
3529        let mut mem1 = Memory::new("Event 1".to_string(), vec![0.1; 384]);
3530        mem1.metadata
3531            .insert("type".to_string(), "event".to_string());
3532        scoped
3533            .add(
3534                mem1.content.clone(),
3535                mem1.embedding.clone(),
3536                Some(mem1.metadata.clone()),
3537                None,
3538                None,
3539            )
3540            .unwrap();
3541
3542        let mut mem2 = Memory::new("Task 1".to_string(), vec![0.11; 384]);
3543        mem2.metadata.insert("type".to_string(), "task".to_string());
3544        scoped
3545            .add(
3546                mem2.content.clone(),
3547                mem2.embedding.clone(),
3548                Some(mem2.metadata.clone()),
3549                None,
3550                None,
3551            )
3552            .unwrap();
3553
3554        // Search with filter in scoped view
3555        let filters = vec![MetadataFilter::eq("type", "event")];
3556        let results = scoped.search(&vec![0.1; 384], 10, Some(&filters)).unwrap();
3557
3558        assert_eq!(results.len(), 1);
3559        assert_eq!(results[0].0.content, "Event 1");
3560    }
3561
3562    #[test]
3563    fn test_consolidate_merges_aliases() {
3564        use crate::types::{EntityFact, EntityId, EntityProfile};
3565
3566        let dir = tempdir().unwrap();
3567        let path = dir.path().join("test.mfdb");
3568        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3569
3570        let mem1 = MemoryId::new();
3571        let mem2 = MemoryId::new();
3572
3573        // Create orphan "mel" profile with 1 fact
3574        let mut mel_profile =
3575            EntityProfile::new(EntityId::new(), "mel".to_string(), "person".to_string());
3576        mel_profile.add_fact(EntityFact::new("hobby", "hiking", 0.9, mem1.clone()));
3577        mel_profile.add_source_memory(mem1.clone());
3578        engine.storage.store_entity_profile(&mel_profile).unwrap();
3579
3580        // Create canonical "melanie" profile with 2 facts
3581        let mut melanie_profile =
3582            EntityProfile::new(EntityId::new(), "melanie".to_string(), "person".to_string());
3583        melanie_profile.add_fact(EntityFact::new("instrument", "guitar", 0.9, mem2.clone()));
3584        melanie_profile.add_fact(EntityFact::new("occupation", "teacher", 0.8, mem2.clone()));
3585        melanie_profile.add_source_memory(mem2.clone());
3586        engine
3587            .storage
3588            .store_entity_profile(&melanie_profile)
3589            .unwrap();
3590
3591        // Verify both exist before consolidation
3592        assert!(engine.storage.get_entity_profile("mel").unwrap().is_some());
3593        assert!(engine
3594            .storage
3595            .get_entity_profile("melanie")
3596            .unwrap()
3597            .is_some());
3598
3599        let (_facts_removed, profiles_deleted) = engine.consolidate_profiles().unwrap();
3600
3601        // "mel" should be merged into "melanie" and deleted
3602        assert!(
3603            profiles_deleted >= 1,
3604            "At least 1 profile should be deleted (mel)"
3605        );
3606        assert!(
3607            engine.storage.get_entity_profile("mel").unwrap().is_none(),
3608            "mel profile should be deleted after merge"
3609        );
3610
3611        // "melanie" should have all facts merged
3612        let melanie = engine
3613            .storage
3614            .get_entity_profile("melanie")
3615            .unwrap()
3616            .expect("melanie should still exist");
3617        assert!(
3618            melanie.total_facts() >= 3,
3619            "melanie should have merged facts (hiking + guitar + teacher), got {}",
3620            melanie.total_facts()
3621        );
3622        assert!(
3623            melanie.source_memories.contains(&mem1),
3624            "melanie should have mem1 from merged mel"
3625        );
3626        assert!(
3627            melanie.source_memories.contains(&mem2),
3628            "melanie should have mem2 from original melanie"
3629        );
3630    }
3631
3632    // ======= Embedding-auto tests =======
3633
3634    #[test]
3635    fn test_add_explicit_embedding_still_works() {
3636        // Existing callers that pass Vec<f32> directly should compile and work
3637        let dir = tempdir().unwrap();
3638        let path = dir.path().join("test.mfdb");
3639        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3640        let embedding = vec![0.1f32; 384];
3641        // Vec<f32> implements Into<Option<Vec<f32>>> — this must compile
3642        let id = engine
3643            .add(
3644                "Alice loves hiking".to_string(),
3645                embedding,
3646                None,
3647                None,
3648                None,
3649                None,
3650            )
3651            .unwrap();
3652        assert!(engine.get(&id).unwrap().is_some());
3653    }
3654
3655    #[test]
3656    fn test_add_some_embedding_works() {
3657        // Some(Vec<f32>) should also compile
3658        let dir = tempdir().unwrap();
3659        let path = dir.path().join("test.mfdb");
3660        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3661        let id = engine
3662            .add(
3663                "Test content".to_string(),
3664                Some(vec![0.2f32; 384]),
3665                None,
3666                None,
3667                None,
3668                None,
3669            )
3670            .unwrap();
3671        assert!(engine.get(&id).unwrap().is_some());
3672    }
3673
3674    #[test]
3675    fn test_add_none_embedding_without_engine_errors() {
3676        // None embedding without embedding engine configured → Error::NoEmbeddingEngine
3677        let dir = tempdir().unwrap();
3678        let path = dir.path().join("test.mfdb");
3679        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3680        let result = engine.add("Test".to_string(), None::<Vec<f32>>, None, None, None, None);
3681        assert!(
3682            result.is_err(),
3683            "Expected error when no embedding engine configured"
3684        );
3685        assert!(matches!(result.unwrap_err(), Error::NoEmbeddingEngine));
3686    }
3687
3688    #[test]
3689    fn test_with_user_sets_default_namespace() {
3690        let dir = tempdir().unwrap();
3691        let path = dir.path().join("test.mfdb");
3692        let engine = MemoryEngine::open(&path, Config::default())
3693            .unwrap()
3694            .with_user("alice");
3695        assert_eq!(engine.default_namespace.as_deref(), Some("alice"));
3696
3697        // add() with no explicit namespace should use "alice"
3698        let id = engine
3699            .add(
3700                "Alice's memory".to_string(),
3701                vec![0.1f32; 384],
3702                None,
3703                None,
3704                None,
3705                None,
3706            )
3707            .unwrap();
3708        let mem = engine.get(&id).unwrap().unwrap();
3709        assert_eq!(mem.get_namespace(), "alice");
3710    }
3711
3712    #[test]
3713    fn test_query_none_embedding_without_engine_errors() {
3714        let dir = tempdir().unwrap();
3715        let path = dir.path().join("test.mfdb");
3716        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3717        let result = engine.query("test query", None::<Vec<f32>>, 10, None, None);
3718        assert!(result.is_err());
3719        assert!(matches!(result.unwrap_err(), Error::NoEmbeddingEngine));
3720    }
3721
3722    // ======= first_person_to_third tests =======
3723
3724    #[test]
3725    fn test_first_person_to_third_basic() {
3726        assert_eq!(
3727            first_person_to_third("I joined a gym", "Alice"),
3728            "Alice joined a gym"
3729        );
3730        assert_eq!(
3731            first_person_to_third("I love hiking in the mountains", "Alice"),
3732            "Alice love hiking in the mountains"
3733        );
3734    }
3735
3736    #[test]
3737    fn test_first_person_to_third_contractions() {
3738        assert_eq!(
3739            first_person_to_third("I'm learning guitar", "Bob"),
3740            "Bob is learning guitar"
3741        );
3742        assert_eq!(
3743            first_person_to_third("I've started a new job", "Bob"),
3744            "Bob has started a new job"
3745        );
3746        assert_eq!(
3747            first_person_to_third("I'll be there tomorrow", "Bob"),
3748            "Bob will be there tomorrow"
3749        );
3750        assert_eq!(
3751            first_person_to_third("I'd love to visit Paris", "Bob"),
3752            "Bob would love to visit Paris"
3753        );
3754    }
3755
3756    #[test]
3757    fn test_first_person_to_third_possessive() {
3758        assert_eq!(
3759            first_person_to_third("My hobby is hiking", "Alice"),
3760            "Alice's hobby is hiking"
3761        );
3762        assert_eq!(
3763            first_person_to_third("That book is mine", "Alice"),
3764            "That book is Alice's"
3765        );
3766        // "myself" before "my" prevents double-substitution
3767        assert_eq!(
3768            first_person_to_third("I hurt myself at the gym", "Alice"),
3769            "Alice hurt Alice at the gym"
3770        );
3771    }
3772
3773    #[test]
3774    fn test_first_person_to_third_object_pronoun() {
3775        assert_eq!(
3776            first_person_to_third("He gave me the book", "Alice"),
3777            "He gave Alice the book"
3778        );
3779    }
3780
3781    #[test]
3782    fn test_first_person_to_third_no_pronouns() {
3783        // Content without first-person pronouns should be returned unchanged
3784        let content = "She likes hiking in the mountains";
3785        assert_eq!(first_person_to_third(content, "Alice"), content);
3786    }
3787
3788    #[test]
3789    fn test_first_person_to_third_no_word_boundary_false_positive() {
3790        // "me" inside "intermediate" should not match \bme\b
3791        let content = "The intermediate step";
3792        assert_eq!(first_person_to_third(content, "Alice"), content);
3793    }
3794
3795    #[test]
3796    fn test_auto_detect_embedding_dim_from_existing_db() {
3797        // Create a DB with 768-dim embeddings
3798        let dir = tempdir().unwrap();
3799        let path = dir.path().join("test.mfdb");
3800
3801        let mut config = Config::default();
3802        config.embedding_dim = 768;
3803        let engine = MemoryEngine::open(&path, config).unwrap();
3804
3805        // Add a vector so the index has data to persist
3806        let embedding = vec![0.1; 768];
3807        engine
3808            .add(
3809                "test content".to_string(),
3810                embedding,
3811                None,
3812                None,
3813                None,
3814                None,
3815            )
3816            .unwrap();
3817        drop(engine);
3818
3819        // Re-open with default config (384) — should auto-detect 768
3820        let engine2 = MemoryEngine::open(&path, Config::default()).unwrap();
3821        assert_eq!(
3822            engine2.config().embedding_dim,
3823            768,
3824            "Should auto-detect embedding dim from existing DB"
3825        );
3826
3827        // Verify we can add more vectors with the detected dimension
3828        let embedding2 = vec![0.2; 768];
3829        engine2
3830            .add(
3831                "another memory".to_string(),
3832                embedding2,
3833                None,
3834                None,
3835                None,
3836                None,
3837            )
3838            .unwrap();
3839    }
3840
3841    #[test]
3842    fn test_add_batch_with_progress_callback() {
3843        let dir = tempdir().unwrap();
3844        let path = dir.path().join("test.mfdb");
3845        let engine = MemoryEngine::open(&path, Config::default()).unwrap();
3846
3847        let inputs: Vec<MemoryInput> = (0..5)
3848            .map(|i| MemoryInput::new(format!("memory {}", i), vec![0.1 * (i as f32 + 1.0); 384]))
3849            .collect();
3850
3851        let progress = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
3852        let progress_clone = progress.clone();
3853
3854        let result = engine
3855            .add_batch_with_progress(
3856                inputs,
3857                None,
3858                Some(Box::new(move |current, total| {
3859                    progress_clone.lock().unwrap().push((current, total));
3860                })),
3861            )
3862            .unwrap();
3863
3864        assert_eq!(result.created_count, 5);
3865        let recorded = progress.lock().unwrap();
3866        assert_eq!(recorded.len(), 5);
3867        assert_eq!(recorded[0], (1, 5));
3868        assert_eq!(recorded[4], (5, 5));
3869    }
3870}