Skip to main content

mnemefusion_core/storage/
engine.rs

1//! Storage engine implementation
2//!
3//! Wraps redb and provides CRUD operations for memories and indexes.
4
5use crate::{
6    types::{Entity, EntityId, EntityProfile, Memory, MemoryId, Timestamp},
7    Error, Result,
8};
9use redb::{Database, ReadableTable, ReadableTableMetadata, TableDefinition};
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12
13use super::FileHeader;
14
15// Table definitions
16const MEMORIES: TableDefinition<&[u8], &[u8]> = TableDefinition::new("memories");
17pub(crate) const TEMPORAL_INDEX: TableDefinition<u64, &[u8]> =
18    TableDefinition::new("temporal_index");
19const METADATA_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("metadata");
20const MEMORY_ID_INDEX: TableDefinition<u64, &[u8]> = TableDefinition::new("memory_id_index");
21const CAUSAL_GRAPH: TableDefinition<&str, &[u8]> = TableDefinition::new("causal_graph");
22const ENTITIES: TableDefinition<&[u8], &[u8]> = TableDefinition::new("entities");
23const ENTITY_NAMES: TableDefinition<&str, &[u8]> = TableDefinition::new("entity_names");
24const CONTENT_HASH_INDEX: TableDefinition<&str, &[u8]> = TableDefinition::new("content_hash_index");
25const LOGICAL_KEY_INDEX: TableDefinition<&str, &[u8]> = TableDefinition::new("logical_key_index");
26const METADATA_INDEX: TableDefinition<&str, &[u8]> = TableDefinition::new("metadata_index");
27const ENTITY_PROFILES: TableDefinition<&str, &[u8]> = TableDefinition::new("entity_profiles");
28const FACT_EMBEDDINGS: TableDefinition<&[u8], &[u8]> = TableDefinition::new("fact_embeddings");
29
30/// Storage engine wrapper around redb
31///
32/// Provides ACID transactions and persistent storage for all MnemeFusion data.
33pub struct StorageEngine {
34    db: Database,
35    path: PathBuf,
36}
37
38impl StorageEngine {
39    /// Open or create a database at the specified path
40    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
41        let path = path.as_ref();
42
43        // Check if file exists and validate minimum size
44        if path.exists() {
45            let metadata = std::fs::metadata(path)?;
46            let file_size = metadata.len();
47
48            // Minimum valid file size (redb has minimum overhead + 64 byte header)
49            // redb files are typically at least a few KB even when empty
50            const MIN_FILE_SIZE: u64 = 512; // Conservative minimum
51
52            if file_size < MIN_FILE_SIZE {
53                return Err(Error::FileTruncated(format!(
54                    "File size ({} bytes) is too small to be a valid database",
55                    file_size
56                )));
57            }
58        }
59
60        let db = Database::create(path)?;
61
62        let mut engine = Self {
63            db,
64            path: path.to_path_buf(),
65        };
66
67        // Initialize tables
68        engine.init_tables()?;
69
70        // Store or validate header
71        engine.init_header()?;
72
73        // Validate database integrity (for existing databases)
74        if path.exists() {
75            engine.validate_database()?;
76        }
77
78        Ok(engine)
79    }
80
81    /// Initialize required tables
82    fn init_tables(&self) -> Result<()> {
83        let write_txn = self.db.begin_write()?;
84        {
85            let _ = write_txn.open_table(MEMORIES)?;
86            let _ = write_txn.open_table(TEMPORAL_INDEX)?;
87            let _ = write_txn.open_table(METADATA_TABLE)?;
88            let _ = write_txn.open_table(MEMORY_ID_INDEX)?;
89            let _ = write_txn.open_table(CAUSAL_GRAPH)?;
90            let _ = write_txn.open_table(ENTITIES)?;
91            let _ = write_txn.open_table(ENTITY_NAMES)?;
92            let _ = write_txn.open_table(CONTENT_HASH_INDEX)?;
93            let _ = write_txn.open_table(LOGICAL_KEY_INDEX)?;
94            let _ = write_txn.open_table(METADATA_INDEX)?;
95            let _ = write_txn.open_table(ENTITY_PROFILES)?;
96            let _ = write_txn.open_table(FACT_EMBEDDINGS)?;
97        }
98        write_txn.commit()?;
99        Ok(())
100    }
101
102    /// Initialize or validate file header
103    fn init_header(&mut self) -> Result<()> {
104        let write_txn = self.db.begin_write()?;
105        {
106            let mut table = write_txn.open_table(METADATA_TABLE)?;
107
108            // Check if header exists
109            let header_exists = table.get("file_header")?.is_some();
110
111            if header_exists {
112                // Read and validate existing header
113                let existing = table.get("file_header")?.unwrap();
114                let existing_bytes = existing.value().to_vec();
115                let header = FileHeader::from_bytes(&existing_bytes)?;
116                header.validate()?;
117            } else {
118                // Create new header
119                let header = FileHeader::new();
120                table.insert("file_header", header.to_bytes().as_slice())?;
121            }
122        }
123        write_txn.commit()?;
124        Ok(())
125    }
126
127    /// Validate database integrity
128    ///
129    /// Verifies that all required tables exist and are accessible.
130    /// This helps detect corrupted or incomplete database files.
131    fn validate_database(&self) -> Result<()> {
132        let read_txn = self.db.begin_read()?;
133
134        // Check that all required tables exist and are accessible
135        // We open each table individually since they have different type parameters
136
137        if let Err(e) = read_txn.open_table(MEMORIES) {
138            return Err(Error::DatabaseCorruption(format!(
139                "Required table 'memories' is missing or corrupt: {}",
140                e
141            )));
142        }
143
144        if let Err(e) = read_txn.open_table(TEMPORAL_INDEX) {
145            return Err(Error::DatabaseCorruption(format!(
146                "Required table 'temporal_index' is missing or corrupt: {}",
147                e
148            )));
149        }
150
151        if let Err(e) = read_txn.open_table(METADATA_TABLE) {
152            return Err(Error::DatabaseCorruption(format!(
153                "Required table 'metadata' is missing or corrupt: {}",
154                e
155            )));
156        }
157
158        if let Err(e) = read_txn.open_table(MEMORY_ID_INDEX) {
159            return Err(Error::DatabaseCorruption(format!(
160                "Required table 'memory_id_index' is missing or corrupt: {}",
161                e
162            )));
163        }
164
165        if let Err(e) = read_txn.open_table(CAUSAL_GRAPH) {
166            return Err(Error::DatabaseCorruption(format!(
167                "Required table 'causal_graph' is missing or corrupt: {}",
168                e
169            )));
170        }
171
172        if let Err(e) = read_txn.open_table(ENTITIES) {
173            return Err(Error::DatabaseCorruption(format!(
174                "Required table 'entities' is missing or corrupt: {}",
175                e
176            )));
177        }
178
179        if let Err(e) = read_txn.open_table(ENTITY_NAMES) {
180            return Err(Error::DatabaseCorruption(format!(
181                "Required table 'entity_names' is missing or corrupt: {}",
182                e
183            )));
184        }
185
186        if let Err(e) = read_txn.open_table(CONTENT_HASH_INDEX) {
187            return Err(Error::DatabaseCorruption(format!(
188                "Required table 'content_hash_index' is missing or corrupt: {}",
189                e
190            )));
191        }
192
193        if let Err(e) = read_txn.open_table(LOGICAL_KEY_INDEX) {
194            return Err(Error::DatabaseCorruption(format!(
195                "Required table 'logical_key_index' is missing or corrupt: {}",
196                e
197            )));
198        }
199
200        if let Err(e) = read_txn.open_table(METADATA_INDEX) {
201            return Err(Error::DatabaseCorruption(format!(
202                "Required table 'metadata_index' is missing or corrupt: {}",
203                e
204            )));
205        }
206
207        if let Err(e) = read_txn.open_table(ENTITY_PROFILES) {
208            return Err(Error::DatabaseCorruption(format!(
209                "Required table 'entity_profiles' is missing or corrupt: {}",
210                e
211            )));
212        }
213
214        if let Err(e) = read_txn.open_table(FACT_EMBEDDINGS) {
215            return Err(Error::DatabaseCorruption(format!(
216                "Required table 'fact_embeddings' is missing or corrupt: {}",
217                e
218            )));
219        }
220
221        // Validate header exists
222        let metadata = read_txn.open_table(METADATA_TABLE)?;
223        match metadata.get("file_header")? {
224            Some(header_bytes) => {
225                // Validate header format
226                let header = FileHeader::from_bytes(header_bytes.value())?;
227                header.validate()?;
228            }
229            None => {
230                return Err(Error::DatabaseCorruption(
231                    "File header is missing".to_string(),
232                ));
233            }
234        }
235
236        Ok(())
237    }
238
239    /// Store a memory record
240    pub fn store_memory(&self, memory: &Memory) -> Result<()> {
241        let write_txn = self.db.begin_write()?;
242        {
243            let mut memories = write_txn.open_table(MEMORIES)?;
244            let mut temporal = write_txn.open_table(TEMPORAL_INDEX)?;
245            let mut id_index = write_txn.open_table(MEMORY_ID_INDEX)?;
246
247            // Serialize memory
248            let memory_data = self.serialize_memory(memory)?;
249
250            // Store memory
251            memories.insert(memory.id.as_bytes().as_slice(), memory_data.as_slice())?;
252
253            // Index by timestamp
254            temporal.insert(
255                memory.created_at.as_micros(),
256                memory.id.as_bytes().as_slice(),
257            )?;
258
259            // Index by u64 (for vector index lookups)
260            id_index.insert(memory.id.to_u64(), memory.id.as_bytes().as_slice())?;
261        }
262        write_txn.commit()?;
263        Ok(())
264    }
265
266    /// Retrieve a memory by ID
267    pub fn get_memory(&self, id: &MemoryId) -> Result<Option<Memory>> {
268        let read_txn = self.db.begin_read()?;
269        let table = read_txn.open_table(MEMORIES)?;
270
271        match table.get(id.as_bytes().as_slice())? {
272            Some(data) => {
273                let memory = self.deserialize_memory(data.value())?;
274                Ok(Some(memory))
275            }
276            None => Ok(None),
277        }
278    }
279
280    /// Retrieve a memory by its u64 key (used by vector index)
281    pub fn get_memory_by_u64(&self, key: u64) -> Result<Option<Memory>> {
282        let read_txn = self.db.begin_read()?;
283        let id_index = read_txn.open_table(MEMORY_ID_INDEX)?;
284        let memories = read_txn.open_table(MEMORIES)?;
285
286        // First lookup the full MemoryId from the u64 index
287        match id_index.get(key)? {
288            Some(id_bytes) => {
289                // Then fetch the memory using the full ID
290                match memories.get(id_bytes.value())? {
291                    Some(data) => {
292                        let memory = self.deserialize_memory(data.value())?;
293                        Ok(Some(memory))
294                    }
295                    None => Ok(None),
296                }
297            }
298            None => Ok(None),
299        }
300    }
301
302    /// Delete a memory by ID
303    pub fn delete_memory(&self, id: &MemoryId) -> Result<bool> {
304        let write_txn = self.db.begin_write()?;
305        let removed = {
306            let mut memories = write_txn.open_table(MEMORIES)?;
307            let mut id_index = write_txn.open_table(MEMORY_ID_INDEX)?;
308
309            let result = memories.remove(id.as_bytes().as_slice())?;
310
311            // Also remove from ID index
312            if result.is_some() {
313                id_index.remove(id.to_u64())?;
314            }
315
316            result.is_some()
317        };
318        write_txn.commit()?;
319        Ok(removed)
320    }
321
322    /// Get all memory IDs (for testing/debugging)
323    pub fn list_memory_ids(&self) -> Result<Vec<MemoryId>> {
324        let read_txn = self.db.begin_read()?;
325        let table = read_txn.open_table(MEMORIES)?;
326
327        let mut ids = Vec::new();
328        for item in table.iter()? {
329            let (key, _) = item?;
330            let id = MemoryId::from_bytes(key.value())?;
331            ids.push(id);
332        }
333        Ok(ids)
334    }
335
336    /// Get memory count
337    pub fn count_memories(&self) -> Result<usize> {
338        let read_txn = self.db.begin_read()?;
339        let table = read_txn.open_table(MEMORIES)?;
340        Ok(table.len()? as usize)
341    }
342
343    /// Find a memory by its dialog_id metadata value (O(n) scan).
344    ///
345    /// dialog_id is not stored in a separate index, so this iterates all memories.
346    /// With ~3643 memories in current DBs, this takes ~1-5ms — acceptable for the
347    /// bounded number of calls (≤18 per query for adjacent-turn bridging).
348    ///
349    /// Returns the first memory whose metadata["dialog_id"] == dialog_id.
350    pub fn find_memory_by_dialog_id(&self, dialog_id: &str) -> Result<Option<Memory>> {
351        let read_txn = self.db.begin_read()?;
352        let table = read_txn.open_table(MEMORIES)?;
353        for item in table.iter()? {
354            let (_, val) = item?;
355            let memory = self.deserialize_memory(val.value())?;
356            if memory.metadata.get("dialog_id").map(|s| s.as_str()) == Some(dialog_id) {
357                return Ok(Some(memory));
358            }
359        }
360        Ok(None)
361    }
362
363    /// Serialize memory to bytes
364    fn serialize_memory(&self, memory: &Memory) -> Result<Vec<u8>> {
365        // Simple serialization format:
366        // [id (16 bytes)][timestamp (8 bytes)][content_len (4 bytes)][content][embedding_len (4 bytes)][embedding][metadata_len (4 bytes)][metadata]
367
368        let mut bytes = Vec::new();
369
370        // ID
371        bytes.extend_from_slice(memory.id.as_bytes());
372
373        // Timestamp
374        bytes.extend_from_slice(&memory.created_at.to_bytes());
375
376        // Content
377        let content_bytes = memory.content.as_bytes();
378        bytes.extend_from_slice(&(content_bytes.len() as u32).to_le_bytes());
379        bytes.extend_from_slice(content_bytes);
380
381        // Embedding
382        bytes.extend_from_slice(&(memory.embedding.len() as u32).to_le_bytes());
383        for val in &memory.embedding {
384            bytes.extend_from_slice(&val.to_le_bytes());
385        }
386
387        // Metadata
388        let metadata_str = serde_json::to_string(&memory.metadata)
389            .map_err(|e| Error::Serialization(e.to_string()))?;
390        let metadata_bytes = metadata_str.as_bytes();
391        bytes.extend_from_slice(&(metadata_bytes.len() as u32).to_le_bytes());
392        bytes.extend_from_slice(metadata_bytes);
393
394        Ok(bytes)
395    }
396
397    /// Deserialize memory from bytes
398    fn deserialize_memory(&self, bytes: &[u8]) -> Result<Memory> {
399        let mut offset = 0;
400
401        // ID
402        if bytes.len() < offset + 16 {
403            return Err(Error::Deserialization("Incomplete memory data".to_string()));
404        }
405        let id = MemoryId::from_bytes(&bytes[offset..offset + 16])?;
406        offset += 16;
407
408        // Timestamp
409        if bytes.len() < offset + 8 {
410            return Err(Error::Deserialization(
411                "Incomplete timestamp data".to_string(),
412            ));
413        }
414        let created_at = Timestamp::from_bytes(&bytes[offset..offset + 8])?;
415        offset += 8;
416
417        // Content
418        if bytes.len() < offset + 4 {
419            return Err(Error::Deserialization(
420                "Incomplete content length".to_string(),
421            ));
422        }
423        let content_len = u32::from_le_bytes([
424            bytes[offset],
425            bytes[offset + 1],
426            bytes[offset + 2],
427            bytes[offset + 3],
428        ]) as usize;
429        offset += 4;
430
431        if bytes.len() < offset + content_len {
432            return Err(Error::Deserialization(
433                "Incomplete content data".to_string(),
434            ));
435        }
436        let content = String::from_utf8(bytes[offset..offset + content_len].to_vec())
437            .map_err(|e| Error::Deserialization(e.to_string()))?;
438        offset += content_len;
439
440        // Embedding
441        if bytes.len() < offset + 4 {
442            return Err(Error::Deserialization(
443                "Incomplete embedding length".to_string(),
444            ));
445        }
446        let embedding_len = u32::from_le_bytes([
447            bytes[offset],
448            bytes[offset + 1],
449            bytes[offset + 2],
450            bytes[offset + 3],
451        ]) as usize;
452        offset += 4;
453
454        if bytes.len() < offset + embedding_len * 4 {
455            return Err(Error::Deserialization(
456                "Incomplete embedding data".to_string(),
457            ));
458        }
459        let mut embedding = Vec::with_capacity(embedding_len);
460        for _ in 0..embedding_len {
461            let val = f32::from_le_bytes([
462                bytes[offset],
463                bytes[offset + 1],
464                bytes[offset + 2],
465                bytes[offset + 3],
466            ]);
467            embedding.push(val);
468            offset += 4;
469        }
470
471        // Metadata
472        if bytes.len() < offset + 4 {
473            return Err(Error::Deserialization(
474                "Incomplete metadata length".to_string(),
475            ));
476        }
477        let metadata_len = u32::from_le_bytes([
478            bytes[offset],
479            bytes[offset + 1],
480            bytes[offset + 2],
481            bytes[offset + 3],
482        ]) as usize;
483        offset += 4;
484
485        if bytes.len() < offset + metadata_len {
486            return Err(Error::Deserialization(
487                "Incomplete metadata data".to_string(),
488            ));
489        }
490        let metadata_str = String::from_utf8(bytes[offset..offset + metadata_len].to_vec())
491            .map_err(|e| Error::Deserialization(e.to_string()))?;
492        let metadata: HashMap<String, String> = serde_json::from_str(&metadata_str)
493            .map_err(|e| Error::Deserialization(e.to_string()))?;
494
495        Ok(Memory {
496            id,
497            content,
498            embedding,
499            created_at,
500            metadata,
501        })
502    }
503
504    /// Get the database path
505    pub fn path(&self) -> &Path {
506        &self.path
507    }
508
509    /// Get a reference to the underlying database
510    ///
511    /// This is used by index implementations (TemporalIndex, etc.) to access
512    /// tables directly for specialized queries.
513    pub(crate) fn db(&self) -> &Database {
514        &self.db
515    }
516
517    /// Store vector index data
518    pub fn store_vector_index(&self, buffer: &[u8]) -> Result<()> {
519        let write_txn = self.db.begin_write()?;
520        {
521            let mut table = write_txn.open_table(METADATA_TABLE)?;
522            table.insert("vector_index", buffer)?;
523        }
524        write_txn.commit()?;
525        Ok(())
526    }
527
528    /// Load vector index data
529    pub fn load_vector_index(&self) -> Result<Option<Vec<u8>>> {
530        let read_txn = self.db.begin_read()?;
531        let table = read_txn.open_table(METADATA_TABLE)?;
532
533        match table.get("vector_index")? {
534            Some(data) => Ok(Some(data.value().to_vec())),
535            None => Ok(None),
536        }
537    }
538
539    /// Store BM25 index data
540    pub fn store_bm25_index(&self, buffer: &[u8]) -> Result<()> {
541        let write_txn = self.db.begin_write()?;
542        {
543            let mut table = write_txn.open_table(METADATA_TABLE)?;
544            table.insert("bm25_index", buffer)?;
545        }
546        write_txn.commit()?;
547        Ok(())
548    }
549
550    /// Load BM25 index data
551    pub fn load_bm25_index(&self) -> Result<Option<Vec<u8>>> {
552        let read_txn = self.db.begin_read()?;
553        let table = read_txn.open_table(METADATA_TABLE)?;
554
555        match table.get("bm25_index")? {
556            Some(data) => Ok(Some(data.value().to_vec())),
557            None => Ok(None),
558        }
559    }
560
561    /// Store causal graph data
562    pub fn store_causal_graph(&self, data: &[u8]) -> Result<()> {
563        let write_txn = self.db.begin_write()?;
564        {
565            let mut table = write_txn.open_table(CAUSAL_GRAPH)?;
566            table.insert("graph", data)?;
567        }
568        write_txn.commit()?;
569        Ok(())
570    }
571
572    /// Load causal graph data
573    pub fn load_causal_graph(&self) -> Result<Option<Vec<u8>>> {
574        let read_txn = self.db.begin_read()?;
575        let table = read_txn.open_table(CAUSAL_GRAPH)?;
576
577        match table.get("graph")? {
578            Some(data) => Ok(Some(data.value().to_vec())),
579            None => Ok(None),
580        }
581    }
582
583    /// Store an entity
584    pub fn store_entity(&self, entity: &Entity) -> Result<()> {
585        let write_txn = self.db.begin_write()?;
586        {
587            let mut entities = write_txn.open_table(ENTITIES)?;
588            let mut names = write_txn.open_table(ENTITY_NAMES)?;
589
590            // Serialize entity to JSON
591            let entity_data =
592                serde_json::to_vec(entity).map_err(|e| Error::Serialization(e.to_string()))?;
593
594            // Store entity by ID
595            entities.insert(entity.id.as_bytes().as_slice(), entity_data.as_slice())?;
596
597            // Index by normalized name (case-insensitive)
598            let normalized_name = entity.normalized_name();
599            names.insert(normalized_name.as_str(), entity.id.as_bytes().as_slice())?;
600        }
601        write_txn.commit()?;
602        Ok(())
603    }
604
605    /// Get an entity by ID
606    pub fn get_entity(&self, id: &EntityId) -> Result<Option<Entity>> {
607        let read_txn = self.db.begin_read()?;
608        let table = read_txn.open_table(ENTITIES)?;
609
610        match table.get(id.as_bytes().as_slice())? {
611            Some(data) => {
612                let entity: Entity = serde_json::from_slice(data.value())
613                    .map_err(|e| Error::Deserialization(e.to_string()))?;
614                Ok(Some(entity))
615            }
616            None => Ok(None),
617        }
618    }
619
620    /// Find an entity by name (case-insensitive)
621    pub fn find_entity_by_name(&self, name: &str) -> Result<Option<Entity>> {
622        let read_txn = self.db.begin_read()?;
623        let names_table = read_txn.open_table(ENTITY_NAMES)?;
624        let entities_table = read_txn.open_table(ENTITIES)?;
625
626        // Normalize the search name
627        let normalized = name.to_lowercase();
628
629        // Look up entity ID by normalized name
630        match names_table.get(normalized.as_str())? {
631            Some(id_bytes) => {
632                let id_bytes = id_bytes.value().to_vec();
633                let entity_id = EntityId::from_bytes(&id_bytes)?;
634
635                // Get the entity
636                match entities_table.get(entity_id.as_bytes().as_slice())? {
637                    Some(data) => {
638                        let entity: Entity = serde_json::from_slice(data.value())
639                            .map_err(|e| Error::Deserialization(e.to_string()))?;
640                        Ok(Some(entity))
641                    }
642                    None => Ok(None),
643                }
644            }
645            None => Ok(None),
646        }
647    }
648
649    /// Delete an entity
650    pub fn delete_entity(&self, id: &EntityId) -> Result<bool> {
651        let write_txn = self.db.begin_write()?;
652        let deleted = {
653            let mut entities = write_txn.open_table(ENTITIES)?;
654            let mut names = write_txn.open_table(ENTITY_NAMES)?;
655
656            // Get entity to find its name for name index cleanup
657            // Store the normalized name before dropping the guard
658            let normalized_name = if let Some(data) = entities.get(id.as_bytes().as_slice())? {
659                let entity: Entity = serde_json::from_slice(data.value())
660                    .map_err(|e| Error::Deserialization(e.to_string()))?;
661                Some(entity.normalized_name())
662            } else {
663                None
664            };
665
666            // Now we can mutate
667            if let Some(name) = normalized_name {
668                // Remove from name index
669                names.remove(name.as_str())?;
670
671                // Remove entity
672                entities.remove(id.as_bytes().as_slice())?;
673                true
674            } else {
675                false
676            }
677        };
678        write_txn.commit()?;
679        Ok(deleted)
680    }
681
682    /// List all entities
683    pub fn list_entities(&self) -> Result<Vec<Entity>> {
684        let read_txn = self.db.begin_read()?;
685        let table = read_txn.open_table(ENTITIES)?;
686
687        let mut entities = Vec::new();
688        for result in table.iter()? {
689            let (_, value) = result?;
690            let entity: Entity = serde_json::from_slice(value.value())
691                .map_err(|e| Error::Deserialization(e.to_string()))?;
692            entities.push(entity);
693        }
694
695        Ok(entities)
696    }
697
698    /// Count entities
699    pub fn count_entities(&self) -> Result<usize> {
700        let read_txn = self.db.begin_read()?;
701        let table = read_txn.open_table(ENTITIES)?;
702        Ok(table.len()? as usize)
703    }
704
705    /// Store entity graph data
706    pub fn store_entity_graph(&self, data: &[u8]) -> Result<()> {
707        let write_txn = self.db.begin_write()?;
708        {
709            let mut table = write_txn.open_table(METADATA_TABLE)?;
710            table.insert("entity_graph", data)?;
711        }
712        write_txn.commit()?;
713        Ok(())
714    }
715
716    /// Load entity graph data
717    pub fn load_entity_graph(&self) -> Result<Option<Vec<u8>>> {
718        let read_txn = self.db.begin_read()?;
719        let table = read_txn.open_table(METADATA_TABLE)?;
720
721        match table.get("entity_graph")? {
722            Some(data) => Ok(Some(data.value().to_vec())),
723            None => Ok(None),
724        }
725    }
726
727    /// Store entity-to-entity relationship graph data
728    pub fn store_relationship_graph(&self, data: &[u8]) -> Result<()> {
729        let write_txn = self.db.begin_write()?;
730        {
731            let mut table = write_txn.open_table(METADATA_TABLE)?;
732            table.insert("relationship_graph", data)?;
733        }
734        write_txn.commit()?;
735        Ok(())
736    }
737
738    /// Load entity-to-entity relationship graph data
739    pub fn load_relationship_graph(&self) -> Result<Option<Vec<u8>>> {
740        let read_txn = self.db.begin_read()?;
741        let table = read_txn.open_table(METADATA_TABLE)?;
742
743        match table.get("relationship_graph")? {
744            Some(data) => Ok(Some(data.value().to_vec())),
745            None => Ok(None),
746        }
747    }
748
749    /// Check if the relationship graph key exists in storage
750    pub fn has_relationship_graph(&self) -> Result<bool> {
751        let read_txn = self.db.begin_read()?;
752        let table = read_txn.open_table(METADATA_TABLE)?;
753        Ok(table.get("relationship_graph")?.is_some())
754    }
755
756    // Content hash operations for deduplication
757
758    /// Store content hash → memory ID mapping
759    ///
760    /// Used for deduplication to quickly find if content already exists
761    pub fn store_content_hash(&self, hash: &str, memory_id: &MemoryId) -> Result<()> {
762        let write_txn = self.db.begin_write()?;
763        {
764            let mut table = write_txn.open_table(CONTENT_HASH_INDEX)?;
765            table.insert(hash, memory_id.as_bytes() as &[u8])?;
766        }
767        write_txn.commit()?;
768        Ok(())
769    }
770
771    /// Find memory ID by content hash
772    ///
773    /// Returns None if hash not found (content is unique)
774    pub fn find_by_content_hash(&self, hash: &str) -> Result<Option<MemoryId>> {
775        let read_txn = self.db.begin_read()?;
776        let table = read_txn.open_table(CONTENT_HASH_INDEX)?;
777
778        match table.get(hash)? {
779            Some(bytes) => {
780                let id = MemoryId::from_bytes(bytes.value())?;
781                Ok(Some(id))
782            }
783            None => Ok(None),
784        }
785    }
786
787    /// Delete content hash mapping
788    pub fn delete_content_hash(&self, hash: &str) -> Result<()> {
789        let write_txn = self.db.begin_write()?;
790        {
791            let mut table = write_txn.open_table(CONTENT_HASH_INDEX)?;
792            table.remove(hash)?;
793        }
794        write_txn.commit()?;
795        Ok(())
796    }
797
798    // Logical key operations for upsert
799
800    /// Store logical key → memory ID mapping
801    ///
802    /// Used for upsert operations with developer-defined keys
803    pub fn store_logical_key(&self, key: &str, memory_id: &MemoryId) -> Result<()> {
804        let write_txn = self.db.begin_write()?;
805        {
806            let mut table = write_txn.open_table(LOGICAL_KEY_INDEX)?;
807            table.insert(key, memory_id.as_bytes() as &[u8])?;
808        }
809        write_txn.commit()?;
810        Ok(())
811    }
812
813    /// Find memory ID by logical key
814    ///
815    /// Returns None if key not found
816    pub fn find_by_logical_key(&self, key: &str) -> Result<Option<MemoryId>> {
817        let read_txn = self.db.begin_read()?;
818        let table = read_txn.open_table(LOGICAL_KEY_INDEX)?;
819
820        match table.get(key)? {
821            Some(bytes) => {
822                let id = MemoryId::from_bytes(bytes.value())?;
823                Ok(Some(id))
824            }
825            None => Ok(None),
826        }
827    }
828
829    /// Delete logical key mapping
830    pub fn delete_logical_key(&self, key: &str) -> Result<()> {
831        let write_txn = self.db.begin_write()?;
832        {
833            let mut table = write_txn.open_table(LOGICAL_KEY_INDEX)?;
834            table.remove(key)?;
835        }
836        write_txn.commit()?;
837        Ok(())
838    }
839
840    /// Update logical key mapping to point to a new memory ID
841    ///
842    /// This is used when an upsert operation replaces an existing memory
843    pub fn update_logical_key(&self, key: &str, new_memory_id: &MemoryId) -> Result<()> {
844        // Just overwrite - same as store
845        self.store_logical_key(key, new_memory_id)
846    }
847
848    // Namespace operations
849
850    /// List all namespaces in the database
851    ///
852    /// Scans all memories and extracts unique namespace values.
853    /// Returns sorted list of namespace strings (excluding default namespace "").
854    ///
855    /// # Performance
856    ///
857    /// O(n) where n = total memories. Can be cached if needed.
858    pub fn list_namespaces(&self) -> Result<Vec<String>> {
859        let read_txn = self.db.begin_read()?;
860        let table = read_txn.open_table(MEMORIES)?;
861
862        let mut namespaces = std::collections::HashSet::new();
863
864        // Scan all memories
865        for entry in table.iter()? {
866            let (_, value) = entry?;
867            let memory_data = value.value();
868
869            // Deserialize memory to get namespace
870            if let Ok(memory) = self.deserialize_memory(memory_data) {
871                let ns = memory.get_namespace();
872                if !ns.is_empty() {
873                    namespaces.insert(ns);
874                }
875            }
876        }
877
878        let mut result: Vec<String> = namespaces.into_iter().collect();
879        result.sort();
880        Ok(result)
881    }
882
883    /// Count memories in a specific namespace
884    ///
885    /// # Arguments
886    ///
887    /// * `namespace` - The namespace to count (empty string "" for default)
888    ///
889    /// # Returns
890    ///
891    /// Number of memories in the namespace
892    pub fn count_namespace(&self, namespace: &str) -> Result<usize> {
893        let read_txn = self.db.begin_read()?;
894        let table = read_txn.open_table(MEMORIES)?;
895
896        let mut count = 0;
897
898        // Scan all memories
899        for entry in table.iter()? {
900            let (_, value) = entry?;
901            let memory_data = value.value();
902
903            // Deserialize memory to check namespace
904            if let Ok(memory) = self.deserialize_memory(memory_data) {
905                if memory.get_namespace() == namespace {
906                    count += 1;
907                }
908            }
909        }
910
911        Ok(count)
912    }
913
914    /// List all memory IDs in a specific namespace
915    ///
916    /// # Arguments
917    ///
918    /// * `namespace` - The namespace to list (empty string "" for default)
919    ///
920    /// # Returns
921    ///
922    /// Vector of MemoryIds in the namespace
923    pub fn list_namespace_ids(&self, namespace: &str) -> Result<Vec<MemoryId>> {
924        let read_txn = self.db.begin_read()?;
925        let table = read_txn.open_table(MEMORIES)?;
926
927        let mut ids = Vec::new();
928
929        // Scan all memories
930        for entry in table.iter()? {
931            let (key, value) = entry?;
932            let memory_data = value.value();
933
934            // Deserialize memory to check namespace
935            if let Ok(memory) = self.deserialize_memory(memory_data) {
936                if memory.get_namespace() == namespace {
937                    // Get memory ID from key
938                    let id = MemoryId::from_bytes(key.value())?;
939                    ids.push(id);
940                }
941            }
942        }
943
944        Ok(ids)
945    }
946
947    // Metadata index operations
948
949    /// Build a metadata index key
950    ///
951    /// Format: "{field}:{value}:{namespace}"
952    fn metadata_index_key(field: &str, value: &str, namespace: &str) -> String {
953        format!("{}:{}:{}", field, value, namespace)
954    }
955
956    /// Store a metadata field value in the index
957    ///
958    /// Associates a memory with a specific metadata field value in a namespace.
959    ///
960    /// # Arguments
961    ///
962    /// * `field` - The metadata field name
963    /// * `value` - The field value
964    /// * `namespace` - The namespace the memory belongs to
965    /// * `memory_id` - The memory ID to associate with this field value
966    pub fn add_to_metadata_index(
967        &self,
968        field: &str,
969        value: &str,
970        namespace: &str,
971        memory_id: &MemoryId,
972    ) -> Result<()> {
973        let write_txn = self.db.begin_write()?;
974        {
975            let mut table = write_txn.open_table(METADATA_INDEX)?;
976
977            let key = Self::metadata_index_key(field, value, namespace);
978
979            // Get existing memory IDs for this key
980            let mut ids: Vec<MemoryId> = match table.get(key.as_str())? {
981                Some(data) => serde_json::from_slice(data.value())?,
982                None => Vec::new(),
983            };
984
985            // Add new ID if not already present
986            if !ids.contains(memory_id) {
987                ids.push(memory_id.clone());
988
989                // Serialize and store
990                let data = serde_json::to_vec(&ids)?;
991                table.insert(key.as_str(), data.as_slice())?;
992            }
993        }
994        write_txn.commit()?;
995        Ok(())
996    }
997
998    /// Remove a memory from a metadata index entry
999    ///
1000    /// # Arguments
1001    ///
1002    /// * `field` - The metadata field name
1003    /// * `value` - The field value
1004    /// * `namespace` - The namespace
1005    /// * `memory_id` - The memory ID to remove
1006    pub fn remove_from_metadata_index(
1007        &self,
1008        field: &str,
1009        value: &str,
1010        namespace: &str,
1011        memory_id: &MemoryId,
1012    ) -> Result<()> {
1013        let write_txn = self.db.begin_write()?;
1014        {
1015            let mut table = write_txn.open_table(METADATA_INDEX)?;
1016
1017            let key = Self::metadata_index_key(field, value, namespace);
1018
1019            // Get existing memory IDs for this key and clone the data
1020            let ids_data = table.get(key.as_str())?.map(|data| data.value().to_vec());
1021
1022            if let Some(data_vec) = ids_data {
1023                let mut ids: Vec<MemoryId> = serde_json::from_slice(&data_vec)?;
1024
1025                // Remove the memory ID
1026                ids.retain(|id| id != memory_id);
1027
1028                if ids.is_empty() {
1029                    // Remove the index entry if no more memories
1030                    table.remove(key.as_str())?;
1031                } else {
1032                    // Update with remaining IDs
1033                    let data = serde_json::to_vec(&ids)?;
1034                    table.insert(key.as_str(), data.as_slice())?;
1035                }
1036            }
1037        }
1038        write_txn.commit()?;
1039        Ok(())
1040    }
1041
1042    /// Find all memory IDs matching a metadata field value
1043    ///
1044    /// # Arguments
1045    ///
1046    /// * `field` - The metadata field name
1047    /// * `value` - The field value to match
1048    /// * `namespace` - The namespace to search in
1049    ///
1050    /// # Returns
1051    ///
1052    /// Vector of memory IDs that have the specified metadata field value
1053    pub fn find_by_metadata(
1054        &self,
1055        field: &str,
1056        value: &str,
1057        namespace: &str,
1058    ) -> Result<Vec<MemoryId>> {
1059        let read_txn = self.db.begin_read()?;
1060        let table = read_txn.open_table(METADATA_INDEX)?;
1061
1062        let key = Self::metadata_index_key(field, value, namespace);
1063
1064        match table.get(key.as_str())? {
1065            Some(data) => {
1066                let ids: Vec<MemoryId> = serde_json::from_slice(data.value())?;
1067                Ok(ids)
1068            }
1069            None => Ok(Vec::new()),
1070        }
1071    }
1072
1073    /// Remove all metadata index entries for a memory
1074    ///
1075    /// Used when deleting a memory to clean up all its metadata indexes.
1076    ///
1077    /// # Arguments
1078    ///
1079    /// * `memory` - The memory being deleted
1080    /// * `indexed_fields` - List of fields that are indexed
1081    pub fn remove_metadata_indexes_for_memory(
1082        &self,
1083        memory: &Memory,
1084        indexed_fields: &[String],
1085    ) -> Result<()> {
1086        let namespace = memory.get_namespace();
1087
1088        // For each indexed field, remove this memory from the index
1089        for field in indexed_fields {
1090            if let Some(value) = memory.metadata.get(field) {
1091                self.remove_from_metadata_index(field, value, &namespace, &memory.id)?;
1092            }
1093        }
1094
1095        Ok(())
1096    }
1097
1098    // Entity Profile operations
1099
1100    /// Store an entity profile
1101    ///
1102    /// Profiles are keyed by the normalized (lowercase) entity name for
1103    /// case-insensitive lookups.
1104    ///
1105    /// # Arguments
1106    ///
1107    /// * `profile` - The entity profile to store
1108    ///
1109    /// # Example
1110    ///
1111    /// ```ignore
1112    /// let profile = EntityProfile::new(EntityId::new(), "Alice".into(), "person".into());
1113    /// storage.store_entity_profile(&profile)?;
1114    /// ```
1115    pub fn store_entity_profile(&self, profile: &EntityProfile) -> Result<()> {
1116        let write_txn = self.db.begin_write()?;
1117        {
1118            let mut table = write_txn.open_table(ENTITY_PROFILES)?;
1119            let key = profile.name.to_lowercase();
1120            let data =
1121                serde_json::to_vec(profile).map_err(|e| Error::Serialization(e.to_string()))?;
1122            table.insert(key.as_str(), data.as_slice())?;
1123        }
1124        write_txn.commit()?;
1125        Ok(())
1126    }
1127
1128    /// Get an entity profile by name (case-insensitive)
1129    ///
1130    /// # Arguments
1131    ///
1132    /// * `name` - The entity name to look up
1133    ///
1134    /// # Returns
1135    ///
1136    /// The entity profile if found, or None
1137    ///
1138    /// # Example
1139    ///
1140    /// ```ignore
1141    /// let profile = storage.get_entity_profile("Alice")?;
1142    /// if let Some(p) = profile {
1143    ///     println!("Found profile for {}", p.name);
1144    /// }
1145    /// ```
1146    pub fn get_entity_profile(&self, name: &str) -> Result<Option<EntityProfile>> {
1147        let read_txn = self.db.begin_read()?;
1148        let table = read_txn.open_table(ENTITY_PROFILES)?;
1149        let key = name.to_lowercase();
1150
1151        match table.get(key.as_str())? {
1152            Some(data) => {
1153                let profile: EntityProfile = serde_json::from_slice(data.value())
1154                    .map_err(|e| Error::Deserialization(e.to_string()))?;
1155                Ok(Some(profile))
1156            }
1157            None => Ok(None),
1158        }
1159    }
1160
1161    /// List all entity profiles
1162    ///
1163    /// # Returns
1164    ///
1165    /// Vector of all entity profiles in the database
1166    ///
1167    /// # Performance
1168    ///
1169    /// O(n) where n = number of profiles. Use with caution on large databases.
1170    pub fn list_entity_profiles(&self) -> Result<Vec<EntityProfile>> {
1171        let read_txn = self.db.begin_read()?;
1172        let table = read_txn.open_table(ENTITY_PROFILES)?;
1173
1174        let mut profiles = Vec::new();
1175        for result in table.iter()? {
1176            let (_, value) = result?;
1177            let profile: EntityProfile = serde_json::from_slice(value.value())
1178                .map_err(|e| Error::Deserialization(e.to_string()))?;
1179            profiles.push(profile);
1180        }
1181        Ok(profiles)
1182    }
1183
1184    /// Delete an entity profile by name (case-insensitive)
1185    ///
1186    /// # Arguments
1187    ///
1188    /// * `name` - The entity name to delete
1189    ///
1190    /// # Returns
1191    ///
1192    /// true if the profile was deleted, false if it didn't exist
1193    pub fn delete_entity_profile(&self, name: &str) -> Result<bool> {
1194        let write_txn = self.db.begin_write()?;
1195        let deleted = {
1196            let mut table = write_txn.open_table(ENTITY_PROFILES)?;
1197            let key = name.to_lowercase();
1198            let result = table.remove(key.as_str())?;
1199            result.is_some()
1200        };
1201        write_txn.commit()?;
1202        Ok(deleted)
1203    }
1204
1205    /// Count entity profiles
1206    ///
1207    /// # Returns
1208    ///
1209    /// Number of entity profiles in the database
1210    pub fn count_entity_profiles(&self) -> Result<usize> {
1211        let read_txn = self.db.begin_read()?;
1212        let table = read_txn.open_table(ENTITY_PROFILES)?;
1213        Ok(table.len()? as usize)
1214    }
1215
1216    /// List all entity profile names (lightweight, no full deserialization)
1217    ///
1218    /// Returns the table keys directly, which are the lowercased entity names.
1219    /// Much faster than `list_entity_profiles()` when you only need names.
1220    pub fn list_entity_profile_names(&self) -> Result<Vec<String>> {
1221        let read_txn = self.db.begin_read()?;
1222        let table = read_txn.open_table(ENTITY_PROFILES)?;
1223
1224        let mut names = Vec::new();
1225        for result in table.iter()? {
1226            let (key, _) = result?;
1227            names.push(key.value().to_string());
1228        }
1229        Ok(names)
1230    }
1231
1232    // Fact embedding operations
1233
1234    /// Store a fact embedding
1235    ///
1236    /// Key is a 16-byte hash of (entity_name, fact_type, value).
1237    /// Value is the embedding as raw little-endian f32 bytes.
1238    pub fn store_fact_embedding(&self, key: &[u8], embedding: &[f32]) -> Result<()> {
1239        let write_txn = self.db.begin_write()?;
1240        {
1241            let mut table = write_txn.open_table(FACT_EMBEDDINGS)?;
1242            let bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
1243            table.insert(key, bytes.as_slice())?;
1244        }
1245        write_txn.commit()?;
1246        Ok(())
1247    }
1248
1249    /// Get a fact embedding by key
1250    ///
1251    /// Returns the embedding as Vec<f32>, or None if not found.
1252    pub fn get_fact_embedding(&self, key: &[u8]) -> Result<Option<Vec<f32>>> {
1253        let read_txn = self.db.begin_read()?;
1254        let table = read_txn.open_table(FACT_EMBEDDINGS)?;
1255
1256        match table.get(key)? {
1257            Some(data) => {
1258                let bytes = data.value();
1259                if bytes.len() % 4 != 0 {
1260                    return Err(Error::Deserialization(
1261                        "Invalid fact embedding data length".to_string(),
1262                    ));
1263                }
1264                let embedding: Vec<f32> = bytes
1265                    .chunks_exact(4)
1266                    .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
1267                    .collect();
1268                Ok(Some(embedding))
1269            }
1270            None => Ok(None),
1271        }
1272    }
1273}
1274
1275// Add serde_json for metadata serialization
1276// This is a temporary solution - we'll use rkyv for zero-copy in later sprints
1277
1278#[cfg(test)]
1279mod tests {
1280    use super::*;
1281    use tempfile::tempdir;
1282
1283    #[test]
1284    fn test_storage_engine_open() {
1285        let dir = tempdir().unwrap();
1286        let path = dir.path().join("test.mfdb");
1287
1288        let engine = StorageEngine::open(&path).unwrap();
1289        assert_eq!(engine.path(), path);
1290    }
1291
1292    #[test]
1293    fn test_storage_engine_store_and_retrieve() {
1294        let dir = tempdir().unwrap();
1295        let path = dir.path().join("test.mfdb");
1296        let engine = StorageEngine::open(&path).unwrap();
1297
1298        let memory = Memory::new("test content".to_string(), vec![0.1, 0.2, 0.3]);
1299        let id = memory.id.clone();
1300
1301        engine.store_memory(&memory).unwrap();
1302
1303        let retrieved = engine.get_memory(&id).unwrap();
1304        assert!(retrieved.is_some());
1305
1306        let retrieved = retrieved.unwrap();
1307        assert_eq!(retrieved.id, id);
1308        assert_eq!(retrieved.content, "test content");
1309        assert_eq!(retrieved.embedding, vec![0.1, 0.2, 0.3]);
1310    }
1311
1312    #[test]
1313    fn test_storage_engine_delete() {
1314        let dir = tempdir().unwrap();
1315        let path = dir.path().join("test.mfdb");
1316        let engine = StorageEngine::open(&path).unwrap();
1317
1318        let memory = Memory::new("test".to_string(), vec![0.1]);
1319        let id = memory.id.clone();
1320
1321        engine.store_memory(&memory).unwrap();
1322        assert!(engine.get_memory(&id).unwrap().is_some());
1323
1324        let deleted = engine.delete_memory(&id).unwrap();
1325        assert!(deleted);
1326        assert!(engine.get_memory(&id).unwrap().is_none());
1327    }
1328
1329    #[test]
1330    fn test_storage_engine_not_found() {
1331        let dir = tempdir().unwrap();
1332        let path = dir.path().join("test.mfdb");
1333        let engine = StorageEngine::open(&path).unwrap();
1334
1335        let id = MemoryId::new();
1336        assert!(engine.get_memory(&id).unwrap().is_none());
1337    }
1338
1339    #[test]
1340    fn test_storage_engine_multiple_memories() {
1341        let dir = tempdir().unwrap();
1342        let path = dir.path().join("test.mfdb");
1343        let engine = StorageEngine::open(&path).unwrap();
1344
1345        let mem1 = Memory::new("first".to_string(), vec![0.1]);
1346        let mem2 = Memory::new("second".to_string(), vec![0.2]);
1347        let mem3 = Memory::new("third".to_string(), vec![0.3]);
1348
1349        engine.store_memory(&mem1).unwrap();
1350        engine.store_memory(&mem2).unwrap();
1351        engine.store_memory(&mem3).unwrap();
1352
1353        assert_eq!(engine.count_memories().unwrap(), 3);
1354
1355        let ids = engine.list_memory_ids().unwrap();
1356        assert_eq!(ids.len(), 3);
1357    }
1358
1359    #[test]
1360    fn test_storage_engine_with_metadata() {
1361        let dir = tempdir().unwrap();
1362        let path = dir.path().join("test.mfdb");
1363        let engine = StorageEngine::open(&path).unwrap();
1364
1365        let mut metadata = HashMap::new();
1366        metadata.insert("source".to_string(), "test".to_string());
1367        metadata.insert("category".to_string(), "example".to_string());
1368
1369        let memory = Memory::new_with_metadata("test".to_string(), vec![0.1], metadata);
1370        let id = memory.id.clone();
1371
1372        engine.store_memory(&memory).unwrap();
1373
1374        let retrieved = engine.get_memory(&id).unwrap().unwrap();
1375        assert_eq!(retrieved.metadata.len(), 2);
1376        assert_eq!(retrieved.metadata.get("source"), Some(&"test".to_string()));
1377        assert_eq!(
1378            retrieved.metadata.get("category"),
1379            Some(&"example".to_string())
1380        );
1381    }
1382
1383    #[test]
1384    fn test_storage_engine_reopen() {
1385        let dir = tempdir().unwrap();
1386        let path = dir.path().join("test.mfdb");
1387
1388        let memory = Memory::new("persistent".to_string(), vec![0.5]);
1389        let id = memory.id.clone();
1390
1391        // Store in first instance
1392        {
1393            let engine = StorageEngine::open(&path).unwrap();
1394            engine.store_memory(&memory).unwrap();
1395        }
1396
1397        // Retrieve in second instance
1398        {
1399            let engine = StorageEngine::open(&path).unwrap();
1400            let retrieved = engine.get_memory(&id).unwrap();
1401            assert!(retrieved.is_some());
1402            assert_eq!(retrieved.unwrap().content, "persistent");
1403        }
1404    }
1405
1406    #[test]
1407    fn test_storage_list_namespaces_empty() {
1408        let dir = tempdir().unwrap();
1409        let path = dir.path().join("test.mfdb");
1410        let engine = StorageEngine::open(&path).unwrap();
1411
1412        // No namespaces initially
1413        let namespaces = engine.list_namespaces().unwrap();
1414        assert!(namespaces.is_empty());
1415    }
1416
1417    #[test]
1418    fn test_storage_list_namespaces() {
1419        let dir = tempdir().unwrap();
1420        let path = dir.path().join("test.mfdb");
1421        let engine = StorageEngine::open(&path).unwrap();
1422
1423        // Add memories to different namespaces
1424        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1425        mem1.set_namespace("user_123");
1426        engine.store_memory(&mem1).unwrap();
1427
1428        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1429        mem2.set_namespace("user_456");
1430        engine.store_memory(&mem2).unwrap();
1431
1432        let mut mem3 = Memory::new("content 3".to_string(), vec![0.3; 384]);
1433        mem3.set_namespace("user_123"); // Duplicate namespace
1434        engine.store_memory(&mem3).unwrap();
1435
1436        // Default namespace (no set_namespace)
1437        let mem4 = Memory::new("content 4".to_string(), vec![0.4; 384]);
1438        engine.store_memory(&mem4).unwrap();
1439
1440        // Should return 2 unique non-default namespaces (sorted)
1441        let namespaces = engine.list_namespaces().unwrap();
1442        assert_eq!(namespaces.len(), 2);
1443        assert_eq!(namespaces[0], "user_123");
1444        assert_eq!(namespaces[1], "user_456");
1445    }
1446
1447    #[test]
1448    fn test_storage_count_namespace() {
1449        let dir = tempdir().unwrap();
1450        let path = dir.path().join("test.mfdb");
1451        let engine = StorageEngine::open(&path).unwrap();
1452
1453        // Add memories to different namespaces
1454        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1455        mem1.set_namespace("user_123");
1456        engine.store_memory(&mem1).unwrap();
1457
1458        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1459        mem2.set_namespace("user_123");
1460        engine.store_memory(&mem2).unwrap();
1461
1462        let mut mem3 = Memory::new("content 3".to_string(), vec![0.3; 384]);
1463        mem3.set_namespace("user_456");
1464        engine.store_memory(&mem3).unwrap();
1465
1466        let mem4 = Memory::new("content 4".to_string(), vec![0.4; 384]);
1467        engine.store_memory(&mem4).unwrap();
1468
1469        // Count by namespace
1470        assert_eq!(engine.count_namespace("user_123").unwrap(), 2);
1471        assert_eq!(engine.count_namespace("user_456").unwrap(), 1);
1472        assert_eq!(engine.count_namespace("").unwrap(), 1); // Default namespace
1473        assert_eq!(engine.count_namespace("nonexistent").unwrap(), 0);
1474    }
1475
1476    #[test]
1477    fn test_storage_list_namespace_ids() {
1478        let dir = tempdir().unwrap();
1479        let path = dir.path().join("test.mfdb");
1480        let engine = StorageEngine::open(&path).unwrap();
1481
1482        // Add memories to namespace
1483        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1484        mem1.set_namespace("user_123");
1485        let id1 = mem1.id.clone();
1486        engine.store_memory(&mem1).unwrap();
1487
1488        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1489        mem2.set_namespace("user_123");
1490        let id2 = mem2.id.clone();
1491        engine.store_memory(&mem2).unwrap();
1492
1493        let mut mem3 = Memory::new("content 3".to_string(), vec![0.3; 384]);
1494        mem3.set_namespace("user_456");
1495        engine.store_memory(&mem3).unwrap();
1496
1497        // List IDs by namespace
1498        let ids = engine.list_namespace_ids("user_123").unwrap();
1499        assert_eq!(ids.len(), 2);
1500        assert!(ids.contains(&id1));
1501        assert!(ids.contains(&id2));
1502
1503        let ids_456 = engine.list_namespace_ids("user_456").unwrap();
1504        assert_eq!(ids_456.len(), 1);
1505
1506        let ids_empty = engine.list_namespace_ids("nonexistent").unwrap();
1507        assert!(ids_empty.is_empty());
1508    }
1509
1510    #[test]
1511    fn test_storage_namespace_default() {
1512        let dir = tempdir().unwrap();
1513        let path = dir.path().join("test.mfdb");
1514        let engine = StorageEngine::open(&path).unwrap();
1515
1516        // Memory without explicit namespace
1517        let mem = Memory::new("default content".to_string(), vec![0.1; 384]);
1518        let id = mem.id.clone();
1519        engine.store_memory(&mem).unwrap();
1520
1521        // Should be in default namespace ""
1522        assert_eq!(engine.count_namespace("").unwrap(), 1);
1523
1524        let ids = engine.list_namespace_ids("").unwrap();
1525        assert_eq!(ids.len(), 1);
1526        assert_eq!(ids[0], id);
1527
1528        // Should not appear in list_namespaces (only non-default)
1529        let namespaces = engine.list_namespaces().unwrap();
1530        assert!(namespaces.is_empty());
1531    }
1532
1533    #[test]
1534    fn test_metadata_index_add_and_find() {
1535        let dir = tempdir().unwrap();
1536        let path = dir.path().join("test.mfdb");
1537        let engine = StorageEngine::open(&path).unwrap();
1538
1539        let mut mem = Memory::new("test content".to_string(), vec![0.1; 384]);
1540        mem.metadata.insert("type".to_string(), "event".to_string());
1541        mem.metadata
1542            .insert("priority".to_string(), "high".to_string());
1543        let id = mem.id.clone();
1544
1545        // Add to metadata index
1546        engine
1547            .add_to_metadata_index("type", "event", "", &id)
1548            .unwrap();
1549        engine
1550            .add_to_metadata_index("priority", "high", "", &id)
1551            .unwrap();
1552
1553        // Find by metadata
1554        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1555        assert_eq!(ids.len(), 1);
1556        assert_eq!(ids[0], id);
1557
1558        let ids = engine.find_by_metadata("priority", "high", "").unwrap();
1559        assert_eq!(ids.len(), 1);
1560        assert_eq!(ids[0], id);
1561
1562        // Non-existent value
1563        let ids = engine.find_by_metadata("type", "task", "").unwrap();
1564        assert!(ids.is_empty());
1565    }
1566
1567    #[test]
1568    fn test_metadata_index_multiple_memories() {
1569        let dir = tempdir().unwrap();
1570        let path = dir.path().join("test.mfdb");
1571        let engine = StorageEngine::open(&path).unwrap();
1572
1573        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1574        mem1.metadata
1575            .insert("type".to_string(), "event".to_string());
1576        let id1 = mem1.id.clone();
1577
1578        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1579        mem2.metadata
1580            .insert("type".to_string(), "event".to_string());
1581        let id2 = mem2.id.clone();
1582
1583        let mut mem3 = Memory::new("content 3".to_string(), vec![0.3; 384]);
1584        mem3.metadata.insert("type".to_string(), "task".to_string());
1585        let id3 = mem3.id.clone();
1586
1587        // Add to index
1588        engine
1589            .add_to_metadata_index("type", "event", "", &id1)
1590            .unwrap();
1591        engine
1592            .add_to_metadata_index("type", "event", "", &id2)
1593            .unwrap();
1594        engine
1595            .add_to_metadata_index("type", "task", "", &id3)
1596            .unwrap();
1597
1598        // Find all events
1599        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1600        assert_eq!(ids.len(), 2);
1601        assert!(ids.contains(&id1));
1602        assert!(ids.contains(&id2));
1603
1604        // Find all tasks
1605        let ids = engine.find_by_metadata("type", "task", "").unwrap();
1606        assert_eq!(ids.len(), 1);
1607        assert_eq!(ids[0], id3);
1608    }
1609
1610    #[test]
1611    fn test_metadata_index_with_namespace() {
1612        let dir = tempdir().unwrap();
1613        let path = dir.path().join("test.mfdb");
1614        let engine = StorageEngine::open(&path).unwrap();
1615
1616        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1617        mem1.set_namespace("user_123");
1618        mem1.metadata
1619            .insert("type".to_string(), "event".to_string());
1620        let id1 = mem1.id.clone();
1621
1622        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1623        mem2.set_namespace("user_456");
1624        mem2.metadata
1625            .insert("type".to_string(), "event".to_string());
1626        let id2 = mem2.id.clone();
1627
1628        // Add to index
1629        engine
1630            .add_to_metadata_index("type", "event", "user_123", &id1)
1631            .unwrap();
1632        engine
1633            .add_to_metadata_index("type", "event", "user_456", &id2)
1634            .unwrap();
1635
1636        // Find in each namespace
1637        let ids = engine
1638            .find_by_metadata("type", "event", "user_123")
1639            .unwrap();
1640        assert_eq!(ids.len(), 1);
1641        assert_eq!(ids[0], id1);
1642
1643        let ids = engine
1644            .find_by_metadata("type", "event", "user_456")
1645            .unwrap();
1646        assert_eq!(ids.len(), 1);
1647        assert_eq!(ids[0], id2);
1648
1649        // Not found in wrong namespace
1650        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1651        assert!(ids.is_empty());
1652    }
1653
1654    #[test]
1655    fn test_metadata_index_remove() {
1656        let dir = tempdir().unwrap();
1657        let path = dir.path().join("test.mfdb");
1658        let engine = StorageEngine::open(&path).unwrap();
1659
1660        let mut mem = Memory::new("test content".to_string(), vec![0.1; 384]);
1661        mem.metadata.insert("type".to_string(), "event".to_string());
1662        let id = mem.id.clone();
1663
1664        // Add to index
1665        engine
1666            .add_to_metadata_index("type", "event", "", &id)
1667            .unwrap();
1668
1669        // Verify it's there
1670        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1671        assert_eq!(ids.len(), 1);
1672
1673        // Remove from index
1674        engine
1675            .remove_from_metadata_index("type", "event", "", &id)
1676            .unwrap();
1677
1678        // Verify it's gone
1679        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1680        assert!(ids.is_empty());
1681    }
1682
1683    #[test]
1684    fn test_metadata_index_remove_one_of_many() {
1685        let dir = tempdir().unwrap();
1686        let path = dir.path().join("test.mfdb");
1687        let engine = StorageEngine::open(&path).unwrap();
1688
1689        let mut mem1 = Memory::new("content 1".to_string(), vec![0.1; 384]);
1690        mem1.metadata
1691            .insert("type".to_string(), "event".to_string());
1692        let id1 = mem1.id.clone();
1693
1694        let mut mem2 = Memory::new("content 2".to_string(), vec![0.2; 384]);
1695        mem2.metadata
1696            .insert("type".to_string(), "event".to_string());
1697        let id2 = mem2.id.clone();
1698
1699        // Add both to index
1700        engine
1701            .add_to_metadata_index("type", "event", "", &id1)
1702            .unwrap();
1703        engine
1704            .add_to_metadata_index("type", "event", "", &id2)
1705            .unwrap();
1706
1707        // Verify both are there
1708        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1709        assert_eq!(ids.len(), 2);
1710
1711        // Remove one
1712        engine
1713            .remove_from_metadata_index("type", "event", "", &id1)
1714            .unwrap();
1715
1716        // Verify only one remains
1717        let ids = engine.find_by_metadata("type", "event", "").unwrap();
1718        assert_eq!(ids.len(), 1);
1719        assert_eq!(ids[0], id2);
1720    }
1721
1722    #[test]
1723    fn test_metadata_index_remove_all_for_memory() {
1724        let dir = tempdir().unwrap();
1725        let path = dir.path().join("test.mfdb");
1726        let engine = StorageEngine::open(&path).unwrap();
1727
1728        let mut mem = Memory::new("test content".to_string(), vec![0.1; 384]);
1729        mem.metadata.insert("type".to_string(), "event".to_string());
1730        mem.metadata
1731            .insert("priority".to_string(), "high".to_string());
1732        mem.metadata
1733            .insert("category".to_string(), "work".to_string());
1734        let id = mem.id.clone();
1735
1736        // Add to indexes
1737        engine
1738            .add_to_metadata_index("type", "event", "", &id)
1739            .unwrap();
1740        engine
1741            .add_to_metadata_index("priority", "high", "", &id)
1742            .unwrap();
1743        engine
1744            .add_to_metadata_index("category", "work", "", &id)
1745            .unwrap();
1746
1747        // Verify all are indexed
1748        assert!(!engine
1749            .find_by_metadata("type", "event", "")
1750            .unwrap()
1751            .is_empty());
1752        assert!(!engine
1753            .find_by_metadata("priority", "high", "")
1754            .unwrap()
1755            .is_empty());
1756        assert!(!engine
1757            .find_by_metadata("category", "work", "")
1758            .unwrap()
1759            .is_empty());
1760
1761        // Remove all indexes for this memory
1762        let indexed_fields = vec![
1763            "type".to_string(),
1764            "priority".to_string(),
1765            "category".to_string(),
1766        ];
1767        engine
1768            .remove_metadata_indexes_for_memory(&mem, &indexed_fields)
1769            .unwrap();
1770
1771        // Verify all are removed
1772        assert!(engine
1773            .find_by_metadata("type", "event", "")
1774            .unwrap()
1775            .is_empty());
1776        assert!(engine
1777            .find_by_metadata("priority", "high", "")
1778            .unwrap()
1779            .is_empty());
1780        assert!(engine
1781            .find_by_metadata("category", "work", "")
1782            .unwrap()
1783            .is_empty());
1784    }
1785
1786    // Validation tests for Task 4: File header validation on open
1787
1788    #[test]
1789    fn test_truncated_file_detection() {
1790        let dir = tempdir().unwrap();
1791        let path = dir.path().join("truncated.mfdb");
1792
1793        // Create a tiny truncated file
1794        std::fs::write(&path, b"MF").unwrap();
1795
1796        // Should fail with FileTruncated error
1797        let result = StorageEngine::open(&path);
1798        assert!(result.is_err());
1799
1800        if let Err(err) = result {
1801            assert!(matches!(err, Error::FileTruncated(_)));
1802        }
1803    }
1804
1805    #[test]
1806    fn test_validate_database_integrity() {
1807        let dir = tempdir().unwrap();
1808        let path = dir.path().join("test.mfdb");
1809
1810        // Create a valid database
1811        let engine = StorageEngine::open(&path).unwrap();
1812
1813        // Validation should pass
1814        assert!(engine.validate_database().is_ok());
1815    }
1816
1817    #[test]
1818    fn test_validate_database_with_data() {
1819        let dir = tempdir().unwrap();
1820        let path = dir.path().join("test.mfdb");
1821
1822        // Create database with some data
1823        let engine = StorageEngine::open(&path).unwrap();
1824        let mem = Memory::new("test content".to_string(), vec![0.1; 384]);
1825        engine.store_memory(&mem).unwrap();
1826
1827        // Validation should still pass
1828        assert!(engine.validate_database().is_ok());
1829
1830        // Close and reopen
1831        drop(engine);
1832        let engine = StorageEngine::open(&path).unwrap();
1833
1834        // Validation should pass on reopen
1835        assert!(engine.validate_database().is_ok());
1836    }
1837
1838    #[test]
1839    fn test_open_validates_existing_database() {
1840        let dir = tempdir().unwrap();
1841        let path = dir.path().join("test.mfdb");
1842
1843        // Create a valid database
1844        {
1845            let _engine = StorageEngine::open(&path).unwrap();
1846        }
1847
1848        // Reopening should validate the database
1849        let engine = StorageEngine::open(&path).unwrap();
1850
1851        // Verify header is valid
1852        assert!(engine.validate_database().is_ok());
1853    }
1854
1855    // Entity Profile tests
1856
1857    #[test]
1858    fn test_entity_profile_store_and_retrieve() {
1859        use crate::types::{EntityFact, EntityId, EntityProfile};
1860
1861        let dir = tempdir().unwrap();
1862        let path = dir.path().join("test.mfdb");
1863        let engine = StorageEngine::open(&path).unwrap();
1864
1865        // Create a profile
1866        let entity_id = EntityId::new();
1867        let mut profile =
1868            EntityProfile::new(entity_id.clone(), "Alice".to_string(), "person".to_string());
1869
1870        // Add a fact
1871        let memory_id = MemoryId::new();
1872        profile.add_fact(EntityFact::new(
1873            "occupation",
1874            "engineer",
1875            0.9,
1876            memory_id.clone(),
1877        ));
1878        profile.add_source_memory(memory_id);
1879
1880        // Store
1881        engine.store_entity_profile(&profile).unwrap();
1882
1883        // Retrieve
1884        let retrieved = engine.get_entity_profile("Alice").unwrap();
1885        assert!(retrieved.is_some());
1886
1887        let retrieved = retrieved.unwrap();
1888        assert_eq!(retrieved.name, "Alice");
1889        assert_eq!(retrieved.entity_type, "person");
1890        assert_eq!(retrieved.facts.get("occupation").unwrap().len(), 1);
1891        assert_eq!(
1892            retrieved.facts.get("occupation").unwrap()[0].value,
1893            "engineer"
1894        );
1895    }
1896
1897    #[test]
1898    fn test_entity_profile_case_insensitive_lookup() {
1899        use crate::types::{EntityId, EntityProfile};
1900
1901        let dir = tempdir().unwrap();
1902        let path = dir.path().join("test.mfdb");
1903        let engine = StorageEngine::open(&path).unwrap();
1904
1905        let profile =
1906            EntityProfile::new(EntityId::new(), "Alice".to_string(), "person".to_string());
1907        engine.store_entity_profile(&profile).unwrap();
1908
1909        // Case-insensitive lookup
1910        assert!(engine.get_entity_profile("alice").unwrap().is_some());
1911        assert!(engine.get_entity_profile("ALICE").unwrap().is_some());
1912        assert!(engine.get_entity_profile("Alice").unwrap().is_some());
1913        assert!(engine.get_entity_profile("aLiCe").unwrap().is_some());
1914    }
1915
1916    #[test]
1917    fn test_entity_profile_not_found() {
1918        let dir = tempdir().unwrap();
1919        let path = dir.path().join("test.mfdb");
1920        let engine = StorageEngine::open(&path).unwrap();
1921
1922        let result = engine.get_entity_profile("Nonexistent").unwrap();
1923        assert!(result.is_none());
1924    }
1925
1926    #[test]
1927    fn test_entity_profile_list() {
1928        use crate::types::{EntityId, EntityProfile};
1929
1930        let dir = tempdir().unwrap();
1931        let path = dir.path().join("test.mfdb");
1932        let engine = StorageEngine::open(&path).unwrap();
1933
1934        // Create and store profiles
1935        let profile1 =
1936            EntityProfile::new(EntityId::new(), "Alice".to_string(), "person".to_string());
1937        let profile2 = EntityProfile::new(EntityId::new(), "Bob".to_string(), "person".to_string());
1938        let profile3 = EntityProfile::new(
1939            EntityId::new(),
1940            "Acme Corp".to_string(),
1941            "organization".to_string(),
1942        );
1943
1944        engine.store_entity_profile(&profile1).unwrap();
1945        engine.store_entity_profile(&profile2).unwrap();
1946        engine.store_entity_profile(&profile3).unwrap();
1947
1948        // List all
1949        let profiles = engine.list_entity_profiles().unwrap();
1950        assert_eq!(profiles.len(), 3);
1951
1952        let names: Vec<_> = profiles.iter().map(|p| p.name.as_str()).collect();
1953        assert!(names.contains(&"Alice"));
1954        assert!(names.contains(&"Bob"));
1955        assert!(names.contains(&"Acme Corp"));
1956    }
1957
1958    #[test]
1959    fn test_entity_profile_delete() {
1960        use crate::types::{EntityId, EntityProfile};
1961
1962        let dir = tempdir().unwrap();
1963        let path = dir.path().join("test.mfdb");
1964        let engine = StorageEngine::open(&path).unwrap();
1965
1966        let profile =
1967            EntityProfile::new(EntityId::new(), "Alice".to_string(), "person".to_string());
1968        engine.store_entity_profile(&profile).unwrap();
1969
1970        // Verify it exists
1971        assert!(engine.get_entity_profile("Alice").unwrap().is_some());
1972
1973        // Delete
1974        let deleted = engine.delete_entity_profile("Alice").unwrap();
1975        assert!(deleted);
1976
1977        // Verify it's gone
1978        assert!(engine.get_entity_profile("Alice").unwrap().is_none());
1979
1980        // Deleting non-existent returns false
1981        let deleted = engine.delete_entity_profile("Alice").unwrap();
1982        assert!(!deleted);
1983    }
1984
1985    #[test]
1986    fn test_entity_profile_count() {
1987        use crate::types::{EntityId, EntityProfile};
1988
1989        let dir = tempdir().unwrap();
1990        let path = dir.path().join("test.mfdb");
1991        let engine = StorageEngine::open(&path).unwrap();
1992
1993        assert_eq!(engine.count_entity_profiles().unwrap(), 0);
1994
1995        engine
1996            .store_entity_profile(&EntityProfile::new(
1997                EntityId::new(),
1998                "Alice".to_string(),
1999                "person".to_string(),
2000            ))
2001            .unwrap();
2002        assert_eq!(engine.count_entity_profiles().unwrap(), 1);
2003
2004        engine
2005            .store_entity_profile(&EntityProfile::new(
2006                EntityId::new(),
2007                "Bob".to_string(),
2008                "person".to_string(),
2009            ))
2010            .unwrap();
2011        assert_eq!(engine.count_entity_profiles().unwrap(), 2);
2012    }
2013
2014    #[test]
2015    fn test_entity_profile_update() {
2016        use crate::types::{EntityFact, EntityId, EntityProfile};
2017
2018        let dir = tempdir().unwrap();
2019        let path = dir.path().join("test.mfdb");
2020        let engine = StorageEngine::open(&path).unwrap();
2021
2022        // Create initial profile
2023        let entity_id = EntityId::new();
2024        let mut profile =
2025            EntityProfile::new(entity_id.clone(), "Alice".to_string(), "person".to_string());
2026        profile.add_fact(EntityFact::new(
2027            "occupation",
2028            "engineer",
2029            0.9,
2030            MemoryId::new(),
2031        ));
2032        engine.store_entity_profile(&profile).unwrap();
2033
2034        // Update profile with new fact
2035        profile.add_fact(EntityFact::new("skill", "Rust", 0.85, MemoryId::new()));
2036        engine.store_entity_profile(&profile).unwrap();
2037
2038        // Retrieve and verify update
2039        let retrieved = engine.get_entity_profile("Alice").unwrap().unwrap();
2040        assert_eq!(retrieved.facts.len(), 2);
2041        assert!(retrieved.facts.contains_key("occupation"));
2042        assert!(retrieved.facts.contains_key("skill"));
2043    }
2044
2045    #[test]
2046    fn test_entity_profile_persistence() {
2047        use crate::types::{EntityFact, EntityId, EntityProfile};
2048
2049        let dir = tempdir().unwrap();
2050        let path = dir.path().join("test.mfdb");
2051
2052        let entity_id = EntityId::new();
2053
2054        // Create and store profile
2055        {
2056            let engine = StorageEngine::open(&path).unwrap();
2057            let mut profile =
2058                EntityProfile::new(entity_id.clone(), "Alice".to_string(), "person".to_string());
2059            profile.add_fact(EntityFact::new(
2060                "occupation",
2061                "engineer",
2062                0.9,
2063                MemoryId::new(),
2064            ));
2065            engine.store_entity_profile(&profile).unwrap();
2066        }
2067
2068        // Reopen and verify
2069        {
2070            let engine = StorageEngine::open(&path).unwrap();
2071            let retrieved = engine.get_entity_profile("Alice").unwrap().unwrap();
2072            assert_eq!(retrieved.name, "Alice");
2073            assert_eq!(
2074                retrieved.facts.get("occupation").unwrap()[0].value,
2075                "engineer"
2076            );
2077        }
2078    }
2079}