1use crate::error::Result;
8use crate::memvid::lifecycle::Memvid;
9use crate::types::{
10 Cardinality, EntityKind, FrameId, MemoriesStats, MemoriesTrack, MemoryCard, MemoryCardId,
11 PredicateSchema, SchemaError, SchemaRegistry,
12};
13use serde::Serialize;
14
15#[derive(Debug, Clone, Serialize)]
17pub struct SchemaSummaryEntry {
18 pub predicate: String,
20 pub inferred_type: String,
22 pub cardinality: Cardinality,
24 pub entity_count: usize,
26 pub value_count: usize,
28 pub unique_values: usize,
30 pub is_builtin: bool,
32}
33
34struct PredicateStats {
36 _entity_count: usize,
37 value_count: usize,
38 unique_values: std::collections::HashSet<String>,
39 entities: std::collections::HashSet<String>,
40}
41
42impl Memvid {
43 #[must_use]
48 pub fn memories(&self) -> &MemoriesTrack {
49 &self.memories_track
50 }
51
52 pub fn memories_mut(&mut self) -> &mut MemoriesTrack {
57 self.dirty = true;
58 &mut self.memories_track
59 }
60
61 pub fn put_memory_card(&mut self, card: MemoryCard) -> Result<MemoryCardId> {
79 if let Err(e) = self.validate_card(&card) {
81 if self.schema_strict {
82 return Err(crate::error::MemvidError::SchemaValidation {
83 reason: e.to_string(),
84 });
85 }
86 tracing::warn!(
88 entity = %card.entity,
89 slot = %card.slot,
90 value = %card.value,
91 error = %e,
92 "Schema validation warning"
93 );
94 }
95
96 self.dirty = true;
97 let id = self.memories_track.add_card(card);
98 Ok(id)
99 }
100
101 pub fn put_memory_cards(&mut self, cards: Vec<MemoryCard>) -> Result<Vec<MemoryCardId>> {
119 let validation_errors = self.validate_cards(&cards);
121
122 if !validation_errors.is_empty() {
123 if self.schema_strict {
124 let errors: Vec<String> = validation_errors
126 .iter()
127 .map(|(i, e)| format!("Card {}: {}", i, e))
128 .collect();
129 return Err(crate::error::MemvidError::SchemaValidation {
130 reason: format!(
131 "{} cards failed validation: {}",
132 errors.len(),
133 errors.join("; ")
134 ),
135 });
136 }
137
138 for (i, e) in &validation_errors {
140 let card = &cards[*i];
141 tracing::warn!(
142 index = i,
143 entity = %card.entity,
144 slot = %card.slot,
145 value = %card.value,
146 error = %e,
147 "Schema validation warning"
148 );
149 }
150 }
151
152 self.dirty = true;
153 let ids = self.memories_track.add_cards(cards);
154 Ok(ids)
155 }
156
157 pub fn record_enrichment(
168 &mut self,
169 frame_id: FrameId,
170 engine_kind: &str,
171 engine_version: &str,
172 card_ids: Vec<MemoryCardId>,
173 ) -> Result<()> {
174 self.dirty = true;
175 self.memories_track
176 .record_enrichment(frame_id, engine_kind, engine_version, card_ids);
177 Ok(())
178 }
179
180 #[must_use]
189 pub fn get_unenriched_frames(&self, engine_kind: &str, engine_version: &str) -> Vec<FrameId> {
190 (0..self.toc.frames.len() as FrameId)
191 .filter(|id| {
192 self.memories_track.enrichment_manifest().needs_enrichment(
193 *id,
194 engine_kind,
195 engine_version,
196 )
197 })
198 .collect()
199 }
200
201 #[must_use]
203 pub fn is_frame_enriched(
204 &self,
205 frame_id: FrameId,
206 engine_kind: &str,
207 engine_version: &str,
208 ) -> bool {
209 self.memories_track
210 .is_enriched_by(frame_id, engine_kind, engine_version)
211 }
212
213 #[must_use]
222 pub fn get_current_memory(&self, entity: &str, slot: &str) -> Option<&MemoryCard> {
223 self.memories_track.get_current(entity, slot)
224 }
225
226 #[must_use]
236 pub fn get_memory_at_time(
237 &self,
238 entity: &str,
239 slot: &str,
240 timestamp: i64,
241 ) -> Option<&MemoryCard> {
242 self.memories_track.get_at_time(entity, slot, timestamp)
243 }
244
245 #[must_use]
253 pub fn get_entity_memories(&self, entity: &str) -> Vec<&MemoryCard> {
254 self.memories_track.get_entity_cards(entity)
255 }
256
257 #[must_use]
269 pub fn aggregate_memory_slot(&self, entity: &str, slot: &str) -> Vec<String> {
270 self.memories_track.aggregate_slot(entity, slot)
271 }
272
273 #[must_use]
285 pub fn count_memory_occurrences(
286 &self,
287 entity: &str,
288 slot: &str,
289 value_filter: Option<&str>,
290 ) -> usize {
291 self.memories_track
292 .count_occurrences(entity, slot, value_filter)
293 }
294
295 #[must_use]
305 pub fn get_memory_timeline(&self, entity: &str) -> Vec<&MemoryCard> {
306 self.memories_track.get_timeline(entity)
307 }
308
309 #[must_use]
311 pub fn get_preferences(&self, entity: &str) -> Vec<&MemoryCard> {
312 self.memories_track.get_preferences(entity)
313 }
314
315 #[must_use]
317 pub fn memories_stats(&self) -> MemoriesStats {
318 self.memories_track.stats()
319 }
320
321 #[must_use]
323 pub fn memory_card_count(&self) -> usize {
324 self.memories_track.card_count()
325 }
326
327 #[must_use]
329 pub fn memory_entities(&self) -> Vec<String> {
330 self.memories_track.entities()
331 }
332
333 pub fn clear_memories(&mut self) {
337 self.dirty = true;
338 self.memories_track.clear();
339 }
340
341 #[must_use]
347 pub fn schema_registry(&self) -> &SchemaRegistry {
348 &self.schema_registry
349 }
350
351 pub fn schema_registry_mut(&mut self) -> &mut SchemaRegistry {
355 &mut self.schema_registry
356 }
357
358 pub fn set_schema_strict(&mut self, strict: bool) {
368 self.schema_strict = strict;
369 }
370
371 #[must_use]
373 pub fn is_schema_strict(&self) -> bool {
374 self.schema_strict
375 }
376
377 pub fn register_schema(&mut self, schema: PredicateSchema) {
382 self.schema_registry.register(schema);
383 }
384
385 pub fn validate_card(&self, card: &MemoryCard) -> std::result::Result<(), SchemaError> {
393 let entity_kind = match card.kind {
395 crate::types::MemoryKind::Fact
396 | crate::types::MemoryKind::Preference
397 | crate::types::MemoryKind::Profile
398 | crate::types::MemoryKind::Relationship => Some(EntityKind::Person),
399 crate::types::MemoryKind::Event
401 | crate::types::MemoryKind::Goal
402 | crate::types::MemoryKind::Other => None,
403 };
404
405 self.schema_registry
406 .validate(&card.slot, &card.value, entity_kind)
407 }
408
409 pub fn validate_cards(&self, cards: &[MemoryCard]) -> Vec<(usize, SchemaError)> {
417 cards
418 .iter()
419 .enumerate()
420 .filter_map(|(i, card)| self.validate_card(card).err().map(|e| (i, e)))
421 .collect()
422 }
423
424 #[must_use]
433 pub fn infer_schemas(&self) -> Vec<PredicateSchema> {
434 use std::collections::HashMap;
435
436 let mut predicate_values: HashMap<String, HashMap<String, Vec<String>>> = HashMap::new();
438
439 for entity in self.memories_track.entities() {
440 for card in self.memories_track.get_entity_cards(&entity) {
441 predicate_values
442 .entry(card.slot.clone())
443 .or_default()
444 .entry(card.entity.clone())
445 .or_default()
446 .push(card.value.clone());
447 }
448 }
449
450 let mut schemas: Vec<PredicateSchema> = Vec::new();
452
453 for (predicate, entity_values) in predicate_values {
454 let all_values: Vec<&str> = entity_values
456 .values()
457 .flatten()
458 .map(|s| s.as_str())
459 .collect();
460
461 let mut schema = self.schema_registry.infer_from_values(&predicate, &all_values);
463
464 let has_multiple = entity_values.values().any(|vals| vals.len() > 1);
466 if has_multiple {
467 schema.cardinality = crate::types::Cardinality::Multiple;
468 }
469
470 schemas.push(schema);
475 }
476
477 schemas.sort_by(|a, b| a.id.cmp(&b.id));
479 schemas
480 }
481
482 pub fn register_inferred_schemas(&mut self, overwrite: bool) -> usize {
493 let inferred = self.infer_schemas();
494 let mut count = 0;
495
496 for schema in inferred {
497 if overwrite || !self.schema_registry.contains(&schema.id) {
498 self.schema_registry.register(schema);
499 count += 1;
500 }
501 }
502
503 count
504 }
505
506 #[must_use]
510 pub fn schema_summary(&self) -> Vec<SchemaSummaryEntry> {
511 use std::collections::HashMap;
512
513 let mut predicate_stats: HashMap<String, PredicateStats> = HashMap::new();
515
516 for entity in self.memories_track.entities() {
517 for card in self.memories_track.get_entity_cards(&entity) {
518 let stats = predicate_stats
519 .entry(card.slot.clone())
520 .or_insert_with(|| PredicateStats {
521 _entity_count: 0,
522 value_count: 0,
523 unique_values: std::collections::HashSet::new(),
524 entities: std::collections::HashSet::new(),
525 });
526
527 stats.value_count += 1;
528 stats.unique_values.insert(card.value.clone());
529 stats.entities.insert(card.entity.clone());
530 }
531 }
532
533 let inferred = self.infer_schemas();
535 let mut entries: Vec<SchemaSummaryEntry> = inferred
536 .into_iter()
537 .map(|schema| {
538 let stats = predicate_stats.get(&schema.id);
539 let (entity_count, value_count, unique_values) = stats
540 .map(|s| (s.entities.len(), s.value_count, s.unique_values.len()))
541 .unwrap_or((0, 0, 0));
542
543 let is_builtin = self
545 .schema_registry
546 .get(&schema.id)
547 .map(|s| s.builtin)
548 .unwrap_or(false);
549
550 SchemaSummaryEntry {
551 predicate: schema.id.clone(),
552 inferred_type: schema.range.description(),
553 cardinality: schema.cardinality,
554 entity_count,
555 value_count,
556 unique_values,
557 is_builtin,
558 }
559 })
560 .collect();
561
562 entries.sort_by(|a, b| a.predicate.cmp(&b.predicate));
563 entries
564 }
565
566 pub fn run_enrichment(
580 &mut self,
581 engine: &dyn crate::enrich::EnrichmentEngine,
582 ) -> Result<(usize, usize)> {
583 use crate::enrich::EnrichmentContext;
584
585 let unenriched = self.get_unenriched_frames(engine.kind(), engine.version());
586 let mut frames_processed = 0;
587 let mut total_cards = 0;
588
589 for frame_id in unenriched {
590 let Some(frame) = self.toc.frames.get(frame_id as usize) else {
592 continue;
593 };
594 let frame = frame.clone();
595
596 let text = match self.frame_content(&frame) {
598 Ok(t) => t,
599 Err(_) => continue,
600 };
601
602 let uri = frame
604 .uri
605 .clone()
606 .unwrap_or_else(|| crate::default_uri(frame_id));
607 let metadata_json = frame
608 .metadata
609 .as_ref()
610 .and_then(|m| serde_json::to_string(m).ok());
611 let ctx = EnrichmentContext::new(
612 frame_id,
613 uri,
614 text,
615 frame.title.clone(),
616 frame.timestamp,
617 metadata_json,
618 );
619
620 let result = engine.enrich(&ctx);
622
623 if result.success {
624 let cards = result.cards;
625 let card_count = cards.len();
626
627 let card_ids = if !cards.is_empty() {
629 self.put_memory_cards(cards)?
630 } else {
631 Vec::new()
632 };
633
634 self.record_enrichment(frame_id, engine.kind(), engine.version(), card_ids)?;
636
637 total_cards += card_count;
638 }
639
640 frames_processed += 1;
641 }
642
643 Ok((frames_processed, total_cards))
644 }
645}
646
647#[cfg(test)]
648mod tests {
649 use super::*;
650 use crate::types::MemoryCardBuilder;
651 use tempfile::NamedTempFile;
652
653 #[test]
654 fn test_put_and_get_memory_card() {
655 let temp = NamedTempFile::new().unwrap();
656 let path = temp.path();
657 std::fs::remove_file(path).ok();
658
659 let mut memvid = Memvid::create(path).unwrap();
660
661 let card = MemoryCardBuilder::new()
662 .fact()
663 .entity("user")
664 .slot("employer")
665 .value("Anthropic")
666 .source(0, Some("mv2://test".to_string()))
667 .engine("test", "1.0.0")
668 .build(0)
669 .unwrap();
670
671 let id = memvid.put_memory_card(card).unwrap();
672
673 let current = memvid.get_current_memory("user", "employer");
674 assert!(current.is_some());
675 assert_eq!(current.unwrap().value, "Anthropic");
676 assert_eq!(current.unwrap().id, id);
677 }
678
679 #[test]
680 fn test_enrichment_tracking() {
681 let temp = NamedTempFile::new().unwrap();
682 let path = temp.path();
683 std::fs::remove_file(path).ok();
684
685 let mut memvid = Memvid::create(path).unwrap();
686
687 assert!(!memvid.is_frame_enriched(1, "rules-v1", "1.0.0"));
689
690 memvid
692 .record_enrichment(1, "rules-v1", "1.0.0", vec![0, 1])
693 .unwrap();
694
695 assert!(memvid.is_frame_enriched(1, "rules-v1", "1.0.0"));
697
698 assert!(!memvid.is_frame_enriched(1, "llm:phi-3.5-mini", "1.0.0"));
700 }
701
702 #[test]
703 fn test_memory_stats() {
704 let temp = NamedTempFile::new().unwrap();
705 let path = temp.path();
706 std::fs::remove_file(path).ok();
707
708 let mut memvid = Memvid::create(path).unwrap();
709
710 for slot in ["employer", "location", "hobby"] {
712 let card = MemoryCardBuilder::new()
713 .fact()
714 .entity("user")
715 .slot(slot)
716 .value("test")
717 .source(0, None)
718 .engine("test", "1.0.0")
719 .build(0)
720 .unwrap();
721 memvid.put_memory_card(card).unwrap();
722 }
723
724 let stats = memvid.memories_stats();
725 assert_eq!(stats.card_count, 3);
726 assert_eq!(stats.entity_count, 1);
727 }
728
729 #[test]
730 fn test_run_enrichment() {
731 use crate::enrich::RulesEngine;
732 use crate::PutOptions;
733
734 let temp = NamedTempFile::new().unwrap();
735 let path = temp.path();
736 std::fs::remove_file(path).ok();
737
738 let mut memvid = Memvid::create(path).unwrap();
739
740 let opts = PutOptions::builder().extract_triplets(false).build();
742 memvid
743 .put_bytes_with_options(b"Hello! I work at Anthropic.", opts.clone())
744 .unwrap();
745 memvid
746 .put_bytes_with_options(b"I live in San Francisco.", opts.clone())
747 .unwrap();
748 memvid
749 .put_bytes_with_options(b"The weather is nice today.", opts)
750 .unwrap();
751 memvid.commit().unwrap();
752
753 let engine = RulesEngine::new();
755 let (frames, cards) = memvid.run_enrichment(&engine).unwrap();
756
757 assert_eq!(frames, 3);
758 assert_eq!(cards, 2); let employer = memvid.get_current_memory("user", "employer");
762 assert!(employer.is_some());
763 assert_eq!(employer.unwrap().value, "Anthropic");
764
765 let location = memvid.get_current_memory("user", "location");
766 assert!(location.is_some());
767 assert_eq!(location.unwrap().value, "San Francisco");
768
769 let (frames2, cards2) = memvid.run_enrichment(&engine).unwrap();
771 assert_eq!(frames2, 0);
772 assert_eq!(cards2, 0);
773 }
774
775 #[test]
776 fn test_schema_validation_strict() {
777 let temp = NamedTempFile::new().unwrap();
778 let path = temp.path();
779 std::fs::remove_file(path).ok();
780
781 let mut memvid = Memvid::create(path).unwrap();
782
783 memvid.set_schema_strict(true);
785
786 let valid_card = MemoryCardBuilder::new()
788 .fact()
789 .entity("user")
790 .slot("age")
791 .value("25") .source(0, None)
793 .engine("test", "1.0.0")
794 .build(0)
795 .unwrap();
796
797 assert!(memvid.put_memory_card(valid_card).is_ok());
798
799 let invalid_card = MemoryCardBuilder::new()
801 .fact()
802 .entity("user")
803 .slot("age")
804 .value("twenty-five") .source(0, None)
806 .engine("test", "1.0.0")
807 .build(0)
808 .unwrap();
809
810 let result = memvid.put_memory_card(invalid_card);
811 assert!(result.is_err());
812 }
813
814 #[test]
815 fn test_schema_validation_non_strict() {
816 let temp = NamedTempFile::new().unwrap();
817 let path = temp.path();
818 std::fs::remove_file(path).ok();
819
820 let mut memvid = Memvid::create(path).unwrap();
821
822 assert!(!memvid.is_schema_strict());
824
825 let invalid_card = MemoryCardBuilder::new()
827 .fact()
828 .entity("user")
829 .slot("age")
830 .value("twenty-five") .source(0, None)
832 .engine("test", "1.0.0")
833 .build(0)
834 .unwrap();
835
836 let result = memvid.put_memory_card(invalid_card);
838 assert!(result.is_ok());
839
840 let cards = memvid.get_entity_memories("user");
842 assert_eq!(cards.len(), 1);
843 }
844
845 #[test]
846 fn test_schema_registry_custom() {
847 use crate::types::{PredicateSchema, ValueType};
848
849 let temp = NamedTempFile::new().unwrap();
850 let path = temp.path();
851 std::fs::remove_file(path).ok();
852
853 let mut memvid = Memvid::create(path).unwrap();
854 memvid.set_schema_strict(true);
855
856 let status_schema = PredicateSchema::new("status", "Status")
858 .with_range(ValueType::Enum {
859 values: vec!["active".to_string(), "inactive".to_string()],
860 });
861 memvid.register_schema(status_schema);
862
863 let valid_card = MemoryCardBuilder::new()
865 .fact()
866 .entity("user")
867 .slot("status")
868 .value("active")
869 .source(0, None)
870 .engine("test", "1.0.0")
871 .build(0)
872 .unwrap();
873
874 assert!(memvid.put_memory_card(valid_card).is_ok());
875
876 let invalid_card = MemoryCardBuilder::new()
878 .fact()
879 .entity("user")
880 .slot("status")
881 .value("pending") .source(0, None)
883 .engine("test", "1.0.0")
884 .build(0)
885 .unwrap();
886
887 assert!(memvid.put_memory_card(invalid_card).is_err());
888 }
889}