1use crate::error::Result;
8use crate::memvid::lifecycle::Memvid;
9use crate::types::{
10 Cardinality, EntityKind, FrameId, MemoriesStats, MemoriesTrack, MemoryCard, MemoryCardId,
11 PredicateSchema, SchemaError, SchemaRegistry,
12};
13use serde::Serialize;
14
15#[derive(Debug, Clone, Serialize)]
17pub struct SchemaSummaryEntry {
18 pub predicate: String,
20 pub inferred_type: String,
22 pub cardinality: Cardinality,
24 pub entity_count: usize,
26 pub value_count: usize,
28 pub unique_values: usize,
30 pub is_builtin: bool,
32}
33
34struct PredicateStats {
36 _entity_count: usize,
37 value_count: usize,
38 unique_values: std::collections::HashSet<String>,
39 entities: std::collections::HashSet<String>,
40}
41
42impl Memvid {
43 #[must_use]
48 pub fn memories(&self) -> &MemoriesTrack {
49 &self.memories_track
50 }
51
52 pub fn memories_mut(&mut self) -> &mut MemoriesTrack {
57 self.dirty = true;
58 &mut self.memories_track
59 }
60
61 pub fn put_memory_card(&mut self, card: MemoryCard) -> Result<MemoryCardId> {
79 if let Err(e) = self.validate_card(&card) {
81 if self.schema_strict {
82 return Err(crate::error::MemvidError::SchemaValidation {
83 reason: e.to_string(),
84 });
85 }
86 tracing::warn!(
88 entity = %card.entity,
89 slot = %card.slot,
90 value = %card.value,
91 error = %e,
92 "Schema validation warning"
93 );
94 }
95
96 self.dirty = true;
97 let id = self.memories_track.add_card(card);
98 Ok(id)
99 }
100
101 pub fn put_memory_cards(&mut self, cards: Vec<MemoryCard>) -> Result<Vec<MemoryCardId>> {
119 let validation_errors = self.validate_cards(&cards);
121
122 if !validation_errors.is_empty() {
123 if self.schema_strict {
124 let errors: Vec<String> = validation_errors
126 .iter()
127 .map(|(i, e)| format!("Card {i}: {e}"))
128 .collect();
129 return Err(crate::error::MemvidError::SchemaValidation {
130 reason: format!(
131 "{} cards failed validation: {}",
132 errors.len(),
133 errors.join("; ")
134 ),
135 });
136 }
137
138 for (i, e) in &validation_errors {
140 let card = &cards[*i];
141 tracing::warn!(
142 index = i,
143 entity = %card.entity,
144 slot = %card.slot,
145 value = %card.value,
146 error = %e,
147 "Schema validation warning"
148 );
149 }
150 }
151
152 self.dirty = true;
153 let ids = self.memories_track.add_cards(cards);
154 Ok(ids)
155 }
156
157 pub fn record_enrichment(
168 &mut self,
169 frame_id: FrameId,
170 engine_kind: &str,
171 engine_version: &str,
172 card_ids: Vec<MemoryCardId>,
173 ) -> Result<()> {
174 self.dirty = true;
175 self.memories_track
176 .record_enrichment(frame_id, engine_kind, engine_version, card_ids);
177 Ok(())
178 }
179
180 #[must_use]
189 pub fn get_unenriched_frames(&self, engine_kind: &str, engine_version: &str) -> Vec<FrameId> {
190 (0..self.toc.frames.len() as FrameId)
191 .filter(|id| {
192 self.memories_track.enrichment_manifest().needs_enrichment(
193 *id,
194 engine_kind,
195 engine_version,
196 )
197 })
198 .collect()
199 }
200
201 #[must_use]
203 pub fn is_frame_enriched(
204 &self,
205 frame_id: FrameId,
206 engine_kind: &str,
207 engine_version: &str,
208 ) -> bool {
209 self.memories_track
210 .is_enriched_by(frame_id, engine_kind, engine_version)
211 }
212
213 #[must_use]
222 pub fn get_current_memory(&self, entity: &str, slot: &str) -> Option<&MemoryCard> {
223 self.memories_track.get_current(entity, slot)
224 }
225
226 #[must_use]
236 pub fn get_memory_at_time(
237 &self,
238 entity: &str,
239 slot: &str,
240 timestamp: i64,
241 ) -> Option<&MemoryCard> {
242 self.memories_track.get_at_time(entity, slot, timestamp)
243 }
244
245 #[must_use]
253 pub fn get_entity_memories(&self, entity: &str) -> Vec<&MemoryCard> {
254 self.memories_track.get_entity_cards(entity)
255 }
256
257 #[must_use]
269 pub fn aggregate_memory_slot(&self, entity: &str, slot: &str) -> Vec<String> {
270 self.memories_track.aggregate_slot(entity, slot)
271 }
272
273 #[must_use]
285 pub fn count_memory_occurrences(
286 &self,
287 entity: &str,
288 slot: &str,
289 value_filter: Option<&str>,
290 ) -> usize {
291 self.memories_track
292 .count_occurrences(entity, slot, value_filter)
293 }
294
295 #[must_use]
305 pub fn get_memory_timeline(&self, entity: &str) -> Vec<&MemoryCard> {
306 self.memories_track.get_timeline(entity)
307 }
308
309 #[must_use]
311 pub fn get_preferences(&self, entity: &str) -> Vec<&MemoryCard> {
312 self.memories_track.get_preferences(entity)
313 }
314
315 #[must_use]
317 pub fn memories_stats(&self) -> MemoriesStats {
318 self.memories_track.stats()
319 }
320
321 #[must_use]
323 pub fn memory_card_count(&self) -> usize {
324 self.memories_track.card_count()
325 }
326
327 #[must_use]
329 pub fn memory_entities(&self) -> Vec<String> {
330 self.memories_track.entities()
331 }
332
333 pub fn clear_memories(&mut self) {
337 self.dirty = true;
338 self.memories_track.clear();
339 }
340
341 #[must_use]
347 pub fn schema_registry(&self) -> &SchemaRegistry {
348 &self.schema_registry
349 }
350
351 pub fn schema_registry_mut(&mut self) -> &mut SchemaRegistry {
355 &mut self.schema_registry
356 }
357
358 pub fn set_schema_strict(&mut self, strict: bool) {
368 self.schema_strict = strict;
369 }
370
371 #[must_use]
373 pub fn is_schema_strict(&self) -> bool {
374 self.schema_strict
375 }
376
377 pub fn register_schema(&mut self, schema: PredicateSchema) {
382 self.schema_registry.register(schema);
383 }
384
385 pub fn validate_card(&self, card: &MemoryCard) -> std::result::Result<(), SchemaError> {
393 let entity_kind = match card.kind {
395 crate::types::MemoryKind::Fact
396 | crate::types::MemoryKind::Preference
397 | crate::types::MemoryKind::Profile
398 | crate::types::MemoryKind::Relationship => Some(EntityKind::Person),
399 crate::types::MemoryKind::Event
401 | crate::types::MemoryKind::Goal
402 | crate::types::MemoryKind::Other => None,
403 };
404
405 self.schema_registry
406 .validate(&card.slot, &card.value, entity_kind)
407 }
408
409 #[must_use]
417 pub fn validate_cards(&self, cards: &[MemoryCard]) -> Vec<(usize, SchemaError)> {
418 cards
419 .iter()
420 .enumerate()
421 .filter_map(|(i, card)| self.validate_card(card).err().map(|e| (i, e)))
422 .collect()
423 }
424
425 #[must_use]
434 pub fn infer_schemas(&self) -> Vec<PredicateSchema> {
435 use std::collections::HashMap;
436
437 let mut predicate_values: HashMap<String, HashMap<String, Vec<String>>> = HashMap::new();
439
440 for entity in self.memories_track.entities() {
441 for card in self.memories_track.get_entity_cards(&entity) {
442 predicate_values
443 .entry(card.slot.clone())
444 .or_default()
445 .entry(card.entity.clone())
446 .or_default()
447 .push(card.value.clone());
448 }
449 }
450
451 let mut schemas: Vec<PredicateSchema> = Vec::new();
453
454 for (predicate, entity_values) in predicate_values {
455 let all_values: Vec<&str> = entity_values
457 .values()
458 .flatten()
459 .map(std::string::String::as_str)
460 .collect();
461
462 let mut schema = self
464 .schema_registry
465 .infer_from_values(&predicate, &all_values);
466
467 let has_multiple = entity_values.values().any(|vals| vals.len() > 1);
469 if has_multiple {
470 schema.cardinality = crate::types::Cardinality::Multiple;
471 }
472
473 schemas.push(schema);
478 }
479
480 schemas.sort_by(|a, b| a.id.cmp(&b.id));
482 schemas
483 }
484
485 pub fn register_inferred_schemas(&mut self, overwrite: bool) -> usize {
496 let inferred = self.infer_schemas();
497 let mut count = 0;
498
499 for schema in inferred {
500 if overwrite || !self.schema_registry.contains(&schema.id) {
501 self.schema_registry.register(schema);
502 count += 1;
503 }
504 }
505
506 count
507 }
508
509 #[must_use]
513 pub fn schema_summary(&self) -> Vec<SchemaSummaryEntry> {
514 use std::collections::HashMap;
515
516 let mut predicate_stats: HashMap<String, PredicateStats> = HashMap::new();
518
519 for entity in self.memories_track.entities() {
520 for card in self.memories_track.get_entity_cards(&entity) {
521 let stats =
522 predicate_stats
523 .entry(card.slot.clone())
524 .or_insert_with(|| PredicateStats {
525 _entity_count: 0,
526 value_count: 0,
527 unique_values: std::collections::HashSet::new(),
528 entities: std::collections::HashSet::new(),
529 });
530
531 stats.value_count += 1;
532 stats.unique_values.insert(card.value.clone());
533 stats.entities.insert(card.entity.clone());
534 }
535 }
536
537 let inferred = self.infer_schemas();
539 let mut entries: Vec<SchemaSummaryEntry> = inferred
540 .into_iter()
541 .map(|schema| {
542 let stats = predicate_stats.get(&schema.id);
543 let (entity_count, value_count, unique_values) = stats.map_or((0, 0, 0), |s| {
544 (s.entities.len(), s.value_count, s.unique_values.len())
545 });
546
547 let is_builtin = self
549 .schema_registry
550 .get(&schema.id)
551 .is_some_and(|s| s.builtin);
552
553 SchemaSummaryEntry {
554 predicate: schema.id.clone(),
555 inferred_type: schema.range.description(),
556 cardinality: schema.cardinality,
557 entity_count,
558 value_count,
559 unique_values,
560 is_builtin,
561 }
562 })
563 .collect();
564
565 entries.sort_by(|a, b| a.predicate.cmp(&b.predicate));
566 entries
567 }
568
569 pub fn run_enrichment(
583 &mut self,
584 engine: &dyn crate::enrich::EnrichmentEngine,
585 ) -> Result<(usize, usize)> {
586 use crate::enrich::EnrichmentContext;
587
588 let unenriched = self.get_unenriched_frames(engine.kind(), engine.version());
589 let mut frames_processed = 0;
590 let mut total_cards = 0;
591
592 for frame_id in unenriched {
593 let Ok(index) = usize::try_from(frame_id) else {
596 continue;
597 };
598 let Some(frame) = self.toc.frames.get(index) else {
599 continue;
600 };
601 let frame = frame.clone();
602
603 let text = match self.frame_content(&frame) {
605 Ok(t) => t,
606 Err(_) => continue,
607 };
608
609 let uri = frame
611 .uri
612 .clone()
613 .unwrap_or_else(|| crate::default_uri(frame_id));
614 let metadata_json = frame
615 .metadata
616 .as_ref()
617 .and_then(|m| serde_json::to_string(m).ok());
618 let ctx = EnrichmentContext::new(
619 frame_id,
620 uri,
621 text,
622 frame.title.clone(),
623 frame.timestamp,
624 metadata_json,
625 );
626
627 let result = engine.enrich(&ctx);
629
630 if result.success {
631 let cards = result.cards;
632 let card_count = cards.len();
633
634 let card_ids = if cards.is_empty() {
636 Vec::new()
637 } else {
638 self.put_memory_cards(cards)?
639 };
640
641 self.record_enrichment(frame_id, engine.kind(), engine.version(), card_ids)?;
643
644 total_cards += card_count;
645 }
646
647 frames_processed += 1;
648 }
649
650 Ok((frames_processed, total_cards))
651 }
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657 use crate::types::MemoryCardBuilder;
658 use tempfile::NamedTempFile;
659
660 #[test]
661 fn test_put_and_get_memory_card() {
662 let temp = NamedTempFile::new().unwrap();
663 let path = temp.path();
664 std::fs::remove_file(path).ok();
665
666 let mut memvid = Memvid::create(path).unwrap();
667
668 let card = MemoryCardBuilder::new()
669 .fact()
670 .entity("user")
671 .slot("employer")
672 .value("Anthropic")
673 .source(0, Some("mv2://test".to_string()))
674 .engine("test", "1.0.0")
675 .build(0)
676 .unwrap();
677
678 let id = memvid.put_memory_card(card).unwrap();
679
680 let current = memvid.get_current_memory("user", "employer");
681 assert!(current.is_some());
682 assert_eq!(current.unwrap().value, "Anthropic");
683 assert_eq!(current.unwrap().id, id);
684 }
685
686 #[test]
687 fn test_enrichment_tracking() {
688 let temp = NamedTempFile::new().unwrap();
689 let path = temp.path();
690 std::fs::remove_file(path).ok();
691
692 let mut memvid = Memvid::create(path).unwrap();
693
694 assert!(!memvid.is_frame_enriched(1, "rules-v1", "1.0.0"));
696
697 memvid
699 .record_enrichment(1, "rules-v1", "1.0.0", vec![0, 1])
700 .unwrap();
701
702 assert!(memvid.is_frame_enriched(1, "rules-v1", "1.0.0"));
704
705 assert!(!memvid.is_frame_enriched(1, "llm:phi-3.5-mini", "1.0.0"));
707 }
708
709 #[test]
710 fn test_memory_stats() {
711 let temp = NamedTempFile::new().unwrap();
712 let path = temp.path();
713 std::fs::remove_file(path).ok();
714
715 let mut memvid = Memvid::create(path).unwrap();
716
717 for slot in ["employer", "location", "hobby"] {
719 let card = MemoryCardBuilder::new()
720 .fact()
721 .entity("user")
722 .slot(slot)
723 .value("test")
724 .source(0, None)
725 .engine("test", "1.0.0")
726 .build(0)
727 .unwrap();
728 memvid.put_memory_card(card).unwrap();
729 }
730
731 let stats = memvid.memories_stats();
732 assert_eq!(stats.card_count, 3);
733 assert_eq!(stats.entity_count, 1);
734 }
735
736 #[test]
737 fn test_run_enrichment() {
738 use crate::PutOptions;
739 use crate::enrich::RulesEngine;
740
741 let temp = NamedTempFile::new().unwrap();
742 let path = temp.path();
743 std::fs::remove_file(path).ok();
744
745 let mut memvid = Memvid::create(path).unwrap();
746
747 let opts = PutOptions::builder().extract_triplets(false).build();
749 memvid
750 .put_bytes_with_options(b"Hello! I work at Anthropic.", opts.clone())
751 .unwrap();
752 memvid
753 .put_bytes_with_options(b"I live in San Francisco.", opts.clone())
754 .unwrap();
755 memvid
756 .put_bytes_with_options(b"The weather is nice today.", opts)
757 .unwrap();
758 memvid.commit().unwrap();
759
760 let engine = RulesEngine::new();
762 let (frames, cards) = memvid.run_enrichment(&engine).unwrap();
763
764 assert_eq!(frames, 3);
765 assert_eq!(cards, 2); let employer = memvid.get_current_memory("user", "employer");
769 assert!(employer.is_some());
770 assert_eq!(employer.unwrap().value, "Anthropic");
771
772 let location = memvid.get_current_memory("user", "location");
773 assert!(location.is_some());
774 assert_eq!(location.unwrap().value, "San Francisco");
775
776 let (frames2, cards2) = memvid.run_enrichment(&engine).unwrap();
778 assert_eq!(frames2, 0);
779 assert_eq!(cards2, 0);
780 }
781
782 #[test]
783 fn test_schema_validation_strict() {
784 let temp = NamedTempFile::new().unwrap();
785 let path = temp.path();
786 std::fs::remove_file(path).ok();
787
788 let mut memvid = Memvid::create(path).unwrap();
789
790 memvid.set_schema_strict(true);
792
793 let valid_card = MemoryCardBuilder::new()
795 .fact()
796 .entity("user")
797 .slot("age")
798 .value("25") .source(0, None)
800 .engine("test", "1.0.0")
801 .build(0)
802 .unwrap();
803
804 assert!(memvid.put_memory_card(valid_card).is_ok());
805
806 let invalid_card = MemoryCardBuilder::new()
808 .fact()
809 .entity("user")
810 .slot("age")
811 .value("twenty-five") .source(0, None)
813 .engine("test", "1.0.0")
814 .build(0)
815 .unwrap();
816
817 let result = memvid.put_memory_card(invalid_card);
818 assert!(result.is_err());
819 }
820
821 #[test]
822 fn test_schema_validation_non_strict() {
823 let temp = NamedTempFile::new().unwrap();
824 let path = temp.path();
825 std::fs::remove_file(path).ok();
826
827 let mut memvid = Memvid::create(path).unwrap();
828
829 assert!(!memvid.is_schema_strict());
831
832 let invalid_card = MemoryCardBuilder::new()
834 .fact()
835 .entity("user")
836 .slot("age")
837 .value("twenty-five") .source(0, None)
839 .engine("test", "1.0.0")
840 .build(0)
841 .unwrap();
842
843 let result = memvid.put_memory_card(invalid_card);
845 assert!(result.is_ok());
846
847 let cards = memvid.get_entity_memories("user");
849 assert_eq!(cards.len(), 1);
850 }
851
852 #[test]
853 fn test_schema_registry_custom() {
854 use crate::types::{PredicateSchema, ValueType};
855
856 let temp = NamedTempFile::new().unwrap();
857 let path = temp.path();
858 std::fs::remove_file(path).ok();
859
860 let mut memvid = Memvid::create(path).unwrap();
861 memvid.set_schema_strict(true);
862
863 let status_schema = PredicateSchema::new("status", "Status").with_range(ValueType::Enum {
865 values: vec!["active".to_string(), "inactive".to_string()],
866 });
867 memvid.register_schema(status_schema);
868
869 let valid_card = MemoryCardBuilder::new()
871 .fact()
872 .entity("user")
873 .slot("status")
874 .value("active")
875 .source(0, None)
876 .engine("test", "1.0.0")
877 .build(0)
878 .unwrap();
879
880 assert!(memvid.put_memory_card(valid_card).is_ok());
881
882 let invalid_card = MemoryCardBuilder::new()
884 .fact()
885 .entity("user")
886 .slot("status")
887 .value("pending") .source(0, None)
889 .engine("test", "1.0.0")
890 .build(0)
891 .unwrap();
892
893 assert!(memvid.put_memory_card(invalid_card).is_err());
894 }
895}