1use std::collections::HashMap;
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use crate::constants::{ENTITY_CONTENT_BYTES_MAX, ENTITY_NAME_BYTES_MAX};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
18#[serde(rename_all = "snake_case")]
19pub enum EntityType {
20 #[serde(rename = "self")]
22 Self_,
23 Person,
25 Project,
27 Topic,
29 Note,
31 Task,
33}
34
35impl EntityType {
36 #[must_use]
38 pub fn as_str(&self) -> &'static str {
39 match self {
40 Self::Self_ => "self",
41 Self::Person => "person",
42 Self::Project => "project",
43 Self::Topic => "topic",
44 Self::Note => "note",
45 Self::Task => "task",
46 }
47 }
48
49 #[must_use]
51 pub fn from_str(s: &str) -> Option<Self> {
52 match s.to_lowercase().as_str() {
53 "self" => Some(Self::Self_),
54 "person" => Some(Self::Person),
55 "project" => Some(Self::Project),
56 "topic" => Some(Self::Topic),
57 "note" => Some(Self::Note),
58 "task" => Some(Self::Task),
59 _ => None,
60 }
61 }
62
63 #[must_use]
65 pub fn all() -> &'static [EntityType] {
66 &[
67 Self::Self_,
68 Self::Person,
69 Self::Project,
70 Self::Topic,
71 Self::Note,
72 Self::Task,
73 ]
74 }
75}
76
77impl std::fmt::Display for EntityType {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 write!(f, "{}", self.as_str())
80 }
81}
82
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub struct SourceRef {
110 pub uri: String,
112 pub mime_type: Option<String>,
114 pub size_bytes: Option<u64>,
116 pub checksum: Option<String>,
118}
119
120impl SourceRef {
121 #[must_use]
123 pub fn new(uri: String) -> Self {
124 Self {
125 uri,
126 mime_type: None,
127 size_bytes: None,
128 checksum: None,
129 }
130 }
131
132 #[must_use]
134 pub fn with_mime_type(mut self, mime_type: String) -> Self {
135 self.mime_type = Some(mime_type);
136 self
137 }
138
139 #[must_use]
141 pub fn with_size_bytes(mut self, size_bytes: u64) -> Self {
142 self.size_bytes = Some(size_bytes);
143 self
144 }
145
146 #[must_use]
148 pub fn with_checksum(mut self, checksum: String) -> Self {
149 self.checksum = Some(checksum);
150 self
151 }
152
153 #[must_use]
155 pub fn is_local(&self) -> bool {
156 self.uri.starts_with("file://")
157 }
158
159 #[must_use]
161 pub fn is_remote(&self) -> bool {
162 self.uri.starts_with("http://") || self.uri.starts_with("https://")
163 }
164
165 #[must_use]
167 pub fn is_s3(&self) -> bool {
168 self.uri.starts_with("s3://")
169 }
170
171 #[must_use]
173 pub fn extension(&self) -> Option<&str> {
174 self.uri.rsplit('.').next().filter(|ext| !ext.contains('/'))
175 }
176}
177
178impl std::fmt::Display for SourceRef {
179 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180 write!(f, "{}", self.uri)
181 }
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct Entity {
202 pub id: String,
204 pub entity_type: EntityType,
206 pub name: String,
208 pub content: String,
210 pub metadata: HashMap<String, String>,
212 pub embedding: Option<Vec<f32>>,
214 pub created_at: DateTime<Utc>,
216 pub updated_at: DateTime<Utc>,
218 pub document_time: Option<DateTime<Utc>>,
220 pub event_time: Option<DateTime<Utc>>,
222 pub source_ref: Option<SourceRef>,
224}
225
226impl Entity {
227 #[must_use]
232 pub fn new(entity_type: EntityType, name: String, content: String) -> Self {
233 assert!(
235 name.len() <= ENTITY_NAME_BYTES_MAX,
236 "name {} bytes exceeds max {}",
237 name.len(),
238 ENTITY_NAME_BYTES_MAX
239 );
240 assert!(
241 content.len() <= ENTITY_CONTENT_BYTES_MAX,
242 "content {} bytes exceeds max {}",
243 content.len(),
244 ENTITY_CONTENT_BYTES_MAX
245 );
246
247 let now = Utc::now();
248 Self {
249 id: uuid::Uuid::new_v4().to_string(),
250 entity_type,
251 name,
252 content,
253 metadata: HashMap::new(),
254 embedding: None,
255 created_at: now,
256 updated_at: now,
257 document_time: None,
258 event_time: None,
259 source_ref: None,
260 }
261 }
262
263 #[must_use]
265 pub fn builder(entity_type: EntityType, name: String, content: String) -> EntityBuilder {
266 EntityBuilder::new(entity_type, name, content)
267 }
268
269 #[must_use]
271 pub fn has_embedding(&self) -> bool {
272 self.embedding.is_some()
273 }
274
275 #[must_use]
277 pub fn get_metadata(&self, key: &str) -> Option<&str> {
278 self.metadata.get(key).map(String::as_str)
279 }
280
281 pub fn update_content(&mut self, content: String) {
283 assert!(
284 content.len() <= ENTITY_CONTENT_BYTES_MAX,
285 "content {} bytes exceeds max {}",
286 content.len(),
287 ENTITY_CONTENT_BYTES_MAX
288 );
289 self.content = content;
290 self.updated_at = Utc::now();
291 }
292
293 pub fn set_embedding(&mut self, embedding: Vec<f32>) {
295 self.embedding = Some(embedding);
296 self.updated_at = Utc::now();
297 }
298
299 pub fn set_document_time(&mut self, time: DateTime<Utc>) {
301 self.document_time = Some(time);
302 self.updated_at = Utc::now();
303 }
304
305 pub fn set_event_time(&mut self, time: DateTime<Utc>) {
307 self.event_time = Some(time);
308 self.updated_at = Utc::now();
309 }
310
311 #[must_use]
313 pub fn document_time(&self) -> Option<DateTime<Utc>> {
314 self.document_time
315 }
316
317 #[must_use]
319 pub fn event_time(&self) -> Option<DateTime<Utc>> {
320 self.event_time
321 }
322
323 #[must_use]
325 pub fn has_temporal_metadata(&self) -> bool {
326 self.document_time.is_some() || self.event_time.is_some()
327 }
328
329 pub fn set_source_ref(&mut self, source_ref: SourceRef) {
331 self.source_ref = Some(source_ref);
332 self.updated_at = Utc::now();
333 }
334
335 #[must_use]
337 pub fn source_ref(&self) -> Option<&SourceRef> {
338 self.source_ref.as_ref()
339 }
340
341 #[must_use]
343 pub fn has_source_ref(&self) -> bool {
344 self.source_ref.is_some()
345 }
346}
347
348#[derive(Debug)]
354pub struct EntityBuilder {
355 entity_type: EntityType,
356 name: String,
357 content: String,
358 id: Option<String>,
359 metadata: HashMap<String, String>,
360 embedding: Option<Vec<f32>>,
361 created_at: Option<DateTime<Utc>>,
362 updated_at: Option<DateTime<Utc>>,
363 document_time: Option<DateTime<Utc>>,
364 event_time: Option<DateTime<Utc>>,
365 source_ref: Option<SourceRef>,
366}
367
368impl EntityBuilder {
369 #[must_use]
371 pub fn new(entity_type: EntityType, name: String, content: String) -> Self {
372 Self {
373 entity_type,
374 name,
375 content,
376 id: None,
377 metadata: HashMap::new(),
378 embedding: None,
379 created_at: None,
380 updated_at: None,
381 document_time: None,
382 event_time: None,
383 source_ref: None,
384 }
385 }
386
387 #[must_use]
389 pub fn with_id(mut self, id: String) -> Self {
390 self.id = Some(id);
391 self
392 }
393
394 #[must_use]
396 pub fn with_metadata(mut self, key: String, value: String) -> Self {
397 self.metadata.insert(key, value);
398 self
399 }
400
401 #[must_use]
403 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
404 self.embedding = Some(embedding);
405 self
406 }
407
408 #[must_use]
410 pub fn with_created_at(mut self, created_at: DateTime<Utc>) -> Self {
411 self.created_at = Some(created_at);
412 self
413 }
414
415 #[must_use]
417 pub fn with_updated_at(mut self, updated_at: DateTime<Utc>) -> Self {
418 self.updated_at = Some(updated_at);
419 self
420 }
421
422 #[must_use]
424 pub fn with_document_time(mut self, document_time: DateTime<Utc>) -> Self {
425 self.document_time = Some(document_time);
426 self
427 }
428
429 #[must_use]
431 pub fn with_event_time(mut self, event_time: DateTime<Utc>) -> Self {
432 self.event_time = Some(event_time);
433 self
434 }
435
436 #[must_use]
438 pub fn with_source_ref(mut self, source_ref: SourceRef) -> Self {
439 self.source_ref = Some(source_ref);
440 self
441 }
442
443 #[must_use]
448 pub fn build(self) -> Entity {
449 assert!(
451 self.name.len() <= ENTITY_NAME_BYTES_MAX,
452 "name {} bytes exceeds max {}",
453 self.name.len(),
454 ENTITY_NAME_BYTES_MAX
455 );
456 assert!(
457 self.content.len() <= ENTITY_CONTENT_BYTES_MAX,
458 "content {} bytes exceeds max {}",
459 self.content.len(),
460 ENTITY_CONTENT_BYTES_MAX
461 );
462
463 let now = Utc::now();
464 Entity {
465 id: self.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
466 entity_type: self.entity_type,
467 name: self.name,
468 content: self.content,
469 metadata: self.metadata,
470 embedding: self.embedding,
471 created_at: self.created_at.unwrap_or(now),
472 updated_at: self.updated_at.unwrap_or(now),
473 document_time: self.document_time,
474 event_time: self.event_time,
475 source_ref: self.source_ref,
476 }
477 }
478}
479
480#[cfg(test)]
485mod tests {
486 use super::*;
487
488 #[test]
489 fn test_entity_type_as_str() {
490 assert_eq!(EntityType::Self_.as_str(), "self");
491 assert_eq!(EntityType::Person.as_str(), "person");
492 assert_eq!(EntityType::Project.as_str(), "project");
493 assert_eq!(EntityType::Topic.as_str(), "topic");
494 assert_eq!(EntityType::Note.as_str(), "note");
495 assert_eq!(EntityType::Task.as_str(), "task");
496 }
497
498 #[test]
499 fn test_entity_type_from_str() {
500 assert_eq!(EntityType::from_str("self"), Some(EntityType::Self_));
501 assert_eq!(EntityType::from_str("PERSON"), Some(EntityType::Person));
502 assert_eq!(EntityType::from_str("Project"), Some(EntityType::Project));
503 assert_eq!(EntityType::from_str("unknown"), None);
504 }
505
506 #[test]
507 fn test_entity_new() {
508 let entity = Entity::new(
509 EntityType::Person,
510 "Alice".to_string(),
511 "My friend Alice".to_string(),
512 );
513
514 assert!(!entity.id.is_empty());
515 assert_eq!(entity.entity_type, EntityType::Person);
516 assert_eq!(entity.name, "Alice");
517 assert_eq!(entity.content, "My friend Alice");
518 assert!(entity.metadata.is_empty());
519 assert!(entity.embedding.is_none());
520 }
521
522 #[test]
523 fn test_entity_builder() {
524 let entity = Entity::builder(
525 EntityType::Project,
526 "Umi".to_string(),
527 "Memory system".to_string(),
528 )
529 .with_id("custom-id".to_string())
530 .with_metadata("status".to_string(), "active".to_string())
531 .with_embedding(vec![0.1, 0.2, 0.3])
532 .build();
533
534 assert_eq!(entity.id, "custom-id");
535 assert_eq!(entity.entity_type, EntityType::Project);
536 assert_eq!(entity.get_metadata("status"), Some("active"));
537 assert!(entity.has_embedding());
538 }
539
540 #[test]
541 fn test_entity_update_content() {
542 let mut entity = Entity::new(
543 EntityType::Note,
544 "Test".to_string(),
545 "Original content".to_string(),
546 );
547 let original_updated = entity.updated_at;
548
549 std::thread::sleep(std::time::Duration::from_millis(10));
551
552 entity.update_content("New content".to_string());
553
554 assert_eq!(entity.content, "New content");
555 assert!(entity.updated_at >= original_updated);
556 }
557
558 #[test]
559 #[should_panic(expected = "name")]
560 fn test_entity_name_too_long() {
561 let long_name = "x".repeat(ENTITY_NAME_BYTES_MAX + 1);
562 let _ = Entity::new(EntityType::Note, long_name, "content".to_string());
563 }
564
565 #[test]
566 #[should_panic(expected = "content")]
567 fn test_entity_content_too_long() {
568 let long_content = "x".repeat(ENTITY_CONTENT_BYTES_MAX + 1);
569 let _ = Entity::new(EntityType::Note, "name".to_string(), long_content);
570 }
571
572 #[test]
577 fn test_entity_new_has_no_temporal_metadata() {
578 let entity = Entity::new(EntityType::Note, "Test".to_string(), "Content".to_string());
579
580 assert!(entity.document_time.is_none());
582 assert!(entity.event_time.is_none());
583 assert!(!entity.has_temporal_metadata());
584 }
585
586 #[test]
587 fn test_entity_set_document_time() {
588 let mut entity = Entity::new(
589 EntityType::Note,
590 "Email".to_string(),
591 "Content from email".to_string(),
592 );
593
594 let email_sent_time = Utc::now() - chrono::Duration::days(7);
595 entity.set_document_time(email_sent_time);
596
597 assert_eq!(entity.document_time(), Some(email_sent_time));
599 assert!(entity.has_temporal_metadata());
600 assert!(entity.event_time().is_none()); }
602
603 #[test]
604 fn test_entity_set_event_time() {
605 let mut entity = Entity::new(
606 EntityType::Person,
607 "Alice".to_string(),
608 "Met at conference".to_string(),
609 );
610
611 let event_occurred = Utc::now() - chrono::Duration::days(5);
613 entity.set_event_time(event_occurred);
614
615 assert_eq!(entity.event_time(), Some(event_occurred));
617 assert!(entity.has_temporal_metadata());
618 assert!(entity.document_time().is_none()); }
620
621 #[test]
622 fn test_entity_builder_with_temporal_metadata() {
623 let doc_time = Utc::now() - chrono::Duration::days(10);
624 let event_time = Utc::now() - chrono::Duration::days(14);
625
626 let entity = Entity::builder(
627 EntityType::Note,
628 "Meeting Notes".to_string(),
629 "Discussed project timeline".to_string(),
630 )
631 .with_document_time(doc_time)
632 .with_event_time(event_time)
633 .build();
634
635 assert_eq!(entity.document_time(), Some(doc_time));
637 assert_eq!(entity.event_time(), Some(event_time));
638 assert!(entity.has_temporal_metadata());
639 }
640
641 #[test]
642 fn test_temporal_metadata_bi_temporal_scenario() {
643 let now = Utc::now();
648 let last_month = now - chrono::Duration::days(30);
649
650 let entity = Entity::builder(
651 EntityType::Person,
652 "Bob".to_string(),
653 "Met at conference".to_string(),
654 )
655 .with_document_time(now)
656 .with_event_time(last_month)
657 .build();
658
659 assert!(entity.document_time().unwrap() > entity.event_time().unwrap());
661
662 assert!(entity.document_time().unwrap() <= entity.created_at);
664 }
665
666 #[test]
667 fn test_temporal_metadata_updates_timestamp() {
668 let mut entity = Entity::new(EntityType::Note, "Test".to_string(), "Content".to_string());
669 let original_updated = entity.updated_at;
670
671 std::thread::sleep(std::time::Duration::from_millis(10));
672
673 entity.set_event_time(Utc::now());
674
675 assert!(entity.updated_at > original_updated);
677 }
678
679 #[test]
684 fn test_source_ref_new() {
685 let source_ref = SourceRef::new("file:///photos/meeting.jpg".to_string());
686
687 assert_eq!(source_ref.uri, "file:///photos/meeting.jpg");
688 assert!(source_ref.mime_type.is_none());
689 assert!(source_ref.size_bytes.is_none());
690 assert!(source_ref.checksum.is_none());
691 }
692
693 #[test]
694 fn test_source_ref_builder_pattern() {
695 let source_ref = SourceRef::new("s3://bucket/report.pdf".to_string())
696 .with_mime_type("application/pdf".to_string())
697 .with_size_bytes(1024 * 1024)
698 .with_checksum("sha256:abc123".to_string());
699
700 assert_eq!(source_ref.uri, "s3://bucket/report.pdf");
701 assert_eq!(source_ref.mime_type, Some("application/pdf".to_string()));
702 assert_eq!(source_ref.size_bytes, Some(1024 * 1024));
703 assert_eq!(source_ref.checksum, Some("sha256:abc123".to_string()));
704 }
705
706 #[test]
707 fn test_source_ref_is_local() {
708 let local = SourceRef::new("file:///home/user/doc.pdf".to_string());
709 let remote = SourceRef::new("https://example.com/doc.pdf".to_string());
710 let s3 = SourceRef::new("s3://bucket/doc.pdf".to_string());
711
712 assert!(local.is_local());
713 assert!(!remote.is_local());
714 assert!(!s3.is_local());
715 }
716
717 #[test]
718 fn test_source_ref_is_remote() {
719 let http = SourceRef::new("http://example.com/doc.pdf".to_string());
720 let https = SourceRef::new("https://example.com/doc.pdf".to_string());
721 let local = SourceRef::new("file:///home/user/doc.pdf".to_string());
722
723 assert!(http.is_remote());
724 assert!(https.is_remote());
725 assert!(!local.is_remote());
726 }
727
728 #[test]
729 fn test_source_ref_is_s3() {
730 let s3 = SourceRef::new("s3://my-bucket/path/to/file.pdf".to_string());
731 let local = SourceRef::new("file:///home/user/doc.pdf".to_string());
732
733 assert!(s3.is_s3());
734 assert!(!local.is_s3());
735 }
736
737 #[test]
738 fn test_source_ref_extension() {
739 let pdf = SourceRef::new("file:///docs/report.pdf".to_string());
740 let jpg = SourceRef::new("https://example.com/image.jpg".to_string());
741 let no_ext = SourceRef::new("s3://bucket/file".to_string());
742
743 assert_eq!(pdf.extension(), Some("pdf"));
744 assert_eq!(jpg.extension(), Some("jpg"));
745 assert_eq!(no_ext.extension(), None);
746 }
747
748 #[test]
749 fn test_entity_new_has_no_source_ref() {
750 let entity = Entity::new(EntityType::Note, "Test".to_string(), "Content".to_string());
751
752 assert!(entity.source_ref.is_none());
753 assert!(!entity.has_source_ref());
754 }
755
756 #[test]
757 fn test_entity_set_source_ref() {
758 let mut entity = Entity::new(
759 EntityType::Note,
760 "Image Analysis".to_string(),
761 "A photo of a whiteboard with meeting notes".to_string(),
762 );
763
764 let source_ref = SourceRef::new("file:///photos/whiteboard.jpg".to_string())
765 .with_mime_type("image/jpeg".to_string());
766
767 entity.set_source_ref(source_ref);
768
769 assert!(entity.has_source_ref());
770 assert_eq!(
771 entity.source_ref().unwrap().uri,
772 "file:///photos/whiteboard.jpg"
773 );
774 assert_eq!(
775 entity.source_ref().unwrap().mime_type,
776 Some("image/jpeg".to_string())
777 );
778 }
779
780 #[test]
781 fn test_entity_builder_with_source_ref() {
782 let source_ref = SourceRef::new("https://storage.example.com/audio/memo.mp3".to_string())
783 .with_mime_type("audio/mpeg".to_string())
784 .with_size_bytes(5 * 1024 * 1024);
785
786 let entity = Entity::builder(
787 EntityType::Note,
788 "Voice Memo".to_string(),
789 "Discussed Q4 planning with the team".to_string(),
790 )
791 .with_source_ref(source_ref)
792 .build();
793
794 assert!(entity.has_source_ref());
795 let ref_data = entity.source_ref().unwrap();
796 assert_eq!(ref_data.uri, "https://storage.example.com/audio/memo.mp3");
797 assert_eq!(ref_data.mime_type, Some("audio/mpeg".to_string()));
798 assert_eq!(ref_data.size_bytes, Some(5 * 1024 * 1024));
799 }
800
801 #[test]
802 fn test_source_ref_updates_timestamp() {
803 let mut entity = Entity::new(EntityType::Note, "Test".to_string(), "Content".to_string());
804 let original_updated = entity.updated_at;
805
806 std::thread::sleep(std::time::Duration::from_millis(10));
807
808 let source_ref = SourceRef::new("file:///test.pdf".to_string());
809 entity.set_source_ref(source_ref);
810
811 assert!(entity.updated_at > original_updated);
813 }
814
815 #[test]
816 fn test_source_ref_display() {
817 let source_ref = SourceRef::new("file:///photos/image.png".to_string());
818 assert_eq!(format!("{}", source_ref), "file:///photos/image.png");
819 }
820}