1use serde::{Deserialize, Serialize};
78use serde_json;
79use std::collections::HashMap;
80use std::fmt;
81use std::io::Read;
82
83use crate::soch::SochValue;
84
85#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct ObjectId([u8; 32]);
104
105impl ObjectId {
106 pub fn from_bytes(bytes: [u8; 32]) -> Self {
108 Self(bytes)
109 }
110
111 pub fn from_content(content: &[u8]) -> Self {
113 let hash = blake3::hash(content);
114 Self(*hash.as_bytes())
115 }
116
117 pub fn as_bytes(&self) -> &[u8; 32] {
119 &self.0
120 }
121
122 pub fn to_hex(&self) -> String {
124 hex::encode(self.0)
125 }
126
127 pub fn from_hex(s: &str) -> Result<Self, ObjectIdError> {
129 let bytes = hex::decode(s).map_err(|_| ObjectIdError::InvalidHex)?;
130 if bytes.len() != 32 {
131 return Err(ObjectIdError::InvalidLength(bytes.len()));
132 }
133 let mut arr = [0u8; 32];
134 arr.copy_from_slice(&bytes);
135 Ok(Self(arr))
136 }
137
138 pub const NIL: Self = Self([0u8; 32]);
140
141 pub fn is_nil(&self) -> bool {
143 self.0 == [0u8; 32]
144 }
145}
146
147impl fmt::Debug for ObjectId {
148 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149 write!(f, "ObjectId({})", &self.to_hex()[..16]) }
151}
152
153impl fmt::Display for ObjectId {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 write!(f, "{}", self.to_hex())
156 }
157}
158
159#[derive(Debug, Clone, thiserror::Error)]
161pub enum ObjectIdError {
162 #[error("invalid hex encoding")]
163 InvalidHex,
164 #[error("expected 32 bytes, got {0}")]
165 InvalidLength(usize),
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
189pub struct BitemporalCoord {
190 pub valid_from: u64,
192
193 pub valid_to: u64,
196
197 pub system_time: u64,
200}
201
202impl BitemporalCoord {
203 pub fn new(valid_from: u64, system_time: u64) -> Self {
205 Self {
206 valid_from,
207 valid_to: u64::MAX,
208 system_time,
209 }
210 }
211
212 pub fn with_valid_range(valid_from: u64, valid_to: u64, system_time: u64) -> Self {
214 Self {
215 valid_from,
216 valid_to,
217 system_time,
218 }
219 }
220
221 pub fn valid_at(&self, valid_time: u64) -> bool {
223 self.valid_from <= valid_time && valid_time < self.valid_to
224 }
225
226 pub fn known_at(&self, system_time: u64) -> bool {
228 self.system_time <= system_time
229 }
230
231 pub fn visible_at(&self, system_time: u64, valid_time: u64) -> bool {
233 self.known_at(system_time) && self.valid_at(valid_time)
234 }
235
236 pub fn close_valid_time(&mut self, valid_to: u64) {
238 self.valid_to = valid_to;
239 }
240
241 pub fn is_current(&self) -> bool {
243 self.valid_to == u64::MAX
244 }
245
246 pub const ETERNAL: Self = Self {
248 valid_from: 0,
249 valid_to: u64::MAX,
250 system_time: 0,
251 };
252}
253
254impl Default for BitemporalCoord {
255 fn default() -> Self {
256 Self::ETERNAL
257 }
258}
259
260#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
269pub enum EdgeKind {
270 Typed(String),
272 Contains,
274 DerivedFrom,
276 References,
278 Succeeds,
280 SimilarTo,
282}
283
284impl EdgeKind {
285 pub fn typed(label: impl Into<String>) -> Self {
287 Self::Typed(label.into())
288 }
289
290 pub fn label(&self) -> &str {
292 match self {
293 EdgeKind::Typed(s) => s,
294 EdgeKind::Contains => "contains",
295 EdgeKind::DerivedFrom => "derived_from",
296 EdgeKind::References => "references",
297 EdgeKind::Succeeds => "succeeds",
298 EdgeKind::SimilarTo => "similar_to",
299 }
300 }
301}
302
303impl fmt::Display for EdgeKind {
304 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
305 write!(f, "{}", self.label())
306 }
307}
308
309#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct Edge {
331 pub target: ObjectId,
333
334 pub kind: EdgeKind,
336
337 pub weight: f32,
342
343 pub valid_from: u64,
346
347 pub valid_to: u64,
349
350 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
352 pub properties: HashMap<String, SochValue>,
353}
354
355impl Edge {
356 pub fn new(target: ObjectId, kind: EdgeKind, weight: f32) -> Self {
358 Self {
359 target,
360 kind,
361 weight,
362 valid_from: 0,
363 valid_to: u64::MAX,
364 properties: HashMap::new(),
365 }
366 }
367
368 pub fn with_validity(
370 target: ObjectId,
371 kind: EdgeKind,
372 weight: f32,
373 valid_from: u64,
374 valid_to: u64,
375 ) -> Self {
376 Self {
377 target,
378 kind,
379 weight,
380 valid_from,
381 valid_to,
382 properties: HashMap::new(),
383 }
384 }
385
386 pub fn with_property(mut self, key: impl Into<String>, value: SochValue) -> Self {
388 self.properties.insert(key.into(), value);
389 self
390 }
391
392 pub fn valid_at(&self, time: u64) -> bool {
394 self.valid_from <= time && time < self.valid_to
395 }
396
397 pub fn is_current(&self) -> bool {
399 self.valid_to == u64::MAX
400 }
401}
402
403impl PartialEq for Edge {
404 fn eq(&self, other: &Self) -> bool {
405 self.target == other.target && self.kind == other.kind
406 }
407}
408
409impl Eq for Edge {}
410
411#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
418pub enum ObjectKind {
419 Entity,
422
423 Event,
426
427 Episode,
430
431 Document,
434
435 Fact,
438
439 Artifact,
442
443 Custom(String),
445}
446
447impl ObjectKind {
448 pub fn label(&self) -> &str {
450 match self {
451 ObjectKind::Entity => "entity",
452 ObjectKind::Event => "event",
453 ObjectKind::Episode => "episode",
454 ObjectKind::Document => "document",
455 ObjectKind::Fact => "fact",
456 ObjectKind::Artifact => "artifact",
457 ObjectKind::Custom(s) => s,
458 }
459 }
460}
461
462impl fmt::Display for ObjectKind {
463 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
464 write!(f, "{}", self.label())
465 }
466}
467
468#[derive(Debug, Clone, Serialize, Deserialize)]
481pub struct Provenance {
482 pub parents: Vec<ObjectId>,
485
486 pub operation: String,
489
490 pub agent: String,
493
494 pub timestamp: u64,
496
497 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
499 pub metadata: HashMap<String, SochValue>,
500}
501
502impl Provenance {
503 pub fn root(agent: impl Into<String>, timestamp: u64) -> Self {
505 Self {
506 parents: Vec::new(),
507 operation: "create".to_string(),
508 agent: agent.into(),
509 timestamp,
510 metadata: HashMap::new(),
511 }
512 }
513
514 pub fn derived(
516 parents: Vec<ObjectId>,
517 operation: impl Into<String>,
518 agent: impl Into<String>,
519 timestamp: u64,
520 ) -> Self {
521 Self {
522 parents,
523 operation: operation.into(),
524 agent: agent.into(),
525 timestamp,
526 metadata: HashMap::new(),
527 }
528 }
529
530 pub fn with_metadata(mut self, key: impl Into<String>, value: SochValue) -> Self {
532 self.metadata.insert(key.into(), value);
533 self
534 }
535
536 pub fn is_root(&self) -> bool {
538 self.parents.is_empty()
539 }
540}
541
542#[derive(Debug, Clone, Serialize, Deserialize)]
556pub struct EmbeddingSpace {
557 pub vector: Vec<f32>,
559
560 pub dimensions: u32,
562
563 pub model: String,
566
567 pub generated_at: u64,
570}
571
572impl EmbeddingSpace {
573 pub fn new(vector: Vec<f32>, model: impl Into<String>, generated_at: u64) -> Self {
575 let dimensions = vector.len() as u32;
576 Self {
577 vector,
578 dimensions,
579 model: model.into(),
580 generated_at,
581 }
582 }
583
584 pub fn norm(&self) -> f32 {
586 self.vector.iter().map(|x| x * x).sum::<f32>().sqrt()
587 }
588
589 pub fn normalize(&mut self) {
591 let norm = self.norm();
592 if norm > f32::EPSILON {
593 for x in &mut self.vector {
594 *x /= norm;
595 }
596 }
597 }
598}
599
600#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct KnowledgeObject {
627 oid: ObjectId,
629
630 kind: ObjectKind,
632
633 payload: SochValue,
637
638 edges: Vec<Edge>,
642
643 embeddings: HashMap<String, EmbeddingSpace>,
646
647 temporal: BitemporalCoord,
649
650 provenance: Provenance,
652
653 #[serde(default, skip_serializing_if = "Option::is_none")]
655 namespace: Option<String>,
656
657 #[serde(default, skip_serializing_if = "Vec::is_empty")]
660 tags: Vec<String>,
661}
662
663impl KnowledgeObject {
664 pub fn oid(&self) -> ObjectId {
670 self.oid
671 }
672
673 pub fn kind(&self) -> &ObjectKind {
675 &self.kind
676 }
677
678 pub fn payload(&self) -> &SochValue {
680 &self.payload
681 }
682
683 pub fn payload_mut(&mut self) -> &mut SochValue {
685 &mut self.payload
686 }
687
688 pub fn edges(&self) -> &[Edge] {
690 &self.edges
691 }
692
693 pub fn edges_of_kind(&self, kind: &EdgeKind) -> Vec<&Edge> {
695 self.edges.iter().filter(|e| &e.kind == kind).collect()
696 }
697
698 pub fn edges_valid_at(&self, time: u64) -> Vec<&Edge> {
700 self.edges.iter().filter(|e| e.valid_at(time)).collect()
701 }
702
703 pub fn embedding(&self, space: &str) -> Option<&EmbeddingSpace> {
705 self.embeddings.get(space)
706 }
707
708 pub fn embeddings(&self) -> &HashMap<String, EmbeddingSpace> {
710 &self.embeddings
711 }
712
713 pub fn primary_embedding(&self) -> Option<&[f32]> {
715 self.embeddings.get("semantic").map(|e| e.vector.as_slice())
716 }
717
718 pub fn temporal(&self) -> &BitemporalCoord {
720 &self.temporal
721 }
722
723 pub fn set_temporal(&mut self, coord: BitemporalCoord) {
728 self.temporal = coord;
729 }
730
731 pub fn provenance(&self) -> &Provenance {
733 &self.provenance
734 }
735
736 pub fn namespace(&self) -> Option<&str> {
738 self.namespace.as_deref()
739 }
740
741 pub fn tags(&self) -> &[String] {
743 &self.tags
744 }
745
746 pub fn has_tag(&self, tag: &str) -> bool {
748 self.tags.iter().any(|t| t == tag)
749 }
750
751 pub fn valid_at(&self, valid_time: u64) -> bool {
757 self.temporal.valid_at(valid_time)
758 }
759
760 pub fn known_at(&self, system_time: u64) -> bool {
762 self.temporal.known_at(system_time)
763 }
764
765 pub fn visible_at(&self, system_time: u64, valid_time: u64) -> bool {
767 self.temporal.visible_at(system_time, valid_time)
768 }
769
770 pub fn is_current(&self) -> bool {
772 self.temporal.is_current()
773 }
774
775 pub fn attribute(&self, key: &str) -> Option<&SochValue> {
781 match &self.payload {
782 SochValue::Object(map) => map.get(key),
783 _ => None,
784 }
785 }
786
787 pub fn text_attribute(&self, key: &str) -> Option<&str> {
789 self.attribute(key).and_then(|v| v.as_text())
790 }
791
792 pub fn int_attribute(&self, key: &str) -> Option<i64> {
794 self.attribute(key).and_then(|v| v.as_int())
795 }
796
797 pub fn recompute_oid(&mut self) {
804 self.oid = Self::compute_oid(&self.kind, &self.payload, &self.edges, &self.embeddings);
805 }
806
807 pub fn verify_oid(&self) -> bool {
809 let computed = Self::compute_oid(&self.kind, &self.payload, &self.edges, &self.embeddings);
810 self.oid == computed
811 }
812
813 fn compute_oid(
815 kind: &ObjectKind,
816 payload: &SochValue,
817 edges: &[Edge],
818 embeddings: &HashMap<String, EmbeddingSpace>,
819 ) -> ObjectId {
820 let canonical = Self::canonical_bytes(kind, payload, edges, embeddings);
821 ObjectId::from_content(&canonical)
822 }
823
824 fn canonical_bytes(
831 kind: &ObjectKind,
832 payload: &SochValue,
833 edges: &[Edge],
834 embeddings: &HashMap<String, EmbeddingSpace>,
835 ) -> Vec<u8> {
836 let mut hasher_input = Vec::with_capacity(1024);
842
843 let kind_bytes = kind.label().as_bytes();
845 hasher_input.extend_from_slice(&(kind_bytes.len() as u32).to_le_bytes());
846 hasher_input.extend_from_slice(kind_bytes);
847
848 let payload_bytes = canonical_soch_value_bytes(payload);
851 hasher_input.extend_from_slice(&(payload_bytes.len() as u32).to_le_bytes());
852 hasher_input.extend_from_slice(&payload_bytes);
853
854 let mut sorted_edges: Vec<_> = edges.iter().collect();
856 sorted_edges.sort_by(|a, b| {
857 a.target
858 .as_bytes()
859 .cmp(b.target.as_bytes())
860 .then_with(|| a.kind.label().cmp(b.kind.label()))
861 });
862 hasher_input.extend_from_slice(&(sorted_edges.len() as u32).to_le_bytes());
863 for edge in &sorted_edges {
864 hasher_input.extend_from_slice(edge.target.as_bytes());
865 let kind_label = edge.kind.label().as_bytes();
866 hasher_input.extend_from_slice(&(kind_label.len() as u32).to_le_bytes());
867 hasher_input.extend_from_slice(kind_label);
868 hasher_input.extend_from_slice(&edge.weight.to_le_bytes());
869 }
870
871 let mut sorted_spaces: Vec<_> = embeddings.iter().collect();
873 sorted_spaces.sort_by_key(|(name, _)| *name);
874 hasher_input.extend_from_slice(&(sorted_spaces.len() as u32).to_le_bytes());
875 for (name, embedding) in &sorted_spaces {
876 let name_bytes = name.as_bytes();
877 hasher_input.extend_from_slice(&(name_bytes.len() as u32).to_le_bytes());
878 hasher_input.extend_from_slice(name_bytes);
879 hasher_input.extend_from_slice(&embedding.dimensions.to_le_bytes());
880 for &v in &embedding.vector {
881 hasher_input.extend_from_slice(&v.to_le_bytes());
882 }
883 }
884
885 hasher_input
886 }
887}
888
889fn canonical_soch_value_bytes(value: &SochValue) -> Vec<u8> {
892 let mut buf = Vec::with_capacity(256);
893 write_canonical_soch_value(&mut buf, value);
894 buf
895}
896
897fn write_canonical_soch_value(buf: &mut Vec<u8>, value: &SochValue) {
899 match value {
900 SochValue::Null => buf.push(0),
901 SochValue::Bool(b) => {
902 buf.push(1);
903 buf.push(if *b { 1 } else { 0 });
904 }
905 SochValue::Int(i) => {
906 buf.push(2);
907 buf.extend_from_slice(&i.to_le_bytes());
908 }
909 SochValue::UInt(u) => {
910 buf.push(3);
911 buf.extend_from_slice(&u.to_le_bytes());
912 }
913 SochValue::Float(f) => {
914 buf.push(4);
915 let normalized = if f.is_nan() { 0.0 } else if *f == 0.0 { 0.0 } else { *f };
917 buf.extend_from_slice(&normalized.to_le_bytes());
918 }
919 SochValue::Text(s) => {
920 buf.push(5);
921 buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
922 buf.extend_from_slice(s.as_bytes());
923 }
924 SochValue::Binary(b) => {
925 buf.push(6);
926 buf.extend_from_slice(&(b.len() as u32).to_le_bytes());
927 buf.extend_from_slice(b);
928 }
929 SochValue::Array(arr) => {
930 buf.push(7);
931 buf.extend_from_slice(&(arr.len() as u32).to_le_bytes());
932 for item in arr {
933 write_canonical_soch_value(buf, item);
934 }
935 }
936 SochValue::Object(map) => {
937 buf.push(8);
938 let mut sorted_keys: Vec<&String> = map.keys().collect();
940 sorted_keys.sort();
941 buf.extend_from_slice(&(sorted_keys.len() as u32).to_le_bytes());
942 for key in sorted_keys {
943 buf.extend_from_slice(&(key.len() as u32).to_le_bytes());
944 buf.extend_from_slice(key.as_bytes());
945 write_canonical_soch_value(buf, &map[key]);
946 }
947 }
948 SochValue::Ref { table, id } => {
949 buf.push(9);
950 buf.extend_from_slice(&(table.len() as u32).to_le_bytes());
951 buf.extend_from_slice(table.as_bytes());
952 buf.extend_from_slice(&id.to_le_bytes());
953 }
954 }
955}
956
957impl KnowledgeObject {
958 pub fn to_bytes(&self) -> Result<Vec<u8>, KnowledgeObjectError> {
965 serde_json::to_vec(self).map_err(|e| KnowledgeObjectError::SerializationError(e.to_string()))
966 }
967
968 pub fn from_bytes(bytes: &[u8]) -> Result<Self, KnowledgeObjectError> {
970 serde_json::from_slice(bytes)
971 .map_err(|e| KnowledgeObjectError::DeserializationError(e.to_string()))
972 }
973
974 pub fn estimated_size(&self) -> usize {
976 std::mem::size_of::<Self>()
977 + self.edges.len() * std::mem::size_of::<Edge>()
978 + self
979 .embeddings
980 .values()
981 .map(|e| e.vector.len() * 4)
982 .sum::<usize>()
983 + self.tags.iter().map(|t| t.len()).sum::<usize>()
984 }
985
986 pub fn to_compressed_bytes(
1000 &self,
1001 mode: CompressionMode,
1002 ) -> Result<Vec<u8>, KnowledgeObjectError> {
1003 let raw = self.to_bytes()?;
1004 let original_len = raw.len() as u32;
1005
1006 match mode {
1007 CompressionMode::None => {
1008 let mut out = Vec::with_capacity(5 + raw.len());
1009 out.push(CompressionMode::None.tag());
1010 out.extend_from_slice(&original_len.to_le_bytes());
1011 out.extend_from_slice(&raw);
1012 Ok(out)
1013 }
1014 CompressionMode::Lz4 => {
1015 let compressed = lz4::block::compress(&raw, None, false)
1016 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1017 if compressed.len() >= raw.len() {
1019 let mut out = Vec::with_capacity(5 + raw.len());
1020 out.push(CompressionMode::None.tag());
1021 out.extend_from_slice(&original_len.to_le_bytes());
1022 out.extend_from_slice(&raw);
1023 return Ok(out);
1024 }
1025 let mut out = Vec::with_capacity(5 + compressed.len());
1026 out.push(CompressionMode::Lz4.tag());
1027 out.extend_from_slice(&original_len.to_le_bytes());
1028 out.extend_from_slice(&compressed);
1029 Ok(out)
1030 }
1031 CompressionMode::Zstd { level } => {
1032 let compressed = zstd::encode_all(raw.as_slice(), level)
1033 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1034 if compressed.len() >= raw.len() {
1035 let mut out = Vec::with_capacity(5 + raw.len());
1036 out.push(CompressionMode::None.tag());
1037 out.extend_from_slice(&original_len.to_le_bytes());
1038 out.extend_from_slice(&raw);
1039 return Ok(out);
1040 }
1041 let mut out = Vec::with_capacity(5 + compressed.len());
1042 out.push(CompressionMode::Zstd { level }.tag());
1043 out.extend_from_slice(&original_len.to_le_bytes());
1044 out.extend_from_slice(&compressed);
1045 Ok(out)
1046 }
1047 }
1048 }
1049
1050 pub fn from_compressed_bytes(bytes: &[u8]) -> Result<Self, KnowledgeObjectError> {
1054 if bytes.len() < 5 {
1055 return Err(KnowledgeObjectError::DeserializationError(
1056 "compressed payload too short (need >= 5 bytes)".into(),
1057 ));
1058 }
1059
1060 let tag = bytes[0];
1061 let original_len =
1062 u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
1063 let payload = &bytes[5..];
1064
1065 let raw = match tag {
1066 0 => {
1067 payload.to_vec()
1069 }
1070 1 => {
1071 lz4::block::decompress(payload, Some(original_len as i32))
1073 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?
1074 }
1075 2 => {
1076 let mut decoder = zstd::Decoder::new(payload)
1078 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1079 let mut raw = Vec::with_capacity(original_len);
1080 decoder
1081 .read_to_end(&mut raw)
1082 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1083 raw
1084 }
1085 _ => {
1086 return Err(KnowledgeObjectError::UnknownCompressionTag(tag));
1087 }
1088 };
1089
1090 Self::from_bytes(&raw)
1091 }
1092
1093 pub fn compression_ratio(
1096 &self,
1097 mode: CompressionMode,
1098 ) -> Result<f64, KnowledgeObjectError> {
1099 let raw_len = self.to_bytes()?.len() as f64;
1100 let compressed_len = self.to_compressed_bytes(mode)?.len() as f64;
1101 Ok(compressed_len / raw_len)
1102 }
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1118pub enum CompressionMode {
1119 None,
1121 Lz4,
1123 Zstd { level: i32 },
1125}
1126
1127impl CompressionMode {
1128 pub fn tag(&self) -> u8 {
1130 match self {
1131 Self::None => 0,
1132 Self::Lz4 => 1,
1133 Self::Zstd { .. } => 2,
1134 }
1135 }
1136
1137 pub fn from_tag(tag: u8) -> Option<Self> {
1139 match tag {
1140 0 => Some(Self::None),
1141 1 => Some(Self::Lz4),
1142 2 => Some(Self::Zstd { level: 0 }), _ => Option::None,
1144 }
1145 }
1146
1147 pub fn zstd() -> Self {
1149 Self::Zstd { level: 3 }
1150 }
1151
1152 pub fn zstd_high() -> Self {
1154 Self::Zstd { level: 9 }
1155 }
1156}
1157
1158impl Default for CompressionMode {
1159 fn default() -> Self {
1160 Self::None
1161 }
1162}
1163
1164impl PartialEq for KnowledgeObject {
1165 fn eq(&self, other: &Self) -> bool {
1166 self.oid == other.oid
1168 }
1169}
1170
1171impl Eq for KnowledgeObject {}
1172
1173impl std::hash::Hash for KnowledgeObject {
1174 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1175 self.oid.hash(state);
1176 }
1177}
1178
1179impl fmt::Display for KnowledgeObject {
1180 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1181 write!(
1182 f,
1183 "KO({}, kind={}, edges={}, embeddings={}, tags={})",
1184 &self.oid.to_hex()[..12],
1185 self.kind,
1186 self.edges.len(),
1187 self.embeddings.len(),
1188 self.tags.len()
1189 )
1190 }
1191}
1192
1193pub struct KnowledgeObjectBuilder {
1212 kind: ObjectKind,
1213 payload: SochValue,
1214 edges: Vec<Edge>,
1215 embeddings: HashMap<String, EmbeddingSpace>,
1216 temporal: BitemporalCoord,
1217 provenance: Provenance,
1218 namespace: Option<String>,
1219 tags: Vec<String>,
1220}
1221
1222impl KnowledgeObjectBuilder {
1223 pub fn new(kind: ObjectKind) -> Self {
1225 Self {
1226 kind,
1227 payload: SochValue::Object(HashMap::new()),
1228 edges: Vec::new(),
1229 embeddings: HashMap::new(),
1230 temporal: BitemporalCoord::default(),
1231 provenance: Provenance::root("system", 0),
1232 namespace: None,
1233 tags: Vec::new(),
1234 }
1235 }
1236
1237 pub fn payload(mut self, payload: SochValue) -> Self {
1239 self.payload = payload;
1240 self
1241 }
1242
1243 pub fn attribute(mut self, key: impl Into<String>, value: SochValue) -> Self {
1245 match &mut self.payload {
1246 SochValue::Object(map) => {
1247 map.insert(key.into(), value);
1248 }
1249 _ => {
1250 let mut map = HashMap::new();
1251 map.insert(key.into(), value);
1252 self.payload = SochValue::Object(map);
1253 }
1254 }
1255 self
1256 }
1257
1258 pub fn edge(mut self, edge: Edge) -> Self {
1260 self.edges.push(edge);
1261 self
1262 }
1263
1264 pub fn edges(mut self, edges: impl IntoIterator<Item = Edge>) -> Self {
1266 self.edges.extend(edges);
1267 self
1268 }
1269
1270 pub fn embedding(
1272 mut self,
1273 space: impl Into<String>,
1274 vector: Vec<f32>,
1275 ) -> Self {
1276 let space_name = space.into();
1277 self.embeddings.insert(
1278 space_name,
1279 EmbeddingSpace::new(vector, "unknown", 0),
1280 );
1281 self
1282 }
1283
1284 pub fn embedding_with_metadata(
1286 mut self,
1287 space: impl Into<String>,
1288 vector: Vec<f32>,
1289 model: impl Into<String>,
1290 generated_at: u64,
1291 ) -> Self {
1292 let space_name = space.into();
1293 self.embeddings.insert(
1294 space_name,
1295 EmbeddingSpace::new(vector, model, generated_at),
1296 );
1297 self
1298 }
1299
1300 pub fn valid_from(mut self, valid_from: u64) -> Self {
1302 self.temporal.valid_from = valid_from;
1303 self
1304 }
1305
1306 pub fn valid_to(mut self, valid_to: u64) -> Self {
1308 self.temporal.valid_to = valid_to;
1309 self
1310 }
1311
1312 pub fn system_time(mut self, system_time: u64) -> Self {
1314 self.temporal.system_time = system_time;
1315 self
1316 }
1317
1318 pub fn temporal(mut self, temporal: BitemporalCoord) -> Self {
1320 self.temporal = temporal;
1321 self
1322 }
1323
1324 pub fn provenance(mut self, provenance: Provenance) -> Self {
1326 self.provenance = provenance;
1327 self
1328 }
1329
1330 pub fn namespace(mut self, namespace: impl Into<String>) -> Self {
1332 self.namespace = Some(namespace.into());
1333 self
1334 }
1335
1336 pub fn tag(mut self, tag: impl Into<String>) -> Self {
1338 self.tags.push(tag.into());
1339 self
1340 }
1341
1342 pub fn tags(mut self, tags: impl IntoIterator<Item = impl Into<String>>) -> Self {
1344 self.tags.extend(tags.into_iter().map(|t| t.into()));
1345 self
1346 }
1347
1348 pub fn build(self) -> KnowledgeObject {
1350 let oid = KnowledgeObject::compute_oid(
1351 &self.kind,
1352 &self.payload,
1353 &self.edges,
1354 &self.embeddings,
1355 );
1356
1357 KnowledgeObject {
1358 oid,
1359 kind: self.kind,
1360 payload: self.payload,
1361 edges: self.edges,
1362 embeddings: self.embeddings,
1363 temporal: self.temporal,
1364 provenance: self.provenance,
1365 namespace: self.namespace,
1366 tags: self.tags,
1367 }
1368 }
1369
1370 pub fn build_with_oid(self, oid: ObjectId) -> KnowledgeObject {
1372 KnowledgeObject {
1373 oid,
1374 kind: self.kind,
1375 payload: self.payload,
1376 edges: self.edges,
1377 embeddings: self.embeddings,
1378 temporal: self.temporal,
1379 provenance: self.provenance,
1380 namespace: self.namespace,
1381 tags: self.tags,
1382 }
1383 }
1384}
1385
1386#[derive(Debug, Clone, thiserror::Error)]
1392pub enum KnowledgeObjectError {
1393 #[error("serialization error: {0}")]
1394 SerializationError(String),
1395
1396 #[error("deserialization error: {0}")]
1397 DeserializationError(String),
1398
1399 #[error("OID verification failed: stored={stored}, computed={computed}")]
1400 OidMismatch { stored: String, computed: String },
1401
1402 #[error("missing required embedding space: {0}")]
1403 MissingEmbedding(String),
1404
1405 #[error("dimension mismatch in space '{space}': expected {expected}, got {got}")]
1406 DimensionMismatch {
1407 space: String,
1408 expected: u32,
1409 got: u32,
1410 },
1411
1412 #[error("invalid temporal coordinates: valid_from ({valid_from}) > valid_to ({valid_to})")]
1413 InvalidTemporalRange { valid_from: u64, valid_to: u64 },
1414
1415 #[error("compression error: {0}")]
1416 CompressionError(String),
1417
1418 #[error("unknown compression tag: {0}")]
1419 UnknownCompressionTag(u8),
1420}
1421
1422impl From<SochValue> for KnowledgeObjectBuilder {
1427 fn from(value: SochValue) -> Self {
1430 KnowledgeObjectBuilder::new(ObjectKind::Document).payload(value)
1431 }
1432}
1433
1434#[cfg(test)]
1439mod tests {
1440 use super::*;
1441
1442 #[test]
1443 fn test_content_addressing_determinism() {
1444 let ko1 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1445 .attribute("name", SochValue::Text("Alice".into()))
1446 .attribute("age", SochValue::Int(30))
1447 .build();
1448
1449 let ko2 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1450 .attribute("age", SochValue::Int(30))
1451 .attribute("name", SochValue::Text("Alice".into()))
1452 .build();
1453
1454 assert_eq!(ko1.oid(), ko2.oid());
1456 }
1457
1458 #[test]
1459 fn test_different_content_different_oid() {
1460 let ko1 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1461 .attribute("name", SochValue::Text("Alice".into()))
1462 .build();
1463
1464 let ko2 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1465 .attribute("name", SochValue::Text("Bob".into()))
1466 .build();
1467
1468 assert_ne!(ko1.oid(), ko2.oid());
1469 }
1470
1471 #[test]
1472 fn test_oid_verification() {
1473 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1474 .attribute("content", SochValue::Text("Hello, world!".into()))
1475 .build();
1476
1477 assert!(ko.verify_oid());
1478 }
1479
1480 #[test]
1481 fn test_bitemporal_queries() {
1482 let ko = KnowledgeObjectBuilder::new(ObjectKind::Event)
1483 .valid_from(100)
1484 .valid_to(200)
1485 .system_time(50)
1486 .build();
1487
1488 assert!(ko.valid_at(150));
1489 assert!(!ko.valid_at(250));
1490 assert!(ko.known_at(50));
1491 assert!(ko.known_at(100));
1492 assert!(!ko.known_at(40));
1493
1494 assert!(ko.visible_at(60, 150));
1496 assert!(!ko.visible_at(40, 150));
1498 }
1499
1500 #[test]
1501 fn test_embedded_edges() {
1502 let target_oid = ObjectId::from_content(b"target_object");
1503
1504 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1505 .attribute("name", SochValue::Text("Alice".into()))
1506 .edge(Edge::new(target_oid, EdgeKind::typed("works_at"), 1.0))
1507 .edge(Edge::new(target_oid, EdgeKind::Contains, 0.5))
1508 .build();
1509
1510 assert_eq!(ko.edges().len(), 2);
1511 assert_eq!(ko.edges_of_kind(&EdgeKind::typed("works_at")).len(), 1);
1512 assert_eq!(ko.edges_of_kind(&EdgeKind::Contains).len(), 1);
1513 }
1514
1515 #[test]
1516 fn test_multi_space_embeddings() {
1517 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1518 .embedding("semantic", vec![0.1, 0.2, 0.3])
1519 .embedding("code", vec![0.4, 0.5, 0.6, 0.7])
1520 .build();
1521
1522 assert!(ko.embedding("semantic").is_some());
1523 assert!(ko.embedding("code").is_some());
1524 assert!(ko.embedding("nonexistent").is_none());
1525 assert_eq!(ko.embedding("semantic").unwrap().dimensions, 3);
1526 assert_eq!(ko.embedding("code").unwrap().dimensions, 4);
1527 }
1528
1529 #[test]
1530 fn test_provenance_chain() {
1531 let parent_oid = ObjectId::from_content(b"parent_document");
1532
1533 let ko = KnowledgeObjectBuilder::new(ObjectKind::Fact)
1534 .attribute("claim", SochValue::Text("X is true".into()))
1535 .provenance(Provenance::derived(
1536 vec![parent_oid],
1537 "extract_facts",
1538 "gpt-4",
1539 1700000000,
1540 ))
1541 .build();
1542
1543 assert!(!ko.provenance().is_root());
1544 assert_eq!(ko.provenance().parents.len(), 1);
1545 assert_eq!(ko.provenance().parents[0], parent_oid);
1546 assert_eq!(ko.provenance().operation, "extract_facts");
1547 }
1548
1549 #[test]
1550 fn test_serialization_roundtrip() {
1551 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1552 .attribute("name", SochValue::Text("Alice".into()))
1553 .embedding("semantic", vec![0.1, 0.2, 0.3])
1554 .tag("person")
1555 .namespace("test")
1556 .build();
1557
1558 let bytes = ko.to_bytes().unwrap();
1559 let restored = KnowledgeObject::from_bytes(&bytes).unwrap();
1560
1561 assert_eq!(ko.oid(), restored.oid());
1562 assert_eq!(ko.kind(), restored.kind());
1563 assert_eq!(ko.tags(), restored.tags());
1564 assert_eq!(ko.namespace(), restored.namespace());
1565 }
1566
1567 #[test]
1568 fn test_object_id_hex_roundtrip() {
1569 let oid = ObjectId::from_content(b"test content");
1570 let hex = oid.to_hex();
1571 let parsed = ObjectId::from_hex(&hex).unwrap();
1572 assert_eq!(oid, parsed);
1573 }
1574
1575 #[test]
1576 fn test_nil_oid() {
1577 assert!(ObjectId::NIL.is_nil());
1578 let non_nil = ObjectId::from_content(b"something");
1579 assert!(!non_nil.is_nil());
1580 }
1581
1582 #[test]
1583 fn test_edge_temporal_filtering() {
1584 let target = ObjectId::from_content(b"target");
1585
1586 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1587 .edge(Edge::with_validity(target, EdgeKind::typed("works_at"), 1.0, 100, 200))
1588 .edge(Edge::with_validity(target, EdgeKind::typed("manages"), 0.8, 150, u64::MAX))
1589 .build();
1590
1591 let active = ko.edges_valid_at(120);
1593 assert_eq!(active.len(), 1);
1594 assert_eq!(active[0].kind, EdgeKind::typed("works_at"));
1595
1596 assert_eq!(ko.edges_valid_at(160).len(), 2);
1598
1599 let active = ko.edges_valid_at(250);
1601 assert_eq!(active.len(), 1);
1602 assert_eq!(active[0].kind, EdgeKind::typed("manages"));
1603 }
1604
1605 #[test]
1606 fn test_estimated_size() {
1607 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1608 .embedding("semantic", vec![0.0; 384])
1609 .tag("test")
1610 .build();
1611
1612 let size = ko.estimated_size();
1613 assert!(size > 384 * 4); }
1615
1616 #[test]
1617 fn test_display() {
1618 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1619 .attribute("name", SochValue::Text("Alice".into()))
1620 .build();
1621
1622 let display = format!("{}", ko);
1623 assert!(display.starts_with("KO("));
1624 assert!(display.contains("kind=entity"));
1625 }
1626
1627 #[test]
1632 fn test_compression_none_roundtrip() {
1633 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1634 .attribute("name", SochValue::Text("Alice".into()))
1635 .embedding("semantic", vec![0.1; 128])
1636 .tag("person")
1637 .build();
1638
1639 let compressed = ko.to_compressed_bytes(CompressionMode::None).unwrap();
1640 assert_eq!(compressed[0], 0); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1642 assert_eq!(ko.oid(), restored.oid());
1643 }
1644
1645 #[test]
1646 fn test_compression_lz4_roundtrip() {
1647 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1648 .attribute("content", SochValue::Text("hello world ".repeat(100)))
1649 .embedding("semantic", vec![0.5; 384])
1650 .build();
1651
1652 let compressed = ko.to_compressed_bytes(CompressionMode::Lz4).unwrap();
1653 let raw = ko.to_bytes().unwrap();
1654
1655 assert!(compressed.len() < raw.len(), "LZ4 should reduce size for repetitive data");
1657 assert_eq!(compressed[0], 1); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1660 assert_eq!(ko.oid(), restored.oid());
1661 assert_eq!(ko.tags(), restored.tags());
1662 }
1663
1664 #[test]
1665 fn test_compression_zstd_roundtrip() {
1666 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1667 .attribute("content", SochValue::Text("hello world ".repeat(100)))
1668 .embedding("semantic", vec![0.5; 384])
1669 .tag("document")
1670 .namespace("test-ns")
1671 .build();
1672
1673 let compressed = ko.to_compressed_bytes(CompressionMode::zstd()).unwrap();
1674 let raw = ko.to_bytes().unwrap();
1675
1676 assert!(compressed.len() < raw.len(), "ZSTD should reduce size");
1677 assert_eq!(compressed[0], 2); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1680 assert_eq!(ko.oid(), restored.oid());
1681 assert_eq!(ko.namespace(), restored.namespace());
1682 }
1683
1684 #[test]
1685 fn test_compression_fallback_on_tiny_object() {
1686 let ko = KnowledgeObjectBuilder::new(ObjectKind::Fact)
1688 .attribute("x", SochValue::Int(1))
1689 .build();
1690
1691 let compressed_lz4 = ko.to_compressed_bytes(CompressionMode::Lz4).unwrap();
1692 let compressed_zstd = ko.to_compressed_bytes(CompressionMode::zstd()).unwrap();
1693
1694 let r1 = KnowledgeObject::from_compressed_bytes(&compressed_lz4).unwrap();
1696 let r2 = KnowledgeObject::from_compressed_bytes(&compressed_zstd).unwrap();
1697 assert_eq!(ko.oid(), r1.oid());
1698 assert_eq!(ko.oid(), r2.oid());
1699 }
1700
1701 #[test]
1702 fn test_compression_ratio() {
1703 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1704 .attribute("data", SochValue::Text("abcdefgh".repeat(500)))
1705 .build();
1706
1707 let ratio = ko.compression_ratio(CompressionMode::Lz4).unwrap();
1708 assert!(ratio < 1.0, "LZ4 should achieve < 1.0 ratio on repetitive data");
1709
1710 let ratio_zstd = ko.compression_ratio(CompressionMode::zstd()).unwrap();
1711 assert!(ratio_zstd < ratio, "ZSTD should beat LZ4 ratio at default level");
1712 }
1713
1714 #[test]
1715 fn test_compression_mode_tag_roundtrip() {
1716 for mode in [CompressionMode::None, CompressionMode::Lz4, CompressionMode::zstd()] {
1717 let tag = mode.tag();
1718 let recovered = CompressionMode::from_tag(tag).unwrap();
1719 assert_eq!(mode.tag(), recovered.tag());
1720 }
1721 assert!(CompressionMode::from_tag(255).is_none());
1722 }
1723
1724 #[test]
1725 fn test_compressed_bytes_too_short() {
1726 let result = KnowledgeObject::from_compressed_bytes(&[0, 1, 2]);
1727 assert!(result.is_err());
1728 }
1729
1730 #[test]
1731 fn test_unknown_compression_tag() {
1732 let bad_bytes = vec![99, 0, 0, 0, 0]; let result = KnowledgeObject::from_compressed_bytes(&bad_bytes);
1734 assert!(result.is_err());
1735 }
1736}