1use serde::{Deserialize, Serialize};
78use serde_json;
79use std::collections::HashMap;
80use std::fmt;
81use std::io::Read;
82
83use crate::soch::SochValue;
84
85#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct ObjectId([u8; 32]);
104
105impl ObjectId {
106 pub fn from_bytes(bytes: [u8; 32]) -> Self {
108 Self(bytes)
109 }
110
111 pub fn from_content(content: &[u8]) -> Self {
113 let hash = blake3::hash(content);
114 Self(*hash.as_bytes())
115 }
116
117 pub fn as_bytes(&self) -> &[u8; 32] {
119 &self.0
120 }
121
122 pub fn to_hex(&self) -> String {
124 hex::encode(self.0)
125 }
126
127 pub fn from_hex(s: &str) -> Result<Self, ObjectIdError> {
129 let bytes = hex::decode(s).map_err(|_| ObjectIdError::InvalidHex)?;
130 if bytes.len() != 32 {
131 return Err(ObjectIdError::InvalidLength(bytes.len()));
132 }
133 let mut arr = [0u8; 32];
134 arr.copy_from_slice(&bytes);
135 Ok(Self(arr))
136 }
137
138 pub const NIL: Self = Self([0u8; 32]);
140
141 pub fn is_nil(&self) -> bool {
143 self.0 == [0u8; 32]
144 }
145}
146
147impl fmt::Debug for ObjectId {
148 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149 write!(f, "ObjectId({})", &self.to_hex()[..16]) }
151}
152
153impl fmt::Display for ObjectId {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 write!(f, "{}", self.to_hex())
156 }
157}
158
159#[derive(Debug, Clone, thiserror::Error)]
161pub enum ObjectIdError {
162 #[error("invalid hex encoding")]
163 InvalidHex,
164 #[error("expected 32 bytes, got {0}")]
165 InvalidLength(usize),
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
189pub struct BitemporalCoord {
190 pub valid_from: u64,
192
193 pub valid_to: u64,
196
197 pub system_time: u64,
200}
201
202impl BitemporalCoord {
203 pub fn new(valid_from: u64, system_time: u64) -> Self {
205 Self {
206 valid_from,
207 valid_to: u64::MAX,
208 system_time,
209 }
210 }
211
212 pub fn with_valid_range(valid_from: u64, valid_to: u64, system_time: u64) -> Self {
214 Self {
215 valid_from,
216 valid_to,
217 system_time,
218 }
219 }
220
221 pub fn valid_at(&self, valid_time: u64) -> bool {
223 self.valid_from <= valid_time && valid_time < self.valid_to
224 }
225
226 pub fn known_at(&self, system_time: u64) -> bool {
228 self.system_time <= system_time
229 }
230
231 pub fn visible_at(&self, system_time: u64, valid_time: u64) -> bool {
233 self.known_at(system_time) && self.valid_at(valid_time)
234 }
235
236 pub fn close_valid_time(&mut self, valid_to: u64) {
238 self.valid_to = valid_to;
239 }
240
241 pub fn is_current(&self) -> bool {
243 self.valid_to == u64::MAX
244 }
245
246 pub const ETERNAL: Self = Self {
248 valid_from: 0,
249 valid_to: u64::MAX,
250 system_time: 0,
251 };
252}
253
254impl Default for BitemporalCoord {
255 fn default() -> Self {
256 Self::ETERNAL
257 }
258}
259
260#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
269pub enum EdgeKind {
270 Typed(String),
272 Contains,
274 DerivedFrom,
276 References,
278 Succeeds,
280 SimilarTo,
282}
283
284impl EdgeKind {
285 pub fn typed(label: impl Into<String>) -> Self {
287 Self::Typed(label.into())
288 }
289
290 pub fn label(&self) -> &str {
292 match self {
293 EdgeKind::Typed(s) => s,
294 EdgeKind::Contains => "contains",
295 EdgeKind::DerivedFrom => "derived_from",
296 EdgeKind::References => "references",
297 EdgeKind::Succeeds => "succeeds",
298 EdgeKind::SimilarTo => "similar_to",
299 }
300 }
301}
302
303impl fmt::Display for EdgeKind {
304 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
305 write!(f, "{}", self.label())
306 }
307}
308
309#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct Edge {
331 pub target: ObjectId,
333
334 pub kind: EdgeKind,
336
337 pub weight: f32,
342
343 pub valid_from: u64,
346
347 pub valid_to: u64,
349
350 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
352 pub properties: HashMap<String, SochValue>,
353}
354
355impl Edge {
356 pub fn new(target: ObjectId, kind: EdgeKind, weight: f32) -> Self {
358 Self {
359 target,
360 kind,
361 weight,
362 valid_from: 0,
363 valid_to: u64::MAX,
364 properties: HashMap::new(),
365 }
366 }
367
368 pub fn with_validity(
370 target: ObjectId,
371 kind: EdgeKind,
372 weight: f32,
373 valid_from: u64,
374 valid_to: u64,
375 ) -> Self {
376 Self {
377 target,
378 kind,
379 weight,
380 valid_from,
381 valid_to,
382 properties: HashMap::new(),
383 }
384 }
385
386 pub fn with_property(mut self, key: impl Into<String>, value: SochValue) -> Self {
388 self.properties.insert(key.into(), value);
389 self
390 }
391
392 pub fn valid_at(&self, time: u64) -> bool {
394 self.valid_from <= time && time < self.valid_to
395 }
396
397 pub fn is_current(&self) -> bool {
399 self.valid_to == u64::MAX
400 }
401}
402
403impl PartialEq for Edge {
404 fn eq(&self, other: &Self) -> bool {
405 self.target == other.target && self.kind == other.kind
406 }
407}
408
409impl Eq for Edge {}
410
411#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
418pub enum ObjectKind {
419 Entity,
422
423 Event,
426
427 Episode,
430
431 Document,
434
435 Fact,
438
439 Artifact,
442
443 Custom(String),
445}
446
447impl ObjectKind {
448 pub fn label(&self) -> &str {
450 match self {
451 ObjectKind::Entity => "entity",
452 ObjectKind::Event => "event",
453 ObjectKind::Episode => "episode",
454 ObjectKind::Document => "document",
455 ObjectKind::Fact => "fact",
456 ObjectKind::Artifact => "artifact",
457 ObjectKind::Custom(s) => s,
458 }
459 }
460}
461
462impl fmt::Display for ObjectKind {
463 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
464 write!(f, "{}", self.label())
465 }
466}
467
468#[derive(Debug, Clone, Serialize, Deserialize)]
481pub struct Provenance {
482 pub parents: Vec<ObjectId>,
485
486 pub operation: String,
489
490 pub agent: String,
493
494 pub timestamp: u64,
496
497 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
499 pub metadata: HashMap<String, SochValue>,
500}
501
502impl Provenance {
503 pub fn root(agent: impl Into<String>, timestamp: u64) -> Self {
505 Self {
506 parents: Vec::new(),
507 operation: "create".to_string(),
508 agent: agent.into(),
509 timestamp,
510 metadata: HashMap::new(),
511 }
512 }
513
514 pub fn derived(
516 parents: Vec<ObjectId>,
517 operation: impl Into<String>,
518 agent: impl Into<String>,
519 timestamp: u64,
520 ) -> Self {
521 Self {
522 parents,
523 operation: operation.into(),
524 agent: agent.into(),
525 timestamp,
526 metadata: HashMap::new(),
527 }
528 }
529
530 pub fn with_metadata(mut self, key: impl Into<String>, value: SochValue) -> Self {
532 self.metadata.insert(key.into(), value);
533 self
534 }
535
536 pub fn is_root(&self) -> bool {
538 self.parents.is_empty()
539 }
540}
541
542#[derive(Debug, Clone, Serialize, Deserialize)]
556pub struct EmbeddingSpace {
557 pub vector: Vec<f32>,
559
560 pub dimensions: u32,
562
563 pub model: String,
566
567 pub generated_at: u64,
570}
571
572impl EmbeddingSpace {
573 pub fn new(vector: Vec<f32>, model: impl Into<String>, generated_at: u64) -> Self {
575 let dimensions = vector.len() as u32;
576 Self {
577 vector,
578 dimensions,
579 model: model.into(),
580 generated_at,
581 }
582 }
583
584 pub fn norm(&self) -> f32 {
586 self.vector.iter().map(|x| x * x).sum::<f32>().sqrt()
587 }
588
589 pub fn normalize(&mut self) {
591 let norm = self.norm();
592 if norm > f32::EPSILON {
593 for x in &mut self.vector {
594 *x /= norm;
595 }
596 }
597 }
598}
599
600#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct KnowledgeObject {
627 oid: ObjectId,
629
630 kind: ObjectKind,
632
633 payload: SochValue,
637
638 edges: Vec<Edge>,
642
643 embeddings: HashMap<String, EmbeddingSpace>,
646
647 temporal: BitemporalCoord,
649
650 provenance: Provenance,
652
653 #[serde(default, skip_serializing_if = "Option::is_none")]
655 namespace: Option<String>,
656
657 #[serde(default, skip_serializing_if = "Vec::is_empty")]
660 tags: Vec<String>,
661}
662
663impl KnowledgeObject {
664 pub fn oid(&self) -> ObjectId {
670 self.oid
671 }
672
673 pub fn kind(&self) -> &ObjectKind {
675 &self.kind
676 }
677
678 pub fn payload(&self) -> &SochValue {
680 &self.payload
681 }
682
683 pub fn payload_mut(&mut self) -> &mut SochValue {
685 &mut self.payload
686 }
687
688 pub fn edges(&self) -> &[Edge] {
690 &self.edges
691 }
692
693 pub fn edges_of_kind(&self, kind: &EdgeKind) -> Vec<&Edge> {
695 self.edges.iter().filter(|e| &e.kind == kind).collect()
696 }
697
698 pub fn edges_valid_at(&self, time: u64) -> Vec<&Edge> {
700 self.edges.iter().filter(|e| e.valid_at(time)).collect()
701 }
702
703 pub fn embedding(&self, space: &str) -> Option<&EmbeddingSpace> {
705 self.embeddings.get(space)
706 }
707
708 pub fn embeddings(&self) -> &HashMap<String, EmbeddingSpace> {
710 &self.embeddings
711 }
712
713 pub fn primary_embedding(&self) -> Option<&[f32]> {
715 self.embeddings.get("semantic").map(|e| e.vector.as_slice())
716 }
717
718 pub fn temporal(&self) -> &BitemporalCoord {
720 &self.temporal
721 }
722
723 pub fn set_temporal(&mut self, coord: BitemporalCoord) {
728 self.temporal = coord;
729 }
730
731 pub fn provenance(&self) -> &Provenance {
733 &self.provenance
734 }
735
736 pub fn namespace(&self) -> Option<&str> {
738 self.namespace.as_deref()
739 }
740
741 pub fn tags(&self) -> &[String] {
743 &self.tags
744 }
745
746 pub fn has_tag(&self, tag: &str) -> bool {
748 self.tags.iter().any(|t| t == tag)
749 }
750
751 pub fn valid_at(&self, valid_time: u64) -> bool {
757 self.temporal.valid_at(valid_time)
758 }
759
760 pub fn known_at(&self, system_time: u64) -> bool {
762 self.temporal.known_at(system_time)
763 }
764
765 pub fn visible_at(&self, system_time: u64, valid_time: u64) -> bool {
767 self.temporal.visible_at(system_time, valid_time)
768 }
769
770 pub fn is_current(&self) -> bool {
772 self.temporal.is_current()
773 }
774
775 pub fn attribute(&self, key: &str) -> Option<&SochValue> {
781 match &self.payload {
782 SochValue::Object(map) => map.get(key),
783 _ => None,
784 }
785 }
786
787 pub fn text_attribute(&self, key: &str) -> Option<&str> {
789 self.attribute(key).and_then(|v| v.as_text())
790 }
791
792 pub fn int_attribute(&self, key: &str) -> Option<i64> {
794 self.attribute(key).and_then(|v| v.as_int())
795 }
796
797 pub fn recompute_oid(&mut self) {
804 self.oid = Self::compute_oid(&self.kind, &self.payload, &self.edges, &self.embeddings);
805 }
806
807 pub fn verify_oid(&self) -> bool {
809 let computed = Self::compute_oid(&self.kind, &self.payload, &self.edges, &self.embeddings);
810 self.oid == computed
811 }
812
813 fn compute_oid(
815 kind: &ObjectKind,
816 payload: &SochValue,
817 edges: &[Edge],
818 embeddings: &HashMap<String, EmbeddingSpace>,
819 ) -> ObjectId {
820 let canonical = Self::canonical_bytes(kind, payload, edges, embeddings);
821 ObjectId::from_content(&canonical)
822 }
823
824 fn canonical_bytes(
831 kind: &ObjectKind,
832 payload: &SochValue,
833 edges: &[Edge],
834 embeddings: &HashMap<String, EmbeddingSpace>,
835 ) -> Vec<u8> {
836 let mut hasher_input = Vec::with_capacity(1024);
842
843 let kind_bytes = kind.label().as_bytes();
845 hasher_input.extend_from_slice(&(kind_bytes.len() as u32).to_le_bytes());
846 hasher_input.extend_from_slice(kind_bytes);
847
848 let payload_bytes = canonical_soch_value_bytes(payload);
851 hasher_input.extend_from_slice(&(payload_bytes.len() as u32).to_le_bytes());
852 hasher_input.extend_from_slice(&payload_bytes);
853
854 let mut sorted_edges: Vec<_> = edges.iter().collect();
856 sorted_edges.sort_by(|a, b| {
857 a.target
858 .as_bytes()
859 .cmp(b.target.as_bytes())
860 .then_with(|| a.kind.label().cmp(b.kind.label()))
861 });
862 hasher_input.extend_from_slice(&(sorted_edges.len() as u32).to_le_bytes());
863 for edge in &sorted_edges {
864 hasher_input.extend_from_slice(edge.target.as_bytes());
865 let kind_label = edge.kind.label().as_bytes();
866 hasher_input.extend_from_slice(&(kind_label.len() as u32).to_le_bytes());
867 hasher_input.extend_from_slice(kind_label);
868 hasher_input.extend_from_slice(&edge.weight.to_le_bytes());
869 }
870
871 let mut sorted_spaces: Vec<_> = embeddings.iter().collect();
873 sorted_spaces.sort_by_key(|(name, _)| *name);
874 hasher_input.extend_from_slice(&(sorted_spaces.len() as u32).to_le_bytes());
875 for (name, embedding) in &sorted_spaces {
876 let name_bytes = name.as_bytes();
877 hasher_input.extend_from_slice(&(name_bytes.len() as u32).to_le_bytes());
878 hasher_input.extend_from_slice(name_bytes);
879 hasher_input.extend_from_slice(&embedding.dimensions.to_le_bytes());
880 for &v in &embedding.vector {
881 hasher_input.extend_from_slice(&v.to_le_bytes());
882 }
883 }
884
885 hasher_input
886 }
887}
888
889fn canonical_soch_value_bytes(value: &SochValue) -> Vec<u8> {
892 let mut buf = Vec::with_capacity(256);
893 write_canonical_soch_value(&mut buf, value);
894 buf
895}
896
897fn write_canonical_soch_value(buf: &mut Vec<u8>, value: &SochValue) {
899 match value {
900 SochValue::Null => buf.push(0),
901 SochValue::Bool(b) => {
902 buf.push(1);
903 buf.push(if *b { 1 } else { 0 });
904 }
905 SochValue::Int(i) => {
906 buf.push(2);
907 buf.extend_from_slice(&i.to_le_bytes());
908 }
909 SochValue::UInt(u) => {
910 buf.push(3);
911 buf.extend_from_slice(&u.to_le_bytes());
912 }
913 SochValue::Float(f) => {
914 buf.push(4);
915 let normalized = if f.is_nan() {
917 0.0
918 } else if *f == 0.0 {
919 0.0
920 } else {
921 *f
922 };
923 buf.extend_from_slice(&normalized.to_le_bytes());
924 }
925 SochValue::Text(s) => {
926 buf.push(5);
927 buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
928 buf.extend_from_slice(s.as_bytes());
929 }
930 SochValue::Binary(b) => {
931 buf.push(6);
932 buf.extend_from_slice(&(b.len() as u32).to_le_bytes());
933 buf.extend_from_slice(b);
934 }
935 SochValue::Array(arr) => {
936 buf.push(7);
937 buf.extend_from_slice(&(arr.len() as u32).to_le_bytes());
938 for item in arr {
939 write_canonical_soch_value(buf, item);
940 }
941 }
942 SochValue::Object(map) => {
943 buf.push(8);
944 let mut sorted_keys: Vec<&String> = map.keys().collect();
946 sorted_keys.sort();
947 buf.extend_from_slice(&(sorted_keys.len() as u32).to_le_bytes());
948 for key in sorted_keys {
949 buf.extend_from_slice(&(key.len() as u32).to_le_bytes());
950 buf.extend_from_slice(key.as_bytes());
951 write_canonical_soch_value(buf, &map[key]);
952 }
953 }
954 SochValue::Ref { table, id } => {
955 buf.push(9);
956 buf.extend_from_slice(&(table.len() as u32).to_le_bytes());
957 buf.extend_from_slice(table.as_bytes());
958 buf.extend_from_slice(&id.to_le_bytes());
959 }
960 }
961}
962
963impl KnowledgeObject {
964 pub fn to_bytes(&self) -> Result<Vec<u8>, KnowledgeObjectError> {
971 serde_json::to_vec(self)
972 .map_err(|e| KnowledgeObjectError::SerializationError(e.to_string()))
973 }
974
975 pub fn from_bytes(bytes: &[u8]) -> Result<Self, KnowledgeObjectError> {
977 serde_json::from_slice(bytes)
978 .map_err(|e| KnowledgeObjectError::DeserializationError(e.to_string()))
979 }
980
981 pub fn estimated_size(&self) -> usize {
983 std::mem::size_of::<Self>()
984 + self.edges.len() * std::mem::size_of::<Edge>()
985 + self
986 .embeddings
987 .values()
988 .map(|e| e.vector.len() * 4)
989 .sum::<usize>()
990 + self.tags.iter().map(|t| t.len()).sum::<usize>()
991 }
992
993 pub fn to_compressed_bytes(
1007 &self,
1008 mode: CompressionMode,
1009 ) -> Result<Vec<u8>, KnowledgeObjectError> {
1010 let raw = self.to_bytes()?;
1011 let original_len = raw.len() as u32;
1012
1013 match mode {
1014 CompressionMode::None => {
1015 let mut out = Vec::with_capacity(5 + raw.len());
1016 out.push(CompressionMode::None.tag());
1017 out.extend_from_slice(&original_len.to_le_bytes());
1018 out.extend_from_slice(&raw);
1019 Ok(out)
1020 }
1021 CompressionMode::Lz4 => {
1022 let compressed = lz4::block::compress(&raw, None, false)
1023 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1024 if compressed.len() >= raw.len() {
1026 let mut out = Vec::with_capacity(5 + raw.len());
1027 out.push(CompressionMode::None.tag());
1028 out.extend_from_slice(&original_len.to_le_bytes());
1029 out.extend_from_slice(&raw);
1030 return Ok(out);
1031 }
1032 let mut out = Vec::with_capacity(5 + compressed.len());
1033 out.push(CompressionMode::Lz4.tag());
1034 out.extend_from_slice(&original_len.to_le_bytes());
1035 out.extend_from_slice(&compressed);
1036 Ok(out)
1037 }
1038 CompressionMode::Zstd { level } => {
1039 let compressed = zstd::encode_all(raw.as_slice(), level)
1040 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1041 if compressed.len() >= raw.len() {
1042 let mut out = Vec::with_capacity(5 + raw.len());
1043 out.push(CompressionMode::None.tag());
1044 out.extend_from_slice(&original_len.to_le_bytes());
1045 out.extend_from_slice(&raw);
1046 return Ok(out);
1047 }
1048 let mut out = Vec::with_capacity(5 + compressed.len());
1049 out.push(CompressionMode::Zstd { level }.tag());
1050 out.extend_from_slice(&original_len.to_le_bytes());
1051 out.extend_from_slice(&compressed);
1052 Ok(out)
1053 }
1054 }
1055 }
1056
1057 pub fn from_compressed_bytes(bytes: &[u8]) -> Result<Self, KnowledgeObjectError> {
1061 if bytes.len() < 5 {
1062 return Err(KnowledgeObjectError::DeserializationError(
1063 "compressed payload too short (need >= 5 bytes)".into(),
1064 ));
1065 }
1066
1067 let tag = bytes[0];
1068 let original_len = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
1069 let payload = &bytes[5..];
1070
1071 let raw = match tag {
1072 0 => {
1073 payload.to_vec()
1075 }
1076 1 => {
1077 lz4::block::decompress(payload, Some(original_len as i32))
1079 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?
1080 }
1081 2 => {
1082 let mut decoder = zstd::Decoder::new(payload)
1084 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1085 let mut raw = Vec::with_capacity(original_len);
1086 decoder
1087 .read_to_end(&mut raw)
1088 .map_err(|e| KnowledgeObjectError::CompressionError(e.to_string()))?;
1089 raw
1090 }
1091 _ => {
1092 return Err(KnowledgeObjectError::UnknownCompressionTag(tag));
1093 }
1094 };
1095
1096 Self::from_bytes(&raw)
1097 }
1098
1099 pub fn compression_ratio(&self, mode: CompressionMode) -> Result<f64, KnowledgeObjectError> {
1102 let raw_len = self.to_bytes()?.len() as f64;
1103 let compressed_len = self.to_compressed_bytes(mode)?.len() as f64;
1104 Ok(compressed_len / raw_len)
1105 }
1106}
1107
1108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1121pub enum CompressionMode {
1122 None,
1124 Lz4,
1126 Zstd { level: i32 },
1128}
1129
1130impl CompressionMode {
1131 pub fn tag(&self) -> u8 {
1133 match self {
1134 Self::None => 0,
1135 Self::Lz4 => 1,
1136 Self::Zstd { .. } => 2,
1137 }
1138 }
1139
1140 pub fn from_tag(tag: u8) -> Option<Self> {
1142 match tag {
1143 0 => Some(Self::None),
1144 1 => Some(Self::Lz4),
1145 2 => Some(Self::Zstd { level: 0 }), _ => Option::None,
1147 }
1148 }
1149
1150 pub fn zstd() -> Self {
1152 Self::Zstd { level: 3 }
1153 }
1154
1155 pub fn zstd_high() -> Self {
1157 Self::Zstd { level: 9 }
1158 }
1159}
1160
1161impl Default for CompressionMode {
1162 fn default() -> Self {
1163 Self::None
1164 }
1165}
1166
1167impl PartialEq for KnowledgeObject {
1168 fn eq(&self, other: &Self) -> bool {
1169 self.oid == other.oid
1171 }
1172}
1173
1174impl Eq for KnowledgeObject {}
1175
1176impl std::hash::Hash for KnowledgeObject {
1177 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1178 self.oid.hash(state);
1179 }
1180}
1181
1182impl fmt::Display for KnowledgeObject {
1183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1184 write!(
1185 f,
1186 "KO({}, kind={}, edges={}, embeddings={}, tags={})",
1187 &self.oid.to_hex()[..12],
1188 self.kind,
1189 self.edges.len(),
1190 self.embeddings.len(),
1191 self.tags.len()
1192 )
1193 }
1194}
1195
1196pub struct KnowledgeObjectBuilder {
1215 kind: ObjectKind,
1216 payload: SochValue,
1217 edges: Vec<Edge>,
1218 embeddings: HashMap<String, EmbeddingSpace>,
1219 temporal: BitemporalCoord,
1220 provenance: Provenance,
1221 namespace: Option<String>,
1222 tags: Vec<String>,
1223}
1224
1225impl KnowledgeObjectBuilder {
1226 pub fn new(kind: ObjectKind) -> Self {
1228 Self {
1229 kind,
1230 payload: SochValue::Object(HashMap::new()),
1231 edges: Vec::new(),
1232 embeddings: HashMap::new(),
1233 temporal: BitemporalCoord::default(),
1234 provenance: Provenance::root("system", 0),
1235 namespace: None,
1236 tags: Vec::new(),
1237 }
1238 }
1239
1240 pub fn payload(mut self, payload: SochValue) -> Self {
1242 self.payload = payload;
1243 self
1244 }
1245
1246 pub fn attribute(mut self, key: impl Into<String>, value: SochValue) -> Self {
1248 match &mut self.payload {
1249 SochValue::Object(map) => {
1250 map.insert(key.into(), value);
1251 }
1252 _ => {
1253 let mut map = HashMap::new();
1254 map.insert(key.into(), value);
1255 self.payload = SochValue::Object(map);
1256 }
1257 }
1258 self
1259 }
1260
1261 pub fn edge(mut self, edge: Edge) -> Self {
1263 self.edges.push(edge);
1264 self
1265 }
1266
1267 pub fn edges(mut self, edges: impl IntoIterator<Item = Edge>) -> Self {
1269 self.edges.extend(edges);
1270 self
1271 }
1272
1273 pub fn embedding(mut self, space: impl Into<String>, vector: Vec<f32>) -> Self {
1275 let space_name = space.into();
1276 self.embeddings
1277 .insert(space_name, EmbeddingSpace::new(vector, "unknown", 0));
1278 self
1279 }
1280
1281 pub fn embedding_with_metadata(
1283 mut self,
1284 space: impl Into<String>,
1285 vector: Vec<f32>,
1286 model: impl Into<String>,
1287 generated_at: u64,
1288 ) -> Self {
1289 let space_name = space.into();
1290 self.embeddings
1291 .insert(space_name, EmbeddingSpace::new(vector, model, generated_at));
1292 self
1293 }
1294
1295 pub fn valid_from(mut self, valid_from: u64) -> Self {
1297 self.temporal.valid_from = valid_from;
1298 self
1299 }
1300
1301 pub fn valid_to(mut self, valid_to: u64) -> Self {
1303 self.temporal.valid_to = valid_to;
1304 self
1305 }
1306
1307 pub fn system_time(mut self, system_time: u64) -> Self {
1309 self.temporal.system_time = system_time;
1310 self
1311 }
1312
1313 pub fn temporal(mut self, temporal: BitemporalCoord) -> Self {
1315 self.temporal = temporal;
1316 self
1317 }
1318
1319 pub fn provenance(mut self, provenance: Provenance) -> Self {
1321 self.provenance = provenance;
1322 self
1323 }
1324
1325 pub fn namespace(mut self, namespace: impl Into<String>) -> Self {
1327 self.namespace = Some(namespace.into());
1328 self
1329 }
1330
1331 pub fn tag(mut self, tag: impl Into<String>) -> Self {
1333 self.tags.push(tag.into());
1334 self
1335 }
1336
1337 pub fn tags(mut self, tags: impl IntoIterator<Item = impl Into<String>>) -> Self {
1339 self.tags.extend(tags.into_iter().map(|t| t.into()));
1340 self
1341 }
1342
1343 pub fn build(self) -> KnowledgeObject {
1345 let oid =
1346 KnowledgeObject::compute_oid(&self.kind, &self.payload, &self.edges, &self.embeddings);
1347
1348 KnowledgeObject {
1349 oid,
1350 kind: self.kind,
1351 payload: self.payload,
1352 edges: self.edges,
1353 embeddings: self.embeddings,
1354 temporal: self.temporal,
1355 provenance: self.provenance,
1356 namespace: self.namespace,
1357 tags: self.tags,
1358 }
1359 }
1360
1361 pub fn build_with_oid(self, oid: ObjectId) -> KnowledgeObject {
1363 KnowledgeObject {
1364 oid,
1365 kind: self.kind,
1366 payload: self.payload,
1367 edges: self.edges,
1368 embeddings: self.embeddings,
1369 temporal: self.temporal,
1370 provenance: self.provenance,
1371 namespace: self.namespace,
1372 tags: self.tags,
1373 }
1374 }
1375}
1376
1377#[derive(Debug, Clone, thiserror::Error)]
1383pub enum KnowledgeObjectError {
1384 #[error("serialization error: {0}")]
1385 SerializationError(String),
1386
1387 #[error("deserialization error: {0}")]
1388 DeserializationError(String),
1389
1390 #[error("OID verification failed: stored={stored}, computed={computed}")]
1391 OidMismatch { stored: String, computed: String },
1392
1393 #[error("missing required embedding space: {0}")]
1394 MissingEmbedding(String),
1395
1396 #[error("dimension mismatch in space '{space}': expected {expected}, got {got}")]
1397 DimensionMismatch {
1398 space: String,
1399 expected: u32,
1400 got: u32,
1401 },
1402
1403 #[error("invalid temporal coordinates: valid_from ({valid_from}) > valid_to ({valid_to})")]
1404 InvalidTemporalRange { valid_from: u64, valid_to: u64 },
1405
1406 #[error("compression error: {0}")]
1407 CompressionError(String),
1408
1409 #[error("unknown compression tag: {0}")]
1410 UnknownCompressionTag(u8),
1411}
1412
1413impl From<SochValue> for KnowledgeObjectBuilder {
1418 fn from(value: SochValue) -> Self {
1421 KnowledgeObjectBuilder::new(ObjectKind::Document).payload(value)
1422 }
1423}
1424
1425#[cfg(test)]
1430mod tests {
1431 use super::*;
1432
1433 #[test]
1434 fn test_content_addressing_determinism() {
1435 let ko1 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1436 .attribute("name", SochValue::Text("Alice".into()))
1437 .attribute("age", SochValue::Int(30))
1438 .build();
1439
1440 let ko2 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1441 .attribute("age", SochValue::Int(30))
1442 .attribute("name", SochValue::Text("Alice".into()))
1443 .build();
1444
1445 assert_eq!(ko1.oid(), ko2.oid());
1447 }
1448
1449 #[test]
1450 fn test_different_content_different_oid() {
1451 let ko1 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1452 .attribute("name", SochValue::Text("Alice".into()))
1453 .build();
1454
1455 let ko2 = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1456 .attribute("name", SochValue::Text("Bob".into()))
1457 .build();
1458
1459 assert_ne!(ko1.oid(), ko2.oid());
1460 }
1461
1462 #[test]
1463 fn test_oid_verification() {
1464 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1465 .attribute("content", SochValue::Text("Hello, world!".into()))
1466 .build();
1467
1468 assert!(ko.verify_oid());
1469 }
1470
1471 #[test]
1472 fn test_bitemporal_queries() {
1473 let ko = KnowledgeObjectBuilder::new(ObjectKind::Event)
1474 .valid_from(100)
1475 .valid_to(200)
1476 .system_time(50)
1477 .build();
1478
1479 assert!(ko.valid_at(150));
1480 assert!(!ko.valid_at(250));
1481 assert!(ko.known_at(50));
1482 assert!(ko.known_at(100));
1483 assert!(!ko.known_at(40));
1484
1485 assert!(ko.visible_at(60, 150));
1487 assert!(!ko.visible_at(40, 150));
1489 }
1490
1491 #[test]
1492 fn test_embedded_edges() {
1493 let target_oid = ObjectId::from_content(b"target_object");
1494
1495 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1496 .attribute("name", SochValue::Text("Alice".into()))
1497 .edge(Edge::new(target_oid, EdgeKind::typed("works_at"), 1.0))
1498 .edge(Edge::new(target_oid, EdgeKind::Contains, 0.5))
1499 .build();
1500
1501 assert_eq!(ko.edges().len(), 2);
1502 assert_eq!(ko.edges_of_kind(&EdgeKind::typed("works_at")).len(), 1);
1503 assert_eq!(ko.edges_of_kind(&EdgeKind::Contains).len(), 1);
1504 }
1505
1506 #[test]
1507 fn test_multi_space_embeddings() {
1508 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1509 .embedding("semantic", vec![0.1, 0.2, 0.3])
1510 .embedding("code", vec![0.4, 0.5, 0.6, 0.7])
1511 .build();
1512
1513 assert!(ko.embedding("semantic").is_some());
1514 assert!(ko.embedding("code").is_some());
1515 assert!(ko.embedding("nonexistent").is_none());
1516 assert_eq!(ko.embedding("semantic").unwrap().dimensions, 3);
1517 assert_eq!(ko.embedding("code").unwrap().dimensions, 4);
1518 }
1519
1520 #[test]
1521 fn test_provenance_chain() {
1522 let parent_oid = ObjectId::from_content(b"parent_document");
1523
1524 let ko = KnowledgeObjectBuilder::new(ObjectKind::Fact)
1525 .attribute("claim", SochValue::Text("X is true".into()))
1526 .provenance(Provenance::derived(
1527 vec![parent_oid],
1528 "extract_facts",
1529 "gpt-4",
1530 1700000000,
1531 ))
1532 .build();
1533
1534 assert!(!ko.provenance().is_root());
1535 assert_eq!(ko.provenance().parents.len(), 1);
1536 assert_eq!(ko.provenance().parents[0], parent_oid);
1537 assert_eq!(ko.provenance().operation, "extract_facts");
1538 }
1539
1540 #[test]
1541 fn test_serialization_roundtrip() {
1542 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1543 .attribute("name", SochValue::Text("Alice".into()))
1544 .embedding("semantic", vec![0.1, 0.2, 0.3])
1545 .tag("person")
1546 .namespace("test")
1547 .build();
1548
1549 let bytes = ko.to_bytes().unwrap();
1550 let restored = KnowledgeObject::from_bytes(&bytes).unwrap();
1551
1552 assert_eq!(ko.oid(), restored.oid());
1553 assert_eq!(ko.kind(), restored.kind());
1554 assert_eq!(ko.tags(), restored.tags());
1555 assert_eq!(ko.namespace(), restored.namespace());
1556 }
1557
1558 #[test]
1559 fn test_object_id_hex_roundtrip() {
1560 let oid = ObjectId::from_content(b"test content");
1561 let hex = oid.to_hex();
1562 let parsed = ObjectId::from_hex(&hex).unwrap();
1563 assert_eq!(oid, parsed);
1564 }
1565
1566 #[test]
1567 fn test_nil_oid() {
1568 assert!(ObjectId::NIL.is_nil());
1569 let non_nil = ObjectId::from_content(b"something");
1570 assert!(!non_nil.is_nil());
1571 }
1572
1573 #[test]
1574 fn test_edge_temporal_filtering() {
1575 let target = ObjectId::from_content(b"target");
1576
1577 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1578 .edge(Edge::with_validity(
1579 target,
1580 EdgeKind::typed("works_at"),
1581 1.0,
1582 100,
1583 200,
1584 ))
1585 .edge(Edge::with_validity(
1586 target,
1587 EdgeKind::typed("manages"),
1588 0.8,
1589 150,
1590 u64::MAX,
1591 ))
1592 .build();
1593
1594 let active = ko.edges_valid_at(120);
1596 assert_eq!(active.len(), 1);
1597 assert_eq!(active[0].kind, EdgeKind::typed("works_at"));
1598
1599 assert_eq!(ko.edges_valid_at(160).len(), 2);
1601
1602 let active = ko.edges_valid_at(250);
1604 assert_eq!(active.len(), 1);
1605 assert_eq!(active[0].kind, EdgeKind::typed("manages"));
1606 }
1607
1608 #[test]
1609 fn test_estimated_size() {
1610 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1611 .embedding("semantic", vec![0.0; 384])
1612 .tag("test")
1613 .build();
1614
1615 let size = ko.estimated_size();
1616 assert!(size > 384 * 4); }
1618
1619 #[test]
1620 fn test_display() {
1621 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1622 .attribute("name", SochValue::Text("Alice".into()))
1623 .build();
1624
1625 let display = format!("{}", ko);
1626 assert!(display.starts_with("KO("));
1627 assert!(display.contains("kind=entity"));
1628 }
1629
1630 #[test]
1635 fn test_compression_none_roundtrip() {
1636 let ko = KnowledgeObjectBuilder::new(ObjectKind::Entity)
1637 .attribute("name", SochValue::Text("Alice".into()))
1638 .embedding("semantic", vec![0.1; 128])
1639 .tag("person")
1640 .build();
1641
1642 let compressed = ko.to_compressed_bytes(CompressionMode::None).unwrap();
1643 assert_eq!(compressed[0], 0); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1645 assert_eq!(ko.oid(), restored.oid());
1646 }
1647
1648 #[test]
1649 fn test_compression_lz4_roundtrip() {
1650 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1651 .attribute("content", SochValue::Text("hello world ".repeat(100)))
1652 .embedding("semantic", vec![0.5; 384])
1653 .build();
1654
1655 let compressed = ko.to_compressed_bytes(CompressionMode::Lz4).unwrap();
1656 let raw = ko.to_bytes().unwrap();
1657
1658 assert!(
1660 compressed.len() < raw.len(),
1661 "LZ4 should reduce size for repetitive data"
1662 );
1663 assert_eq!(compressed[0], 1); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1666 assert_eq!(ko.oid(), restored.oid());
1667 assert_eq!(ko.tags(), restored.tags());
1668 }
1669
1670 #[test]
1671 fn test_compression_zstd_roundtrip() {
1672 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1673 .attribute("content", SochValue::Text("hello world ".repeat(100)))
1674 .embedding("semantic", vec![0.5; 384])
1675 .tag("document")
1676 .namespace("test-ns")
1677 .build();
1678
1679 let compressed = ko.to_compressed_bytes(CompressionMode::zstd()).unwrap();
1680 let raw = ko.to_bytes().unwrap();
1681
1682 assert!(compressed.len() < raw.len(), "ZSTD should reduce size");
1683 assert_eq!(compressed[0], 2); let restored = KnowledgeObject::from_compressed_bytes(&compressed).unwrap();
1686 assert_eq!(ko.oid(), restored.oid());
1687 assert_eq!(ko.namespace(), restored.namespace());
1688 }
1689
1690 #[test]
1691 fn test_compression_fallback_on_tiny_object() {
1692 let ko = KnowledgeObjectBuilder::new(ObjectKind::Fact)
1694 .attribute("x", SochValue::Int(1))
1695 .build();
1696
1697 let compressed_lz4 = ko.to_compressed_bytes(CompressionMode::Lz4).unwrap();
1698 let compressed_zstd = ko.to_compressed_bytes(CompressionMode::zstd()).unwrap();
1699
1700 let r1 = KnowledgeObject::from_compressed_bytes(&compressed_lz4).unwrap();
1702 let r2 = KnowledgeObject::from_compressed_bytes(&compressed_zstd).unwrap();
1703 assert_eq!(ko.oid(), r1.oid());
1704 assert_eq!(ko.oid(), r2.oid());
1705 }
1706
1707 #[test]
1708 fn test_compression_ratio() {
1709 let ko = KnowledgeObjectBuilder::new(ObjectKind::Document)
1710 .attribute("data", SochValue::Text("abcdefgh".repeat(500)))
1711 .build();
1712
1713 let ratio = ko.compression_ratio(CompressionMode::Lz4).unwrap();
1714 assert!(
1715 ratio < 1.0,
1716 "LZ4 should achieve < 1.0 ratio on repetitive data"
1717 );
1718
1719 let ratio_zstd = ko.compression_ratio(CompressionMode::zstd()).unwrap();
1720 assert!(
1721 ratio_zstd < ratio,
1722 "ZSTD should beat LZ4 ratio at default level"
1723 );
1724 }
1725
1726 #[test]
1727 fn test_compression_mode_tag_roundtrip() {
1728 for mode in [
1729 CompressionMode::None,
1730 CompressionMode::Lz4,
1731 CompressionMode::zstd(),
1732 ] {
1733 let tag = mode.tag();
1734 let recovered = CompressionMode::from_tag(tag).unwrap();
1735 assert_eq!(mode.tag(), recovered.tag());
1736 }
1737 assert!(CompressionMode::from_tag(255).is_none());
1738 }
1739
1740 #[test]
1741 fn test_compressed_bytes_too_short() {
1742 let result = KnowledgeObject::from_compressed_bytes(&[0, 1, 2]);
1743 assert!(result.is_err());
1744 }
1745
1746 #[test]
1747 fn test_unknown_compression_tag() {
1748 let bad_bytes = vec![99, 0, 0, 0, 0]; let result = KnowledgeObject::from_compressed_bytes(&bad_bytes);
1750 assert!(result.is_err());
1751 }
1752}