1use std::{
23 collections::BTreeSet,
24 error::Error,
25 fmt,
26 io::Cursor,
27 string::{String, ToString},
28 sync::Arc,
29 vec::Vec,
30};
31
32use arrow_array::{Array, ArrayRef, PrimitiveArray, RecordBatch, types::ArrowPrimitiveType};
33use arrow_ipc::{reader::StreamReader, writer::StreamWriter};
34use arrow_schema::{DataType, Field, Schema};
35use arrow_select::take::take;
36use oxgraph_snapshot::{SectionViewError, Snapshot};
37use oxgraph_topology::{
38 ElementIndex, ElementWeight, IncidenceBase, IncidenceIndex, IncidenceWeight, RelationIndex,
39 RelationWeight, TopologyBase,
40};
41use zerocopy::{
42 FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned,
43 byteorder::{LE, U16, U32, U64},
44};
45
46pub const SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U16: u32 = 0x0100;
48pub const SNAPSHOT_KIND_PROPERTY_DATA_U16: u32 = 0x0101;
50pub const SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U32: u32 = 0x0102;
52pub const SNAPSHOT_KIND_PROPERTY_DATA_U32: u32 = 0x0103;
54pub const SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U64: u32 = 0x0104;
56pub const SNAPSHOT_KIND_PROPERTY_DATA_U64: u32 = 0x0105;
58
59pub const SNAPSHOT_KIND_IDENTITY_MODES_U16: u32 = 0x0110;
65
66pub const SNAPSHOT_KIND_IDENTITY_MODES_U32: u32 = 0x0111;
72
73pub const SNAPSHOT_KIND_IDENTITY_MODES_U64: u32 = 0x0112;
79
80pub const SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U16: u32 = 0x0113;
86
87pub const SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U32: u32 = 0x0114;
93
94pub const SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U64: u32 = 0x0115;
100
101pub const SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U16: u32 = 0x0116;
107
108pub const SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U32: u32 = 0x0117;
114
115pub const SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U64: u32 = 0x0118;
121
122pub const SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U16: u32 = 0x0119;
128
129pub const SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U32: u32 = 0x011A;
135
136pub const SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U64: u32 = 0x011B;
142
143pub const SNAPSHOT_PROPERTY_VERSION: u32 = 1;
149
150#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
156pub struct LayerId<Id>(pub Id);
157
158mod sealed {
160 pub trait PropertyIndex {}
162
163 pub trait PropertySnapshotMetaWord {}
165
166 pub trait PropertyAxis {}
168}
169
170pub trait PropertyIndex: sealed::PropertyIndex + Copy + Ord {
176 type ArrowType: ArrowPrimitiveType<Native = Self> + 'static;
178
179 type LittleEndianWord: FromBytes + Immutable + IntoBytes + KnownLayout + Unaligned + Copy;
181
182 fn to_usize(self) -> Option<usize>;
188
189 fn from_usize(value: usize) -> Option<Self>;
195
196 fn from_u64(value: u64) -> Option<Self>;
202
203 fn to_u64(self) -> u64;
209
210 fn to_le_word(self) -> Self::LittleEndianWord;
216
217 fn from_le_word(word: Self::LittleEndianWord) -> Self;
223
224 fn primitive_array(values: Vec<Self>) -> PrimitiveArray<Self::ArrowType>;
230}
231
232pub trait PropertySnapshotMetaWord: sealed::PropertySnapshotMetaWord + PropertyIndex {
238 const PROPERTY_DESCRIPTORS_KIND: u32;
240
241 const PROPERTY_DATA_KIND: u32;
243
244 const IDENTITY_MODES_KIND: u32;
246
247 const ELEMENT_IDENTITY_MAP_KIND: u32;
249
250 const RELATION_IDENTITY_MAP_KIND: u32;
252
253 const INCIDENCE_IDENTITY_MAP_KIND: u32;
255}
256
257macro_rules! impl_property_width {
259 (
260 $index:ty,
261 $arrow:ty,
262 $word:ty,
263 $descriptor_kind:expr,
264 $data_kind:expr,
265 $identity_kind:expr,
266 $element_kind:expr,
267 $relation_kind:expr,
268 $incidence_kind:expr
269 ) => {
270 impl sealed::PropertyIndex for $index {}
271
272 impl PropertyIndex for $index {
273 type ArrowType = $arrow;
274 type LittleEndianWord = $word;
275
276 fn to_usize(self) -> Option<usize> {
277 usize::try_from(self).ok()
278 }
279
280 fn from_usize(value: usize) -> Option<Self> {
281 <$index>::try_from(value).ok()
282 }
283
284 fn from_u64(value: u64) -> Option<Self> {
285 <$index>::try_from(value).ok()
286 }
287
288 fn to_u64(self) -> u64 {
289 u64::from(self)
290 }
291
292 fn to_le_word(self) -> Self::LittleEndianWord {
293 <$word>::new(self)
294 }
295
296 fn from_le_word(word: Self::LittleEndianWord) -> Self {
297 word.get()
298 }
299
300 fn primitive_array(values: Vec<Self>) -> PrimitiveArray<Self::ArrowType> {
301 PrimitiveArray::<$arrow>::from(values)
302 }
303 }
304
305 impl sealed::PropertySnapshotMetaWord for $index {}
306
307 impl PropertySnapshotMetaWord for $index {
308 const PROPERTY_DESCRIPTORS_KIND: u32 = $descriptor_kind;
309 const PROPERTY_DATA_KIND: u32 = $data_kind;
310 const IDENTITY_MODES_KIND: u32 = $identity_kind;
311 const ELEMENT_IDENTITY_MAP_KIND: u32 = $element_kind;
312 const RELATION_IDENTITY_MAP_KIND: u32 = $relation_kind;
313 const INCIDENCE_IDENTITY_MAP_KIND: u32 = $incidence_kind;
314 }
315 };
316}
317
318impl_property_width!(
319 u16,
320 arrow_array::types::UInt16Type,
321 U16<LE>,
322 SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U16,
323 SNAPSHOT_KIND_PROPERTY_DATA_U16,
324 SNAPSHOT_KIND_IDENTITY_MODES_U16,
325 SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U16,
326 SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U16,
327 SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U16
328);
329
330impl_property_width!(
331 u32,
332 arrow_array::types::UInt32Type,
333 U32<LE>,
334 SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U32,
335 SNAPSHOT_KIND_PROPERTY_DATA_U32,
336 SNAPSHOT_KIND_IDENTITY_MODES_U32,
337 SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U32,
338 SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U32,
339 SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U32
340);
341
342impl_property_width!(
343 u64,
344 arrow_array::types::UInt64Type,
345 U64<LE>,
346 SNAPSHOT_KIND_PROPERTY_DESCRIPTORS_U64,
347 SNAPSHOT_KIND_PROPERTY_DATA_U64,
348 SNAPSHOT_KIND_IDENTITY_MODES_U64,
349 SNAPSHOT_KIND_ELEMENT_IDENTITY_MAP_U64,
350 SNAPSHOT_KIND_RELATION_IDENTITY_MAP_U64,
351 SNAPSHOT_KIND_INCIDENCE_IDENTITY_MAP_U64
352);
353
354#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
360pub struct LayerName {
361 value: String,
363}
364
365impl LayerName {
366 pub fn try_new(value: &str) -> Result<Self, PropertyError> {
376 if value.is_empty() {
377 return Err(PropertyError::EmptyLayerName);
378 }
379 Ok(Self {
380 value: String::from(value),
381 })
382 }
383
384 #[must_use]
390 pub const fn as_str(&self) -> &str {
391 self.value.as_str()
392 }
393}
394
395impl fmt::Display for LayerName {
396 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
397 formatter.write_str(self.as_str())
398 }
399}
400
401#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
407#[non_exhaustive]
408pub enum IdFamily {
409 Element,
411 Relation,
413 Incidence,
415}
416
417#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
423#[non_exhaustive]
424pub enum LayerRole {
425 Weight,
427 Property,
429}
430
431#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
440#[non_exhaustive]
441pub enum MissingPolicy {
442 Null,
444 Default,
446}
447
448#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
454#[non_exhaustive]
455pub enum StorageMode {
456 Dense,
458 Sparse {
460 missing: MissingPolicy,
462 },
463}
464
465#[derive(Clone, Debug, PartialEq)]
471#[non_exhaustive]
472pub struct PropertyLayerDescriptor<Id, I>
473where
474 I: PropertyIndex,
475{
476 pub layer_id: LayerId<Id>,
478 pub name: LayerName,
480 pub id_family: IdFamily,
482 pub role: LayerRole,
484 pub storage: StorageMode,
486 pub arrow_field: Field,
488 index_width: core::marker::PhantomData<I>,
490}
491
492impl<Id, I> PropertyLayerDescriptor<Id, I>
493where
494 I: PropertyIndex,
495{
496 #[expect(
506 clippy::too_many_arguments,
507 reason = "descriptor constructor mirrors the six-field descriptor contract"
508 )]
509 pub fn try_new(
510 layer_id: LayerId<Id>,
511 name: &str,
512 id_family: IdFamily,
513 role: LayerRole,
514 storage: StorageMode,
515 arrow_field: Field,
516 ) -> Result<Self, PropertyError> {
517 Ok(Self {
518 layer_id,
519 name: LayerName::try_new(name)?,
520 id_family,
521 role,
522 storage,
523 arrow_field,
524 index_width: core::marker::PhantomData,
525 })
526 }
527}
528
529#[derive(Debug, Clone, PartialEq)]
535#[non_exhaustive]
536pub enum PropertyError {
537 EmptyLayerName,
539 ExpectedDenseStorage {
541 name: LayerName,
543 },
544 ExpectedSparseStorage {
546 name: LayerName,
548 },
549 DefaultPolicyMismatch {
551 name: LayerName,
553 },
554 ArrowTypeMismatch {
556 name: LayerName,
558 },
559 IdFamilyMismatch {
561 expected: IdFamily,
563 actual: IdFamily,
565 },
566 LayerTooShort {
568 required: usize,
570 actual: usize,
572 },
573 UnexpectedNull {
575 index: usize,
577 },
578 SparseLengthMismatch {
580 indices: usize,
582 values: usize,
584 },
585 SparseIndexOrder {
587 position: usize,
589 },
590 SparseIndexOutOfBounds {
592 index: u64,
594 len: usize,
596 },
597 DuplicateName {
599 id_family: IdFamily,
601 name: LayerName,
603 },
604 SparseNullMissingNotTotal {
606 name: LayerName,
608 },
609 DuplicateLayerId {
611 layer_id: u64,
613 },
614 MissingSnapshotSection {
616 kind: u32,
618 },
619 SnapshotSectionVersion {
621 kind: u32,
623 version: u32,
625 },
626 SnapshotSectionView {
628 kind: u32,
630 error: SectionViewError,
632 },
633 SnapshotRangeOutOfBounds {
635 offset: usize,
637 len: usize,
639 available: usize,
641 },
642 SnapshotInvalidUtf8 {
644 offset: usize,
646 },
647 UnknownIdFamilyTag {
649 tag: u32,
651 },
652 UnknownLayerRoleTag {
654 tag: u32,
656 },
657 UnknownStorageTag {
659 tag: u32,
661 },
662 UnknownMissingPolicyTag {
664 tag: u32,
666 },
667 UnknownArrowFamilyTag {
669 tag: u32,
671 },
672 UnknownIdentityModeTag {
674 tag: u32,
676 },
677 SnapshotDescriptorMismatch {
679 reason: &'static str,
681 },
682 SnapshotDataLength {
684 reason: &'static str,
686 },
687 Arrow {
689 message: String,
691 },
692 MissingIdentityMap {
694 id_family: IdFamily,
696 },
697 IdentityMapLength {
699 id_family: IdFamily,
701 required: usize,
703 actual: usize,
705 },
706 LengthDoesNotFitU64 {
708 value: usize,
710 },
711}
712
713impl fmt::Display for PropertyError {
714 #[expect(
715 clippy::too_many_lines,
716 reason = "property validation has one display branch per concrete error variant"
717 )]
718 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
719 match self {
720 Self::EmptyLayerName => formatter.write_str("property layer name is empty"),
721 Self::ExpectedDenseStorage { name } => {
722 write!(formatter, "property layer '{name}' is not dense")
723 }
724 Self::ExpectedSparseStorage { name } => {
725 write!(formatter, "property layer '{name}' is not sparse")
726 }
727 Self::DefaultPolicyMismatch { name } => {
728 write!(formatter, "property layer '{name}' default policy mismatch")
729 }
730 Self::ArrowTypeMismatch { name } => {
731 write!(formatter, "property layer '{name}' Arrow type mismatch")
732 }
733 Self::IdFamilyMismatch { expected, actual } => write!(
734 formatter,
735 "property ID family mismatch: expected {expected:?}, got {actual:?}"
736 ),
737 Self::LayerTooShort { required, actual } => write!(
738 formatter,
739 "property layer too short: required {required}, got {actual}"
740 ),
741 Self::UnexpectedNull { index } => write!(
742 formatter,
743 "property layer has unexpected null at index {index}"
744 ),
745 Self::SparseLengthMismatch { indices, values } => write!(
746 formatter,
747 "sparse property length mismatch: {indices} indexes for {values} values"
748 ),
749 Self::SparseIndexOrder { position } => write!(
750 formatter,
751 "sparse property indexes are not strictly increasing at position {position}"
752 ),
753 Self::SparseIndexOutOfBounds { index, len } => write!(
754 formatter,
755 "sparse property index {index} is outside logical length {len}"
756 ),
757 Self::DuplicateName { id_family, name } => write!(
758 formatter,
759 "duplicate property name '{name}' in {id_family:?} namespace"
760 ),
761 Self::SparseNullMissingNotTotal { name } => write!(
762 formatter,
763 "sparse property layer '{name}' has null missing policy and is not total"
764 ),
765 Self::DuplicateLayerId { layer_id } => {
766 write!(formatter, "duplicate property layer ID {layer_id:?}")
767 }
768 Self::MissingSnapshotSection { kind } => {
769 write!(formatter, "snapshot is missing section kind {kind:#x}")
770 }
771 Self::SnapshotSectionVersion { kind, version } => write!(
772 formatter,
773 "snapshot section {kind:#x} has unsupported version {version}"
774 ),
775 Self::SnapshotSectionView { kind, error } => write!(
776 formatter,
777 "snapshot section {kind:#x} cannot be borrowed as expected records: {error}"
778 ),
779 Self::SnapshotRangeOutOfBounds {
780 offset,
781 len,
782 available,
783 } => write!(
784 formatter,
785 "snapshot range {offset}..{} exceeds available {available} bytes",
786 offset.saturating_add(*len)
787 ),
788 Self::SnapshotInvalidUtf8 { offset } => {
789 write!(
790 formatter,
791 "snapshot string at byte offset {offset} is not UTF-8"
792 )
793 }
794 Self::UnknownIdFamilyTag { tag } => {
795 write!(formatter, "unknown property ID-family tag {tag}")
796 }
797 Self::UnknownLayerRoleTag { tag } => {
798 write!(formatter, "unknown property layer-role tag {tag}")
799 }
800 Self::UnknownStorageTag { tag } => {
801 write!(formatter, "unknown property storage tag {tag}")
802 }
803 Self::UnknownMissingPolicyTag { tag } => {
804 write!(formatter, "unknown property missing-policy tag {tag}")
805 }
806 Self::UnknownArrowFamilyTag { tag } => {
807 write!(formatter, "unknown Arrow value-family tag {tag}")
808 }
809 Self::UnknownIdentityModeTag { tag } => {
810 write!(formatter, "unknown identity-map mode tag {tag}")
811 }
812 Self::SnapshotDescriptorMismatch { reason } => {
813 write!(formatter, "property snapshot descriptor mismatch: {reason}")
814 }
815 Self::SnapshotDataLength { reason } => {
816 write!(
817 formatter,
818 "property snapshot data length mismatch: {reason}"
819 )
820 }
821 Self::Arrow { message } => write!(formatter, "Arrow property error: {message}"),
822 Self::MissingIdentityMap { id_family } => {
823 write!(formatter, "missing explicit identity map for {id_family:?}")
824 }
825 Self::IdentityMapLength {
826 id_family,
827 required,
828 actual,
829 } => write!(
830 formatter,
831 "identity map for {id_family:?} has length {actual}, required {required}"
832 ),
833 Self::LengthDoesNotFitU64 { value } => {
834 write!(formatter, "length {value} does not fit u64")
835 }
836 }
837 }
838}
839
840impl Error for PropertyError {}
841
842#[non_exhaustive]
848pub enum PropertyLayerData<I>
849where
850 I: PropertyIndex,
851{
852 Dense {
854 values: ArrayRef,
856 },
857 Sparse {
859 indices: Arc<PrimitiveArray<I::ArrowType>>,
861 values: ArrayRef,
863 default: Option<ArrayRef>,
865 },
866}
867
868impl<I> Clone for PropertyLayerData<I>
869where
870 I: PropertyIndex,
871{
872 fn clone(&self) -> Self {
873 match self {
874 Self::Dense { values } => Self::Dense {
875 values: Arc::clone(values),
876 },
877 Self::Sparse {
878 indices,
879 values,
880 default,
881 } => Self::Sparse {
882 indices: Arc::clone(indices),
883 values: Arc::clone(values),
884 default: default.clone(),
885 },
886 }
887 }
888}
889
890impl<I> fmt::Debug for PropertyLayerData<I>
891where
892 I: PropertyIndex,
893{
894 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
895 match self {
896 Self::Dense { values } => formatter
897 .debug_struct("Dense")
898 .field("len", &values.len())
899 .finish(),
900 Self::Sparse {
901 indices,
902 values,
903 default,
904 } => formatter
905 .debug_struct("Sparse")
906 .field("indices", &indices.len())
907 .field("values", &values.len())
908 .field("has_default", &default.is_some())
909 .finish(),
910 }
911 }
912}
913
914#[derive(Clone, Debug)]
920#[must_use]
921pub struct PropertyLayer<Id, I>
922where
923 I: PropertyIndex,
924{
925 descriptor: PropertyLayerDescriptor<Id, I>,
927 len: usize,
929 data: PropertyLayerData<I>,
931}
932
933impl<Id, I> PropertyLayer<Id, I>
934where
935 I: PropertyIndex,
936{
937 pub fn try_new_dense(
947 descriptor: PropertyLayerDescriptor<Id, I>,
948 values: ArrayRef,
949 ) -> Result<Self, PropertyError> {
950 if descriptor.storage != StorageMode::Dense {
951 return Err(PropertyError::ExpectedDenseStorage {
952 name: descriptor.name,
953 });
954 }
955 ensure_arrow_type(&descriptor, values.as_ref())?;
956 if !descriptor.arrow_field.is_nullable() {
957 ensure_no_nulls(values.as_ref())?;
958 }
959 let len = values.len();
960 Ok(Self {
961 descriptor,
962 len,
963 data: PropertyLayerData::Dense { values },
964 })
965 }
966
967 pub fn try_new_sparse(
978 descriptor: PropertyLayerDescriptor<Id, I>,
979 len: usize,
980 indices: Arc<PrimitiveArray<I::ArrowType>>,
981 values: ArrayRef,
982 default: Option<ArrayRef>,
983 ) -> Result<Self, PropertyError> {
984 let StorageMode::Sparse { missing } = descriptor.storage else {
985 return Err(PropertyError::ExpectedSparseStorage {
986 name: descriptor.name,
987 });
988 };
989 validate_default_policy(&descriptor, missing, default.as_ref())?;
990 ensure_arrow_type(&descriptor, values.as_ref())?;
991 if indices.len() != values.len() {
992 return Err(PropertyError::SparseLengthMismatch {
993 indices: indices.len(),
994 values: values.len(),
995 });
996 }
997 ensure_no_nulls(indices.as_ref())?;
998 if !descriptor.arrow_field.is_nullable() {
999 ensure_no_nulls(values.as_ref())?;
1000 }
1001 validate_sparse_indices::<I>(indices.as_ref(), len)?;
1002 Ok(Self {
1003 descriptor,
1004 len,
1005 data: PropertyLayerData::Sparse {
1006 indices,
1007 values,
1008 default,
1009 },
1010 })
1011 }
1012
1013 #[must_use]
1019 pub const fn descriptor(&self) -> &PropertyLayerDescriptor<Id, I> {
1020 &self.descriptor
1021 }
1022
1023 #[must_use]
1029 pub const fn data(&self) -> &PropertyLayerData<I> {
1030 &self.data
1031 }
1032
1033 #[must_use]
1039 pub const fn len(&self) -> usize {
1040 self.len
1041 }
1042
1043 #[must_use]
1049 pub const fn is_empty(&self) -> bool {
1050 self.len == 0
1051 }
1052}
1053
1054#[derive(Clone, Copy, Debug)]
1060pub struct GraphPropertyLayers<'view, Id, NodeIndex, EdgeIndex>
1061where
1062 NodeIndex: PropertyIndex,
1063 EdgeIndex: PropertyIndex,
1064{
1065 pub element: &'view [PropertyLayer<Id, NodeIndex>],
1067 pub relation: &'view [PropertyLayer<Id, EdgeIndex>],
1069}
1070
1071#[derive(Clone, Copy, Debug)]
1077pub struct HyperPropertyLayers<'view, Id, VertexIndex, RelationIndex, IncidenceIndex>
1078where
1079 VertexIndex: PropertyIndex,
1080 RelationIndex: PropertyIndex,
1081 IncidenceIndex: PropertyIndex,
1082{
1083 pub element: &'view [PropertyLayer<Id, VertexIndex>],
1085 pub relation: &'view [PropertyLayer<Id, RelationIndex>],
1087 pub incidence: &'view [PropertyLayer<Id, IncidenceIndex>],
1089}
1090
1091pub trait PropertyAxis: sealed::PropertyAxis {
1104 fn id_family() -> IdFamily;
1110}
1111
1112#[derive(Clone, Copy, Debug, Default)]
1118pub struct ElementAxis;
1119
1120impl sealed::PropertyAxis for ElementAxis {}
1121impl PropertyAxis for ElementAxis {
1122 fn id_family() -> IdFamily {
1123 IdFamily::Element
1124 }
1125}
1126
1127#[derive(Clone, Copy, Debug, Default)]
1133pub struct RelationAxis;
1134
1135impl sealed::PropertyAxis for RelationAxis {}
1136impl PropertyAxis for RelationAxis {
1137 fn id_family() -> IdFamily {
1138 IdFamily::Relation
1139 }
1140}
1141
1142#[derive(Clone, Copy, Debug, Default)]
1148pub struct IncidenceAxis;
1149
1150impl sealed::PropertyAxis for IncidenceAxis {}
1151impl PropertyAxis for IncidenceAxis {
1152 fn id_family() -> IdFamily {
1153 IdFamily::Incidence
1154 }
1155}
1156
1157pub trait AxisIndex<A: PropertyAxis>: TopologyBase {
1173 fn axis_bound(&self) -> usize;
1179}
1180
1181impl<T> AxisIndex<ElementAxis> for T
1182where
1183 T: ElementIndex,
1184{
1185 fn axis_bound(&self) -> usize {
1186 self.element_bound()
1187 }
1188}
1189
1190impl<T> AxisIndex<RelationAxis> for T
1191where
1192 T: RelationIndex,
1193{
1194 fn axis_bound(&self) -> usize {
1195 self.relation_bound()
1196 }
1197}
1198
1199impl<T> AxisIndex<IncidenceAxis> for T
1200where
1201 T: IncidenceIndex,
1202{
1203 fn axis_bound(&self) -> usize {
1204 self.incidence_bound()
1205 }
1206}
1207
1208pub struct DenseWeights<'view, A, T, Id, I, P>
1217where
1218 A: PropertyAxis,
1219 I: PropertyIndex,
1220 P: ArrowPrimitiveType,
1221{
1222 topology: &'view T,
1224 values: &'view PrimitiveArray<P>,
1226 property: core::marker::PhantomData<(A, Id, I)>,
1228}
1229
1230impl<'view, A, T, Id, I, P> DenseWeights<'view, A, T, Id, I, P>
1231where
1232 A: PropertyAxis,
1233 T: AxisIndex<A>,
1234 I: PropertyIndex,
1235 P: ArrowPrimitiveType,
1236{
1237 pub fn new(
1249 topology: &'view T,
1250 layer: &'view PropertyLayer<Id, I>,
1251 ) -> Result<Self, PropertyError> {
1252 let values = validate_dense_primitive_selection::<Id, I, P>(
1253 layer,
1254 A::id_family(),
1255 topology.axis_bound(),
1256 )?;
1257 Ok(Self {
1258 topology,
1259 values,
1260 property: core::marker::PhantomData,
1261 })
1262 }
1263}
1264
1265impl<T, Id, I, P> TopologyBase for DenseWeights<'_, ElementAxis, T, Id, I, P>
1266where
1267 T: ElementIndex,
1268 I: PropertyIndex,
1269 P: ArrowPrimitiveType,
1270{
1271 type ElementId = T::ElementId;
1272 type RelationId = T::RelationId;
1273}
1274
1275impl<T, Id, I, P> ElementWeight for DenseWeights<'_, ElementAxis, T, Id, I, P>
1276where
1277 T: ElementIndex,
1278 I: PropertyIndex,
1279 P: ArrowPrimitiveType,
1280 P::Native: Copy,
1281{
1282 type Weight = P::Native;
1283
1284 fn element_weight(&self, element: Self::ElementId) -> Self::Weight {
1285 self.values.value(self.topology.element_index(element))
1286 }
1287}
1288
1289impl<T, Id, I, P> TopologyBase for DenseWeights<'_, RelationAxis, T, Id, I, P>
1290where
1291 T: RelationIndex,
1292 I: PropertyIndex,
1293 P: ArrowPrimitiveType,
1294{
1295 type ElementId = T::ElementId;
1296 type RelationId = T::RelationId;
1297}
1298
1299impl<T, Id, I, P> RelationWeight for DenseWeights<'_, RelationAxis, T, Id, I, P>
1300where
1301 T: RelationIndex,
1302 I: PropertyIndex,
1303 P: ArrowPrimitiveType,
1304 P::Native: Copy,
1305{
1306 type Weight = P::Native;
1307
1308 fn relation_weight(&self, relation: Self::RelationId) -> Self::Weight {
1309 self.values.value(self.topology.relation_index(relation))
1310 }
1311}
1312
1313impl<T, Id, I, P> TopologyBase for DenseWeights<'_, IncidenceAxis, T, Id, I, P>
1314where
1315 T: IncidenceIndex,
1316 I: PropertyIndex,
1317 P: ArrowPrimitiveType,
1318{
1319 type ElementId = T::ElementId;
1320 type RelationId = T::RelationId;
1321}
1322
1323impl<T, Id, I, P> IncidenceBase for DenseWeights<'_, IncidenceAxis, T, Id, I, P>
1324where
1325 T: IncidenceIndex,
1326 I: PropertyIndex,
1327 P: ArrowPrimitiveType,
1328{
1329 type IncidenceId = T::IncidenceId;
1330 type Role = T::Role;
1331}
1332
1333impl<T, Id, I, P> IncidenceWeight for DenseWeights<'_, IncidenceAxis, T, Id, I, P>
1334where
1335 T: IncidenceIndex,
1336 I: PropertyIndex,
1337 P: ArrowPrimitiveType,
1338 P::Native: Copy,
1339{
1340 type Weight = P::Native;
1341
1342 fn incidence_weight(&self, incidence: Self::IncidenceId) -> Self::Weight {
1343 self.values.value(self.topology.incidence_index(incidence))
1344 }
1345}
1346
1347pub struct SparseWeights<'view, A, T, Id, I, P>
1356where
1357 A: PropertyAxis,
1358 I: PropertyIndex,
1359 P: ArrowPrimitiveType,
1360{
1361 topology: &'view T,
1363 indices: &'view PrimitiveArray<I::ArrowType>,
1365 values: &'view PrimitiveArray<P>,
1367 default: P::Native,
1369 property: core::marker::PhantomData<(A, Id)>,
1371}
1372
1373impl<'view, A, T, Id, I, P> SparseWeights<'view, A, T, Id, I, P>
1374where
1375 A: PropertyAxis,
1376 T: AxisIndex<A>,
1377 I: PropertyIndex,
1378 P: ArrowPrimitiveType,
1379 P::Native: Copy,
1380{
1381 pub fn new(
1394 topology: &'view T,
1395 layer: &'view PropertyLayer<Id, I>,
1396 ) -> Result<Self, PropertyError> {
1397 let (indices, values, default) = validate_sparse_primitive_selection::<I, P, Id>(
1398 layer,
1399 A::id_family(),
1400 topology.axis_bound(),
1401 )?;
1402 Ok(Self {
1403 topology,
1404 indices,
1405 values,
1406 default,
1407 property: core::marker::PhantomData,
1408 })
1409 }
1410}
1411
1412impl<T, Id, I, P> TopologyBase for SparseWeights<'_, ElementAxis, T, Id, I, P>
1413where
1414 T: ElementIndex,
1415 I: PropertyIndex,
1416 P: ArrowPrimitiveType,
1417{
1418 type ElementId = T::ElementId;
1419 type RelationId = T::RelationId;
1420}
1421
1422impl<T, Id, I, P> ElementWeight for SparseWeights<'_, ElementAxis, T, Id, I, P>
1423where
1424 T: ElementIndex,
1425 I: PropertyIndex,
1426 P: ArrowPrimitiveType,
1427 P::Native: Copy,
1428{
1429 type Weight = P::Native;
1430
1431 fn element_weight(&self, element: Self::ElementId) -> Self::Weight {
1432 sparse_value::<I, P>(
1433 self.indices,
1434 self.values,
1435 self.default,
1436 self.topology.element_index(element),
1437 )
1438 }
1439}
1440
1441impl<T, Id, I, P> TopologyBase for SparseWeights<'_, RelationAxis, T, Id, I, P>
1442where
1443 T: RelationIndex,
1444 I: PropertyIndex,
1445 P: ArrowPrimitiveType,
1446{
1447 type ElementId = T::ElementId;
1448 type RelationId = T::RelationId;
1449}
1450
1451impl<T, Id, I, P> RelationWeight for SparseWeights<'_, RelationAxis, T, Id, I, P>
1452where
1453 T: RelationIndex,
1454 I: PropertyIndex,
1455 P: ArrowPrimitiveType,
1456 P::Native: Copy,
1457{
1458 type Weight = P::Native;
1459
1460 fn relation_weight(&self, relation: Self::RelationId) -> Self::Weight {
1461 sparse_value::<I, P>(
1462 self.indices,
1463 self.values,
1464 self.default,
1465 self.topology.relation_index(relation),
1466 )
1467 }
1468}
1469
1470impl<T, Id, I, P> TopologyBase for SparseWeights<'_, IncidenceAxis, T, Id, I, P>
1471where
1472 T: IncidenceIndex,
1473 I: PropertyIndex,
1474 P: ArrowPrimitiveType,
1475{
1476 type ElementId = T::ElementId;
1477 type RelationId = T::RelationId;
1478}
1479
1480impl<T, Id, I, P> IncidenceBase for SparseWeights<'_, IncidenceAxis, T, Id, I, P>
1481where
1482 T: IncidenceIndex,
1483 I: PropertyIndex,
1484 P: ArrowPrimitiveType,
1485{
1486 type IncidenceId = T::IncidenceId;
1487 type Role = T::Role;
1488}
1489
1490impl<T, Id, I, P> IncidenceWeight for SparseWeights<'_, IncidenceAxis, T, Id, I, P>
1491where
1492 T: IncidenceIndex,
1493 I: PropertyIndex,
1494 P: ArrowPrimitiveType,
1495 P::Native: Copy,
1496{
1497 type Weight = P::Native;
1498
1499 fn incidence_weight(&self, incidence: Self::IncidenceId) -> Self::Weight {
1500 sparse_value::<I, P>(
1501 self.indices,
1502 self.values,
1503 self.default,
1504 self.topology.incidence_index(incidence),
1505 )
1506 }
1507}
1508
1509pub fn validate_unique_names<'descriptor, Id, Index, Descriptors>(
1519 descriptors: Descriptors,
1520) -> Result<(), PropertyError>
1521where
1522 Id: 'descriptor,
1523 Index: PropertyIndex + 'descriptor,
1524 Descriptors: IntoIterator<Item = &'descriptor PropertyLayerDescriptor<Id, Index>>,
1525{
1526 let mut seen: BTreeSet<(IdFamily, &str)> = BTreeSet::new();
1527 for descriptor in descriptors {
1528 let key = (descriptor.id_family, descriptor.name.as_str());
1529 if !seen.insert(key) {
1530 return Err(PropertyError::DuplicateName {
1531 id_family: descriptor.id_family,
1532 name: descriptor.name.clone(),
1533 });
1534 }
1535 }
1536 Ok(())
1537}
1538
1539pub fn validate_unique_layer_ids<'descriptor, Id, Index, Descriptors>(
1549 descriptors: Descriptors,
1550) -> Result<(), PropertyError>
1551where
1552 Id: Copy + Into<u64> + Ord + 'descriptor,
1553 Index: PropertyIndex + 'descriptor,
1554 Descriptors: IntoIterator<Item = &'descriptor PropertyLayerDescriptor<Id, Index>>,
1555{
1556 let mut seen: BTreeSet<LayerId<Id>> = BTreeSet::new();
1557 for descriptor in descriptors {
1558 if !seen.insert(descriptor.layer_id) {
1559 return Err(PropertyError::DuplicateLayerId {
1560 layer_id: descriptor.layer_id.0.into(),
1561 });
1562 }
1563 }
1564 Ok(())
1565}
1566
1567#[expect(
1582 clippy::too_many_lines,
1583 reason = "rekeying keeps dense and sparse Arrow remapping in one contract path"
1584)]
1585pub fn rekey_layer_to_local<Id, I>(
1586 layer: &PropertyLayer<Id, I>,
1587 local_to_canonical: &[I],
1588) -> Result<PropertyLayer<Id, I>, PropertyError>
1589where
1590 Id: Clone,
1591 I: PropertyIndex,
1592{
1593 let descriptor = layer.descriptor().clone();
1594 match layer.data() {
1595 PropertyLayerData::Dense { values } => {
1596 let take_indices = I::primitive_array(local_to_canonical.to_vec());
1597 let values = take(values.as_ref(), &take_indices, None).map_err(map_arrow_error)?;
1598 PropertyLayer::try_new_dense(descriptor, values)
1599 }
1600 PropertyLayerData::Sparse {
1601 indices,
1602 values,
1603 default,
1604 } => {
1605 let mut canonical_to_local = vec![None; layer.len()];
1606 for (local, canonical) in local_to_canonical.iter().copied().enumerate() {
1607 let Some(canonical) = canonical.to_usize() else {
1608 return Err(PropertyError::SparseIndexOutOfBounds {
1609 index: canonical.to_u64(),
1610 len: layer.len(),
1611 });
1612 };
1613 if canonical >= layer.len() {
1614 return Err(PropertyError::SparseIndexOutOfBounds {
1615 index: canonical as u64,
1616 len: layer.len(),
1617 });
1618 }
1619 canonical_to_local[canonical] = Some(I::from_usize(local).ok_or(
1620 PropertyError::SparseIndexOutOfBounds {
1621 index: local as u64,
1622 len: local_to_canonical.len(),
1623 },
1624 )?);
1625 }
1626 let mut remapped = Vec::with_capacity(indices.len());
1627 for position in 0..indices.len() {
1628 let canonical = indices.value(position);
1629 let Some(canonical_usize) = canonical.to_usize() else {
1630 return Err(PropertyError::SparseIndexOutOfBounds {
1631 index: canonical.to_u64(),
1632 len: layer.len(),
1633 });
1634 };
1635 if canonical_usize >= canonical_to_local.len() {
1636 return Err(PropertyError::SparseIndexOutOfBounds {
1637 index: canonical.to_u64(),
1638 len: layer.len(),
1639 });
1640 }
1641 let Some(local) = canonical_to_local[canonical_usize] else {
1642 return Err(PropertyError::SparseIndexOutOfBounds {
1643 index: canonical.to_u64(),
1644 len: layer.len(),
1645 });
1646 };
1647 let take_position =
1648 I::from_usize(position).ok_or(PropertyError::SparseIndexOutOfBounds {
1649 index: position as u64,
1650 len: indices.len(),
1651 })?;
1652 remapped.push((local, take_position));
1653 }
1654 remapped.sort_by_key(|(local, _position)| *local);
1655 let new_indices = I::primitive_array(
1656 remapped
1657 .iter()
1658 .map(|(local, _position)| *local)
1659 .collect::<Vec<_>>(),
1660 );
1661 let take_indices = I::primitive_array(
1662 remapped
1663 .iter()
1664 .map(|(_local, position)| *position)
1665 .collect::<Vec<_>>(),
1666 );
1667 let values = take(values.as_ref(), &take_indices, None).map_err(map_arrow_error)?;
1668 if let Some(default) = default {
1669 ensure_arrow_type(&descriptor, default.as_ref())?;
1670 }
1671 PropertyLayer::try_new_sparse(
1672 descriptor,
1673 local_to_canonical.len(),
1674 Arc::new(new_indices),
1675 values,
1676 default.clone(),
1677 )
1678 }
1679 }
1680}
1681
1682#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1688#[non_exhaustive]
1689pub enum IdentityMapMode {
1690 LocalEqualsCanonical,
1692 ExplicitMap,
1694}
1695
1696impl IdentityMapMode {
1697 const fn tag(self) -> u32 {
1703 match self {
1704 Self::LocalEqualsCanonical => 0,
1705 Self::ExplicitMap => 1,
1706 }
1707 }
1708
1709 const fn from_tag(tag: u32) -> Option<Self> {
1715 match tag {
1716 0 => Some(Self::LocalEqualsCanonical),
1717 1 => Some(Self::ExplicitMap),
1718 _ => None,
1719 }
1720 }
1721}
1722
1723#[derive(Clone, Copy, Debug, Eq, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)]
1729#[repr(C)]
1730pub struct IdentityModeRecord<W>
1731where
1732 W: PropertySnapshotMetaWord,
1733{
1734 id_family: W::LittleEndianWord,
1736 mode: W::LittleEndianWord,
1738 local_len: W::LittleEndianWord,
1740}
1741
1742impl<W> IdentityModeRecord<W>
1743where
1744 W: PropertySnapshotMetaWord,
1745{
1746 pub fn local_equals_canonical(
1757 id_family: IdFamily,
1758 local_len: usize,
1759 ) -> Result<Self, PropertyError> {
1760 Self::new(id_family, IdentityMapMode::LocalEqualsCanonical, local_len)
1761 }
1762
1763 pub fn explicit_map(id_family: IdFamily, local_len: usize) -> Result<Self, PropertyError> {
1774 Self::new(id_family, IdentityMapMode::ExplicitMap, local_len)
1775 }
1776
1777 pub fn new(
1788 id_family: IdFamily,
1789 mode: IdentityMapMode,
1790 local_len: usize,
1791 ) -> Result<Self, PropertyError> {
1792 Ok(Self {
1793 id_family: le_word::<W>(id_family_tag(id_family) as usize)?,
1794 mode: le_word::<W>(mode.tag() as usize)?,
1795 local_len: le_word::<W>(local_len)?,
1796 })
1797 }
1798
1799 pub fn id_family(&self) -> Result<IdFamily, PropertyError> {
1809 id_family_from_tag(le_word_to_u32::<W>(self.id_family)?)
1810 }
1811
1812 pub fn mode(&self) -> Result<IdentityMapMode, PropertyError> {
1822 let tag = le_word_to_u32::<W>(self.mode)?;
1823 IdentityMapMode::from_tag(tag).ok_or(PropertyError::UnknownIdentityModeTag { tag })
1824 }
1825
1826 #[must_use]
1833 pub fn local_len(&self) -> usize {
1834 le_word_to_usize::<W>(self.local_len).unwrap_or(usize::MAX)
1835 }
1836}
1837
1838#[derive(Clone, Debug, Eq, PartialEq)]
1844#[must_use]
1845pub struct IdentitySnapshotSummary {
1846 pub records: Vec<IdentityModeSummary>,
1848}
1849
1850#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1856pub struct IdentityModeSummary {
1857 pub id_family: IdFamily,
1859 pub mode: IdentityMapMode,
1861 pub local_len: usize,
1863}
1864
1865pub fn validate_identity_snapshot<W>(
1877 snapshot: &Snapshot<'_>,
1878) -> Result<IdentitySnapshotSummary, PropertyError>
1879where
1880 W: PropertySnapshotMetaWord,
1881{
1882 let section =
1883 snapshot
1884 .section(W::IDENTITY_MODES_KIND)
1885 .ok_or(PropertyError::MissingSnapshotSection {
1886 kind: W::IDENTITY_MODES_KIND,
1887 })?;
1888 if section.version() != SNAPSHOT_PROPERTY_VERSION {
1889 return Err(PropertyError::SnapshotSectionVersion {
1890 kind: W::IDENTITY_MODES_KIND,
1891 version: section.version(),
1892 });
1893 }
1894 let records: &[IdentityModeRecord<W>] =
1895 section
1896 .try_as_slice()
1897 .map_err(|error| PropertyError::SnapshotSectionView {
1898 kind: W::IDENTITY_MODES_KIND,
1899 error,
1900 })?;
1901 let records = validate_identity_records::<W>(snapshot, records)?;
1902 Ok(IdentitySnapshotSummary { records })
1903}
1904
1905#[derive(Clone, Debug, Eq, PartialEq)]
1911#[must_use]
1912pub struct EncodedPropertySnapshot {
1913 pub descriptors: Vec<u8>,
1915 pub data: Vec<u8>,
1917}
1918
1919#[derive(Clone, Debug, Eq, PartialEq)]
1925#[must_use]
1926pub struct PropertySnapshotSummary {
1927 pub layer_count: usize,
1929 pub total_logical_values: usize,
1931}
1932
1933#[derive(Clone, Debug)]
1944#[must_use]
1945#[non_exhaustive]
1946pub enum DecodedPropertyData {
1947 Dense {
1949 values: ArrayRef,
1951 },
1952 Sparse {
1954 indices: ArrayRef,
1956 values: ArrayRef,
1958 default: Option<ArrayRef>,
1960 },
1961}
1962
1963#[derive(Clone, Debug)]
1975#[must_use]
1976pub struct DecodedPropertyLayer {
1977 pub layer_id: u64,
1979 pub name: String,
1981 pub id_family: IdFamily,
1983 pub role: LayerRole,
1985 pub storage: StorageMode,
1987 pub logical_len: usize,
1989 pub data: DecodedPropertyData,
1991}
1992
1993#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
1995#[repr(C)]
1996struct PropertySnapshotHeader {
1997 record_count: U64<LE>,
1999 record_bytes: U64<LE>,
2001}
2002
2003#[derive(Clone, Copy, Debug, Eq, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)]
2005#[repr(C)]
2006pub struct PropertySnapshotRecord<W>
2007where
2008 W: PropertySnapshotMetaWord,
2009{
2010 layer_id: W::LittleEndianWord,
2012 name_offset: W::LittleEndianWord,
2014 name_len: W::LittleEndianWord,
2016 id_family: W::LittleEndianWord,
2018 role: W::LittleEndianWord,
2020 storage: W::LittleEndianWord,
2022 missing_policy: W::LittleEndianWord,
2024 logical_len: W::LittleEndianWord,
2026 value_count: W::LittleEndianWord,
2028 value_data_offset: W::LittleEndianWord,
2030 value_data_len: W::LittleEndianWord,
2032 default_data_offset: W::LittleEndianWord,
2034 default_data_len: W::LittleEndianWord,
2036 reserved: W::LittleEndianWord,
2038}
2039
2040pub fn encode_property_snapshot<W, Id, I>(
2051 layers: &[PropertyLayer<Id, I>],
2052) -> Result<EncodedPropertySnapshot, PropertyError>
2053where
2054 W: PropertySnapshotMetaWord,
2055 Id: Copy + Into<u64> + Ord + TryInto<W>,
2056 I: PropertyIndex,
2057{
2058 let mut encoder = PropertySnapshotEncoder::<W>::with_capacity(layers.len());
2059 for layer in layers {
2060 encoder.append::<Id, I>(layer)?;
2061 }
2062 encoder.finish()
2063}
2064
2065pub fn encode_graph_property_snapshot<W, Id, NodeIndex, EdgeIndex>(
2075 layers: GraphPropertyLayers<'_, Id, NodeIndex, EdgeIndex>,
2076) -> Result<EncodedPropertySnapshot, PropertyError>
2077where
2078 W: PropertySnapshotMetaWord,
2079 Id: Copy + Into<u64> + Ord + TryInto<W>,
2080 NodeIndex: PropertyIndex,
2081 EdgeIndex: PropertyIndex,
2082{
2083 let mut encoder = PropertySnapshotEncoder::<W>::with_capacity(
2084 layers.element.len().saturating_add(layers.relation.len()),
2085 );
2086 for layer in layers.element {
2087 encoder.append::<Id, NodeIndex>(layer)?;
2088 }
2089 for layer in layers.relation {
2090 encoder.append::<Id, EdgeIndex>(layer)?;
2091 }
2092 encoder.finish()
2093}
2094
2095pub fn encode_hyper_property_snapshot<W, Id, VertexIndex, RelationIndex, IncidenceIndex>(
2105 layers: HyperPropertyLayers<'_, Id, VertexIndex, RelationIndex, IncidenceIndex>,
2106) -> Result<EncodedPropertySnapshot, PropertyError>
2107where
2108 W: PropertySnapshotMetaWord,
2109 Id: Copy + Into<u64> + Ord + TryInto<W>,
2110 VertexIndex: PropertyIndex,
2111 RelationIndex: PropertyIndex,
2112 IncidenceIndex: PropertyIndex,
2113{
2114 let mut encoder = PropertySnapshotEncoder::<W>::with_capacity(
2115 layers
2116 .element
2117 .len()
2118 .saturating_add(layers.relation.len())
2119 .saturating_add(layers.incidence.len()),
2120 );
2121 for layer in layers.element {
2122 encoder.append::<Id, VertexIndex>(layer)?;
2123 }
2124 for layer in layers.relation {
2125 encoder.append::<Id, RelationIndex>(layer)?;
2126 }
2127 for layer in layers.incidence {
2128 encoder.append::<Id, IncidenceIndex>(layer)?;
2129 }
2130 encoder.finish()
2131}
2132
2133struct PropertySnapshotEncoder<W>
2144where
2145 W: PropertySnapshotMetaWord,
2146{
2147 data: Vec<u8>,
2149 strings: Vec<u8>,
2151 records: Vec<PropertySnapshotRecord<W>>,
2153 names: BTreeSet<(IdFamily, LayerName)>,
2155 ids: BTreeSet<u64>,
2157}
2158
2159impl<W> PropertySnapshotEncoder<W>
2160where
2161 W: PropertySnapshotMetaWord,
2162{
2163 fn with_capacity(capacity: usize) -> Self {
2165 Self {
2166 data: Vec::new(),
2167 strings: Vec::new(),
2168 records: Vec::with_capacity(capacity),
2169 names: BTreeSet::new(),
2170 ids: BTreeSet::new(),
2171 }
2172 }
2173
2174 fn append<Id, I>(&mut self, layer: &PropertyLayer<Id, I>) -> Result<(), PropertyError>
2176 where
2177 Id: Copy + Into<u64> + TryInto<W>,
2178 I: PropertyIndex,
2179 {
2180 let descriptor = layer.descriptor();
2181 if !self
2182 .names
2183 .insert((descriptor.id_family, descriptor.name.clone()))
2184 {
2185 return Err(PropertyError::DuplicateName {
2186 id_family: descriptor.id_family,
2187 name: descriptor.name.clone(),
2188 });
2189 }
2190 let diagnostic_layer_id = descriptor.layer_id.0.into();
2191 if !self.ids.insert(diagnostic_layer_id) {
2192 return Err(PropertyError::DuplicateLayerId {
2193 layer_id: diagnostic_layer_id,
2194 });
2195 }
2196 let name_offset = append_string(&mut self.strings, descriptor.name.as_str());
2197 let value_data_offset = self.data.len();
2198 let layer_data = encode_layer_value_ipc(layer)?;
2199 let value_data_len = layer_data.len();
2200 self.data.extend_from_slice(&layer_data);
2201 let (default_data_offset, default_data_len) =
2202 encode_layer_default_ipc(layer)?.map_or((0, 0), |default_data| {
2203 let offset = self.data.len();
2204 let len = default_data.len();
2205 self.data.extend_from_slice(&default_data);
2206 (offset, len)
2207 });
2208 let layer_id = descriptor.layer_id.0.try_into().map_err(|_error| {
2209 PropertyError::SnapshotDescriptorMismatch {
2210 reason: "layer ID does not fit selected metadata width",
2211 }
2212 })?;
2213 self.records.push(PropertySnapshotRecord::<W> {
2214 layer_id: layer_id.to_le_word(),
2215 name_offset: le_word::<W>(name_offset)?,
2216 name_len: le_word::<W>(descriptor.name.as_str().len())?,
2217 id_family: le_word::<W>(id_family_tag(descriptor.id_family) as usize)?,
2218 role: le_word::<W>(layer_role_tag(descriptor.role) as usize)?,
2219 storage: le_word::<W>(storage_tag(descriptor.storage) as usize)?,
2220 missing_policy: le_word::<W>(missing_policy_tag(descriptor.storage) as usize)?,
2221 logical_len: le_word::<W>(layer.len())?,
2222 value_count: le_word::<W>(layer_value_count(layer))?,
2223 value_data_offset: le_word::<W>(value_data_offset)?,
2224 value_data_len: le_word::<W>(value_data_len)?,
2225 default_data_offset: le_word::<W>(default_data_offset)?,
2226 default_data_len: le_word::<W>(default_data_len)?,
2227 reserved: le_word::<W>(0)?,
2228 });
2229 Ok(())
2230 }
2231
2232 fn finish(self) -> Result<EncodedPropertySnapshot, PropertyError> {
2234 let record_bytes = self
2235 .records
2236 .len()
2237 .checked_mul(core::mem::size_of::<PropertySnapshotRecord<W>>())
2238 .ok_or(PropertyError::SnapshotDescriptorMismatch {
2239 reason: "record byte length overflow",
2240 })?;
2241 let header = PropertySnapshotHeader {
2242 record_count: U64::new(usize_to_u64(self.records.len())?),
2243 record_bytes: U64::new(usize_to_u64(record_bytes)?),
2244 };
2245 let mut descriptor_bytes = Vec::with_capacity(
2246 core::mem::size_of::<PropertySnapshotHeader>() + record_bytes + self.strings.len(),
2247 );
2248 descriptor_bytes.extend_from_slice(header.as_bytes());
2249 descriptor_bytes.extend_from_slice(self.records.as_bytes());
2250 descriptor_bytes.extend_from_slice(&self.strings);
2251 Ok(EncodedPropertySnapshot {
2252 descriptors: descriptor_bytes,
2253 data: self.data,
2254 })
2255 }
2256}
2257
2258pub fn validate_property_snapshot<W>(
2270 snapshot: &Snapshot<'_>,
2271) -> Result<PropertySnapshotSummary, PropertyError>
2272where
2273 W: PropertySnapshotMetaWord,
2274{
2275 let descriptor_section = snapshot.section(W::PROPERTY_DESCRIPTORS_KIND).ok_or(
2276 PropertyError::MissingSnapshotSection {
2277 kind: W::PROPERTY_DESCRIPTORS_KIND,
2278 },
2279 )?;
2280 let data_section =
2281 snapshot
2282 .section(W::PROPERTY_DATA_KIND)
2283 .ok_or(PropertyError::MissingSnapshotSection {
2284 kind: W::PROPERTY_DATA_KIND,
2285 })?;
2286 if descriptor_section.version() != SNAPSHOT_PROPERTY_VERSION {
2287 return Err(PropertyError::SnapshotSectionVersion {
2288 kind: W::PROPERTY_DESCRIPTORS_KIND,
2289 version: descriptor_section.version(),
2290 });
2291 }
2292 if data_section.version() != SNAPSHOT_PROPERTY_VERSION {
2293 return Err(PropertyError::SnapshotSectionVersion {
2294 kind: W::PROPERTY_DATA_KIND,
2295 version: data_section.version(),
2296 });
2297 }
2298 validate_property_sections::<W>(descriptor_section.bytes(), data_section.bytes())
2299}
2300
2301pub fn validate_property_sections<W>(
2311 descriptor_bytes: &[u8],
2312 data_bytes: &[u8],
2313) -> Result<PropertySnapshotSummary, PropertyError>
2314where
2315 W: PropertySnapshotMetaWord,
2316{
2317 let header_len = core::mem::size_of::<PropertySnapshotHeader>();
2318 if descriptor_bytes.len() < header_len {
2319 return Err(PropertyError::SnapshotDataLength {
2320 reason: "descriptor header is truncated",
2321 });
2322 }
2323 let record_count = read_u64_le(&descriptor_bytes[0..8])?;
2324 let record_bytes = read_u64_le(&descriptor_bytes[8..16])?;
2325 let record_count_usize = u64_to_usize(record_count)?;
2326 let record_bytes_usize = u64_to_usize(record_bytes)?;
2327 let expected_record_bytes = record_count_usize
2328 .checked_mul(core::mem::size_of::<PropertySnapshotRecord<W>>())
2329 .ok_or(PropertyError::SnapshotDescriptorMismatch {
2330 reason: "record byte length overflow",
2331 })?;
2332 if record_bytes_usize != expected_record_bytes {
2333 return Err(PropertyError::SnapshotDescriptorMismatch {
2334 reason: "record byte length does not match record count",
2335 });
2336 }
2337 let record_start = header_len;
2338 let string_start = record_start.checked_add(record_bytes_usize).ok_or(
2339 PropertyError::SnapshotDescriptorMismatch {
2340 reason: "descriptor section length overflow",
2341 },
2342 )?;
2343 if descriptor_bytes.len() < string_start {
2344 return Err(PropertyError::SnapshotDataLength {
2345 reason: "descriptor records are truncated",
2346 });
2347 }
2348 let record_bytes_slice = &descriptor_bytes[record_start..string_start];
2349 let string_bytes = &descriptor_bytes[string_start..];
2350 let mut names: BTreeSet<(IdFamily, &str)> = BTreeSet::new();
2351 let mut ids: BTreeSet<u64> = BTreeSet::new();
2352 let mut ranges = Vec::with_capacity(record_count_usize);
2353 let mut total_logical_values = 0_usize;
2354 for position in 0..record_count_usize {
2355 let start = position * core::mem::size_of::<PropertySnapshotRecord<W>>();
2356 let record = parse_property_record::<W>(&record_bytes_slice[start..])?;
2357 let id_family = id_family_from_tag(le_word_to_u32::<W>(record.id_family)?)?;
2358 let _role = layer_role_from_tag(le_word_to_u32::<W>(record.role)?)?;
2359 let storage = storage_from_tags(
2360 le_word_to_u32::<W>(record.storage)?,
2361 le_word_to_u32::<W>(record.missing_policy)?,
2362 )?;
2363 let name = read_snapshot_str(
2364 string_bytes,
2365 le_word_to_usize::<W>(record.name_offset)?,
2366 le_word_to_usize::<W>(record.name_len)?,
2367 )?;
2368 let layer_id = le_word_to_u64::<W>(record.layer_id);
2369 if !ids.insert(layer_id) {
2370 return Err(PropertyError::DuplicateLayerId { layer_id });
2371 }
2372 if !names.insert((id_family, name)) {
2373 return Err(PropertyError::DuplicateName {
2374 id_family,
2375 name: LayerName::try_new(name)?,
2376 });
2377 }
2378 let layer_ranges = validate_property_record_data::<W>(&record, storage, data_bytes)?;
2379 ranges.extend(layer_ranges);
2380 total_logical_values = total_logical_values
2381 .checked_add(le_word_to_usize::<W>(record.logical_len)?)
2382 .ok_or(PropertyError::SnapshotDescriptorMismatch {
2383 reason: "logical value total overflow",
2384 })?;
2385 }
2386 validate_data_coverage(&mut ranges, data_bytes.len())?;
2387 Ok(PropertySnapshotSummary {
2388 layer_count: record_count_usize,
2389 total_logical_values,
2390 })
2391}
2392
2393impl DecodedPropertyLayer {
2394 pub fn decode_all<W>(snapshot: &Snapshot<'_>) -> Result<Vec<Self>, PropertyError>
2417 where
2418 W: PropertySnapshotMetaWord,
2419 {
2420 let descriptor_section = snapshot.section(W::PROPERTY_DESCRIPTORS_KIND).ok_or(
2421 PropertyError::MissingSnapshotSection {
2422 kind: W::PROPERTY_DESCRIPTORS_KIND,
2423 },
2424 )?;
2425 let data_section = snapshot.section(W::PROPERTY_DATA_KIND).ok_or(
2426 PropertyError::MissingSnapshotSection {
2427 kind: W::PROPERTY_DATA_KIND,
2428 },
2429 )?;
2430 if descriptor_section.version() != SNAPSHOT_PROPERTY_VERSION {
2431 return Err(PropertyError::SnapshotSectionVersion {
2432 kind: W::PROPERTY_DESCRIPTORS_KIND,
2433 version: descriptor_section.version(),
2434 });
2435 }
2436 if data_section.version() != SNAPSHOT_PROPERTY_VERSION {
2437 return Err(PropertyError::SnapshotSectionVersion {
2438 kind: W::PROPERTY_DATA_KIND,
2439 version: data_section.version(),
2440 });
2441 }
2442 Self::decode_sections::<W>(descriptor_section.bytes(), data_section.bytes())
2443 }
2444
2445 pub fn decode_sections<W>(
2462 descriptor_bytes: &[u8],
2463 data_bytes: &[u8],
2464 ) -> Result<Vec<Self>, PropertyError>
2465 where
2466 W: PropertySnapshotMetaWord,
2467 {
2468 let _summary = validate_property_sections::<W>(descriptor_bytes, data_bytes)?;
2469 let header_len = core::mem::size_of::<PropertySnapshotHeader>();
2470 let record_count_usize = u64_to_usize(read_u64_le(&descriptor_bytes[0..8])?)?;
2471 let record_bytes_usize = u64_to_usize(read_u64_le(&descriptor_bytes[8..16])?)?;
2472 let record_start = header_len;
2473 let string_start = record_start.checked_add(record_bytes_usize).ok_or(
2474 PropertyError::SnapshotDescriptorMismatch {
2475 reason: "descriptor section length overflow",
2476 },
2477 )?;
2478 let record_bytes_slice = &descriptor_bytes[record_start..string_start];
2479 let string_bytes = &descriptor_bytes[string_start..];
2480 let record_size = core::mem::size_of::<PropertySnapshotRecord<W>>();
2481 let mut out = Vec::with_capacity(record_count_usize);
2482 for position in 0..record_count_usize {
2483 let start = position.checked_mul(record_size).ok_or(
2484 PropertyError::SnapshotDescriptorMismatch {
2485 reason: "record offset overflow",
2486 },
2487 )?;
2488 let record = parse_property_record::<W>(&record_bytes_slice[start..])?;
2489 let layer_id = le_word_to_u64::<W>(record.layer_id);
2490 let id_family = id_family_from_tag(le_word_to_u32::<W>(record.id_family)?)?;
2491 let role = layer_role_from_tag(le_word_to_u32::<W>(record.role)?)?;
2492 let storage = storage_from_tags(
2493 le_word_to_u32::<W>(record.storage)?,
2494 le_word_to_u32::<W>(record.missing_policy)?,
2495 )?;
2496 let name = read_snapshot_str(
2497 string_bytes,
2498 le_word_to_usize::<W>(record.name_offset)?,
2499 le_word_to_usize::<W>(record.name_len)?,
2500 )?
2501 .to_string();
2502 let logical_len = le_word_to_usize::<W>(record.logical_len)?;
2503 let value_offset = le_word_to_usize::<W>(record.value_data_offset)?;
2504 let value_len = le_word_to_usize::<W>(record.value_data_len)?;
2505 let value_end = checked_end(value_offset, value_len, data_bytes.len())?;
2506 let value_batch = read_one_ipc_batch(&data_bytes[value_offset..value_end])?;
2507 let default_offset = le_word_to_usize::<W>(record.default_data_offset)?;
2508 let default_len = le_word_to_usize::<W>(record.default_data_len)?;
2509 let default_batch = if default_len == 0 {
2510 None
2511 } else {
2512 let default_end = checked_end(default_offset, default_len, data_bytes.len())?;
2513 Some(read_one_ipc_batch(
2514 &data_bytes[default_offset..default_end],
2515 )?)
2516 };
2517 let data = match storage {
2518 StorageMode::Dense => DecodedPropertyData::Dense {
2519 values: Arc::clone(value_batch.column(0)),
2520 },
2521 StorageMode::Sparse { .. } => DecodedPropertyData::Sparse {
2522 indices: Arc::clone(value_batch.column(0)),
2523 values: Arc::clone(value_batch.column(1)),
2524 default: default_batch
2525 .as_ref()
2526 .map(|batch| Arc::clone(batch.column(0))),
2527 },
2528 };
2529 out.push(Self {
2530 layer_id,
2531 name,
2532 id_family,
2533 role,
2534 storage,
2535 logical_len,
2536 data,
2537 });
2538 }
2539 Ok(out)
2540 }
2541}
2542
2543fn validate_identity_records<W>(
2549 snapshot: &Snapshot<'_>,
2550 records: &[IdentityModeRecord<W>],
2551) -> Result<Vec<IdentityModeSummary>, PropertyError>
2552where
2553 W: PropertySnapshotMetaWord,
2554{
2555 let mut seen = BTreeSet::new();
2556 let mut summaries = Vec::with_capacity(records.len());
2557 for record in records {
2558 let family = record.id_family()?;
2559 if !seen.insert(family) {
2560 return Err(PropertyError::SnapshotDescriptorMismatch {
2561 reason: "duplicate identity family mode record",
2562 });
2563 }
2564 let mode = record.mode()?;
2565 let local_len = record.local_len();
2566 match mode {
2567 IdentityMapMode::LocalEqualsCanonical => {}
2568 IdentityMapMode::ExplicitMap => {
2569 validate_identity_map_section::<W>(snapshot, family, local_len)?;
2570 }
2571 }
2572 summaries.push(IdentityModeSummary {
2573 id_family: family,
2574 mode,
2575 local_len,
2576 });
2577 }
2578 Ok(summaries)
2579}
2580
2581fn validate_identity_map_section<W>(
2587 snapshot: &Snapshot<'_>,
2588 id_family: IdFamily,
2589 required: usize,
2590) -> Result<(), PropertyError>
2591where
2592 W: PropertySnapshotMetaWord,
2593{
2594 let kind = identity_map_kind::<W>(id_family);
2595 let section = snapshot
2596 .section(kind)
2597 .ok_or(PropertyError::MissingIdentityMap { id_family })?;
2598 if section.version() != SNAPSHOT_PROPERTY_VERSION {
2599 return Err(PropertyError::SnapshotSectionVersion {
2600 kind,
2601 version: section.version(),
2602 });
2603 }
2604 let map: &[W::LittleEndianWord] = section
2605 .try_as_slice()
2606 .map_err(|error| PropertyError::SnapshotSectionView { kind, error })?;
2607 if map.len() != required {
2608 return Err(PropertyError::IdentityMapLength {
2609 id_family,
2610 required,
2611 actual: map.len(),
2612 });
2613 }
2614 Ok(())
2615}
2616
2617const fn identity_map_kind<W>(id_family: IdFamily) -> u32
2623where
2624 W: PropertySnapshotMetaWord,
2625{
2626 match id_family {
2627 IdFamily::Element => W::ELEMENT_IDENTITY_MAP_KIND,
2628 IdFamily::Relation => W::RELATION_IDENTITY_MAP_KIND,
2629 IdFamily::Incidence => W::INCIDENCE_IDENTITY_MAP_KIND,
2630 }
2631}
2632
2633fn append_string(strings: &mut Vec<u8>, value: &str) -> usize {
2639 let offset = strings.len();
2640 strings.extend_from_slice(value.as_bytes());
2641 offset
2642}
2643
2644fn layer_value_count<Id, I>(layer: &PropertyLayer<Id, I>) -> usize
2650where
2651 I: PropertyIndex,
2652{
2653 match layer.data() {
2654 PropertyLayerData::Dense { values } => values.len(),
2655 PropertyLayerData::Sparse { indices, .. } => indices.len(),
2656 }
2657}
2658
2659fn encode_layer_value_ipc<Id, I>(layer: &PropertyLayer<Id, I>) -> Result<Vec<u8>, PropertyError>
2665where
2666 I: PropertyIndex,
2667{
2668 let (schema, columns) = match layer.data() {
2669 PropertyLayerData::Dense { values } => {
2670 let schema = Arc::new(Schema::new(vec![layer.descriptor().arrow_field.clone()]));
2671 (schema, vec![Arc::clone(values)])
2672 }
2673 PropertyLayerData::Sparse {
2674 indices,
2675 values,
2676 default: _,
2677 } => {
2678 let fields = vec![
2679 Field::new("index", index_data_type::<I>(), false),
2680 layer.descriptor().arrow_field.clone(),
2681 ];
2682 let columns: Vec<ArrayRef> = vec![Arc::clone(indices) as ArrayRef, Arc::clone(values)];
2683 (Arc::new(Schema::new(fields)), columns)
2684 }
2685 };
2686 write_one_ipc_batch(&schema, columns)
2687}
2688
2689fn encode_layer_default_ipc<Id, I>(
2695 layer: &PropertyLayer<Id, I>,
2696) -> Result<Option<Vec<u8>>, PropertyError>
2697where
2698 I: PropertyIndex,
2699{
2700 let PropertyLayerData::Sparse {
2701 default: Some(default),
2702 ..
2703 } = layer.data()
2704 else {
2705 return Ok(None);
2706 };
2707 let schema = Arc::new(Schema::new(vec![layer.descriptor().arrow_field.clone()]));
2708 write_one_ipc_batch(&schema, vec![Arc::clone(default)]).map(Some)
2709}
2710
2711fn write_one_ipc_batch(
2717 schema: &Arc<Schema>,
2718 columns: Vec<ArrayRef>,
2719) -> Result<Vec<u8>, PropertyError> {
2720 let batch = RecordBatch::try_new(Arc::clone(schema), columns).map_err(map_arrow_error)?;
2721 let mut out = Vec::new();
2722 {
2723 let mut writer =
2724 StreamWriter::try_new(&mut out, schema.as_ref()).map_err(map_arrow_error)?;
2725 writer.write(&batch).map_err(map_arrow_error)?;
2726 writer.finish().map_err(map_arrow_error)?;
2727 }
2728 Ok(out)
2729}
2730
2731fn parse_property_record<W>(bytes: &[u8]) -> Result<PropertySnapshotRecord<W>, PropertyError>
2737where
2738 W: PropertySnapshotMetaWord,
2739{
2740 let need = core::mem::size_of::<PropertySnapshotRecord<W>>();
2741 if bytes.len() < need {
2742 return Err(PropertyError::SnapshotDataLength {
2743 reason: "property record is truncated",
2744 });
2745 }
2746 PropertySnapshotRecord::<W>::read_from_bytes(&bytes[..need]).map_err(|_error| {
2747 PropertyError::SnapshotDataLength {
2748 reason: "property record is truncated",
2749 }
2750 })
2751}
2752
2753fn validate_property_record_data<W>(
2759 record: &PropertySnapshotRecord<W>,
2760 storage: StorageMode,
2761 data: &[u8],
2762) -> Result<Vec<core::ops::Range<usize>>, PropertyError>
2763where
2764 W: PropertySnapshotMetaWord,
2765{
2766 if le_word_to_u64::<W>(record.reserved) != 0 {
2767 return Err(PropertyError::SnapshotDescriptorMismatch {
2768 reason: "property descriptor reserved word must be zero",
2769 });
2770 }
2771 let offset = le_word_to_usize::<W>(record.value_data_offset)?;
2772 let len = le_word_to_usize::<W>(record.value_data_len)?;
2773 let end = checked_end(offset, len, data.len())?;
2774 let value_batch = read_one_ipc_batch(&data[offset..end])?;
2775 let default_offset = le_word_to_usize::<W>(record.default_data_offset)?;
2776 let default_len = le_word_to_usize::<W>(record.default_data_len)?;
2777 let default_batch = if default_len == 0 {
2778 None
2779 } else {
2780 let default_end = checked_end(default_offset, default_len, data.len())?;
2781 Some(read_one_ipc_batch(&data[default_offset..default_end])?)
2782 };
2783 match storage {
2784 StorageMode::Dense => {
2785 if default_len != 0 {
2786 return Err(PropertyError::SnapshotDescriptorMismatch {
2787 reason: "dense property must not declare a default stream",
2788 });
2789 }
2790 validate_dense_batch::<W>(record, &value_batch)?;
2791 }
2792 StorageMode::Sparse { missing } => {
2793 validate_sparse_batch::<W>(record, missing, &value_batch, default_batch.as_ref())?;
2794 }
2795 }
2796 let mut ranges = Vec::with_capacity(2);
2797 ranges.push(offset..end);
2798 if default_len != 0 {
2799 ranges.push(default_offset..default_offset + default_len);
2800 }
2801 Ok(ranges)
2802}
2803
2804fn read_one_ipc_batch(bytes: &[u8]) -> Result<RecordBatch, PropertyError> {
2810 let reader = StreamReader::try_new(Cursor::new(bytes), None).map_err(map_arrow_error)?;
2811 let mut batches = Vec::new();
2812 for batch in reader {
2813 batches.push(batch.map_err(map_arrow_error)?);
2814 if batches.len() > 1 {
2815 return Err(PropertyError::SnapshotDescriptorMismatch {
2816 reason: "property IPC stream contains more than one batch",
2817 });
2818 }
2819 }
2820 let mut iter = batches.into_iter();
2821 iter.next()
2822 .ok_or(PropertyError::SnapshotDescriptorMismatch {
2823 reason: "property IPC stream contains no batches",
2824 })
2825}
2826
2827fn validate_dense_batch<W>(
2833 record: &PropertySnapshotRecord<W>,
2834 batch: &RecordBatch,
2835) -> Result<(), PropertyError>
2836where
2837 W: PropertySnapshotMetaWord,
2838{
2839 if batch.num_columns() != 1 {
2840 return Err(PropertyError::SnapshotDescriptorMismatch {
2841 reason: "dense property batch must contain one column",
2842 });
2843 }
2844 let values = batch.column(0);
2845 if values.len() != le_word_to_usize::<W>(record.logical_len)?
2846 || values.len() != le_word_to_usize::<W>(record.value_count)?
2847 {
2848 return Err(PropertyError::SnapshotDataLength {
2849 reason: "dense property Arrow length does not match descriptor",
2850 });
2851 }
2852 validate_value_column(values.as_ref())
2853}
2854
2855fn validate_sparse_batch<W>(
2861 record: &PropertySnapshotRecord<W>,
2862 missing: MissingPolicy,
2863 value_batch: &RecordBatch,
2864 default_batch: Option<&RecordBatch>,
2865) -> Result<(), PropertyError>
2866where
2867 W: PropertySnapshotMetaWord,
2868{
2869 if value_batch.num_columns() != 2 {
2870 return Err(PropertyError::SnapshotDescriptorMismatch {
2871 reason: "sparse property value stream must contain index and value columns",
2872 });
2873 }
2874 let indexes = value_batch.column(0);
2875 let values = value_batch.column(1);
2876 let value_count = le_word_to_usize::<W>(record.value_count)?;
2877 if indexes.len() != value_count || values.len() != value_count {
2878 return Err(PropertyError::SnapshotDataLength {
2879 reason: "sparse property Arrow value count does not match descriptor",
2880 });
2881 }
2882 validate_value_column(values.as_ref())?;
2883 validate_sparse_indices_dyn(indexes.as_ref(), le_word_to_usize::<W>(record.logical_len)?)?;
2884 match (missing, default_batch) {
2885 (MissingPolicy::Null, None) => {}
2886 (MissingPolicy::Null, Some(_)) => {
2887 return Err(PropertyError::SnapshotDescriptorMismatch {
2888 reason: "sparse-null property must not declare a default stream",
2889 });
2890 }
2891 (MissingPolicy::Default, Some(default_batch)) => {
2892 if default_batch.num_columns() != 1 {
2893 return Err(PropertyError::SnapshotDescriptorMismatch {
2894 reason: "sparse default stream must contain one column",
2895 });
2896 }
2897 let default = default_batch.column(0);
2898 if default.len() != 1 || default.data_type() != values.data_type() || default.is_null(0)
2899 {
2900 return Err(PropertyError::SnapshotDescriptorMismatch {
2901 reason: "sparse property default column is not a non-null matching scalar",
2902 });
2903 }
2904 }
2905 (MissingPolicy::Default, None) => {
2906 return Err(PropertyError::SnapshotDescriptorMismatch {
2907 reason: "sparse-default property is missing its default stream",
2908 });
2909 }
2910 }
2911 Ok(())
2912}
2913
2914fn validate_value_column(values: &dyn Array) -> Result<(), PropertyError> {
2920 if values.null_count() > values.len() {
2921 return Err(PropertyError::SnapshotDescriptorMismatch {
2922 reason: "Arrow value column has invalid null accounting",
2923 });
2924 }
2925 Ok(())
2926}
2927
2928fn validate_data_coverage(
2934 ranges: &mut [core::ops::Range<usize>],
2935 data_len: usize,
2936) -> Result<(), PropertyError> {
2937 ranges.sort_by_key(|range| range.start);
2938 let mut cursor = 0_usize;
2939 for range in ranges {
2940 if range.start != cursor {
2941 return Err(PropertyError::SnapshotDescriptorMismatch {
2942 reason: "property data ranges leave a gap or overlap",
2943 });
2944 }
2945 cursor = range.end;
2946 }
2947 if cursor != data_len {
2948 return Err(PropertyError::SnapshotDescriptorMismatch {
2949 reason: "property data section has trailing bytes",
2950 });
2951 }
2952 Ok(())
2953}
2954
2955fn read_snapshot_str(bytes: &[u8], offset: usize, len: usize) -> Result<&str, PropertyError> {
2961 let end = checked_end(offset, len, bytes.len())?;
2962 core::str::from_utf8(&bytes[offset..end])
2963 .map_err(|_error| PropertyError::SnapshotInvalidUtf8 { offset })
2964}
2965
2966fn checked_end(offset: usize, len: usize, available: usize) -> Result<usize, PropertyError> {
2972 let end = offset
2973 .checked_add(len)
2974 .ok_or(PropertyError::SnapshotRangeOutOfBounds {
2975 offset,
2976 len,
2977 available,
2978 })?;
2979 if end > available {
2980 Err(PropertyError::SnapshotRangeOutOfBounds {
2981 offset,
2982 len,
2983 available,
2984 })
2985 } else {
2986 Ok(end)
2987 }
2988}
2989
2990fn read_u64_le(bytes: &[u8]) -> Result<u64, PropertyError> {
2996 if bytes.len() < core::mem::size_of::<u64>() {
2997 return Err(PropertyError::SnapshotDataLength {
2998 reason: "u64 field is truncated",
2999 });
3000 }
3001 let mut array = [0_u8; 8];
3002 array.copy_from_slice(&bytes[..8]);
3003 Ok(u64::from_le_bytes(array))
3004}
3005
3006fn le_word<W>(value: usize) -> Result<W::LittleEndianWord, PropertyError>
3012where
3013 W: PropertySnapshotMetaWord,
3014{
3015 let Some(value) = W::from_usize(value) else {
3016 return Err(PropertyError::SnapshotDescriptorMismatch {
3017 reason: "value does not fit selected metadata width",
3018 });
3019 };
3020 Ok(value.to_le_word())
3021}
3022
3023fn le_word_to_usize<W>(word: W::LittleEndianWord) -> Result<usize, PropertyError>
3029where
3030 W: PropertySnapshotMetaWord,
3031{
3032 W::from_le_word(word)
3033 .to_usize()
3034 .ok_or(PropertyError::SnapshotDescriptorMismatch {
3035 reason: "metadata word does not fit usize",
3036 })
3037}
3038
3039fn le_word_to_u64<W>(word: W::LittleEndianWord) -> u64
3045where
3046 W: PropertySnapshotMetaWord,
3047{
3048 W::from_le_word(word).to_u64()
3049}
3050
3051fn le_word_to_u32<W>(word: W::LittleEndianWord) -> Result<u32, PropertyError>
3057where
3058 W: PropertySnapshotMetaWord,
3059{
3060 let value = le_word_to_u64::<W>(word);
3061 u32::try_from(value).map_err(|_error| PropertyError::SnapshotDescriptorMismatch {
3062 reason: "metadata word does not fit u32 tag",
3063 })
3064}
3065
3066fn u64_to_usize(value: u64) -> Result<usize, PropertyError> {
3072 usize::try_from(value).map_err(|_error| PropertyError::SnapshotDescriptorMismatch {
3073 reason: "snapshot length does not fit usize",
3074 })
3075}
3076
3077fn usize_to_u64(value: usize) -> Result<u64, PropertyError> {
3083 u64::try_from(value).map_err(|_error| PropertyError::LengthDoesNotFitU64 { value })
3084}
3085
3086const fn id_family_tag(id_family: IdFamily) -> u32 {
3092 match id_family {
3093 IdFamily::Element => 0,
3094 IdFamily::Relation => 1,
3095 IdFamily::Incidence => 2,
3096 }
3097}
3098
3099const fn id_family_from_tag(tag: u32) -> Result<IdFamily, PropertyError> {
3105 match tag {
3106 0 => Ok(IdFamily::Element),
3107 1 => Ok(IdFamily::Relation),
3108 2 => Ok(IdFamily::Incidence),
3109 _ => Err(PropertyError::UnknownIdFamilyTag { tag }),
3110 }
3111}
3112
3113const fn layer_role_tag(role: LayerRole) -> u32 {
3119 match role {
3120 LayerRole::Weight => 0,
3121 LayerRole::Property => 1,
3122 }
3123}
3124
3125const fn layer_role_from_tag(tag: u32) -> Result<LayerRole, PropertyError> {
3131 match tag {
3132 0 => Ok(LayerRole::Weight),
3133 1 => Ok(LayerRole::Property),
3134 _ => Err(PropertyError::UnknownLayerRoleTag { tag }),
3135 }
3136}
3137
3138const fn storage_tag(storage: StorageMode) -> u32 {
3144 match storage {
3145 StorageMode::Dense => 0,
3146 StorageMode::Sparse { .. } => 1,
3147 }
3148}
3149
3150const fn missing_policy_tag(storage: StorageMode) -> u32 {
3156 match storage {
3157 StorageMode::Dense => 0,
3158 StorageMode::Sparse {
3159 missing: MissingPolicy::Null,
3160 } => 1,
3161 StorageMode::Sparse {
3162 missing: MissingPolicy::Default,
3163 } => 2,
3164 }
3165}
3166
3167const fn storage_from_tags(storage: u32, missing: u32) -> Result<StorageMode, PropertyError> {
3173 match (storage, missing) {
3174 (0, 0) => Ok(StorageMode::Dense),
3175 (1, 1) => Ok(StorageMode::Sparse {
3176 missing: MissingPolicy::Null,
3177 }),
3178 (1, 2) => Ok(StorageMode::Sparse {
3179 missing: MissingPolicy::Default,
3180 }),
3181 (0, _) => Err(PropertyError::UnknownMissingPolicyTag { tag: missing }),
3182 (_, _) => Err(PropertyError::UnknownStorageTag { tag: storage }),
3183 }
3184}
3185
3186fn ensure_arrow_type<Id, I>(
3192 descriptor: &PropertyLayerDescriptor<Id, I>,
3193 values: &dyn Array,
3194) -> Result<(), PropertyError>
3195where
3196 I: PropertyIndex,
3197{
3198 if descriptor.arrow_field.data_type() == values.data_type() {
3199 Ok(())
3200 } else {
3201 Err(PropertyError::ArrowTypeMismatch {
3202 name: descriptor.name.clone(),
3203 })
3204 }
3205}
3206
3207fn validate_default_policy<Id, I>(
3213 descriptor: &PropertyLayerDescriptor<Id, I>,
3214 missing: MissingPolicy,
3215 default: Option<&ArrayRef>,
3216) -> Result<(), PropertyError>
3217where
3218 I: PropertyIndex,
3219{
3220 match (missing, default) {
3221 (MissingPolicy::Null, None) => Ok(()),
3222 (MissingPolicy::Default, Some(array)) => {
3223 ensure_arrow_type(descriptor, array.as_ref())?;
3224 if array.len() == 1 && !array.is_null(0) {
3225 Ok(())
3226 } else {
3227 Err(PropertyError::DefaultPolicyMismatch {
3228 name: descriptor.name.clone(),
3229 })
3230 }
3231 }
3232 (MissingPolicy::Null | MissingPolicy::Default, _) => {
3233 Err(PropertyError::DefaultPolicyMismatch {
3234 name: descriptor.name.clone(),
3235 })
3236 }
3237 }
3238}
3239
3240fn ensure_no_nulls(array: &dyn Array) -> Result<(), PropertyError> {
3246 for index in 0..array.len() {
3247 if array.is_null(index) {
3248 return Err(PropertyError::UnexpectedNull { index });
3249 }
3250 }
3251 Ok(())
3252}
3253
3254fn validate_sparse_indices<I>(
3260 indices: &PrimitiveArray<I::ArrowType>,
3261 len: usize,
3262) -> Result<(), PropertyError>
3263where
3264 I: PropertyIndex,
3265{
3266 let mut previous = None;
3267 for position in 0..indices.len() {
3268 let index = indices.value(position);
3269 let Some(index_usize) = index.to_usize() else {
3270 return Err(PropertyError::SparseIndexOutOfBounds {
3271 index: index.to_u64(),
3272 len,
3273 });
3274 };
3275 if index_usize >= len {
3276 return Err(PropertyError::SparseIndexOutOfBounds {
3277 index: index.to_u64(),
3278 len,
3279 });
3280 }
3281 if let Some(prior) = previous
3282 && index <= prior
3283 {
3284 return Err(PropertyError::SparseIndexOrder { position });
3285 }
3286 previous = Some(index);
3287 }
3288 Ok(())
3289}
3290
3291fn validate_sparse_indices_dyn(indices: &dyn Array, len: usize) -> Result<(), PropertyError> {
3297 if let Some(indices) = indices
3298 .as_any()
3299 .downcast_ref::<PrimitiveArray<arrow_array::types::UInt16Type>>()
3300 {
3301 return validate_sparse_indices::<u16>(indices, len);
3302 }
3303 if let Some(indices) = indices
3304 .as_any()
3305 .downcast_ref::<PrimitiveArray<arrow_array::types::UInt32Type>>()
3306 {
3307 return validate_sparse_indices::<u32>(indices, len);
3308 }
3309 if let Some(indices) = indices
3310 .as_any()
3311 .downcast_ref::<PrimitiveArray<arrow_array::types::UInt64Type>>()
3312 {
3313 return validate_sparse_indices::<u64>(indices, len);
3314 }
3315 Err(PropertyError::SnapshotDescriptorMismatch {
3316 reason: "sparse property index column is not UInt16, UInt32, or UInt64",
3317 })
3318}
3319
3320const fn index_data_type<I>() -> DataType
3326where
3327 I: PropertyIndex,
3328{
3329 if core::mem::size_of::<I>() == core::mem::size_of::<u16>() {
3330 DataType::UInt16
3331 } else if core::mem::size_of::<I>() == core::mem::size_of::<u32>() {
3332 DataType::UInt32
3333 } else {
3334 DataType::UInt64
3335 }
3336}
3337
3338fn validate_dense_primitive_selection<Id, I, P>(
3344 layer: &PropertyLayer<Id, I>,
3345 expected: IdFamily,
3346 required: usize,
3347) -> Result<&PrimitiveArray<P>, PropertyError>
3348where
3349 I: PropertyIndex,
3350 P: ArrowPrimitiveType,
3351{
3352 if layer.descriptor.id_family != expected {
3353 return Err(PropertyError::IdFamilyMismatch {
3354 expected,
3355 actual: layer.descriptor.id_family,
3356 });
3357 }
3358 if layer.len() < required {
3359 return Err(PropertyError::LayerTooShort {
3360 required,
3361 actual: layer.len(),
3362 });
3363 }
3364 let PropertyLayerData::Dense { values } = layer.data() else {
3365 return Err(PropertyError::ExpectedDenseStorage {
3366 name: layer.descriptor.name.clone(),
3367 });
3368 };
3369 let primitive = values
3370 .as_any()
3371 .downcast_ref::<PrimitiveArray<P>>()
3372 .ok_or_else(|| PropertyError::ArrowTypeMismatch {
3373 name: layer.descriptor.name.clone(),
3374 })?;
3375 ensure_no_nulls(primitive)?;
3376 Ok(primitive)
3377}
3378
3379type SparsePrimitiveSelection<'layer, I, P> = (
3381 &'layer PrimitiveArray<<I as PropertyIndex>::ArrowType>,
3382 &'layer PrimitiveArray<P>,
3383 <P as ArrowPrimitiveType>::Native,
3384);
3385
3386fn validate_sparse_primitive_selection<I, P, Id>(
3392 layer: &PropertyLayer<Id, I>,
3393 expected: IdFamily,
3394 required: usize,
3395) -> Result<SparsePrimitiveSelection<'_, I, P>, PropertyError>
3396where
3397 I: PropertyIndex,
3398 P: ArrowPrimitiveType,
3399 P::Native: Copy,
3400{
3401 if layer.descriptor.id_family != expected {
3402 return Err(PropertyError::IdFamilyMismatch {
3403 expected,
3404 actual: layer.descriptor.id_family,
3405 });
3406 }
3407 if layer.len() < required {
3408 return Err(PropertyError::LayerTooShort {
3409 required,
3410 actual: layer.len(),
3411 });
3412 }
3413 let PropertyLayerData::Sparse {
3414 indices,
3415 values,
3416 default,
3417 } = layer.data()
3418 else {
3419 return Err(PropertyError::ExpectedSparseStorage {
3420 name: layer.descriptor.name.clone(),
3421 });
3422 };
3423 let Some(default_array) = default else {
3424 return Err(PropertyError::SparseNullMissingNotTotal {
3425 name: layer.descriptor.name.clone(),
3426 });
3427 };
3428 let primitive = values
3429 .as_any()
3430 .downcast_ref::<PrimitiveArray<P>>()
3431 .ok_or_else(|| PropertyError::ArrowTypeMismatch {
3432 name: layer.descriptor.name.clone(),
3433 })?;
3434 ensure_no_nulls(primitive)?;
3435 let default_primitive = default_array
3436 .as_any()
3437 .downcast_ref::<PrimitiveArray<P>>()
3438 .ok_or_else(|| PropertyError::ArrowTypeMismatch {
3439 name: layer.descriptor.name.clone(),
3440 })?;
3441 if default_primitive.len() != 1 || default_primitive.is_null(0) {
3442 return Err(PropertyError::DefaultPolicyMismatch {
3443 name: layer.descriptor.name.clone(),
3444 });
3445 }
3446 Ok((indices.as_ref(), primitive, default_primitive.value(0)))
3447}
3448
3449fn sparse_value<I, P>(
3455 indices: &PrimitiveArray<I::ArrowType>,
3456 values: &PrimitiveArray<P>,
3457 default: P::Native,
3458 index: usize,
3459) -> P::Native
3460where
3461 I: PropertyIndex,
3462 P: ArrowPrimitiveType,
3463 P::Native: Copy,
3464{
3465 let Some(target) = I::from_usize(index) else {
3466 return default;
3467 };
3468 let mut low = 0_usize;
3469 let mut high = indices.len();
3470 while low < high {
3471 let mid = low + ((high - low) / 2);
3472 let value = indices.value(mid);
3473 if value < target {
3474 low = mid + 1;
3475 } else {
3476 high = mid;
3477 }
3478 }
3479 if low < indices.len() && indices.value(low) == target {
3480 values.value(low)
3481 } else {
3482 default
3483 }
3484}
3485
3486#[expect(
3492 clippy::needless_pass_by_value,
3493 reason = "Arrow result adapters hand over owned errors and this helper consumes them into messages"
3494)]
3495fn map_arrow_error(error: arrow_schema::ArrowError) -> PropertyError {
3496 PropertyError::Arrow {
3497 message: error.to_string(),
3498 }
3499}
3500
3501#[cfg(test)]
3502mod tests;