oxgraph_layout_util/lib.rs
1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Three responsibilities live here, one namespace each:
4//!
5//! - **Index/word vocabulary** (crate root: [`LayoutIndex`], [`LayoutWord`],
6//! [`LayoutSnapshotWord`], [`SnapshotWidth`]): the single sealed set of dense index widths,
7//! native/little-endian storage words, and the width-to-LE-word bijection shared by every layout
8//! and snapshot crate. CSR and BCSR add only thin section-kind-bearing sub-traits on top.
9//! - **Build-time** ([`build`]): validate dense IDs against a known count, convert `usize` slots
10//! back into a typed index width, flatten per-bucket payloads into CSR-style `(offsets, items)`
11//! pairs, and lower native index slices into explicit little-endian words.
12//! - **Read-time** ([`integrity`]): walk borrowed offset arrays at view-open time and convert
13//! already-validated indexes infallibly.
14//!
15//! The build and integrity helpers are deliberately not re-exported at the
16//! crate root: the root namespace carries only the vocabulary every layer is
17//! parameterized over, while builder and validation internals stay behind
18//! their module names.
19//!
20//! [`LocalId`] is the one generic local-handle newtype every layout crate
21//! aliases for its node/edge/vertex/hyperedge/incidence identities, and
22//! [`IdSlice`] is the one slice-to-handle iterator they all reuse.
23//!
24//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
25//! semantics. No dependency on any other `oxgraph` crate.
26// kani-skip: helpers loop over arbitrary slice lengths and allocate
27// variable-sized buffers; proofs exercise the algebraic contract on bounded
28// fixtures.
29#![no_std]
30
31#[cfg(any(feature = "alloc", kani))]
32extern crate alloc;
33
34#[cfg(kani)]
35extern crate kani;
36
37use core::{fmt, hash::Hash, iter::FusedIterator, marker::PhantomData};
38
39use zerocopy::{
40 FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned,
41 byteorder::{LE, U16, U32, U64},
42};
43
44pub mod build;
45pub mod integrity;
46
47// ---------------------------------------------------------------------------
48// CRC-32C (Castagnoli)
49// ---------------------------------------------------------------------------
50
51/// Lookup table for the byte-at-a-time reflected CRC-32C computation.
52///
53/// Generated at compile time from the reflected Castagnoli polynomial
54/// `0x82F6_3B78` (bit-reversal of `0x1EDC_6F41`).
55const CRC32C_TABLE: [u32; 256] = build_crc32c_table();
56
57/// Builds the 256-entry reflected CRC-32C table at compile time.
58const fn build_crc32c_table() -> [u32; 256] {
59 let mut table = [0u32; 256];
60 let mut index: usize = 0;
61 let mut seed: u32 = 0;
62 while index < 256 {
63 let mut crc = seed;
64 let mut bit = 0;
65 while bit < 8 {
66 let mask = (crc & 1).wrapping_neg();
67 crc = (crc >> 1) ^ (0x82F6_3B78 & mask);
68 bit += 1;
69 }
70 table[index] = crc;
71 index += 1;
72 seed += 1;
73 }
74 table
75}
76
77/// Continues a CRC-32C (Castagnoli, polynomial `0x1EDC_6F41`) checksum over
78/// `bytes`, seeded with the result of a prior call (seed `0` starts a fresh
79/// checksum).
80///
81/// This is a portable, table-driven software implementation provided so
82/// `no_std` layout crates can produce checksummed snapshot containers without
83/// a `std`-only CRC dependency. It satisfies the continuation law
84/// `crc32c_append(crc32c_append(0, a), b) == crc32c_append(0, ab)`, matching
85/// the `crc32c` crate's `crc32c_append`, and `crc32c_append(0, b"") == 0`.
86/// `std` consumers on hot write paths may prefer a hardware-accelerated
87/// implementation (e.g. the `crc32c` crate) — any continuation-style CRC-32C
88/// is byte-for-byte interchangeable with this one.
89///
90/// # Performance
91///
92/// This function is `O(bytes.len())` (one table lookup per byte; no
93/// allocation).
94#[must_use]
95pub fn crc32c_append(crc: u32, bytes: &[u8]) -> u32 {
96 let mut state = !crc;
97 for &byte in bytes {
98 let index = usize::from(state.to_le_bytes()[0] ^ byte);
99 state = (state >> 8) ^ CRC32C_TABLE[index];
100 }
101 !state
102}
103
104/// Sealed module preventing external types from satisfying the in-crate
105/// index/word traits.
106mod sealed {
107 /// Seals [`super::LayoutIndex`] to the supported unsigned index widths.
108 pub trait LayoutIndex {}
109
110 /// Seals [`super::ZerocopyWord`] to in-tree native and little-endian words.
111 pub trait ZerocopyWord {}
112
113 /// Seals [`super::LayoutSnapshotWord`] to little-endian storage words.
114 pub trait LayoutSnapshotWord {}
115
116 /// Seals [`super::SnapshotWidth`] to the persisted unsigned widths.
117 pub trait SnapshotWidth {}
118
119 /// Seals [`super::Axis`] to the in-tree local-handle axis markers.
120 pub trait Axis {}
121}
122
123// ---------------------------------------------------------------------------
124// Index / word vocabulary
125// ---------------------------------------------------------------------------
126
127/// Unsigned dense ID width usable by graph and hypergraph layouts and builders.
128///
129/// This is the single index-width contract for the whole substrate: it merges
130/// what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously duplicated as
131/// `CsrIndex`/`BcsrIndex`. `usize` is included for native build-time indices;
132/// persisted widths additionally implement [`SnapshotWidth`].
133///
134/// # Performance
135///
136/// Implementations perform checked conversions in `O(1)`.
137pub trait LayoutIndex:
138 sealed::LayoutIndex + Copy + Eq + Ord + fmt::Debug + fmt::Display + Hash + Sized
139{
140 /// The additive identity for this width (the zero offset / first slot).
141 const ZERO: Self;
142
143 /// Converts this ID to `usize` when representable on the current target.
144 ///
145 /// # Performance
146 ///
147 /// This function is `O(1)`.
148 fn to_usize(self) -> Option<usize>;
149
150 /// Converts a `usize` into this ID width when representable.
151 ///
152 /// # Performance
153 ///
154 /// This function is `O(1)`.
155 fn from_usize(value: usize) -> Option<Self>;
156}
157
158/// Implements [`LayoutIndex`] for one unsigned width.
159macro_rules! impl_layout_index {
160 ($index:ty) => {
161 impl sealed::LayoutIndex for $index {}
162
163 impl LayoutIndex for $index {
164 const ZERO: Self = 0;
165
166 fn to_usize(self) -> Option<usize> {
167 usize::try_from(self).ok()
168 }
169
170 fn from_usize(value: usize) -> Option<Self> {
171 Self::try_from(value).ok()
172 }
173 }
174 };
175}
176
177impl_layout_index!(u16);
178impl_layout_index!(u32);
179impl_layout_index!(u64);
180
181impl sealed::LayoutIndex for usize {}
182
183impl LayoutIndex for usize {
184 const ZERO: Self = 0;
185
186 fn to_usize(self) -> Option<usize> {
187 Some(self)
188 }
189
190 fn from_usize(value: usize) -> Option<Self> {
191 Some(value)
192 }
193}
194
195/// Borrowed offset or value word usable by offset-integrity primitives.
196///
197/// Sealed: native unsigned integers and little-endian storage words opt in via
198/// the in-tree macros below. External crates cannot satisfy this trait.
199///
200/// # Performance
201///
202/// Reading a word is expected to be `O(1)`.
203pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
204 /// Reads this word's value as `usize`, or returns `None` when the value
205 /// does not fit in `usize` on the current target.
206 ///
207 /// # Performance
208 ///
209 /// This method is `O(1)`.
210 fn read_as_usize(self) -> Option<usize>;
211}
212
213/// Implements [`ZerocopyWord`] for one native unsigned integer type.
214macro_rules! impl_native_zerocopy_word {
215 ($word:ty) => {
216 impl sealed::ZerocopyWord for $word {}
217
218 impl ZerocopyWord for $word {
219 fn read_as_usize(self) -> Option<usize> {
220 usize::try_from(self).ok()
221 }
222 }
223 };
224}
225
226impl_native_zerocopy_word!(u16);
227impl_native_zerocopy_word!(u32);
228impl_native_zerocopy_word!(u64);
229impl_native_zerocopy_word!(usize);
230
231/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
232macro_rules! impl_le_zerocopy_word {
233 ($word:ty) => {
234 impl sealed::ZerocopyWord for $word {}
235
236 impl ZerocopyWord for $word {
237 fn read_as_usize(self) -> Option<usize> {
238 usize::try_from(<$word>::get(self)).ok()
239 }
240 }
241 };
242}
243
244impl_le_zerocopy_word!(U16<LE>);
245impl_le_zerocopy_word!(U32<LE>);
246impl_le_zerocopy_word!(U64<LE>);
247
248/// A native-host or little-endian word carrying a typed dense [`LayoutIndex`].
249///
250/// This merges what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously
251/// duplicated as `CsrWord`/`BcsrWord`. The associated [`LayoutWord::Index`]
252/// recovers the logical index value from either a native word (identity) or a
253/// little-endian storage word (byte-order conversion), so a single [`IdSlice`]
254/// drives both build-path and view-path iteration.
255///
256/// # Performance
257///
258/// [`LayoutWord::get`] is expected to be `O(1)`.
259pub trait LayoutWord: Copy + ZerocopyWord {
260 /// Logical dense index recovered from this word.
261 type Index: LayoutIndex;
262
263 /// Reads the logical index value out of this word.
264 ///
265 /// # Performance
266 ///
267 /// This method is `O(1)`.
268 fn get(self) -> Self::Index;
269}
270
271/// Implements [`LayoutWord`] for one native unsigned integer (identity index).
272macro_rules! impl_native_layout_word {
273 ($word:ty) => {
274 impl LayoutWord for $word {
275 type Index = $word;
276
277 fn get(self) -> Self::Index {
278 self
279 }
280 }
281 };
282}
283
284impl_native_layout_word!(u16);
285impl_native_layout_word!(u32);
286impl_native_layout_word!(u64);
287impl_native_layout_word!(usize);
288
289/// Implements [`LayoutWord`] for one little-endian word over a native index.
290macro_rules! impl_le_layout_word {
291 ($word:ty, $index:ty) => {
292 impl LayoutWord for $word {
293 type Index = $index;
294
295 fn get(self) -> Self::Index {
296 <$word>::get(self)
297 }
298 }
299 };
300}
301
302impl_le_layout_word!(U16<LE>, u16);
303impl_le_layout_word!(U32<LE>, u32);
304impl_le_layout_word!(U64<LE>, u64);
305
306/// A little-endian storage word usable in persisted snapshot payloads.
307///
308/// Sealed marker over [`LayoutWord`] plus the zerocopy byte-view bounds,
309/// including [`Unaligned`] — persisted words are byte-aligned by design, so
310/// generic wire records composed of them stay padding-free. Only the explicit
311/// little-endian words implement it — native integers are excluded so
312/// persisted payloads always carry a defined byte order.
313///
314/// # Performance
315///
316/// `perf: unspecified`; this is a marker trait.
317pub trait LayoutSnapshotWord:
318 sealed::LayoutSnapshotWord
319 + LayoutWord
320 + FromBytes
321 + Immutable
322 + IntoBytes
323 + KnownLayout
324 + Unaligned
325{
326}
327
328/// Implements [`LayoutSnapshotWord`] for one little-endian storage word.
329macro_rules! impl_layout_snapshot_word {
330 ($word:ty) => {
331 impl sealed::LayoutSnapshotWord for $word {}
332
333 impl LayoutSnapshotWord for $word {}
334 };
335}
336
337impl_layout_snapshot_word!(U16<LE>);
338impl_layout_snapshot_word!(U32<LE>);
339impl_layout_snapshot_word!(U64<LE>);
340
341/// A persisted unsigned width with its little-endian storage word.
342///
343/// This is the single width-to-LE-word bijection shared by `oxgraph-csr`,
344/// `oxgraph-hyper-bcsr`, and any future layout crate. `usize` deliberately does
345/// not implement it: persisted snapshots are fixed-width.
346///
347/// # Performance
348///
349/// [`SnapshotWidth::to_le_word`] and [`SnapshotWidth::from_le_word`] are `O(1)`.
350pub trait SnapshotWidth: sealed::SnapshotWidth + LayoutIndex {
351 /// Two-bit width discriminant carried in a section kind's low bits:
352 /// `0b00` = `u16`, `0b01` = `u32`, `0b10` = `u64` (`0b11` reserved).
353 ///
354 /// Layout crates declare 4-aligned `SNAPSHOT_KIND_*_BASE` constants and
355 /// derive each persisted section kind as `BASE | WIDTH_CODE`, so one base
356 /// constant covers every persisted width.
357 const WIDTH_CODE: u32;
358
359 /// Little-endian storage word for this width.
360 type LittleEndianWord: LayoutSnapshotWord<Index = Self>;
361
362 /// Lowers this logical index into its little-endian storage word.
363 ///
364 /// # Performance
365 ///
366 /// This function is `O(1)`.
367 fn to_le_word(self) -> Self::LittleEndianWord;
368
369 /// Recovers this logical index from a little-endian storage word.
370 ///
371 /// # Performance
372 ///
373 /// This function is `O(1)`.
374 fn from_le_word(word: Self::LittleEndianWord) -> Self;
375}
376
377/// Implements [`SnapshotWidth`] for one width and its little-endian word.
378macro_rules! impl_snapshot_width {
379 ($index:ty, $word:ty, $width_code:expr) => {
380 impl sealed::SnapshotWidth for $index {}
381
382 impl SnapshotWidth for $index {
383 const WIDTH_CODE: u32 = $width_code;
384
385 type LittleEndianWord = $word;
386
387 fn to_le_word(self) -> Self::LittleEndianWord {
388 <$word>::new(self)
389 }
390
391 fn from_le_word(word: Self::LittleEndianWord) -> Self {
392 <$word>::get(word)
393 }
394 }
395 };
396}
397
398impl_snapshot_width!(u16, U16<LE>, 0b00);
399impl_snapshot_width!(u32, U32<LE>, 0b01);
400impl_snapshot_width!(u64, U64<LE>, 0b10);
401
402// ---------------------------------------------------------------------------
403// Local-handle newtype + slice iterator
404// ---------------------------------------------------------------------------
405
406/// Marker for a local-handle axis (node, edge, vertex, hyperedge, incidence).
407///
408/// Sealed: only the in-tree axis markers below implement it, so [`LocalId`]
409/// cannot be branded with an arbitrary external type.
410///
411/// # Performance
412///
413/// `perf: unspecified`; this is a marker trait.
414pub trait Axis: sealed::Axis {}
415
416/// Defines one zero-sized local-handle axis marker.
417macro_rules! define_axis {
418 ($(#[$doc:meta])* $name:ident) => {
419 $(#[$doc])*
420 #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
421 pub struct $name;
422
423 impl sealed::Axis for $name {}
424
425 impl Axis for $name {}
426 };
427}
428
429define_axis!(
430 /// Element axis for binary graphs (a node).
431 NodeAxis
432);
433define_axis!(
434 /// Relation axis for binary graphs (an edge).
435 EdgeAxis
436);
437define_axis!(
438 /// Element axis for hypergraphs (a vertex).
439 VertexAxis
440);
441define_axis!(
442 /// Relation axis for hypergraphs (a hyperedge).
443 HyperedgeAxis
444);
445define_axis!(
446 /// Incidence axis (a participant / endpoint).
447 IncidenceAxis
448);
449
450/// A dense local handle: an axis-branded index value.
451///
452/// Every layout crate aliases this for its node/edge/vertex/hyperedge/incidence
453/// identities so a built graph and its borrowed snapshot view yield the same
454/// handle type. The `Axis` brand is a zero-sized `PhantomData<fn() -> A>` so the
455/// handle stays `Copy`, `Send`, and `Sync` regardless of the marker, and
456/// satisfies the `TopologyId` blanket bound when `Width` does.
457///
458/// # Performance
459///
460/// Copy, compare, order, hash, and debug-format are `O(1)`.
461pub struct LocalId<A, Width> {
462 /// The raw dense index value.
463 value: Width,
464 /// Zero-sized axis brand; `fn() -> A` keeps the handle `Send`/`Sync`.
465 axis: PhantomData<fn() -> A>,
466}
467
468impl<A, Width> LocalId<A, Width> {
469 /// Wraps a raw index value as an axis-branded handle.
470 ///
471 /// # Performance
472 ///
473 /// This function is `O(1)`.
474 #[inline]
475 #[must_use]
476 pub const fn new(value: Width) -> Self {
477 Self {
478 value,
479 axis: PhantomData,
480 }
481 }
482}
483
484impl<A, Width: Copy> LocalId<A, Width> {
485 /// Returns the raw index value of this handle.
486 ///
487 /// # Performance
488 ///
489 /// This function is `O(1)`.
490 #[inline]
491 #[must_use]
492 pub const fn get(self) -> Width {
493 self.value
494 }
495}
496
497impl<A, Width> From<Width> for LocalId<A, Width> {
498 #[inline]
499 fn from(value: Width) -> Self {
500 Self::new(value)
501 }
502}
503
504impl<A, Width: Clone> Clone for LocalId<A, Width> {
505 #[inline]
506 fn clone(&self) -> Self {
507 Self {
508 value: self.value.clone(),
509 axis: PhantomData,
510 }
511 }
512}
513
514impl<A, Width: Copy> Copy for LocalId<A, Width> {}
515
516impl<A, Width: PartialEq> PartialEq for LocalId<A, Width> {
517 #[inline]
518 fn eq(&self, other: &Self) -> bool {
519 self.value == other.value
520 }
521}
522
523impl<A, Width: Eq> Eq for LocalId<A, Width> {}
524
525impl<A, Width: PartialOrd> PartialOrd for LocalId<A, Width> {
526 #[inline]
527 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
528 self.value.partial_cmp(&other.value)
529 }
530}
531
532impl<A, Width: Ord> Ord for LocalId<A, Width> {
533 #[inline]
534 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
535 self.value.cmp(&other.value)
536 }
537}
538
539impl<A, Width: Hash> Hash for LocalId<A, Width> {
540 #[inline]
541 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
542 self.value.hash(state);
543 }
544}
545
546impl<A, Width: fmt::Debug> fmt::Debug for LocalId<A, Width> {
547 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
548 formatter.debug_tuple("LocalId").field(&self.value).finish()
549 }
550}
551
552impl<A, Width: fmt::Display> fmt::Display for LocalId<A, Width> {
553 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
554 self.value.fmt(formatter)
555 }
556}
557
558/// Iterator that maps a borrowed slice of [`LayoutWord`]s into axis-branded
559/// handles, recovering each logical index and wrapping it with `Id::from`.
560///
561/// This is the single slice-to-handle iterator shared by every layout crate's
562/// build-path (native words) and view-path (little-endian words).
563///
564/// # Performance
565///
566/// Advancing is `O(1)`; the iterator borrows and never allocates.
567#[derive(Clone, Debug)]
568pub struct IdSlice<'view, W: LayoutWord, Id> {
569 /// Borrowed iterator over the backing word slice.
570 inner: core::slice::Iter<'view, W>,
571 /// Zero-sized brand for the produced handle type.
572 id: PhantomData<fn() -> Id>,
573}
574
575impl<'view, W: LayoutWord, Id> IdSlice<'view, W, Id> {
576 /// Creates an [`IdSlice`] over a borrowed word slice.
577 ///
578 /// # Performance
579 ///
580 /// This function is `O(1)`.
581 #[inline]
582 #[must_use]
583 pub fn new(slice: &'view [W]) -> Self {
584 Self {
585 inner: slice.iter(),
586 id: PhantomData,
587 }
588 }
589}
590
591impl<W: LayoutWord, Id> Iterator for IdSlice<'_, W, Id>
592where
593 Id: From<W::Index>,
594{
595 type Item = Id;
596
597 #[inline]
598 fn next(&mut self) -> Option<Self::Item> {
599 self.inner.next().map(|word| Id::from(word.get()))
600 }
601
602 #[inline]
603 fn size_hint(&self) -> (usize, Option<usize>) {
604 self.inner.size_hint()
605 }
606}
607
608impl<W: LayoutWord, Id> ExactSizeIterator for IdSlice<'_, W, Id>
609where
610 Id: From<W::Index>,
611{
612 #[inline]
613 fn len(&self) -> usize {
614 self.inner.len()
615 }
616}
617
618impl<W: LayoutWord, Id> DoubleEndedIterator for IdSlice<'_, W, Id>
619where
620 Id: From<W::Index>,
621{
622 #[inline]
623 fn next_back(&mut self) -> Option<Self::Item> {
624 self.inner.next_back().map(|word| Id::from(word.get()))
625 }
626}
627
628impl<W: LayoutWord, Id> FusedIterator for IdSlice<'_, W, Id> where Id: From<W::Index> {}
629
630#[cfg(kani)]
631mod proofs;