Skip to main content

oxgraph_layout_util/
lib.rs

1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Three responsibilities live here, one namespace each:
4//!
5//! - **Index/word vocabulary** (crate root: [`LayoutIndex`], [`LayoutWord`],
6//!   [`LayoutSnapshotWord`], [`SnapshotWidth`]): the single sealed set of dense index widths,
7//!   native/little-endian storage words, and the width-to-LE-word bijection shared by every layout
8//!   and snapshot crate. CSR and BCSR add only thin section-kind-bearing sub-traits on top.
9//! - **Build-time** ([`build`]): validate dense IDs against a known count, convert `usize` slots
10//!   back into a typed index width, flatten per-bucket payloads into CSR-style `(offsets, items)`
11//!   pairs, and lower native index slices into explicit little-endian words.
12//! - **Read-time** ([`integrity`]): walk borrowed offset arrays at view-open time and convert
13//!   already-validated indexes infallibly.
14//!
15//! The build and integrity helpers are deliberately not re-exported at the
16//! crate root: the root namespace carries only the vocabulary every layer is
17//! parameterized over, while builder and validation internals stay behind
18//! their module names.
19//!
20//! [`LocalId`] is the one generic local-handle newtype every layout crate
21//! aliases for its node/edge/vertex/hyperedge/incidence identities, and
22//! [`IdSlice`] is the one slice-to-handle iterator they all reuse.
23//!
24//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
25//! semantics. No dependency on any other `oxgraph` crate.
26// kani-skip: helpers loop over arbitrary slice lengths and allocate
27// variable-sized buffers; proofs exercise the algebraic contract on bounded
28// fixtures.
29#![no_std]
30
31#[cfg(any(feature = "alloc", kani))]
32extern crate alloc;
33
34#[cfg(kani)]
35extern crate kani;
36
37use core::{fmt, hash::Hash, iter::FusedIterator, marker::PhantomData};
38
39use zerocopy::{
40    FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned,
41    byteorder::{LE, U16, U32, U64},
42};
43
44pub mod build;
45pub mod integrity;
46
47// ---------------------------------------------------------------------------
48// CRC-32C (Castagnoli)
49// ---------------------------------------------------------------------------
50
51/// Lookup table for the byte-at-a-time reflected CRC-32C computation.
52///
53/// Generated at compile time from the reflected Castagnoli polynomial
54/// `0x82F6_3B78` (bit-reversal of `0x1EDC_6F41`).
55const CRC32C_TABLE: [u32; 256] = build_crc32c_table();
56
57/// Builds the 256-entry reflected CRC-32C table at compile time.
58const fn build_crc32c_table() -> [u32; 256] {
59    let mut table = [0u32; 256];
60    let mut index: usize = 0;
61    let mut seed: u32 = 0;
62    while index < 256 {
63        let mut crc = seed;
64        let mut bit = 0;
65        while bit < 8 {
66            let mask = (crc & 1).wrapping_neg();
67            crc = (crc >> 1) ^ (0x82F6_3B78 & mask);
68            bit += 1;
69        }
70        table[index] = crc;
71        index += 1;
72        seed += 1;
73    }
74    table
75}
76
77/// Continues a CRC-32C (Castagnoli, polynomial `0x1EDC_6F41`) checksum over
78/// `bytes`, seeded with the result of a prior call (seed `0` starts a fresh
79/// checksum).
80///
81/// This is a portable, table-driven software implementation provided so
82/// `no_std` layout crates can produce checksummed snapshot containers without
83/// a `std`-only CRC dependency. It satisfies the continuation law
84/// `crc32c_append(crc32c_append(0, a), b) == crc32c_append(0, ab)`, matching
85/// the `crc32c` crate's `crc32c_append`, and `crc32c_append(0, b"") == 0`.
86/// `std` consumers on hot write paths may prefer a hardware-accelerated
87/// implementation (e.g. the `crc32c` crate) — any continuation-style CRC-32C
88/// is byte-for-byte interchangeable with this one.
89///
90/// # Performance
91///
92/// This function is `O(bytes.len())` (one table lookup per byte; no
93/// allocation).
94#[must_use]
95pub fn crc32c_append(crc: u32, bytes: &[u8]) -> u32 {
96    let mut state = !crc;
97    for &byte in bytes {
98        let index = usize::from(state.to_le_bytes()[0] ^ byte);
99        state = (state >> 8) ^ CRC32C_TABLE[index];
100    }
101    !state
102}
103
104/// Sealed module preventing external types from satisfying the in-crate
105/// index/word traits.
106mod sealed {
107    /// Seals [`super::LayoutIndex`] to the supported unsigned index widths.
108    pub trait LayoutIndex {}
109
110    /// Seals [`super::ZerocopyWord`] to in-tree native and little-endian words.
111    pub trait ZerocopyWord {}
112
113    /// Seals [`super::LayoutSnapshotWord`] to little-endian storage words.
114    pub trait LayoutSnapshotWord {}
115
116    /// Seals [`super::SnapshotWidth`] to the persisted unsigned widths.
117    pub trait SnapshotWidth {}
118
119    /// Seals [`super::Axis`] to the in-tree local-handle axis markers.
120    pub trait Axis {}
121}
122
123// ---------------------------------------------------------------------------
124// Index / word vocabulary
125// ---------------------------------------------------------------------------
126
127/// Unsigned dense ID width usable by graph and hypergraph layouts and builders.
128///
129/// This is the single index-width contract for the whole substrate: it merges
130/// what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously duplicated as
131/// `CsrIndex`/`BcsrIndex`. `usize` is included for native build-time indices;
132/// persisted widths additionally implement [`SnapshotWidth`].
133///
134/// # Performance
135///
136/// Implementations perform checked conversions in `O(1)`.
137pub trait LayoutIndex:
138    sealed::LayoutIndex + Copy + Eq + Ord + fmt::Debug + fmt::Display + Hash + Sized
139{
140    /// The additive identity for this width (the zero offset / first slot).
141    const ZERO: Self;
142
143    /// Converts this ID to `usize` when representable on the current target.
144    ///
145    /// # Performance
146    ///
147    /// This function is `O(1)`.
148    fn to_usize(self) -> Option<usize>;
149
150    /// Converts a `usize` into this ID width when representable.
151    ///
152    /// # Performance
153    ///
154    /// This function is `O(1)`.
155    fn from_usize(value: usize) -> Option<Self>;
156}
157
158/// Implements [`LayoutIndex`] for one unsigned width.
159macro_rules! impl_layout_index {
160    ($index:ty) => {
161        impl sealed::LayoutIndex for $index {}
162
163        impl LayoutIndex for $index {
164            const ZERO: Self = 0;
165
166            fn to_usize(self) -> Option<usize> {
167                usize::try_from(self).ok()
168            }
169
170            fn from_usize(value: usize) -> Option<Self> {
171                Self::try_from(value).ok()
172            }
173        }
174    };
175}
176
177impl_layout_index!(u16);
178impl_layout_index!(u32);
179impl_layout_index!(u64);
180
181impl sealed::LayoutIndex for usize {}
182
183impl LayoutIndex for usize {
184    const ZERO: Self = 0;
185
186    fn to_usize(self) -> Option<usize> {
187        Some(self)
188    }
189
190    fn from_usize(value: usize) -> Option<Self> {
191        Some(value)
192    }
193}
194
195/// Borrowed offset or value word usable by offset-integrity primitives.
196///
197/// Sealed: native unsigned integers and little-endian storage words opt in via
198/// the in-tree macros below. External crates cannot satisfy this trait.
199///
200/// # Performance
201///
202/// Reading a word is expected to be `O(1)`.
203pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
204    /// Reads this word's value as `usize`, or returns `None` when the value
205    /// does not fit in `usize` on the current target.
206    ///
207    /// # Performance
208    ///
209    /// This method is `O(1)`.
210    fn read_as_usize(self) -> Option<usize>;
211}
212
213/// Implements [`ZerocopyWord`] for one native unsigned integer type.
214macro_rules! impl_native_zerocopy_word {
215    ($word:ty) => {
216        impl sealed::ZerocopyWord for $word {}
217
218        impl ZerocopyWord for $word {
219            fn read_as_usize(self) -> Option<usize> {
220                usize::try_from(self).ok()
221            }
222        }
223    };
224}
225
226impl_native_zerocopy_word!(u16);
227impl_native_zerocopy_word!(u32);
228impl_native_zerocopy_word!(u64);
229impl_native_zerocopy_word!(usize);
230
231/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
232macro_rules! impl_le_zerocopy_word {
233    ($word:ty) => {
234        impl sealed::ZerocopyWord for $word {}
235
236        impl ZerocopyWord for $word {
237            fn read_as_usize(self) -> Option<usize> {
238                usize::try_from(<$word>::get(self)).ok()
239            }
240        }
241    };
242}
243
244impl_le_zerocopy_word!(U16<LE>);
245impl_le_zerocopy_word!(U32<LE>);
246impl_le_zerocopy_word!(U64<LE>);
247
248/// A native-host or little-endian word carrying a typed dense [`LayoutIndex`].
249///
250/// This merges what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously
251/// duplicated as `CsrWord`/`BcsrWord`. The associated [`LayoutWord::Index`]
252/// recovers the logical index value from either a native word (identity) or a
253/// little-endian storage word (byte-order conversion), so a single [`IdSlice`]
254/// drives both build-path and view-path iteration.
255///
256/// # Performance
257///
258/// [`LayoutWord::get`] is expected to be `O(1)`.
259pub trait LayoutWord: Copy + ZerocopyWord {
260    /// Logical dense index recovered from this word.
261    type Index: LayoutIndex;
262
263    /// Reads the logical index value out of this word.
264    ///
265    /// # Performance
266    ///
267    /// This method is `O(1)`.
268    fn get(self) -> Self::Index;
269}
270
271/// Implements [`LayoutWord`] for one native unsigned integer (identity index).
272macro_rules! impl_native_layout_word {
273    ($word:ty) => {
274        impl LayoutWord for $word {
275            type Index = $word;
276
277            fn get(self) -> Self::Index {
278                self
279            }
280        }
281    };
282}
283
284impl_native_layout_word!(u16);
285impl_native_layout_word!(u32);
286impl_native_layout_word!(u64);
287impl_native_layout_word!(usize);
288
289/// Implements [`LayoutWord`] for one little-endian word over a native index.
290macro_rules! impl_le_layout_word {
291    ($word:ty, $index:ty) => {
292        impl LayoutWord for $word {
293            type Index = $index;
294
295            fn get(self) -> Self::Index {
296                <$word>::get(self)
297            }
298        }
299    };
300}
301
302impl_le_layout_word!(U16<LE>, u16);
303impl_le_layout_word!(U32<LE>, u32);
304impl_le_layout_word!(U64<LE>, u64);
305
306/// A little-endian storage word usable in persisted snapshot payloads.
307///
308/// Sealed marker over [`LayoutWord`] plus the zerocopy byte-view bounds,
309/// including [`Unaligned`] — persisted words are byte-aligned by design, so
310/// generic wire records composed of them stay padding-free. Only the explicit
311/// little-endian words implement it — native integers are excluded so
312/// persisted payloads always carry a defined byte order.
313///
314/// # Performance
315///
316/// `perf: unspecified`; this is a marker trait.
317pub trait LayoutSnapshotWord:
318    sealed::LayoutSnapshotWord
319    + LayoutWord
320    + FromBytes
321    + Immutable
322    + IntoBytes
323    + KnownLayout
324    + Unaligned
325{
326}
327
328/// Implements [`LayoutSnapshotWord`] for one little-endian storage word.
329macro_rules! impl_layout_snapshot_word {
330    ($word:ty) => {
331        impl sealed::LayoutSnapshotWord for $word {}
332
333        impl LayoutSnapshotWord for $word {}
334    };
335}
336
337impl_layout_snapshot_word!(U16<LE>);
338impl_layout_snapshot_word!(U32<LE>);
339impl_layout_snapshot_word!(U64<LE>);
340
341/// A persisted unsigned width with its little-endian storage word.
342///
343/// This is the single width-to-LE-word bijection shared by `oxgraph-csr`,
344/// `oxgraph-hyper-bcsr`, and any future layout crate. `usize` deliberately does
345/// not implement it: persisted snapshots are fixed-width.
346///
347/// # Performance
348///
349/// [`SnapshotWidth::to_le_word`] and [`SnapshotWidth::from_le_word`] are `O(1)`.
350pub trait SnapshotWidth: sealed::SnapshotWidth + LayoutIndex {
351    /// Two-bit width discriminant carried in a section kind's low bits:
352    /// `0b00` = `u16`, `0b01` = `u32`, `0b10` = `u64` (`0b11` reserved).
353    ///
354    /// Layout crates declare 4-aligned `SNAPSHOT_KIND_*_BASE` constants and
355    /// derive each persisted section kind as `BASE | WIDTH_CODE`, so one base
356    /// constant covers every persisted width.
357    const WIDTH_CODE: u32;
358
359    /// Little-endian storage word for this width.
360    type LittleEndianWord: LayoutSnapshotWord<Index = Self>;
361
362    /// Lowers this logical index into its little-endian storage word.
363    ///
364    /// # Performance
365    ///
366    /// This function is `O(1)`.
367    fn to_le_word(self) -> Self::LittleEndianWord;
368
369    /// Recovers this logical index from a little-endian storage word.
370    ///
371    /// # Performance
372    ///
373    /// This function is `O(1)`.
374    fn from_le_word(word: Self::LittleEndianWord) -> Self;
375}
376
377/// Implements [`SnapshotWidth`] for one width and its little-endian word.
378macro_rules! impl_snapshot_width {
379    ($index:ty, $word:ty, $width_code:expr) => {
380        impl sealed::SnapshotWidth for $index {}
381
382        impl SnapshotWidth for $index {
383            const WIDTH_CODE: u32 = $width_code;
384
385            type LittleEndianWord = $word;
386
387            fn to_le_word(self) -> Self::LittleEndianWord {
388                <$word>::new(self)
389            }
390
391            fn from_le_word(word: Self::LittleEndianWord) -> Self {
392                <$word>::get(word)
393            }
394        }
395    };
396}
397
398impl_snapshot_width!(u16, U16<LE>, 0b00);
399impl_snapshot_width!(u32, U32<LE>, 0b01);
400impl_snapshot_width!(u64, U64<LE>, 0b10);
401
402// ---------------------------------------------------------------------------
403// Local-handle newtype + slice iterator
404// ---------------------------------------------------------------------------
405
406/// Marker for a local-handle axis (node, edge, vertex, hyperedge, incidence).
407///
408/// Sealed: only the in-tree axis markers below implement it, so [`LocalId`]
409/// cannot be branded with an arbitrary external type.
410///
411/// # Performance
412///
413/// `perf: unspecified`; this is a marker trait.
414pub trait Axis: sealed::Axis {}
415
416/// Defines one zero-sized local-handle axis marker.
417macro_rules! define_axis {
418    ($(#[$doc:meta])* $name:ident) => {
419        $(#[$doc])*
420        #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
421        pub struct $name;
422
423        impl sealed::Axis for $name {}
424
425        impl Axis for $name {}
426    };
427}
428
429define_axis!(
430    /// Element axis for binary graphs (a node).
431    NodeAxis
432);
433define_axis!(
434    /// Relation axis for binary graphs (an edge).
435    EdgeAxis
436);
437define_axis!(
438    /// Element axis for hypergraphs (a vertex).
439    VertexAxis
440);
441define_axis!(
442    /// Relation axis for hypergraphs (a hyperedge).
443    HyperedgeAxis
444);
445define_axis!(
446    /// Incidence axis (a participant / endpoint).
447    IncidenceAxis
448);
449
450/// A dense local handle: an axis-branded index value.
451///
452/// Every layout crate aliases this for its node/edge/vertex/hyperedge/incidence
453/// identities so a built graph and its borrowed snapshot view yield the same
454/// handle type. The `Axis` brand is a zero-sized `PhantomData<fn() -> A>` so the
455/// handle stays `Copy`, `Send`, and `Sync` regardless of the marker, and
456/// satisfies the `TopologyId` blanket bound when `Width` does.
457///
458/// # Performance
459///
460/// Copy, compare, order, hash, and debug-format are `O(1)`.
461pub struct LocalId<A, Width> {
462    /// The raw dense index value.
463    value: Width,
464    /// Zero-sized axis brand; `fn() -> A` keeps the handle `Send`/`Sync`.
465    axis: PhantomData<fn() -> A>,
466}
467
468impl<A, Width> LocalId<A, Width> {
469    /// Wraps a raw index value as an axis-branded handle.
470    ///
471    /// # Performance
472    ///
473    /// This function is `O(1)`.
474    #[inline]
475    #[must_use]
476    pub const fn new(value: Width) -> Self {
477        Self {
478            value,
479            axis: PhantomData,
480        }
481    }
482}
483
484impl<A, Width: Copy> LocalId<A, Width> {
485    /// Returns the raw index value of this handle.
486    ///
487    /// # Performance
488    ///
489    /// This function is `O(1)`.
490    #[inline]
491    #[must_use]
492    pub const fn get(self) -> Width {
493        self.value
494    }
495}
496
497impl<A, Width> From<Width> for LocalId<A, Width> {
498    #[inline]
499    fn from(value: Width) -> Self {
500        Self::new(value)
501    }
502}
503
504impl<A, Width: Clone> Clone for LocalId<A, Width> {
505    #[inline]
506    fn clone(&self) -> Self {
507        Self {
508            value: self.value.clone(),
509            axis: PhantomData,
510        }
511    }
512}
513
514impl<A, Width: Copy> Copy for LocalId<A, Width> {}
515
516impl<A, Width: PartialEq> PartialEq for LocalId<A, Width> {
517    #[inline]
518    fn eq(&self, other: &Self) -> bool {
519        self.value == other.value
520    }
521}
522
523impl<A, Width: Eq> Eq for LocalId<A, Width> {}
524
525impl<A, Width: PartialOrd> PartialOrd for LocalId<A, Width> {
526    #[inline]
527    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
528        self.value.partial_cmp(&other.value)
529    }
530}
531
532impl<A, Width: Ord> Ord for LocalId<A, Width> {
533    #[inline]
534    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
535        self.value.cmp(&other.value)
536    }
537}
538
539impl<A, Width: Hash> Hash for LocalId<A, Width> {
540    #[inline]
541    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
542        self.value.hash(state);
543    }
544}
545
546impl<A, Width: fmt::Debug> fmt::Debug for LocalId<A, Width> {
547    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
548        formatter.debug_tuple("LocalId").field(&self.value).finish()
549    }
550}
551
552impl<A, Width: fmt::Display> fmt::Display for LocalId<A, Width> {
553    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
554        self.value.fmt(formatter)
555    }
556}
557
558/// Iterator that maps a borrowed slice of [`LayoutWord`]s into axis-branded
559/// handles, recovering each logical index and wrapping it with `Id::from`.
560///
561/// This is the single slice-to-handle iterator shared by every layout crate's
562/// build-path (native words) and view-path (little-endian words).
563///
564/// # Performance
565///
566/// Advancing is `O(1)`; the iterator borrows and never allocates.
567#[derive(Clone, Debug)]
568pub struct IdSlice<'view, W: LayoutWord, Id> {
569    /// Borrowed iterator over the backing word slice.
570    inner: core::slice::Iter<'view, W>,
571    /// Zero-sized brand for the produced handle type.
572    id: PhantomData<fn() -> Id>,
573}
574
575impl<'view, W: LayoutWord, Id> IdSlice<'view, W, Id> {
576    /// Creates an [`IdSlice`] over a borrowed word slice.
577    ///
578    /// # Performance
579    ///
580    /// This function is `O(1)`.
581    #[inline]
582    #[must_use]
583    pub fn new(slice: &'view [W]) -> Self {
584        Self {
585            inner: slice.iter(),
586            id: PhantomData,
587        }
588    }
589}
590
591impl<W: LayoutWord, Id> Iterator for IdSlice<'_, W, Id>
592where
593    Id: From<W::Index>,
594{
595    type Item = Id;
596
597    #[inline]
598    fn next(&mut self) -> Option<Self::Item> {
599        self.inner.next().map(|word| Id::from(word.get()))
600    }
601
602    #[inline]
603    fn size_hint(&self) -> (usize, Option<usize>) {
604        self.inner.size_hint()
605    }
606}
607
608impl<W: LayoutWord, Id> ExactSizeIterator for IdSlice<'_, W, Id>
609where
610    Id: From<W::Index>,
611{
612    #[inline]
613    fn len(&self) -> usize {
614        self.inner.len()
615    }
616}
617
618impl<W: LayoutWord, Id> DoubleEndedIterator for IdSlice<'_, W, Id>
619where
620    Id: From<W::Index>,
621{
622    #[inline]
623    fn next_back(&mut self) -> Option<Self::Item> {
624        self.inner.next_back().map(|word| Id::from(word.get()))
625    }
626}
627
628impl<W: LayoutWord, Id> FusedIterator for IdSlice<'_, W, Id> where Id: From<W::Index> {}
629
630#[cfg(kani)]
631mod proofs;