Skip to main content

oxgraph_layout_util/
lib.rs

1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Three responsibilities live here, one namespace each:
4//!
5//! - **Index/word vocabulary** (crate root: [`LayoutIndex`], [`LayoutWord`],
6//!   [`LayoutSnapshotWord`], [`SnapshotWidth`]): the single sealed set of dense index widths,
7//!   native/little-endian storage words, and the width-to-LE-word bijection shared by every layout
8//!   and snapshot crate. CSR and BCSR add only thin section-kind-bearing sub-traits on top.
9//! - **Build-time** ([`build`]): validate dense IDs against a known count, convert `usize` slots
10//!   back into a typed index width, flatten per-bucket payloads into CSR-style `(offsets, items)`
11//!   pairs, and lower native index slices into explicit little-endian words.
12//! - **Read-time** ([`integrity`]): walk borrowed offset arrays at view-open time and convert
13//!   already-validated indexes infallibly.
14//!
15//! The build and integrity helpers are deliberately not re-exported at the
16//! crate root: the root namespace carries only the vocabulary every layer is
17//! parameterized over, while builder and validation internals stay behind
18//! their module names.
19//!
20//! [`LocalId`] is the one generic local-handle newtype every layout crate
21//! aliases for its node/edge/vertex/hyperedge/incidence identities, and
22//! [`IdSlice`] is the one slice-to-handle iterator they all reuse.
23//!
24//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
25//! semantics. No dependency on any other `oxgraph` crate.
26// kani-skip: helpers loop over arbitrary slice lengths and allocate
27// variable-sized buffers; proofs exercise the algebraic contract on bounded
28// fixtures.
29#![no_std]
30
31#[cfg(any(feature = "alloc", kani))]
32extern crate alloc;
33
34#[cfg(kani)]
35extern crate kani;
36
37use core::{fmt, hash::Hash, iter::FusedIterator, marker::PhantomData};
38
39use zerocopy::{
40    FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned,
41    byteorder::{LE, U16, U32, U64},
42};
43
44pub mod build;
45pub mod integrity;
46#[cfg(feature = "alloc")]
47pub mod keys;
48
49// ---------------------------------------------------------------------------
50// CRC-32C (Castagnoli)
51// ---------------------------------------------------------------------------
52
53/// Lookup table for the byte-at-a-time reflected CRC-32C computation.
54///
55/// Generated at compile time from the reflected Castagnoli polynomial
56/// `0x82F6_3B78` (bit-reversal of `0x1EDC_6F41`).
57const CRC32C_TABLE: [u32; 256] = build_crc32c_table();
58
59/// Builds the 256-entry reflected CRC-32C table at compile time.
60const fn build_crc32c_table() -> [u32; 256] {
61    let mut table = [0u32; 256];
62    let mut index: usize = 0;
63    let mut seed: u32 = 0;
64    while index < 256 {
65        let mut crc = seed;
66        let mut bit = 0;
67        while bit < 8 {
68            let mask = (crc & 1).wrapping_neg();
69            crc = (crc >> 1) ^ (0x82F6_3B78 & mask);
70            bit += 1;
71        }
72        table[index] = crc;
73        index += 1;
74        seed += 1;
75    }
76    table
77}
78
79/// Continues a CRC-32C (Castagnoli, polynomial `0x1EDC_6F41`) checksum over
80/// `bytes`, seeded with the result of a prior call (seed `0` starts a fresh
81/// checksum).
82///
83/// This is a portable, table-driven software implementation provided so
84/// `no_std` layout crates can produce checksummed snapshot containers without
85/// a `std`-only CRC dependency. It satisfies the continuation law
86/// `crc32c_append(crc32c_append(0, a), b) == crc32c_append(0, ab)`, matching
87/// the `crc32c` crate's `crc32c_append`, and `crc32c_append(0, b"") == 0`.
88/// `std` consumers on hot write paths may prefer a hardware-accelerated
89/// implementation (e.g. the `crc32c` crate) — any continuation-style CRC-32C
90/// is byte-for-byte interchangeable with this one.
91///
92/// # Performance
93///
94/// This function is `O(bytes.len())` (one table lookup per byte; no
95/// allocation).
96#[must_use]
97pub fn crc32c_append(crc: u32, bytes: &[u8]) -> u32 {
98    let mut state = !crc;
99    for &byte in bytes {
100        let index = usize::from(state.to_le_bytes()[0] ^ byte);
101        state = (state >> 8) ^ CRC32C_TABLE[index];
102    }
103    !state
104}
105
106/// Sealed module preventing external types from satisfying the in-crate
107/// index/word traits.
108mod sealed {
109    /// Seals [`super::LayoutIndex`] to the supported unsigned index widths.
110    pub trait LayoutIndex {}
111
112    /// Seals [`super::ZerocopyWord`] to in-tree native and little-endian words.
113    pub trait ZerocopyWord {}
114
115    /// Seals [`super::LayoutSnapshotWord`] to little-endian storage words.
116    pub trait LayoutSnapshotWord {}
117
118    /// Seals [`super::SnapshotWidth`] to the persisted unsigned widths.
119    pub trait SnapshotWidth {}
120
121    /// Seals [`super::Axis`] to the in-tree local-handle axis markers.
122    pub trait Axis {}
123}
124
125// ---------------------------------------------------------------------------
126// Index / word vocabulary
127// ---------------------------------------------------------------------------
128
129/// Unsigned dense ID width usable by graph and hypergraph layouts and builders.
130///
131/// This is the single index-width contract for the whole substrate: it merges
132/// what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously duplicated as
133/// `CsrIndex`/`BcsrIndex`. `usize` is included for native build-time indices;
134/// persisted widths additionally implement [`SnapshotWidth`].
135///
136/// # Performance
137///
138/// Implementations perform checked conversions in `O(1)`.
139pub trait LayoutIndex:
140    sealed::LayoutIndex + Copy + Eq + Ord + fmt::Debug + fmt::Display + Hash + Sized
141{
142    /// The additive identity for this width (the zero offset / first slot).
143    const ZERO: Self;
144
145    /// Converts this ID to `usize` when representable on the current target.
146    ///
147    /// # Performance
148    ///
149    /// This function is `O(1)`.
150    fn to_usize(self) -> Option<usize>;
151
152    /// Converts a `usize` into this ID width when representable.
153    ///
154    /// # Performance
155    ///
156    /// This function is `O(1)`.
157    fn from_usize(value: usize) -> Option<Self>;
158}
159
160/// Implements [`LayoutIndex`] for one unsigned width.
161macro_rules! impl_layout_index {
162    ($index:ty) => {
163        impl sealed::LayoutIndex for $index {}
164
165        impl LayoutIndex for $index {
166            const ZERO: Self = 0;
167
168            fn to_usize(self) -> Option<usize> {
169                usize::try_from(self).ok()
170            }
171
172            fn from_usize(value: usize) -> Option<Self> {
173                Self::try_from(value).ok()
174            }
175        }
176    };
177}
178
179impl_layout_index!(u16);
180impl_layout_index!(u32);
181impl_layout_index!(u64);
182
183impl sealed::LayoutIndex for usize {}
184
185impl LayoutIndex for usize {
186    const ZERO: Self = 0;
187
188    fn to_usize(self) -> Option<usize> {
189        Some(self)
190    }
191
192    fn from_usize(value: usize) -> Option<Self> {
193        Some(value)
194    }
195}
196
197/// Borrowed offset or value word usable by offset-integrity primitives.
198///
199/// Sealed: native unsigned integers and little-endian storage words opt in via
200/// the in-tree macros below. External crates cannot satisfy this trait.
201///
202/// # Performance
203///
204/// Reading a word is expected to be `O(1)`.
205pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
206    /// Reads this word's value as `usize`, or returns `None` when the value
207    /// does not fit in `usize` on the current target.
208    ///
209    /// # Performance
210    ///
211    /// This method is `O(1)`.
212    fn read_as_usize(self) -> Option<usize>;
213}
214
215/// Implements [`ZerocopyWord`] for one native unsigned integer type.
216macro_rules! impl_native_zerocopy_word {
217    ($word:ty) => {
218        impl sealed::ZerocopyWord for $word {}
219
220        impl ZerocopyWord for $word {
221            fn read_as_usize(self) -> Option<usize> {
222                usize::try_from(self).ok()
223            }
224        }
225    };
226}
227
228impl_native_zerocopy_word!(u16);
229impl_native_zerocopy_word!(u32);
230impl_native_zerocopy_word!(u64);
231impl_native_zerocopy_word!(usize);
232
233/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
234macro_rules! impl_le_zerocopy_word {
235    ($word:ty) => {
236        impl sealed::ZerocopyWord for $word {}
237
238        impl ZerocopyWord for $word {
239            fn read_as_usize(self) -> Option<usize> {
240                usize::try_from(<$word>::get(self)).ok()
241            }
242        }
243    };
244}
245
246impl_le_zerocopy_word!(U16<LE>);
247impl_le_zerocopy_word!(U32<LE>);
248impl_le_zerocopy_word!(U64<LE>);
249
250/// A native-host or little-endian word carrying a typed dense [`LayoutIndex`].
251///
252/// This merges what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously
253/// duplicated as `CsrWord`/`BcsrWord`. The associated [`LayoutWord::Index`]
254/// recovers the logical index value from either a native word (identity) or a
255/// little-endian storage word (byte-order conversion), so a single [`IdSlice`]
256/// drives both build-path and view-path iteration.
257///
258/// # Performance
259///
260/// [`LayoutWord::get`] is expected to be `O(1)`.
261pub trait LayoutWord: Copy + ZerocopyWord {
262    /// Logical dense index recovered from this word.
263    type Index: LayoutIndex;
264
265    /// Reads the logical index value out of this word.
266    ///
267    /// # Performance
268    ///
269    /// This method is `O(1)`.
270    fn get(self) -> Self::Index;
271}
272
273/// Implements [`LayoutWord`] for one native unsigned integer (identity index).
274macro_rules! impl_native_layout_word {
275    ($word:ty) => {
276        impl LayoutWord for $word {
277            type Index = $word;
278
279            fn get(self) -> Self::Index {
280                self
281            }
282        }
283    };
284}
285
286impl_native_layout_word!(u16);
287impl_native_layout_word!(u32);
288impl_native_layout_word!(u64);
289impl_native_layout_word!(usize);
290
291/// Implements [`LayoutWord`] for one little-endian word over a native index.
292macro_rules! impl_le_layout_word {
293    ($word:ty, $index:ty) => {
294        impl LayoutWord for $word {
295            type Index = $index;
296
297            fn get(self) -> Self::Index {
298                <$word>::get(self)
299            }
300        }
301    };
302}
303
304impl_le_layout_word!(U16<LE>, u16);
305impl_le_layout_word!(U32<LE>, u32);
306impl_le_layout_word!(U64<LE>, u64);
307
308/// A little-endian storage word usable in persisted snapshot payloads.
309///
310/// Sealed marker over [`LayoutWord`] plus the zerocopy byte-view bounds,
311/// including [`Unaligned`] — persisted words are byte-aligned by design, so
312/// generic wire records composed of them stay padding-free. Only the explicit
313/// little-endian words implement it — native integers are excluded so
314/// persisted payloads always carry a defined byte order.
315///
316/// # Performance
317///
318/// `perf: unspecified`; this is a marker trait.
319pub trait LayoutSnapshotWord:
320    sealed::LayoutSnapshotWord
321    + LayoutWord
322    + FromBytes
323    + Immutable
324    + IntoBytes
325    + KnownLayout
326    + Unaligned
327{
328}
329
330/// Implements [`LayoutSnapshotWord`] for one little-endian storage word.
331macro_rules! impl_layout_snapshot_word {
332    ($word:ty) => {
333        impl sealed::LayoutSnapshotWord for $word {}
334
335        impl LayoutSnapshotWord for $word {}
336    };
337}
338
339impl_layout_snapshot_word!(U16<LE>);
340impl_layout_snapshot_word!(U32<LE>);
341impl_layout_snapshot_word!(U64<LE>);
342
343/// A persisted unsigned width with its little-endian storage word.
344///
345/// This is the single width-to-LE-word bijection shared by `oxgraph-csr`,
346/// `oxgraph-hyper-bcsr`, and any future layout crate. `usize` deliberately does
347/// not implement it: persisted snapshots are fixed-width.
348///
349/// # Performance
350///
351/// [`SnapshotWidth::to_le_word`] and [`SnapshotWidth::from_le_word`] are `O(1)`.
352pub trait SnapshotWidth: sealed::SnapshotWidth + LayoutIndex {
353    /// Two-bit width discriminant carried in a section kind's low bits:
354    /// `0b00` = `u16`, `0b01` = `u32`, `0b10` = `u64` (`0b11` reserved).
355    ///
356    /// Layout crates declare 4-aligned `SNAPSHOT_KIND_*_BASE` constants and
357    /// derive each persisted section kind as `BASE | WIDTH_CODE`, so one base
358    /// constant covers every persisted width.
359    const WIDTH_CODE: u32;
360
361    /// Little-endian storage word for this width.
362    type LittleEndianWord: LayoutSnapshotWord<Index = Self>;
363
364    /// Lowers this logical index into its little-endian storage word.
365    ///
366    /// # Performance
367    ///
368    /// This function is `O(1)`.
369    fn to_le_word(self) -> Self::LittleEndianWord;
370
371    /// Recovers this logical index from a little-endian storage word.
372    ///
373    /// # Performance
374    ///
375    /// This function is `O(1)`.
376    fn from_le_word(word: Self::LittleEndianWord) -> Self;
377}
378
379/// Implements [`SnapshotWidth`] for one width and its little-endian word.
380macro_rules! impl_snapshot_width {
381    ($index:ty, $word:ty, $width_code:expr) => {
382        impl sealed::SnapshotWidth for $index {}
383
384        impl SnapshotWidth for $index {
385            const WIDTH_CODE: u32 = $width_code;
386
387            type LittleEndianWord = $word;
388
389            fn to_le_word(self) -> Self::LittleEndianWord {
390                <$word>::new(self)
391            }
392
393            fn from_le_word(word: Self::LittleEndianWord) -> Self {
394                <$word>::get(word)
395            }
396        }
397    };
398}
399
400impl_snapshot_width!(u16, U16<LE>, 0b00);
401impl_snapshot_width!(u32, U32<LE>, 0b01);
402impl_snapshot_width!(u64, U64<LE>, 0b10);
403
404// ---------------------------------------------------------------------------
405// Local-handle newtype + slice iterator
406// ---------------------------------------------------------------------------
407
408/// Marker for a local-handle axis (node, edge, vertex, hyperedge, incidence).
409///
410/// Sealed: only the in-tree axis markers below implement it, so [`LocalId`]
411/// cannot be branded with an arbitrary external type.
412///
413/// # Performance
414///
415/// `perf: unspecified`; this is a marker trait.
416pub trait Axis: sealed::Axis {}
417
418/// Defines one zero-sized local-handle axis marker.
419macro_rules! define_axis {
420    ($(#[$doc:meta])* $name:ident) => {
421        $(#[$doc])*
422        #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
423        pub struct $name;
424
425        impl sealed::Axis for $name {}
426
427        impl Axis for $name {}
428    };
429}
430
431define_axis!(
432    /// Element axis for binary graphs (a node).
433    NodeAxis
434);
435define_axis!(
436    /// Relation axis for binary graphs (an edge).
437    EdgeAxis
438);
439define_axis!(
440    /// Element axis for hypergraphs (a vertex).
441    VertexAxis
442);
443define_axis!(
444    /// Relation axis for hypergraphs (a hyperedge).
445    HyperedgeAxis
446);
447define_axis!(
448    /// Incidence axis (a participant / endpoint).
449    IncidenceAxis
450);
451
452/// A dense local handle: an axis-branded index value.
453///
454/// Every layout crate aliases this for its node/edge/vertex/hyperedge/incidence
455/// identities so a built graph and its borrowed snapshot view yield the same
456/// handle type. The `Axis` brand is a zero-sized `PhantomData<fn() -> A>` so the
457/// handle stays `Copy`, `Send`, and `Sync` regardless of the marker, and
458/// satisfies the `TopologyId` blanket bound when `Width` does.
459///
460/// # Performance
461///
462/// Copy, compare, order, hash, and debug-format are `O(1)`.
463pub struct LocalId<A, Width> {
464    /// The raw dense index value.
465    value: Width,
466    /// Zero-sized axis brand; `fn() -> A` keeps the handle `Send`/`Sync`.
467    axis: PhantomData<fn() -> A>,
468}
469
470impl<A, Width> LocalId<A, Width> {
471    /// Wraps a raw index value as an axis-branded handle.
472    ///
473    /// # Performance
474    ///
475    /// This function is `O(1)`.
476    #[inline]
477    #[must_use]
478    pub const fn new(value: Width) -> Self {
479        Self {
480            value,
481            axis: PhantomData,
482        }
483    }
484}
485
486impl<A, Width: Copy> LocalId<A, Width> {
487    /// Returns the raw index value of this handle.
488    ///
489    /// # Performance
490    ///
491    /// This function is `O(1)`.
492    #[inline]
493    #[must_use]
494    pub const fn get(self) -> Width {
495        self.value
496    }
497}
498
499impl<A, Width> From<Width> for LocalId<A, Width> {
500    #[inline]
501    fn from(value: Width) -> Self {
502        Self::new(value)
503    }
504}
505
506impl<A, Width: Clone> Clone for LocalId<A, Width> {
507    #[inline]
508    fn clone(&self) -> Self {
509        Self {
510            value: self.value.clone(),
511            axis: PhantomData,
512        }
513    }
514}
515
516impl<A, Width: Copy> Copy for LocalId<A, Width> {}
517
518impl<A, Width: PartialEq> PartialEq for LocalId<A, Width> {
519    #[inline]
520    fn eq(&self, other: &Self) -> bool {
521        self.value == other.value
522    }
523}
524
525impl<A, Width: Eq> Eq for LocalId<A, Width> {}
526
527impl<A, Width: PartialOrd> PartialOrd for LocalId<A, Width> {
528    #[inline]
529    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
530        self.value.partial_cmp(&other.value)
531    }
532}
533
534impl<A, Width: Ord> Ord for LocalId<A, Width> {
535    #[inline]
536    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
537        self.value.cmp(&other.value)
538    }
539}
540
541impl<A, Width: Hash> Hash for LocalId<A, Width> {
542    #[inline]
543    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
544        self.value.hash(state);
545    }
546}
547
548impl<A, Width: fmt::Debug> fmt::Debug for LocalId<A, Width> {
549    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
550        formatter.debug_tuple("LocalId").field(&self.value).finish()
551    }
552}
553
554impl<A, Width: fmt::Display> fmt::Display for LocalId<A, Width> {
555    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
556        self.value.fmt(formatter)
557    }
558}
559
560/// Iterator that maps a borrowed slice of [`LayoutWord`]s into axis-branded
561/// handles, recovering each logical index and wrapping it with `Id::from`.
562///
563/// This is the single slice-to-handle iterator shared by every layout crate's
564/// build-path (native words) and view-path (little-endian words).
565///
566/// # Performance
567///
568/// Advancing is `O(1)`; the iterator borrows and never allocates.
569#[derive(Clone, Debug)]
570pub struct IdSlice<'view, W: LayoutWord, Id> {
571    /// Borrowed iterator over the backing word slice.
572    inner: core::slice::Iter<'view, W>,
573    /// Zero-sized brand for the produced handle type.
574    id: PhantomData<fn() -> Id>,
575}
576
577impl<'view, W: LayoutWord, Id> IdSlice<'view, W, Id> {
578    /// Creates an [`IdSlice`] over a borrowed word slice.
579    ///
580    /// # Performance
581    ///
582    /// This function is `O(1)`.
583    #[inline]
584    #[must_use]
585    pub fn new(slice: &'view [W]) -> Self {
586        Self {
587            inner: slice.iter(),
588            id: PhantomData,
589        }
590    }
591}
592
593impl<W: LayoutWord, Id> Iterator for IdSlice<'_, W, Id>
594where
595    Id: From<W::Index>,
596{
597    type Item = Id;
598
599    #[inline]
600    fn next(&mut self) -> Option<Self::Item> {
601        self.inner.next().map(|word| Id::from(word.get()))
602    }
603
604    #[inline]
605    fn size_hint(&self) -> (usize, Option<usize>) {
606        self.inner.size_hint()
607    }
608}
609
610impl<W: LayoutWord, Id> ExactSizeIterator for IdSlice<'_, W, Id>
611where
612    Id: From<W::Index>,
613{
614    #[inline]
615    fn len(&self) -> usize {
616        self.inner.len()
617    }
618}
619
620impl<W: LayoutWord, Id> DoubleEndedIterator for IdSlice<'_, W, Id>
621where
622    Id: From<W::Index>,
623{
624    #[inline]
625    fn next_back(&mut self) -> Option<Self::Item> {
626        self.inner.next_back().map(|word| Id::from(word.get()))
627    }
628}
629
630impl<W: LayoutWord, Id> FusedIterator for IdSlice<'_, W, Id> where Id: From<W::Index> {}
631
632#[cfg(kani)]
633mod proofs;