oxgraph_layout_util/lib.rs
1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Three responsibilities live here, one namespace each:
4//!
5//! - **Index/word vocabulary** (crate root: [`LayoutIndex`], [`LayoutWord`],
6//! [`LayoutSnapshotWord`], [`SnapshotWidth`]): the single sealed set of dense index widths,
7//! native/little-endian storage words, and the width-to-LE-word bijection shared by every layout
8//! and snapshot crate. CSR and BCSR add only thin section-kind-bearing sub-traits on top.
9//! - **Build-time** ([`build`]): validate dense IDs against a known count, convert `usize` slots
10//! back into a typed index width, flatten per-bucket payloads into CSR-style `(offsets, items)`
11//! pairs, and lower native index slices into explicit little-endian words.
12//! - **Read-time** ([`integrity`]): walk borrowed offset arrays at view-open time and convert
13//! already-validated indexes infallibly.
14//!
15//! The build and integrity helpers are deliberately not re-exported at the
16//! crate root: the root namespace carries only the vocabulary every layer is
17//! parameterized over, while builder and validation internals stay behind
18//! their module names.
19//!
20//! [`LocalId`] is the one generic local-handle newtype every layout crate
21//! aliases for its node/edge/vertex/hyperedge/incidence identities, and
22//! [`IdSlice`] is the one slice-to-handle iterator they all reuse.
23//!
24//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
25//! semantics. No dependency on any other `oxgraph` crate.
26// kani-skip: helpers loop over arbitrary slice lengths and allocate
27// variable-sized buffers; proofs exercise the algebraic contract on bounded
28// fixtures.
29#![no_std]
30
31#[cfg(any(feature = "alloc", kani))]
32extern crate alloc;
33
34#[cfg(kani)]
35extern crate kani;
36
37use core::{fmt, hash::Hash, iter::FusedIterator, marker::PhantomData};
38
39use zerocopy::{
40 FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned,
41 byteorder::{LE, U16, U32, U64},
42};
43
44pub mod build;
45pub mod integrity;
46#[cfg(feature = "alloc")]
47pub mod keys;
48
49// ---------------------------------------------------------------------------
50// CRC-32C (Castagnoli)
51// ---------------------------------------------------------------------------
52
53/// Lookup table for the byte-at-a-time reflected CRC-32C computation.
54///
55/// Generated at compile time from the reflected Castagnoli polynomial
56/// `0x82F6_3B78` (bit-reversal of `0x1EDC_6F41`).
57const CRC32C_TABLE: [u32; 256] = build_crc32c_table();
58
59/// Builds the 256-entry reflected CRC-32C table at compile time.
60const fn build_crc32c_table() -> [u32; 256] {
61 let mut table = [0u32; 256];
62 let mut index: usize = 0;
63 let mut seed: u32 = 0;
64 while index < 256 {
65 let mut crc = seed;
66 let mut bit = 0;
67 while bit < 8 {
68 let mask = (crc & 1).wrapping_neg();
69 crc = (crc >> 1) ^ (0x82F6_3B78 & mask);
70 bit += 1;
71 }
72 table[index] = crc;
73 index += 1;
74 seed += 1;
75 }
76 table
77}
78
79/// Continues a CRC-32C (Castagnoli, polynomial `0x1EDC_6F41`) checksum over
80/// `bytes`, seeded with the result of a prior call (seed `0` starts a fresh
81/// checksum).
82///
83/// This is a portable, table-driven software implementation provided so
84/// `no_std` layout crates can produce checksummed snapshot containers without
85/// a `std`-only CRC dependency. It satisfies the continuation law
86/// `crc32c_append(crc32c_append(0, a), b) == crc32c_append(0, ab)`, matching
87/// the `crc32c` crate's `crc32c_append`, and `crc32c_append(0, b"") == 0`.
88/// `std` consumers on hot write paths may prefer a hardware-accelerated
89/// implementation (e.g. the `crc32c` crate) — any continuation-style CRC-32C
90/// is byte-for-byte interchangeable with this one.
91///
92/// # Performance
93///
94/// This function is `O(bytes.len())` (one table lookup per byte; no
95/// allocation).
96#[must_use]
97pub fn crc32c_append(crc: u32, bytes: &[u8]) -> u32 {
98 let mut state = !crc;
99 for &byte in bytes {
100 let index = usize::from(state.to_le_bytes()[0] ^ byte);
101 state = (state >> 8) ^ CRC32C_TABLE[index];
102 }
103 !state
104}
105
106/// Sealed module preventing external types from satisfying the in-crate
107/// index/word traits.
108mod sealed {
109 /// Seals [`super::LayoutIndex`] to the supported unsigned index widths.
110 pub trait LayoutIndex {}
111
112 /// Seals [`super::ZerocopyWord`] to in-tree native and little-endian words.
113 pub trait ZerocopyWord {}
114
115 /// Seals [`super::LayoutSnapshotWord`] to little-endian storage words.
116 pub trait LayoutSnapshotWord {}
117
118 /// Seals [`super::SnapshotWidth`] to the persisted unsigned widths.
119 pub trait SnapshotWidth {}
120
121 /// Seals [`super::Axis`] to the in-tree local-handle axis markers.
122 pub trait Axis {}
123}
124
125// ---------------------------------------------------------------------------
126// Index / word vocabulary
127// ---------------------------------------------------------------------------
128
129/// Unsigned dense ID width usable by graph and hypergraph layouts and builders.
130///
131/// This is the single index-width contract for the whole substrate: it merges
132/// what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously duplicated as
133/// `CsrIndex`/`BcsrIndex`. `usize` is included for native build-time indices;
134/// persisted widths additionally implement [`SnapshotWidth`].
135///
136/// # Performance
137///
138/// Implementations perform checked conversions in `O(1)`.
139pub trait LayoutIndex:
140 sealed::LayoutIndex + Copy + Eq + Ord + fmt::Debug + fmt::Display + Hash + Sized
141{
142 /// The additive identity for this width (the zero offset / first slot).
143 const ZERO: Self;
144
145 /// Converts this ID to `usize` when representable on the current target.
146 ///
147 /// # Performance
148 ///
149 /// This function is `O(1)`.
150 fn to_usize(self) -> Option<usize>;
151
152 /// Converts a `usize` into this ID width when representable.
153 ///
154 /// # Performance
155 ///
156 /// This function is `O(1)`.
157 fn from_usize(value: usize) -> Option<Self>;
158}
159
160/// Implements [`LayoutIndex`] for one unsigned width.
161macro_rules! impl_layout_index {
162 ($index:ty) => {
163 impl sealed::LayoutIndex for $index {}
164
165 impl LayoutIndex for $index {
166 const ZERO: Self = 0;
167
168 fn to_usize(self) -> Option<usize> {
169 usize::try_from(self).ok()
170 }
171
172 fn from_usize(value: usize) -> Option<Self> {
173 Self::try_from(value).ok()
174 }
175 }
176 };
177}
178
179impl_layout_index!(u16);
180impl_layout_index!(u32);
181impl_layout_index!(u64);
182
183impl sealed::LayoutIndex for usize {}
184
185impl LayoutIndex for usize {
186 const ZERO: Self = 0;
187
188 fn to_usize(self) -> Option<usize> {
189 Some(self)
190 }
191
192 fn from_usize(value: usize) -> Option<Self> {
193 Some(value)
194 }
195}
196
197/// Borrowed offset or value word usable by offset-integrity primitives.
198///
199/// Sealed: native unsigned integers and little-endian storage words opt in via
200/// the in-tree macros below. External crates cannot satisfy this trait.
201///
202/// # Performance
203///
204/// Reading a word is expected to be `O(1)`.
205pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
206 /// Reads this word's value as `usize`, or returns `None` when the value
207 /// does not fit in `usize` on the current target.
208 ///
209 /// # Performance
210 ///
211 /// This method is `O(1)`.
212 fn read_as_usize(self) -> Option<usize>;
213}
214
215/// Implements [`ZerocopyWord`] for one native unsigned integer type.
216macro_rules! impl_native_zerocopy_word {
217 ($word:ty) => {
218 impl sealed::ZerocopyWord for $word {}
219
220 impl ZerocopyWord for $word {
221 fn read_as_usize(self) -> Option<usize> {
222 usize::try_from(self).ok()
223 }
224 }
225 };
226}
227
228impl_native_zerocopy_word!(u16);
229impl_native_zerocopy_word!(u32);
230impl_native_zerocopy_word!(u64);
231impl_native_zerocopy_word!(usize);
232
233/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
234macro_rules! impl_le_zerocopy_word {
235 ($word:ty) => {
236 impl sealed::ZerocopyWord for $word {}
237
238 impl ZerocopyWord for $word {
239 fn read_as_usize(self) -> Option<usize> {
240 usize::try_from(<$word>::get(self)).ok()
241 }
242 }
243 };
244}
245
246impl_le_zerocopy_word!(U16<LE>);
247impl_le_zerocopy_word!(U32<LE>);
248impl_le_zerocopy_word!(U64<LE>);
249
250/// A native-host or little-endian word carrying a typed dense [`LayoutIndex`].
251///
252/// This merges what `oxgraph-csr` and `oxgraph-hyper-bcsr` previously
253/// duplicated as `CsrWord`/`BcsrWord`. The associated [`LayoutWord::Index`]
254/// recovers the logical index value from either a native word (identity) or a
255/// little-endian storage word (byte-order conversion), so a single [`IdSlice`]
256/// drives both build-path and view-path iteration.
257///
258/// # Performance
259///
260/// [`LayoutWord::get`] is expected to be `O(1)`.
261pub trait LayoutWord: Copy + ZerocopyWord {
262 /// Logical dense index recovered from this word.
263 type Index: LayoutIndex;
264
265 /// Reads the logical index value out of this word.
266 ///
267 /// # Performance
268 ///
269 /// This method is `O(1)`.
270 fn get(self) -> Self::Index;
271}
272
273/// Implements [`LayoutWord`] for one native unsigned integer (identity index).
274macro_rules! impl_native_layout_word {
275 ($word:ty) => {
276 impl LayoutWord for $word {
277 type Index = $word;
278
279 fn get(self) -> Self::Index {
280 self
281 }
282 }
283 };
284}
285
286impl_native_layout_word!(u16);
287impl_native_layout_word!(u32);
288impl_native_layout_word!(u64);
289impl_native_layout_word!(usize);
290
291/// Implements [`LayoutWord`] for one little-endian word over a native index.
292macro_rules! impl_le_layout_word {
293 ($word:ty, $index:ty) => {
294 impl LayoutWord for $word {
295 type Index = $index;
296
297 fn get(self) -> Self::Index {
298 <$word>::get(self)
299 }
300 }
301 };
302}
303
304impl_le_layout_word!(U16<LE>, u16);
305impl_le_layout_word!(U32<LE>, u32);
306impl_le_layout_word!(U64<LE>, u64);
307
308/// A little-endian storage word usable in persisted snapshot payloads.
309///
310/// Sealed marker over [`LayoutWord`] plus the zerocopy byte-view bounds,
311/// including [`Unaligned`] — persisted words are byte-aligned by design, so
312/// generic wire records composed of them stay padding-free. Only the explicit
313/// little-endian words implement it — native integers are excluded so
314/// persisted payloads always carry a defined byte order.
315///
316/// # Performance
317///
318/// `perf: unspecified`; this is a marker trait.
319pub trait LayoutSnapshotWord:
320 sealed::LayoutSnapshotWord
321 + LayoutWord
322 + FromBytes
323 + Immutable
324 + IntoBytes
325 + KnownLayout
326 + Unaligned
327{
328}
329
330/// Implements [`LayoutSnapshotWord`] for one little-endian storage word.
331macro_rules! impl_layout_snapshot_word {
332 ($word:ty) => {
333 impl sealed::LayoutSnapshotWord for $word {}
334
335 impl LayoutSnapshotWord for $word {}
336 };
337}
338
339impl_layout_snapshot_word!(U16<LE>);
340impl_layout_snapshot_word!(U32<LE>);
341impl_layout_snapshot_word!(U64<LE>);
342
343/// A persisted unsigned width with its little-endian storage word.
344///
345/// This is the single width-to-LE-word bijection shared by `oxgraph-csr`,
346/// `oxgraph-hyper-bcsr`, and any future layout crate. `usize` deliberately does
347/// not implement it: persisted snapshots are fixed-width.
348///
349/// # Performance
350///
351/// [`SnapshotWidth::to_le_word`] and [`SnapshotWidth::from_le_word`] are `O(1)`.
352pub trait SnapshotWidth: sealed::SnapshotWidth + LayoutIndex {
353 /// Two-bit width discriminant carried in a section kind's low bits:
354 /// `0b00` = `u16`, `0b01` = `u32`, `0b10` = `u64` (`0b11` reserved).
355 ///
356 /// Layout crates declare 4-aligned `SNAPSHOT_KIND_*_BASE` constants and
357 /// derive each persisted section kind as `BASE | WIDTH_CODE`, so one base
358 /// constant covers every persisted width.
359 const WIDTH_CODE: u32;
360
361 /// Little-endian storage word for this width.
362 type LittleEndianWord: LayoutSnapshotWord<Index = Self>;
363
364 /// Lowers this logical index into its little-endian storage word.
365 ///
366 /// # Performance
367 ///
368 /// This function is `O(1)`.
369 fn to_le_word(self) -> Self::LittleEndianWord;
370
371 /// Recovers this logical index from a little-endian storage word.
372 ///
373 /// # Performance
374 ///
375 /// This function is `O(1)`.
376 fn from_le_word(word: Self::LittleEndianWord) -> Self;
377}
378
379/// Implements [`SnapshotWidth`] for one width and its little-endian word.
380macro_rules! impl_snapshot_width {
381 ($index:ty, $word:ty, $width_code:expr) => {
382 impl sealed::SnapshotWidth for $index {}
383
384 impl SnapshotWidth for $index {
385 const WIDTH_CODE: u32 = $width_code;
386
387 type LittleEndianWord = $word;
388
389 fn to_le_word(self) -> Self::LittleEndianWord {
390 <$word>::new(self)
391 }
392
393 fn from_le_word(word: Self::LittleEndianWord) -> Self {
394 <$word>::get(word)
395 }
396 }
397 };
398}
399
400impl_snapshot_width!(u16, U16<LE>, 0b00);
401impl_snapshot_width!(u32, U32<LE>, 0b01);
402impl_snapshot_width!(u64, U64<LE>, 0b10);
403
404// ---------------------------------------------------------------------------
405// Local-handle newtype + slice iterator
406// ---------------------------------------------------------------------------
407
408/// Marker for a local-handle axis (node, edge, vertex, hyperedge, incidence).
409///
410/// Sealed: only the in-tree axis markers below implement it, so [`LocalId`]
411/// cannot be branded with an arbitrary external type.
412///
413/// # Performance
414///
415/// `perf: unspecified`; this is a marker trait.
416pub trait Axis: sealed::Axis {}
417
418/// Defines one zero-sized local-handle axis marker.
419macro_rules! define_axis {
420 ($(#[$doc:meta])* $name:ident) => {
421 $(#[$doc])*
422 #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
423 pub struct $name;
424
425 impl sealed::Axis for $name {}
426
427 impl Axis for $name {}
428 };
429}
430
431define_axis!(
432 /// Element axis for binary graphs (a node).
433 NodeAxis
434);
435define_axis!(
436 /// Relation axis for binary graphs (an edge).
437 EdgeAxis
438);
439define_axis!(
440 /// Element axis for hypergraphs (a vertex).
441 VertexAxis
442);
443define_axis!(
444 /// Relation axis for hypergraphs (a hyperedge).
445 HyperedgeAxis
446);
447define_axis!(
448 /// Incidence axis (a participant / endpoint).
449 IncidenceAxis
450);
451
452/// A dense local handle: an axis-branded index value.
453///
454/// Every layout crate aliases this for its node/edge/vertex/hyperedge/incidence
455/// identities so a built graph and its borrowed snapshot view yield the same
456/// handle type. The `Axis` brand is a zero-sized `PhantomData<fn() -> A>` so the
457/// handle stays `Copy`, `Send`, and `Sync` regardless of the marker, and
458/// satisfies the `TopologyId` blanket bound when `Width` does.
459///
460/// # Performance
461///
462/// Copy, compare, order, hash, and debug-format are `O(1)`.
463pub struct LocalId<A, Width> {
464 /// The raw dense index value.
465 value: Width,
466 /// Zero-sized axis brand; `fn() -> A` keeps the handle `Send`/`Sync`.
467 axis: PhantomData<fn() -> A>,
468}
469
470impl<A, Width> LocalId<A, Width> {
471 /// Wraps a raw index value as an axis-branded handle.
472 ///
473 /// # Performance
474 ///
475 /// This function is `O(1)`.
476 #[inline]
477 #[must_use]
478 pub const fn new(value: Width) -> Self {
479 Self {
480 value,
481 axis: PhantomData,
482 }
483 }
484}
485
486impl<A, Width: Copy> LocalId<A, Width> {
487 /// Returns the raw index value of this handle.
488 ///
489 /// # Performance
490 ///
491 /// This function is `O(1)`.
492 #[inline]
493 #[must_use]
494 pub const fn get(self) -> Width {
495 self.value
496 }
497}
498
499impl<A, Width> From<Width> for LocalId<A, Width> {
500 #[inline]
501 fn from(value: Width) -> Self {
502 Self::new(value)
503 }
504}
505
506impl<A, Width: Clone> Clone for LocalId<A, Width> {
507 #[inline]
508 fn clone(&self) -> Self {
509 Self {
510 value: self.value.clone(),
511 axis: PhantomData,
512 }
513 }
514}
515
516impl<A, Width: Copy> Copy for LocalId<A, Width> {}
517
518impl<A, Width: PartialEq> PartialEq for LocalId<A, Width> {
519 #[inline]
520 fn eq(&self, other: &Self) -> bool {
521 self.value == other.value
522 }
523}
524
525impl<A, Width: Eq> Eq for LocalId<A, Width> {}
526
527impl<A, Width: PartialOrd> PartialOrd for LocalId<A, Width> {
528 #[inline]
529 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
530 self.value.partial_cmp(&other.value)
531 }
532}
533
534impl<A, Width: Ord> Ord for LocalId<A, Width> {
535 #[inline]
536 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
537 self.value.cmp(&other.value)
538 }
539}
540
541impl<A, Width: Hash> Hash for LocalId<A, Width> {
542 #[inline]
543 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
544 self.value.hash(state);
545 }
546}
547
548impl<A, Width: fmt::Debug> fmt::Debug for LocalId<A, Width> {
549 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
550 formatter.debug_tuple("LocalId").field(&self.value).finish()
551 }
552}
553
554impl<A, Width: fmt::Display> fmt::Display for LocalId<A, Width> {
555 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
556 self.value.fmt(formatter)
557 }
558}
559
560/// Iterator that maps a borrowed slice of [`LayoutWord`]s into axis-branded
561/// handles, recovering each logical index and wrapping it with `Id::from`.
562///
563/// This is the single slice-to-handle iterator shared by every layout crate's
564/// build-path (native words) and view-path (little-endian words).
565///
566/// # Performance
567///
568/// Advancing is `O(1)`; the iterator borrows and never allocates.
569#[derive(Clone, Debug)]
570pub struct IdSlice<'view, W: LayoutWord, Id> {
571 /// Borrowed iterator over the backing word slice.
572 inner: core::slice::Iter<'view, W>,
573 /// Zero-sized brand for the produced handle type.
574 id: PhantomData<fn() -> Id>,
575}
576
577impl<'view, W: LayoutWord, Id> IdSlice<'view, W, Id> {
578 /// Creates an [`IdSlice`] over a borrowed word slice.
579 ///
580 /// # Performance
581 ///
582 /// This function is `O(1)`.
583 #[inline]
584 #[must_use]
585 pub fn new(slice: &'view [W]) -> Self {
586 Self {
587 inner: slice.iter(),
588 id: PhantomData,
589 }
590 }
591}
592
593impl<W: LayoutWord, Id> Iterator for IdSlice<'_, W, Id>
594where
595 Id: From<W::Index>,
596{
597 type Item = Id;
598
599 #[inline]
600 fn next(&mut self) -> Option<Self::Item> {
601 self.inner.next().map(|word| Id::from(word.get()))
602 }
603
604 #[inline]
605 fn size_hint(&self) -> (usize, Option<usize>) {
606 self.inner.size_hint()
607 }
608}
609
610impl<W: LayoutWord, Id> ExactSizeIterator for IdSlice<'_, W, Id>
611where
612 Id: From<W::Index>,
613{
614 #[inline]
615 fn len(&self) -> usize {
616 self.inner.len()
617 }
618}
619
620impl<W: LayoutWord, Id> DoubleEndedIterator for IdSlice<'_, W, Id>
621where
622 Id: From<W::Index>,
623{
624 #[inline]
625 fn next_back(&mut self) -> Option<Self::Item> {
626 self.inner.next_back().map(|word| Id::from(word.get()))
627 }
628}
629
630impl<W: LayoutWord, Id> FusedIterator for IdSlice<'_, W, Id> where Id: From<W::Index> {}
631
632#[cfg(kani)]
633mod proofs;