Skip to main content

oxgraph_layout_util/
lib.rs

1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Two responsibilities live here:
4//!
5//! - **Build-time** ([`BuildIndex`], [`id_to_slot`], [`slot_or_max`], [`index_from_usize`],
6//!   [`build_offset_index`]): validate dense IDs against a known count, convert `usize` slots back
7//!   into a typed index width, and flatten per-bucket payloads into CSR-style `(offsets, items)`
8//!   pairs. Used by `oxgraph-graph-build` and `oxgraph-hyper-build`.
9//! - **Read-time** ([`ZerocopyWord`], [`OffsetIntegrityIssue`], [`check_offsets_monotonic`],
10//!   [`check_value_range`], [`check_offset_section`]): walk borrowed offset arrays at view-open
11//!   time to enforce length matches `count + 1`, first offset is zero, offsets non-decreasing, the
12//!   final offset matches the value-array length, and every value fits in the dense bound. Used by
13//!   `oxgraph-csr` and `oxgraph-hyper-bcsr`.
14//!
15//! Helpers return small typed data enums ([`IdOutOfBounds`],
16//! [`OffsetOverflow`], [`OffsetIntegrityIssue`]) instead of crate-specific
17//! error types. Callers map the issue to their own typed error at the
18//! boundary.
19//!
20//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
21//! semantics. No dependency on any other `oxgraph` crate.
22// kani-skip: helpers loop over arbitrary slice lengths and allocate
23// variable-sized buffers; proofs exercise the algebraic contract on bounded
24// fixtures.
25#![no_std]
26
27extern crate alloc;
28
29#[cfg(kani)]
30extern crate kani;
31
32use alloc::vec::Vec;
33use core::{error::Error, fmt, hash::Hash};
34
35use zerocopy::byteorder::{LE, U16, U32, U64};
36
37/// Sealed module preventing external types from satisfying the in-crate
38/// build/zerocopy traits.
39mod sealed {
40    /// Seals [`super::BuildIndex`] to the supported unsigned index widths.
41    pub trait BuildIndex {}
42
43    /// Seals [`super::ZerocopyWord`] to in-tree CSR and BCSR word types.
44    pub trait ZerocopyWord {}
45}
46
47// ---------------------------------------------------------------------------
48// Build-time primitives
49// ---------------------------------------------------------------------------
50
51/// Unsigned dense ID width usable by graph and hypergraph builders.
52///
53/// # Performance
54///
55/// Implementations perform checked conversions in `O(1)`.
56pub trait BuildIndex: sealed::BuildIndex + Copy + Eq + Ord + fmt::Debug + Hash {
57    /// Converts this ID to `usize` when representable on the current target.
58    ///
59    /// # Performance
60    ///
61    /// This function is `O(1)`.
62    fn to_usize(self) -> Option<usize>;
63
64    /// Converts a `usize` into this ID width when representable.
65    ///
66    /// # Performance
67    ///
68    /// This function is `O(1)`.
69    fn from_usize(value: usize) -> Option<Self>;
70}
71
72/// Implements [`BuildIndex`] for one unsigned width.
73macro_rules! impl_build_index {
74    ($index:ty) => {
75        impl sealed::BuildIndex for $index {}
76
77        impl BuildIndex for $index {
78            fn to_usize(self) -> Option<usize> {
79                usize::try_from(self).ok()
80            }
81
82            fn from_usize(value: usize) -> Option<Self> {
83                Self::try_from(value).ok()
84            }
85        }
86    };
87}
88
89impl_build_index!(u16);
90impl_build_index!(u32);
91impl_build_index!(u64);
92
93impl sealed::BuildIndex for usize {}
94
95impl BuildIndex for usize {
96    fn to_usize(self) -> Option<usize> {
97        Some(self)
98    }
99
100    fn from_usize(value: usize) -> Option<Self> {
101        Some(value)
102    }
103}
104
105/// Reasons an ID failed dense bounds validation.
106///
107/// # Performance
108///
109/// Formatting is `O(message length)`.
110#[derive(Clone, Copy, Debug, Eq, PartialEq)]
111#[non_exhaustive]
112pub enum IdOutOfBounds {
113    /// The ID's value did not fit in `usize` on the current target.
114    UsizeOverflow,
115    /// The ID's slot was greater than or equal to the dense count.
116    OutOfRange {
117        /// Slot derived from the ID.
118        slot: usize,
119        /// Exclusive upper bound for the slot.
120        count: usize,
121    },
122}
123
124impl fmt::Display for IdOutOfBounds {
125    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
126        match self {
127            Self::UsizeOverflow => formatter.write_str("ID value did not fit in usize"),
128            Self::OutOfRange { slot, count } => write!(
129                formatter,
130                "ID slot {slot} is not less than the dense bound {count}"
131            ),
132        }
133    }
134}
135
136impl Error for IdOutOfBounds {}
137
138/// Reasons a `usize` could not be represented in a target index width during
139/// builder offset construction.
140///
141/// # Performance
142///
143/// Formatting is `O(message length)`.
144#[derive(Clone, Copy, Debug, Eq, PartialEq)]
145#[non_exhaustive]
146pub enum OffsetOverflow {
147    /// A `usize` value did not fit in the target [`BuildIndex`] width.
148    IndexOverflow {
149        /// Value that did not fit.
150        value: usize,
151    },
152}
153
154impl fmt::Display for OffsetOverflow {
155    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
156        match self {
157            Self::IndexOverflow { value } => {
158                write!(
159                    formatter,
160                    "value {value} does not fit in the target index width"
161                )
162            }
163        }
164    }
165}
166
167impl Error for OffsetOverflow {}
168
169/// Validates that `id`'s `usize` representation is less than `count`.
170///
171/// # Errors
172///
173/// Returns [`IdOutOfBounds::UsizeOverflow`] when the ID does not fit in
174/// `usize` on the current target, and [`IdOutOfBounds::OutOfRange`] when the
175/// slot is not less than `count`.
176///
177/// # Performance
178///
179/// This function is `O(1)`.
180#[inline]
181pub fn id_to_slot<I: BuildIndex>(id: I, count: usize) -> Result<usize, IdOutOfBounds> {
182    let slot = id.to_usize().ok_or(IdOutOfBounds::UsizeOverflow)?;
183    if slot < count {
184        Ok(slot)
185    } else {
186        Err(IdOutOfBounds::OutOfRange { slot, count })
187    }
188}
189
190/// Returns `id`'s `usize` representation, or `usize::MAX` when it does not
191/// fit in `usize` on the current target.
192///
193/// Used for fallback display and for slot lookups guarded elsewhere by
194/// [`id_to_slot`]. The `usize::MAX` sentinel is safe because callers compare
195/// against an upper bound less than `usize::MAX` before indexing.
196///
197/// # Performance
198///
199/// This function is `O(1)`.
200#[inline]
201#[must_use]
202pub fn slot_or_max<I: BuildIndex>(id: I) -> usize {
203    id.to_usize().unwrap_or(usize::MAX)
204}
205
206/// Converts a `usize` value into the target index width.
207///
208/// # Errors
209///
210/// Returns [`OffsetOverflow::IndexOverflow`] when `value` does not fit.
211///
212/// # Performance
213///
214/// This function is `O(1)`.
215#[inline]
216pub fn index_from_usize<O: BuildIndex>(value: usize) -> Result<O, OffsetOverflow> {
217    O::from_usize(value).ok_or(OffsetOverflow::IndexOverflow { value })
218}
219
220/// Flattens per-bucket payloads into a `(offsets, items)` pair.
221///
222/// The returned `offsets` vector has length `buckets.len() + 1`. `offsets[0]`
223/// is zero, `offsets[i + 1] - offsets[i]` equals the i-th bucket's length, and
224/// `offsets[buckets.len()]` equals `items.len()`. Items appear in input order
225/// within each bucket and buckets are concatenated in input order.
226///
227/// # Errors
228///
229/// Returns [`OffsetOverflow::IndexOverflow`] when any cumulative offset does
230/// not fit in the target index width.
231///
232/// # Performance
233///
234/// This function is `O(n)` where `n` is the total item count across all
235/// buckets. Allocation matches a single-pass extend-and-grow; no second pass
236/// is performed.
237pub fn build_offset_index<O, T>(buckets: Vec<Vec<T>>) -> Result<(Vec<O>, Vec<T>), OffsetOverflow>
238where
239    O: BuildIndex,
240{
241    let mut offsets = Vec::with_capacity(buckets.len() + 1);
242    let mut items: Vec<T> = Vec::new();
243    offsets.push(index_from_usize::<O>(0)?);
244    for bucket in buckets {
245        items.extend(bucket);
246        offsets.push(index_from_usize::<O>(items.len())?);
247    }
248    Ok((offsets, items))
249}
250
251// ---------------------------------------------------------------------------
252// Read-time offset-integrity primitives
253// ---------------------------------------------------------------------------
254
255/// Borrowed offset or value word usable by offset-integrity primitives.
256///
257/// Sealed: both `CsrWord` (in `oxgraph-csr`) and `BcsrWord` (in
258/// `oxgraph-hyper-bcsr`) opt in via in-tree `impl ZerocopyWord for ...`
259/// blocks. External crates cannot satisfy this trait.
260///
261/// # Performance
262///
263/// Reading a word is expected to be `O(1)`.
264pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
265    /// Reads this word's value as `usize`, or returns `None` when the value
266    /// does not fit in `usize` on the current target.
267    ///
268    /// # Performance
269    ///
270    /// This method is `O(1)`.
271    fn read_as_usize(self) -> Option<usize>;
272}
273
274/// Implements [`ZerocopyWord`] for one native unsigned integer type.
275macro_rules! impl_native_zerocopy_word {
276    ($word:ty) => {
277        impl sealed::ZerocopyWord for $word {}
278
279        impl ZerocopyWord for $word {
280            fn read_as_usize(self) -> Option<usize> {
281                usize::try_from(self).ok()
282            }
283        }
284    };
285}
286
287impl_native_zerocopy_word!(u16);
288impl_native_zerocopy_word!(u32);
289impl_native_zerocopy_word!(u64);
290impl_native_zerocopy_word!(usize);
291
292/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
293macro_rules! impl_le_zerocopy_word {
294    ($word:ty) => {
295        impl sealed::ZerocopyWord for $word {}
296
297        impl ZerocopyWord for $word {
298            fn read_as_usize(self) -> Option<usize> {
299                usize::try_from(<$word>::get(self)).ok()
300            }
301        }
302    };
303}
304
305impl_le_zerocopy_word!(U16<LE>);
306impl_le_zerocopy_word!(U32<LE>);
307impl_le_zerocopy_word!(U64<LE>);
308
309/// Reasons a borrowed offset or value array failed structural validation.
310///
311/// # Performance
312///
313/// Formatting is `O(message length)`.
314#[derive(Clone, Copy, Debug, Eq, PartialEq)]
315#[non_exhaustive]
316pub enum OffsetIntegrityIssue {
317    /// Length did not match `count + 1`.
318    Length {
319        /// Expected length (`count + 1`).
320        expected: usize,
321        /// Observed length.
322        actual: usize,
323    },
324    /// `offsets[0]` was not zero.
325    FirstNonZero {
326        /// Observed first offset.
327        actual: usize,
328    },
329    /// `offsets[index] < offsets[index - 1]`.
330    NonMonotonic {
331        /// Index where monotonicity failed.
332        index: usize,
333        /// Offset at `index - 1`.
334        previous: usize,
335        /// Offset at `index`.
336        actual: usize,
337    },
338    /// `offsets[count]` did not match `value_len`.
339    FinalMismatch {
340        /// Observed final offset.
341        final_offset: usize,
342        /// Length of the values array.
343        value_len: usize,
344    },
345    /// A value at `index` was not less than the dense `bound`.
346    ValueOutOfRange {
347        /// Index of the offending value.
348        index: usize,
349        /// Observed value.
350        value: usize,
351        /// Exclusive upper bound.
352        bound: usize,
353    },
354    /// A word's value at `index` did not fit in `usize` on the current target.
355    UsizeOverflow {
356        /// Slice position of the offending word.
357        index: usize,
358    },
359}
360
361impl fmt::Display for OffsetIntegrityIssue {
362    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
363        match self {
364            Self::Length { expected, actual } => write!(
365                formatter,
366                "offsets length {actual} does not match expected {expected}"
367            ),
368            Self::FirstNonZero { actual } => {
369                write!(formatter, "first offset {actual} must be zero")
370            }
371            Self::NonMonotonic {
372                index,
373                previous,
374                actual,
375            } => write!(
376                formatter,
377                "offsets[{index}] = {actual} is less than offsets[{}] = {previous}",
378                index - 1
379            ),
380            Self::FinalMismatch {
381                final_offset,
382                value_len,
383            } => write!(
384                formatter,
385                "final offset {final_offset} does not match values length {value_len}"
386            ),
387            Self::ValueOutOfRange {
388                index,
389                value,
390                bound,
391            } => write!(
392                formatter,
393                "values[{index}] = {value} is not less than bound {bound}"
394            ),
395            Self::UsizeOverflow { index } => write!(
396                formatter,
397                "word at slice index {index} did not fit in usize"
398            ),
399        }
400    }
401}
402
403impl Error for OffsetIntegrityIssue {}
404
405/// Verifies `offsets[0] == 0` and that `offsets` is non-decreasing.
406///
407/// # Errors
408///
409/// Returns [`OffsetIntegrityIssue::FirstNonZero`] when the first offset is
410/// non-zero, [`OffsetIntegrityIssue::NonMonotonic`] when an offset decreases
411/// from the previous, and [`OffsetIntegrityIssue::UsizeOverflow`] when a
412/// word's value does not fit in `usize`.
413///
414/// # Performance
415///
416/// This function is `O(offsets.len())`.
417pub fn check_offsets_monotonic<W: ZerocopyWord>(offsets: &[W]) -> Result<(), OffsetIntegrityIssue> {
418    let mut previous: usize = 0;
419    for (index, word) in offsets.iter().copied().enumerate() {
420        let offset = word
421            .read_as_usize()
422            .ok_or(OffsetIntegrityIssue::UsizeOverflow { index })?;
423        if index == 0 {
424            if offset != 0 {
425                return Err(OffsetIntegrityIssue::FirstNonZero { actual: offset });
426            }
427        } else if offset < previous {
428            return Err(OffsetIntegrityIssue::NonMonotonic {
429                index,
430                previous,
431                actual: offset,
432            });
433        }
434        previous = offset;
435    }
436    Ok(())
437}
438
439/// Verifies every value in `values` is less than `bound`.
440///
441/// # Errors
442///
443/// Returns [`OffsetIntegrityIssue::ValueOutOfRange`] when a value is at or
444/// above `bound`, and [`OffsetIntegrityIssue::UsizeOverflow`] when a word's
445/// value does not fit in `usize`.
446///
447/// # Performance
448///
449/// This function is `O(values.len())`.
450pub fn check_value_range<W: ZerocopyWord>(
451    values: &[W],
452    bound: usize,
453) -> Result<(), OffsetIntegrityIssue> {
454    for (index, word) in values.iter().copied().enumerate() {
455        let value = word
456            .read_as_usize()
457            .ok_or(OffsetIntegrityIssue::UsizeOverflow { index })?;
458        if value >= bound {
459            return Err(OffsetIntegrityIssue::ValueOutOfRange {
460                index,
461                value,
462                bound,
463            });
464        }
465    }
466    Ok(())
467}
468
469/// Validates one offset section against `expected_count` rows and a backing
470/// values array of length `value_len`.
471///
472/// Performs four checks: length matches `expected_count + 1`, first offset is
473/// zero, offsets are non-decreasing, and the final offset matches `value_len`.
474///
475/// # Errors
476///
477/// Returns the first [`OffsetIntegrityIssue`] encountered.
478///
479/// # Performance
480///
481/// This function is `O(offsets.len())`.
482pub fn check_offset_section<W: ZerocopyWord>(
483    offsets: &[W],
484    expected_count: usize,
485    value_len: usize,
486) -> Result<(), OffsetIntegrityIssue> {
487    let Some(expected) = expected_count.checked_add(1) else {
488        return Err(OffsetIntegrityIssue::Length {
489            expected: usize::MAX,
490            actual: offsets.len(),
491        });
492    };
493    if offsets.len() != expected {
494        return Err(OffsetIntegrityIssue::Length {
495            expected,
496            actual: offsets.len(),
497        });
498    }
499    check_offsets_monotonic(offsets)?;
500    let final_index = offsets.len() - 1;
501    let final_offset = offsets[final_index]
502        .read_as_usize()
503        .ok_or(OffsetIntegrityIssue::UsizeOverflow { index: final_index })?;
504    if final_offset != value_len {
505        return Err(OffsetIntegrityIssue::FinalMismatch {
506            final_offset,
507            value_len,
508        });
509    }
510    Ok(())
511}
512
513#[cfg(kani)]
514mod proofs;