oxgraph_layout_util/lib.rs
1//! Shared layout primitives for `OxGraph` graph and hypergraph crates.
2//!
3//! Two responsibilities live here:
4//!
5//! - **Build-time** ([`BuildIndex`], [`id_to_slot`], [`slot_or_max`], [`index_from_usize`],
6//! [`build_offset_index`]): validate dense IDs against a known count, convert `usize` slots back
7//! into a typed index width, and flatten per-bucket payloads into CSR-style `(offsets, items)`
8//! pairs. Used by `oxgraph-graph-build` and `oxgraph-hyper-build`.
9//! - **Read-time** ([`ZerocopyWord`], [`OffsetIntegrityIssue`], [`check_offsets_monotonic`],
10//! [`check_value_range`], [`check_offset_section`]): walk borrowed offset arrays at view-open
11//! time to enforce length matches `count + 1`, first offset is zero, offsets non-decreasing, the
12//! final offset matches the value-array length, and every value fits in the dense bound. Used by
13//! `oxgraph-csr` and `oxgraph-hyper-bcsr`.
14//!
15//! Helpers return small typed data enums ([`IdOutOfBounds`],
16//! [`OffsetOverflow`], [`OffsetIntegrityIssue`]) instead of crate-specific
17//! error types. Callers map the issue to their own typed error at the
18//! boundary.
19//!
20//! `no_std + alloc` (build-time primitives need `Vec`). No public domain
21//! semantics. No dependency on any other `oxgraph` crate.
22// kani-skip: helpers loop over arbitrary slice lengths and allocate
23// variable-sized buffers; proofs exercise the algebraic contract on bounded
24// fixtures.
25#![no_std]
26
27extern crate alloc;
28
29#[cfg(kani)]
30extern crate kani;
31
32use alloc::vec::Vec;
33use core::{error::Error, fmt, hash::Hash};
34
35use zerocopy::byteorder::{LE, U16, U32, U64};
36
37/// Sealed module preventing external types from satisfying the in-crate
38/// build/zerocopy traits.
39mod sealed {
40 /// Seals [`super::BuildIndex`] to the supported unsigned index widths.
41 pub trait BuildIndex {}
42
43 /// Seals [`super::ZerocopyWord`] to in-tree CSR and BCSR word types.
44 pub trait ZerocopyWord {}
45}
46
47// ---------------------------------------------------------------------------
48// Build-time primitives
49// ---------------------------------------------------------------------------
50
51/// Unsigned dense ID width usable by graph and hypergraph builders.
52///
53/// # Performance
54///
55/// Implementations perform checked conversions in `O(1)`.
56pub trait BuildIndex: sealed::BuildIndex + Copy + Eq + Ord + fmt::Debug + Hash {
57 /// Converts this ID to `usize` when representable on the current target.
58 ///
59 /// # Performance
60 ///
61 /// This function is `O(1)`.
62 fn to_usize(self) -> Option<usize>;
63
64 /// Converts a `usize` into this ID width when representable.
65 ///
66 /// # Performance
67 ///
68 /// This function is `O(1)`.
69 fn from_usize(value: usize) -> Option<Self>;
70}
71
72/// Implements [`BuildIndex`] for one unsigned width.
73macro_rules! impl_build_index {
74 ($index:ty) => {
75 impl sealed::BuildIndex for $index {}
76
77 impl BuildIndex for $index {
78 fn to_usize(self) -> Option<usize> {
79 usize::try_from(self).ok()
80 }
81
82 fn from_usize(value: usize) -> Option<Self> {
83 Self::try_from(value).ok()
84 }
85 }
86 };
87}
88
89impl_build_index!(u16);
90impl_build_index!(u32);
91impl_build_index!(u64);
92
93impl sealed::BuildIndex for usize {}
94
95impl BuildIndex for usize {
96 fn to_usize(self) -> Option<usize> {
97 Some(self)
98 }
99
100 fn from_usize(value: usize) -> Option<Self> {
101 Some(value)
102 }
103}
104
105/// Reasons an ID failed dense bounds validation.
106///
107/// # Performance
108///
109/// Formatting is `O(message length)`.
110#[derive(Clone, Copy, Debug, Eq, PartialEq)]
111#[non_exhaustive]
112pub enum IdOutOfBounds {
113 /// The ID's value did not fit in `usize` on the current target.
114 UsizeOverflow,
115 /// The ID's slot was greater than or equal to the dense count.
116 OutOfRange {
117 /// Slot derived from the ID.
118 slot: usize,
119 /// Exclusive upper bound for the slot.
120 count: usize,
121 },
122}
123
124impl fmt::Display for IdOutOfBounds {
125 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
126 match self {
127 Self::UsizeOverflow => formatter.write_str("ID value did not fit in usize"),
128 Self::OutOfRange { slot, count } => write!(
129 formatter,
130 "ID slot {slot} is not less than the dense bound {count}"
131 ),
132 }
133 }
134}
135
136impl Error for IdOutOfBounds {}
137
138/// Reasons a `usize` could not be represented in a target index width during
139/// builder offset construction.
140///
141/// # Performance
142///
143/// Formatting is `O(message length)`.
144#[derive(Clone, Copy, Debug, Eq, PartialEq)]
145#[non_exhaustive]
146pub enum OffsetOverflow {
147 /// A `usize` value did not fit in the target [`BuildIndex`] width.
148 IndexOverflow {
149 /// Value that did not fit.
150 value: usize,
151 },
152}
153
154impl fmt::Display for OffsetOverflow {
155 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
156 match self {
157 Self::IndexOverflow { value } => {
158 write!(
159 formatter,
160 "value {value} does not fit in the target index width"
161 )
162 }
163 }
164 }
165}
166
167impl Error for OffsetOverflow {}
168
169/// Validates that `id`'s `usize` representation is less than `count`.
170///
171/// # Errors
172///
173/// Returns [`IdOutOfBounds::UsizeOverflow`] when the ID does not fit in
174/// `usize` on the current target, and [`IdOutOfBounds::OutOfRange`] when the
175/// slot is not less than `count`.
176///
177/// # Performance
178///
179/// This function is `O(1)`.
180#[inline]
181pub fn id_to_slot<I: BuildIndex>(id: I, count: usize) -> Result<usize, IdOutOfBounds> {
182 let slot = id.to_usize().ok_or(IdOutOfBounds::UsizeOverflow)?;
183 if slot < count {
184 Ok(slot)
185 } else {
186 Err(IdOutOfBounds::OutOfRange { slot, count })
187 }
188}
189
190/// Returns `id`'s `usize` representation, or `usize::MAX` when it does not
191/// fit in `usize` on the current target.
192///
193/// Used for fallback display and for slot lookups guarded elsewhere by
194/// [`id_to_slot`]. The `usize::MAX` sentinel is safe because callers compare
195/// against an upper bound less than `usize::MAX` before indexing.
196///
197/// # Performance
198///
199/// This function is `O(1)`.
200#[inline]
201#[must_use]
202pub fn slot_or_max<I: BuildIndex>(id: I) -> usize {
203 id.to_usize().unwrap_or(usize::MAX)
204}
205
206/// Converts a `usize` value into the target index width.
207///
208/// # Errors
209///
210/// Returns [`OffsetOverflow::IndexOverflow`] when `value` does not fit.
211///
212/// # Performance
213///
214/// This function is `O(1)`.
215#[inline]
216pub fn index_from_usize<O: BuildIndex>(value: usize) -> Result<O, OffsetOverflow> {
217 O::from_usize(value).ok_or(OffsetOverflow::IndexOverflow { value })
218}
219
220/// Flattens per-bucket payloads into a `(offsets, items)` pair.
221///
222/// The returned `offsets` vector has length `buckets.len() + 1`. `offsets[0]`
223/// is zero, `offsets[i + 1] - offsets[i]` equals the i-th bucket's length, and
224/// `offsets[buckets.len()]` equals `items.len()`. Items appear in input order
225/// within each bucket and buckets are concatenated in input order.
226///
227/// # Errors
228///
229/// Returns [`OffsetOverflow::IndexOverflow`] when any cumulative offset does
230/// not fit in the target index width.
231///
232/// # Performance
233///
234/// This function is `O(n)` where `n` is the total item count across all
235/// buckets. Allocation matches a single-pass extend-and-grow; no second pass
236/// is performed.
237pub fn build_offset_index<O, T>(buckets: Vec<Vec<T>>) -> Result<(Vec<O>, Vec<T>), OffsetOverflow>
238where
239 O: BuildIndex,
240{
241 let mut offsets = Vec::with_capacity(buckets.len() + 1);
242 let mut items: Vec<T> = Vec::new();
243 offsets.push(index_from_usize::<O>(0)?);
244 for bucket in buckets {
245 items.extend(bucket);
246 offsets.push(index_from_usize::<O>(items.len())?);
247 }
248 Ok((offsets, items))
249}
250
251// ---------------------------------------------------------------------------
252// Read-time offset-integrity primitives
253// ---------------------------------------------------------------------------
254
255/// Borrowed offset or value word usable by offset-integrity primitives.
256///
257/// Sealed: both `CsrWord` (in `oxgraph-csr`) and `BcsrWord` (in
258/// `oxgraph-hyper-bcsr`) opt in via in-tree `impl ZerocopyWord for ...`
259/// blocks. External crates cannot satisfy this trait.
260///
261/// # Performance
262///
263/// Reading a word is expected to be `O(1)`.
264pub trait ZerocopyWord: sealed::ZerocopyWord + Copy {
265 /// Reads this word's value as `usize`, or returns `None` when the value
266 /// does not fit in `usize` on the current target.
267 ///
268 /// # Performance
269 ///
270 /// This method is `O(1)`.
271 fn read_as_usize(self) -> Option<usize>;
272}
273
274/// Implements [`ZerocopyWord`] for one native unsigned integer type.
275macro_rules! impl_native_zerocopy_word {
276 ($word:ty) => {
277 impl sealed::ZerocopyWord for $word {}
278
279 impl ZerocopyWord for $word {
280 fn read_as_usize(self) -> Option<usize> {
281 usize::try_from(self).ok()
282 }
283 }
284 };
285}
286
287impl_native_zerocopy_word!(u16);
288impl_native_zerocopy_word!(u32);
289impl_native_zerocopy_word!(u64);
290impl_native_zerocopy_word!(usize);
291
292/// Implements [`ZerocopyWord`] for one little-endian zerocopy storage word.
293macro_rules! impl_le_zerocopy_word {
294 ($word:ty) => {
295 impl sealed::ZerocopyWord for $word {}
296
297 impl ZerocopyWord for $word {
298 fn read_as_usize(self) -> Option<usize> {
299 usize::try_from(<$word>::get(self)).ok()
300 }
301 }
302 };
303}
304
305impl_le_zerocopy_word!(U16<LE>);
306impl_le_zerocopy_word!(U32<LE>);
307impl_le_zerocopy_word!(U64<LE>);
308
309/// Reasons a borrowed offset or value array failed structural validation.
310///
311/// # Performance
312///
313/// Formatting is `O(message length)`.
314#[derive(Clone, Copy, Debug, Eq, PartialEq)]
315#[non_exhaustive]
316pub enum OffsetIntegrityIssue {
317 /// Length did not match `count + 1`.
318 Length {
319 /// Expected length (`count + 1`).
320 expected: usize,
321 /// Observed length.
322 actual: usize,
323 },
324 /// `offsets[0]` was not zero.
325 FirstNonZero {
326 /// Observed first offset.
327 actual: usize,
328 },
329 /// `offsets[index] < offsets[index - 1]`.
330 NonMonotonic {
331 /// Index where monotonicity failed.
332 index: usize,
333 /// Offset at `index - 1`.
334 previous: usize,
335 /// Offset at `index`.
336 actual: usize,
337 },
338 /// `offsets[count]` did not match `value_len`.
339 FinalMismatch {
340 /// Observed final offset.
341 final_offset: usize,
342 /// Length of the values array.
343 value_len: usize,
344 },
345 /// A value at `index` was not less than the dense `bound`.
346 ValueOutOfRange {
347 /// Index of the offending value.
348 index: usize,
349 /// Observed value.
350 value: usize,
351 /// Exclusive upper bound.
352 bound: usize,
353 },
354 /// A word's value at `index` did not fit in `usize` on the current target.
355 UsizeOverflow {
356 /// Slice position of the offending word.
357 index: usize,
358 },
359}
360
361impl fmt::Display for OffsetIntegrityIssue {
362 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
363 match self {
364 Self::Length { expected, actual } => write!(
365 formatter,
366 "offsets length {actual} does not match expected {expected}"
367 ),
368 Self::FirstNonZero { actual } => {
369 write!(formatter, "first offset {actual} must be zero")
370 }
371 Self::NonMonotonic {
372 index,
373 previous,
374 actual,
375 } => write!(
376 formatter,
377 "offsets[{index}] = {actual} is less than offsets[{}] = {previous}",
378 index - 1
379 ),
380 Self::FinalMismatch {
381 final_offset,
382 value_len,
383 } => write!(
384 formatter,
385 "final offset {final_offset} does not match values length {value_len}"
386 ),
387 Self::ValueOutOfRange {
388 index,
389 value,
390 bound,
391 } => write!(
392 formatter,
393 "values[{index}] = {value} is not less than bound {bound}"
394 ),
395 Self::UsizeOverflow { index } => write!(
396 formatter,
397 "word at slice index {index} did not fit in usize"
398 ),
399 }
400 }
401}
402
403impl Error for OffsetIntegrityIssue {}
404
405/// Verifies `offsets[0] == 0` and that `offsets` is non-decreasing.
406///
407/// # Errors
408///
409/// Returns [`OffsetIntegrityIssue::FirstNonZero`] when the first offset is
410/// non-zero, [`OffsetIntegrityIssue::NonMonotonic`] when an offset decreases
411/// from the previous, and [`OffsetIntegrityIssue::UsizeOverflow`] when a
412/// word's value does not fit in `usize`.
413///
414/// # Performance
415///
416/// This function is `O(offsets.len())`.
417pub fn check_offsets_monotonic<W: ZerocopyWord>(offsets: &[W]) -> Result<(), OffsetIntegrityIssue> {
418 let mut previous: usize = 0;
419 for (index, word) in offsets.iter().copied().enumerate() {
420 let offset = word
421 .read_as_usize()
422 .ok_or(OffsetIntegrityIssue::UsizeOverflow { index })?;
423 if index == 0 {
424 if offset != 0 {
425 return Err(OffsetIntegrityIssue::FirstNonZero { actual: offset });
426 }
427 } else if offset < previous {
428 return Err(OffsetIntegrityIssue::NonMonotonic {
429 index,
430 previous,
431 actual: offset,
432 });
433 }
434 previous = offset;
435 }
436 Ok(())
437}
438
439/// Verifies every value in `values` is less than `bound`.
440///
441/// # Errors
442///
443/// Returns [`OffsetIntegrityIssue::ValueOutOfRange`] when a value is at or
444/// above `bound`, and [`OffsetIntegrityIssue::UsizeOverflow`] when a word's
445/// value does not fit in `usize`.
446///
447/// # Performance
448///
449/// This function is `O(values.len())`.
450pub fn check_value_range<W: ZerocopyWord>(
451 values: &[W],
452 bound: usize,
453) -> Result<(), OffsetIntegrityIssue> {
454 for (index, word) in values.iter().copied().enumerate() {
455 let value = word
456 .read_as_usize()
457 .ok_or(OffsetIntegrityIssue::UsizeOverflow { index })?;
458 if value >= bound {
459 return Err(OffsetIntegrityIssue::ValueOutOfRange {
460 index,
461 value,
462 bound,
463 });
464 }
465 }
466 Ok(())
467}
468
469/// Validates one offset section against `expected_count` rows and a backing
470/// values array of length `value_len`.
471///
472/// Performs four checks: length matches `expected_count + 1`, first offset is
473/// zero, offsets are non-decreasing, and the final offset matches `value_len`.
474///
475/// # Errors
476///
477/// Returns the first [`OffsetIntegrityIssue`] encountered.
478///
479/// # Performance
480///
481/// This function is `O(offsets.len())`.
482pub fn check_offset_section<W: ZerocopyWord>(
483 offsets: &[W],
484 expected_count: usize,
485 value_len: usize,
486) -> Result<(), OffsetIntegrityIssue> {
487 let Some(expected) = expected_count.checked_add(1) else {
488 return Err(OffsetIntegrityIssue::Length {
489 expected: usize::MAX,
490 actual: offsets.len(),
491 });
492 };
493 if offsets.len() != expected {
494 return Err(OffsetIntegrityIssue::Length {
495 expected,
496 actual: offsets.len(),
497 });
498 }
499 check_offsets_monotonic(offsets)?;
500 let final_index = offsets.len() - 1;
501 let final_offset = offsets[final_index]
502 .read_as_usize()
503 .ok_or(OffsetIntegrityIssue::UsizeOverflow { index: final_index })?;
504 if final_offset != value_len {
505 return Err(OffsetIntegrityIssue::FinalMismatch {
506 final_offset,
507 value_len,
508 });
509 }
510 Ok(())
511}
512
513#[cfg(kani)]
514mod proofs;