oxgraph_snapshot/container.rs
1//! Topology-agnostic snapshot container: format constants, byte-level header
2//! and section table, validation, reader, no-`alloc` planner, and the
3//! `alloc`-gated owning builder.
4//!
5//! All public types are re-exported through the crate root; consumers should
6//! depend on the crate-level paths rather than reaching in here.
7//!
8//! When the container ever graduates to a separate `topology-snapshot` crate
9//! the whole module moves wholesale, and the crate root becomes a shim of
10//! `pub use topology_snapshot::*`.
11
12#[cfg(feature = "alloc")]
13use alloc::{vec, vec::Vec};
14use core::fmt;
15
16use oxgraph_layout_util::SnapshotWidth;
17use zerocopy::{
18 FromBytes, Immutable, IntoBytes, KnownLayout,
19 byteorder::{LE, U32, U64},
20};
21
22use crate::container_error::{PlanError, SectionBindError, SectionViewError, SnapshotError};
23
24/// Magic bytes identifying the topology snapshot container format.
25///
26/// Producers MUST write these eight bytes at offset 0; readers MUST reject
27/// snapshots whose first eight bytes differ.
28///
29/// # Performance
30///
31/// `perf: unspecified`; this is a compile-time constant.
32pub const FORMAT_MAGIC: [u8; 8] = *b"OXGTOPO\0";
33
34/// Format major version this library reads and writes.
35///
36/// A snapshot whose `format_major` field does not equal this constant is
37/// rejected at open time. Major bumps are permitted to break compatibility
38/// in arbitrary ways.
39///
40/// # Performance
41///
42/// `perf: unspecified`; this is a compile-time constant.
43pub const FORMAT_MAJOR: u32 = 1;
44
45/// Format minor version written by this library's builder.
46///
47/// Minor bumps are reserved for backward-compatible additions (e.g. enabling
48/// previously reserved bits or fields). Producers using this library will
49/// emit this value unconditionally.
50///
51/// # Performance
52///
53/// `perf: unspecified`; this is a compile-time constant.
54pub const FORMAT_MINOR: u32 = 0;
55
56/// Highest format minor version this library can read.
57///
58/// Snapshots with `format_minor > MAX_SUPPORTED_MINOR` are rejected at open
59/// time. v1 is intentionally strict; raising this value is a deliberate
60/// per-minor decision once the new minor is proven safely readable here.
61///
62/// # Performance
63///
64/// `perf: unspecified`; this is a compile-time constant.
65pub const MAX_SUPPORTED_MINOR: u32 = 0;
66
67/// Size of the snapshot header in bytes.
68///
69/// # Performance
70///
71/// `perf: unspecified`; this is a compile-time constant.
72pub const HEADER_SIZE: usize = 32;
73
74/// Size of one section table entry in bytes.
75///
76/// # Performance
77///
78/// `perf: unspecified`; this is a compile-time constant.
79pub const SECTION_ENTRY_SIZE: usize = 32;
80
81/// Maximum permitted `alignment_log2` value (2^12 = 4 KiB, page-friendly).
82///
83/// # Performance
84///
85/// `perf: unspecified`; this is a compile-time constant.
86pub const MAX_ALIGNMENT_LOG2: u8 = 12;
87
88/// Maximum permitted section count for v1 snapshots.
89///
90/// Bounds the duplicate-kind detection in `O(s^2)` validation and keeps
91/// kani proofs tractable. Future minors may raise this if validation moves
92/// to a sorted-by-kind side index.
93///
94/// # Performance
95///
96/// `perf: unspecified`; this is a compile-time constant.
97pub const MAX_SECTION_COUNT: u32 = 1024;
98
99/// `HEADER_SIZE` rendered as a `u32` for header-field comparisons.
100const HEADER_SIZE_U32: u32 = 32;
101
102/// Typed wrapper over a section's opaque `u32` kind tag.
103///
104/// The container still treats the value opaquely, but [`SectionKind`] plus the
105/// [`kinds`] band registry give the wire format a single documented authority
106/// over the kind namespace. Layout crates declare their kind constants inside
107/// the band the registry reserves for them so that distinct subsystems cannot
108/// silently collide on a value.
109///
110/// # Performance
111///
112/// All methods are `O(1)`.
113#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
114pub struct SectionKind(u32);
115
116impl SectionKind {
117 /// Wraps a raw kind tag.
118 ///
119 /// # Performance
120 ///
121 /// This function is `O(1)`.
122 #[must_use]
123 pub const fn new(value: u32) -> Self {
124 Self(value)
125 }
126
127 /// Returns the raw kind tag.
128 ///
129 /// # Performance
130 ///
131 /// This function is `O(1)`.
132 #[must_use]
133 pub const fn get(self) -> u32 {
134 self.0
135 }
136}
137
138impl From<u32> for SectionKind {
139 fn from(value: u32) -> Self {
140 Self(value)
141 }
142}
143
144impl fmt::Display for SectionKind {
145 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
146 write!(formatter, "{:#06x}", self.0)
147 }
148}
149
150/// Section-kind band allocation registry: the single documented authority for
151/// who owns which range of the opaque `u32` kind namespace.
152///
153/// The container assigns no semantics to kinds, but every in-tree layer
154/// declares its `SNAPSHOT_KIND_*` constants inside the band reserved here, and
155/// the bands are mutually exclusive, so distinct subsystems cannot collide.
156/// Each band is a half-open `[start, end)` range of raw kind values.
157///
158/// # Performance
159///
160/// `perf: unspecified`; these are compile-time constants.
161pub mod kinds {
162 use core::ops::Range;
163
164 /// CSR graph layout sections (offsets/targets, all widths).
165 pub const CSR_BAND: Range<u32> = 0x0001..0x0020;
166 /// Bipartite-CSR hypergraph layout sections (all widths).
167 pub const BCSR_BAND: Range<u32> = 0x0020..0x0100;
168 /// Property and identity-map sections (all widths).
169 pub const PROPERTY_BAND: Range<u32> = 0x0100..0x0200;
170 /// `PostgreSQL` engine sections, including the inbound CSC layout.
171 pub const POSTGRES_BAND: Range<u32> = 0x0200..0x0300;
172 /// Embedded `OxGraph` database state sections.
173 pub const DATABASE_BAND: Range<u32> = 0x0300..0x0400;
174 /// Application/custom sections; the container reserves nothing here.
175 pub const CUSTOM_BASE: u32 = 0x0400;
176
177 /// Returns whether `kind` falls within the half-open `band`.
178 ///
179 /// Layout crates use this in `const`-checked tests to prove their kind
180 /// constants stay inside their reserved band.
181 ///
182 /// # Performance
183 ///
184 /// This function is `O(1)`.
185 #[must_use]
186 pub const fn in_band(kind: u32, band: Range<u32>) -> bool {
187 kind >= band.start && kind < band.end
188 }
189}
190
191/// Converts a checked `u64` into `usize`, asserting in debug mode that the
192/// value already fits because validation enforced an earlier bound.
193///
194/// # Panics
195///
196/// Panics via `unreachable!()` only on a target where `usize` is narrower
197/// than `u64` AND the caller has supplied a value that was not first vetted
198/// by the snapshot's `Layout` validation pass (which surfaces the failure
199/// as [`SnapshotError::UsizeOverflow`] before any `_validated` call).
200///
201/// # Performance
202///
203/// This function is `O(1)`.
204fn u64_to_usize_validated(value: u64) -> usize {
205 match usize::try_from(value) {
206 Ok(converted) => converted,
207 Err(_error) => unreachable!("validated u64 must fit usize on this target"),
208 }
209}
210
211/// Byte-level snapshot header.
212///
213/// Layout is `#[repr(C)]` with all multi-byte fields stored as zerocopy's
214/// unaligned little-endian wrappers. The struct itself has alignment 1, so
215/// it can be borrowed from any byte slice that is at least `HEADER_SIZE`
216/// long without an alignment check.
217#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
218#[repr(C)]
219struct RawHeader {
220 /// Magic bytes; must equal [`FORMAT_MAGIC`].
221 magic: [u8; 8],
222 /// Format major version.
223 format_major: U32<LE>,
224 /// Format minor version.
225 format_minor: U32<LE>,
226 /// Header size in bytes; v1.0 mandates `HEADER_SIZE`.
227 header_size: U32<LE>,
228 /// Number of section table entries.
229 section_count: U32<LE>,
230 /// Reserved; must be zero.
231 reserved: [u8; 8],
232}
233
234/// Parses the fixed header from the start of `bytes`.
235///
236/// # Errors
237///
238/// Returns [`SnapshotError::TruncatedHeader`] when fewer than [`HEADER_SIZE`]
239/// bytes are provided. Header field validation is performed separately in
240/// [`validate_magic_versions_reserved`].
241///
242/// # Performance
243///
244/// This function is `O(1)`.
245fn parse_header(bytes: &[u8]) -> Result<(&RawHeader, &[u8]), SnapshotError> {
246 if bytes.len() < HEADER_SIZE {
247 return Err(SnapshotError::TruncatedHeader {
248 needed: HEADER_SIZE,
249 actual: bytes.len(),
250 });
251 }
252
253 match RawHeader::ref_from_prefix(bytes) {
254 Ok((header, rest)) => Ok((header, rest)),
255 Err(_error) => Err(SnapshotError::MalformedHeader),
256 }
257}
258
259/// Validates header magic, version, header size, and reserved bytes.
260///
261/// # Errors
262///
263/// Returns [`SnapshotError`] for any header-level invariant violation.
264///
265/// # Performance
266///
267/// This function is `O(1)`.
268fn validate_magic_versions_reserved(header: &RawHeader) -> Result<(), SnapshotError> {
269 if header.magic != FORMAT_MAGIC {
270 return Err(SnapshotError::BadMagic {
271 actual: header.magic,
272 });
273 }
274
275 let major = header.format_major.get();
276 if major != FORMAT_MAJOR {
277 return Err(SnapshotError::FormatMajorMismatch {
278 actual: major,
279 supported: FORMAT_MAJOR,
280 });
281 }
282
283 let minor = header.format_minor.get();
284 if minor > MAX_SUPPORTED_MINOR {
285 return Err(SnapshotError::FormatMinorTooNew {
286 actual: minor,
287 max_supported: MAX_SUPPORTED_MINOR,
288 });
289 }
290
291 let header_size = header.header_size.get();
292 if header_size != HEADER_SIZE_U32 {
293 return Err(SnapshotError::HeaderSizeMismatch {
294 actual: header_size,
295 expected: HEADER_SIZE_U32,
296 });
297 }
298
299 if header.reserved != [0; 8] {
300 return Err(SnapshotError::NonZeroHeaderReserved);
301 }
302
303 Ok(())
304}
305
306/// Byte-level section table entry.
307///
308/// Layout is `#[repr(C)]` with unaligned little-endian fields, mirroring
309/// [`RawHeader`]'s alignment policy.
310#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
311#[repr(C)]
312struct RawSectionEntry {
313 /// Byte offset of the section payload from the start of the snapshot.
314 offset: U64<LE>,
315 /// Byte length of the section payload.
316 length: U64<LE>,
317 /// Opaque section kind; the container assigns no semantics.
318 kind: U32<LE>,
319 /// Opaque section version; consumers interpret per kind.
320 version: U32<LE>,
321 /// Reserved bytes for a future per-section CRC32C; must be zero in v1.
322 reserved_checksum: [u8; 4],
323 /// `log2` of the producer's chosen payload alignment; v1 cap is 12.
324 alignment_log2: u8,
325 /// Reserved flag bits; must be zero in v1.
326 flags: u8,
327 /// Trailing reserved bytes; must be zero in v1.
328 reserved: [u8; 2],
329}
330
331/// Borrowed view of one validated section in a snapshot.
332///
333/// A `Section` carries the section's byte payload along with its declared
334/// metadata. Payload bytes are bounds- and overlap-checked at snapshot open
335/// time. Typed-slice access via [`Section::try_as_slice`] verifies the
336/// actual borrowed pointer's alignment at the call site.
337///
338/// # Performance
339///
340/// All methods are `O(1)` or `O(payload.len())` for typed conversions.
341#[derive(Clone, Copy, Debug)]
342pub struct Section<'view> {
343 /// Borrowed payload bytes.
344 payload: &'view [u8],
345 /// Section kind, as recorded in the section entry.
346 kind: u32,
347 /// Section version, as recorded in the section entry.
348 version: u32,
349 /// `log2` of the declared payload alignment.
350 alignment_log2: u8,
351}
352
353impl<'view> Section<'view> {
354 /// Constructs a [`Section`] from a previously validated entry.
355 ///
356 /// # Performance
357 ///
358 /// This function is `O(1)`.
359 #[must_use]
360 fn from_entry(bytes: &'view [u8], entry: &RawSectionEntry) -> Self {
361 let offset = u64_to_usize_validated(entry.offset.get());
362 let length = u64_to_usize_validated(entry.length.get());
363 Self {
364 payload: &bytes[offset..offset + length],
365 kind: entry.kind.get(),
366 version: entry.version.get(),
367 alignment_log2: entry.alignment_log2,
368 }
369 }
370
371 /// Returns the section's opaque kind identifier.
372 ///
373 /// # Performance
374 ///
375 /// This method is `O(1)`.
376 #[must_use]
377 pub const fn kind(&self) -> u32 {
378 self.kind
379 }
380
381 /// Returns the section's opaque version identifier.
382 ///
383 /// # Performance
384 ///
385 /// This method is `O(1)`.
386 #[must_use]
387 pub const fn version(&self) -> u32 {
388 self.version
389 }
390
391 /// Returns the alignment the producer declared for this payload.
392 ///
393 /// This is metadata recorded at build time, not a guarantee about the
394 /// actual borrowed pointer. Callers that intend to interpret the payload
395 /// as a typed slice should prefer [`Section::try_as_slice`], which
396 /// checks the actual payload pointer.
397 ///
398 /// # Performance
399 ///
400 /// This method is `O(1)`.
401 #[must_use]
402 pub const fn declared_alignment(&self) -> usize {
403 1usize << self.alignment_log2
404 }
405
406 /// Returns the section's borrowed payload bytes.
407 ///
408 /// # Performance
409 ///
410 /// This method is `O(1)`.
411 #[must_use]
412 pub const fn bytes(&self) -> &'view [u8] {
413 self.payload
414 }
415
416 /// Borrows the payload as a typed slice of `T`.
417 ///
418 /// Errors if (a) `payload.len()` is not a multiple of
419 /// `core::mem::size_of::<T>()` or (b) the payload's actual base address
420 /// does not satisfy `core::mem::align_of::<T>()`. The producer's
421 /// declared `alignment_log2` is not consulted; the actual borrowed
422 /// pointer is checked directly so that mmap'd or sub-sliced inputs
423 /// cannot bypass the check.
424 ///
425 /// # Errors
426 ///
427 /// Returns [`SectionViewError`] when the payload cannot be borrowed
428 /// as `&[T]` without copying.
429 ///
430 /// # Performance
431 ///
432 /// This method is `O(1)` modulo the bounds and alignment checks; it
433 /// performs no allocation and no per-element work.
434 pub fn try_as_slice<T>(&self) -> Result<&'view [T], SectionViewError>
435 where
436 T: zerocopy::FromBytes + zerocopy::Immutable + zerocopy::KnownLayout,
437 {
438 let elem_size = core::mem::size_of::<T>();
439 let length = self.payload.len();
440
441 if elem_size == 0 {
442 return Err(SectionViewError::ZeroSizedType);
443 }
444
445 if !length.is_multiple_of(elem_size) {
446 return Err(SectionViewError::LengthNotMultipleOfSize { length, elem_size });
447 }
448
449 let required = core::mem::align_of::<T>();
450 let ptr_addr = self.payload.as_ptr().addr();
451 if !ptr_addr.is_multiple_of(required) {
452 return Err(SectionViewError::AlignmentMismatch { ptr_addr, required });
453 }
454
455 let count = length / elem_size;
456 match <[T]>::ref_from_bytes_with_elems(self.payload, count) {
457 Ok(slice) => Ok(slice),
458 Err(_error) => Err(SectionViewError::AlignmentMismatch { ptr_addr, required }),
459 }
460 }
461}
462
463/// Validation depth applied at snapshot open time.
464///
465/// Validation responsibilities are layered. Header-only validation is not a
466/// member of this enum; callers wanting it should use
467/// [`HeaderOnlySnapshot::open`] instead, so the type system distinguishes a
468/// section-bearing handle from one whose section table has not been
469/// validated.
470///
471/// - [`SectionTable`](Self::SectionTable) parses the section table and per-entry self-consistency
472/// (alignment bound, reserved bytes zero, flags zero).
473/// - [`Layout`](Self::Layout) is the default; it adds payload bounds, monotonic-offset enforcement,
474/// and duplicate-kind detection.
475///
476/// Topology-level validation (CSR offset monotonicity, hypergraph role
477/// consistency, etc.) is the consumer's responsibility — the container
478/// has no kind registry and cannot validate semantics it does not know.
479///
480/// # Performance
481///
482/// `perf: unspecified`; this is a metadata enum.
483#[non_exhaustive]
484#[derive(Clone, Copy, Debug, Eq, PartialEq)]
485pub enum ValidationLevel {
486 /// Validate header and section table self-consistency.
487 SectionTable,
488 /// Validate header, section table, and full payload layout.
489 Layout,
490}
491
492/// Returns the first `kind` value that appears more than once in `items`.
493///
494/// Shared by the reader's duplicate-kind walk and [`SnapshotPlan::new`] so the
495/// two paths cannot drift. `kind_of` projects each item to its kind tag.
496///
497/// # Performance
498///
499/// This function is `O(items.len()^2)`.
500fn first_duplicate_kind<T>(items: &[T], kind_of: impl Fn(&T) -> u32) -> Option<u32> {
501 for (index, item) in items.iter().enumerate() {
502 let kind = kind_of(item);
503 if items[..index].iter().any(|prior| kind_of(prior) == kind) {
504 return Some(kind);
505 }
506 }
507 None
508}
509
510/// Walks the section table once and checks all v1 invariants.
511///
512/// `bytes` is the entire snapshot byte slice; `entries` is the parsed
513/// section table; `level` controls how deep the walk goes. Header-level
514/// invariants are presumed already validated by the caller.
515///
516/// Per-entry self-consistency **and** payload bounds (`offset + length` does
517/// not overflow and stays within the snapshot) are enforced at every level, so
518/// every [`Section`] a [`Snapshot`] hands out is bounds-safe regardless of the
519/// requested [`ValidationLevel`]. [`ValidationLevel::Layout`] additionally
520/// enforces non-overlapping monotonic ordering and unique kinds.
521///
522/// # Errors
523///
524/// Returns [`SnapshotError`] for any per-entry, bounds, or layout violation.
525///
526/// # Performance
527///
528/// This function is `O(s)` for the always-run self-consistency and bounds walk
529/// and `O(s^2)` for the duplicate-kind walk at [`ValidationLevel::Layout`].
530fn validate_section_table(
531 bytes: &[u8],
532 entries: &[RawSectionEntry],
533 level: ValidationLevel,
534) -> Result<(), SnapshotError> {
535 let snapshot_len = bytes.len() as u64;
536
537 // Always-run: per-entry self-consistency plus payload-bounds safety. The
538 // bounds check guarantees `Section::from_entry`'s `bytes[offset..end]`
539 // slice is in range, so accessors are panic-free at SectionTable level too.
540 for entry in entries {
541 let kind = entry.kind.get();
542 if entry.reserved_checksum != [0; 4] {
543 return Err(SnapshotError::NonZeroEntryChecksum { kind });
544 }
545 if entry.flags != 0 {
546 return Err(SnapshotError::UnsupportedFlags {
547 kind,
548 flags: entry.flags,
549 });
550 }
551 if entry.reserved != [0; 2] {
552 return Err(SnapshotError::NonZeroEntryReserved { kind });
553 }
554 if entry.alignment_log2 > MAX_ALIGNMENT_LOG2 {
555 return Err(SnapshotError::AlignmentLog2TooLarge {
556 kind,
557 alignment_log2: entry.alignment_log2,
558 });
559 }
560 let offset = entry.offset.get();
561 let length = entry.length.get();
562 let end = offset
563 .checked_add(length)
564 .ok_or(SnapshotError::SectionRangeOverflow { kind })?;
565 if end > snapshot_len {
566 return Err(SnapshotError::SectionOutOfBounds {
567 kind,
568 offset,
569 length,
570 snapshot_len,
571 });
572 }
573 }
574
575 if matches!(level, ValidationLevel::SectionTable) {
576 return Ok(());
577 }
578
579 // Layout-only: non-overlapping monotonic ordering (sections start at or
580 // after the end of the header+table and never overlap a predecessor).
581 let header_plus_table = (HEADER_SIZE as u64)
582 .checked_add((entries.len() as u64).saturating_mul(SECTION_ENTRY_SIZE as u64))
583 .ok_or(SnapshotError::SectionRangeOverflow { kind: 0 })?;
584 let mut prev_end = header_plus_table;
585 for (index, entry) in entries.iter().enumerate() {
586 let offset = entry.offset.get();
587 // `end` cannot overflow: the always-run walk above already proved it.
588 let end = offset.saturating_add(entry.length.get());
589 if offset < prev_end {
590 return Err(SnapshotError::UnsortedSectionTable { index });
591 }
592 prev_end = end;
593 }
594
595 // Layout-only: duplicate-kind detection, shared with the writer.
596 if let Some(kind) = first_duplicate_kind(entries, |entry| entry.kind.get()) {
597 return Err(SnapshotError::DuplicateKind { kind });
598 }
599
600 Ok(())
601}
602
603/// Header-only handle to a snapshot's bytes.
604///
605/// `HeaderOnlySnapshot` is the typestate-distinct counterpart to
606/// [`Snapshot`]: it validates only the fixed header (magic, format
607/// versions, header size, reserved bytes) and exposes the format
608/// versions, but it deliberately does not parse or expose the section
609/// table. Callers who only need to inspect format compatibility (e.g.,
610/// to decide whether the snapshot is readable at all) should use this
611/// type rather than asking [`Snapshot`] to skip section validation.
612///
613/// # Performance
614///
615/// [`HeaderOnlySnapshot::open`] is `O(1)` — it does not walk the section
616/// table or payload region. Subsequent accessors are `O(1)`.
617#[derive(Clone, Copy, Debug)]
618pub struct HeaderOnlySnapshot<'view> {
619 /// Borrowed snapshot bytes.
620 bytes: &'view [u8],
621 /// Format major version recorded in the header.
622 format_major: u32,
623 /// Format minor version recorded in the header.
624 format_minor: u32,
625}
626
627impl<'view> HeaderOnlySnapshot<'view> {
628 /// Opens `bytes` as a header-validated snapshot handle.
629 ///
630 /// Validates the magic bytes, format major and minor, header size, and
631 /// reserved bytes only. The section table and payload region are not
632 /// inspected and may still be malformed.
633 ///
634 /// # Errors
635 ///
636 /// Returns [`SnapshotError`] for any header-level invariant violation.
637 ///
638 /// # Performance
639 ///
640 /// This function is `O(1)`.
641 pub fn open(bytes: &'view [u8]) -> Result<Self, SnapshotError> {
642 let (header, _after_header) = parse_header(bytes)?;
643 validate_magic_versions_reserved(header)?;
644 Ok(Self {
645 bytes,
646 format_major: header.format_major.get(),
647 format_minor: header.format_minor.get(),
648 })
649 }
650
651 /// Returns the borrowed snapshot bytes.
652 ///
653 /// # Performance
654 ///
655 /// This method is `O(1)`.
656 #[must_use]
657 pub const fn bytes(&self) -> &'view [u8] {
658 self.bytes
659 }
660
661 /// Returns the format major version recorded in the snapshot header.
662 ///
663 /// # Performance
664 ///
665 /// This method is `O(1)`.
666 #[must_use]
667 pub const fn format_major(&self) -> u32 {
668 self.format_major
669 }
670
671 /// Returns the format minor version recorded in the snapshot header.
672 ///
673 /// # Performance
674 ///
675 /// This method is `O(1)`.
676 #[must_use]
677 pub const fn format_minor(&self) -> u32 {
678 self.format_minor
679 }
680}
681
682/// Validated, borrowed handle to a snapshot's bytes and section table.
683///
684/// A `Snapshot` is constructed via [`Snapshot::open`] (default
685/// [`ValidationLevel::Layout`]) or [`Snapshot::open_with`]. The handle
686/// itself is `Copy` and trivially cheap to pass; cloning it does not
687/// re-validate.
688///
689/// For header-only inspection without parsing the section table, use
690/// [`HeaderOnlySnapshot`] instead — `Snapshot` always carries a validated
691/// section table.
692///
693/// # Performance
694///
695/// Open is `O(s^2)` for `s` sections at [`ValidationLevel::Layout`] due to
696/// duplicate-kind detection, otherwise `O(s)`. Subsequent reads are `O(1)`
697/// to `O(s)` per call. No allocation occurs.
698#[derive(Clone, Copy, Debug)]
699pub struct Snapshot<'view> {
700 /// Borrowed snapshot bytes.
701 bytes: &'view [u8],
702 /// Format major version recorded in the header.
703 format_major: u32,
704 /// Format minor version recorded in the header.
705 format_minor: u32,
706 /// Borrowed, validated section table entries.
707 entries: &'view [RawSectionEntry],
708}
709
710impl<'view> Snapshot<'view> {
711 /// Opens `bytes` as a validated snapshot at [`ValidationLevel::Layout`].
712 ///
713 /// # Errors
714 ///
715 /// Returns [`SnapshotError`] for any header, section table, or layout
716 /// invariant violation.
717 ///
718 /// # Performance
719 ///
720 /// `O(s^2)` for `s` section entries.
721 pub fn open(bytes: &'view [u8]) -> Result<Self, SnapshotError> {
722 Self::open_with(bytes, ValidationLevel::Layout)
723 }
724
725 /// Opens `bytes` as a snapshot validated at the requested level.
726 ///
727 /// `level` selects between [`ValidationLevel::SectionTable`] (per-entry
728 /// self-consistency only) and [`ValidationLevel::Layout`] (full
729 /// payload-bounds and duplicate-kind walk). Header-only validation is
730 /// deliberately not selectable here; callers wanting it should use
731 /// [`HeaderOnlySnapshot::open`].
732 ///
733 /// # Errors
734 ///
735 /// Returns [`SnapshotError`] for any invariant violation visible at
736 /// the requested level.
737 ///
738 /// # Performance
739 ///
740 /// `O(s)` at [`ValidationLevel::SectionTable`], `O(s^2)` at
741 /// [`ValidationLevel::Layout`].
742 pub fn open_with(bytes: &'view [u8], level: ValidationLevel) -> Result<Self, SnapshotError> {
743 let (header, after_header) = parse_header(bytes)?;
744 validate_magic_versions_reserved(header)?;
745
746 let format_major = header.format_major.get();
747 let format_minor = header.format_minor.get();
748
749 let section_count = header.section_count.get();
750 if section_count > MAX_SECTION_COUNT {
751 return Err(SnapshotError::SectionCountTooLarge {
752 count: section_count,
753 max: MAX_SECTION_COUNT,
754 });
755 }
756 let Ok(section_count_usize) = usize::try_from(section_count) else {
757 return Err(SnapshotError::UsizeOverflow {
758 value: u64::from(section_count),
759 });
760 };
761 let Some(table_len) = section_count_usize.checked_mul(SECTION_ENTRY_SIZE) else {
762 return Err(SnapshotError::SectionCountTooLarge {
763 count: section_count,
764 max: MAX_SECTION_COUNT,
765 });
766 };
767 if after_header.len() < table_len {
768 return Err(SnapshotError::TruncatedSectionTable {
769 needed: table_len,
770 actual: after_header.len(),
771 });
772 }
773
774 let table_bytes = &after_header[..table_len];
775 let entries =
776 <[RawSectionEntry]>::ref_from_bytes_with_elems(table_bytes, section_count_usize)
777 .map_err(|_error| SnapshotError::MalformedSectionTable)?;
778
779 validate_section_table(bytes, entries, level)?;
780
781 Ok(Self {
782 bytes,
783 format_major,
784 format_minor,
785 entries,
786 })
787 }
788
789 /// Returns the format major version recorded in the snapshot header.
790 ///
791 /// # Performance
792 ///
793 /// This method is `O(1)`.
794 #[must_use]
795 pub const fn format_major(&self) -> u32 {
796 self.format_major
797 }
798
799 /// Returns the format minor version recorded in the snapshot header.
800 ///
801 /// # Performance
802 ///
803 /// This method is `O(1)`.
804 #[must_use]
805 pub const fn format_minor(&self) -> u32 {
806 self.format_minor
807 }
808
809 /// Returns the number of validated sections.
810 ///
811 /// # Performance
812 ///
813 /// This method is `O(1)`.
814 #[must_use]
815 pub const fn section_count(&self) -> usize {
816 self.entries.len()
817 }
818
819 /// Returns an iterator over all validated sections.
820 ///
821 /// # Performance
822 ///
823 /// Constructing the iterator is `O(1)`; advancing it is `O(1)` per step.
824 #[must_use]
825 pub fn sections(&self) -> SectionIter<'view> {
826 SectionIter {
827 bytes: self.bytes,
828 entries: self.entries.iter(),
829 }
830 }
831
832 /// Returns the section with the given `kind`, when present.
833 ///
834 /// # Performance
835 ///
836 /// This method is `O(s)` for `s` section entries.
837 #[must_use]
838 pub fn section(&self, kind: u32) -> Option<Section<'view>> {
839 let bytes = self.bytes;
840 self.entries.iter().find_map(|entry| {
841 if entry.kind.get() == kind {
842 Some(Section::from_entry(bytes, entry))
843 } else {
844 None
845 }
846 })
847 }
848
849 /// Binds a width-typed section by kind and version in one step.
850 ///
851 /// Looks up the section, checks its version against `expected_version`, and
852 /// borrows the payload as `&[W::LittleEndianWord]`. This is the single
853 /// section-open primitive every layout crate reuses instead of
854 /// re-implementing the lookup/version/typed-view sequence with its own error
855 /// variants; callers map [`SectionBindError`] into their own typed error at
856 /// the boundary.
857 ///
858 /// # Errors
859 ///
860 /// Returns [`SectionBindError::Missing`] when no section has `kind`,
861 /// [`SectionBindError::VersionMismatch`] when the recorded version differs,
862 /// and [`SectionBindError::View`] when the payload cannot be borrowed as the
863 /// requested little-endian word.
864 ///
865 /// # Performance
866 ///
867 /// This method is `O(s)` for `s` section entries plus the typed-view checks.
868 pub fn typed_section<W>(
869 &self,
870 kind: u32,
871 expected_version: u32,
872 ) -> Result<&'view [W::LittleEndianWord], SectionBindError>
873 where
874 W: SnapshotWidth,
875 {
876 let section = self
877 .section(kind)
878 .ok_or(SectionBindError::Missing { kind })?;
879 if section.version() != expected_version {
880 return Err(SectionBindError::VersionMismatch {
881 kind,
882 expected: expected_version,
883 actual: section.version(),
884 });
885 }
886 section
887 .try_as_slice::<W::LittleEndianWord>()
888 .map_err(|error| SectionBindError::View { kind, error })
889 }
890}
891
892/// Iterator over a snapshot's validated sections.
893///
894/// Yields each [`Section`] in section-table order. The iterator does not
895/// allocate and borrows from the snapshot's underlying byte slice.
896///
897/// # Performance
898///
899/// Advancing the iterator is `O(1)` per step.
900#[derive(Clone, Debug)]
901pub struct SectionIter<'view> {
902 /// Borrowed snapshot bytes.
903 bytes: &'view [u8],
904 /// Remaining section table entries to yield.
905 entries: core::slice::Iter<'view, RawSectionEntry>,
906}
907
908impl<'view> Iterator for SectionIter<'view> {
909 type Item = Section<'view>;
910
911 fn next(&mut self) -> Option<Self::Item> {
912 self.entries
913 .next()
914 .map(|entry| Section::from_entry(self.bytes, entry))
915 }
916
917 fn size_hint(&self) -> (usize, Option<usize>) {
918 self.entries.size_hint()
919 }
920}
921
922impl ExactSizeIterator for SectionIter<'_> {
923 fn len(&self) -> usize {
924 self.entries.len()
925 }
926}
927
928/// Description of one section to include in a snapshot.
929///
930/// Every field is opaque to the encoder. `kind` and `version` are passed
931/// through unchanged; `alignment_log2` controls payload alignment relative
932/// to the snapshot's start; `payload` is the section's raw bytes.
933///
934/// # Performance
935///
936/// `perf: unspecified`; this is a metadata struct.
937#[derive(Clone, Copy, Debug)]
938pub struct PendingSection<'a> {
939 /// Section kind to record in the entry.
940 pub kind: u32,
941 /// Section version to record in the entry.
942 pub version: u32,
943 /// `log2` of the requested payload alignment; capped at
944 /// [`MAX_ALIGNMENT_LOG2`].
945 pub alignment_log2: u8,
946 /// Section payload bytes.
947 pub payload: &'a [u8],
948}
949
950/// Validated plan that can compute its encoded length and write itself.
951///
952/// `SnapshotPlan` performs all duplicate-kind, alignment, and count checks
953/// at construction. After construction, [`encoded_len`](Self::encoded_len)
954/// and [`write_into`](Self::write_into) are guaranteed to succeed for any
955/// caller-supplied buffer that is at least `encoded_len()` bytes long.
956///
957/// # Performance
958///
959/// Construction is `O(s^2)` for `s` sections (duplicate-kind check).
960/// `encoded_len` and `write_into` are `O(s + total payload bytes)`.
961#[derive(Clone, Copy, Debug)]
962pub struct SnapshotPlan<'a> {
963 /// Borrowed pending section descriptors, in declaration order.
964 sections: &'a [PendingSection<'a>],
965}
966
967impl<'a> SnapshotPlan<'a> {
968 /// Validates a slice of pending sections and constructs a plan.
969 ///
970 /// # Errors
971 ///
972 /// Returns [`PlanError`] when alignment is too large, too many sections
973 /// are supplied, or duplicate kinds are present.
974 ///
975 /// # Performance
976 ///
977 /// This function is `O(s^2)` for `s` sections.
978 pub fn new(sections: &'a [PendingSection<'a>]) -> Result<Self, PlanError> {
979 if sections.len() > MAX_SECTION_COUNT as usize {
980 return Err(PlanError::TooManySections {
981 count: sections.len(),
982 });
983 }
984
985 for section in sections {
986 if section.alignment_log2 > MAX_ALIGNMENT_LOG2 {
987 return Err(PlanError::AlignmentTooLarge {
988 alignment_log2: section.alignment_log2,
989 });
990 }
991 }
992 if let Some(kind) = first_duplicate_kind(sections, |section| section.kind) {
993 return Err(PlanError::DuplicateKind { kind });
994 }
995
996 Ok(Self { sections })
997 }
998
999 /// Returns the number of sections in this plan.
1000 ///
1001 /// # Performance
1002 ///
1003 /// This method is `O(1)`.
1004 #[must_use]
1005 pub const fn section_count(&self) -> usize {
1006 self.sections.len()
1007 }
1008
1009 /// Computes the total bytes the encoded snapshot will occupy.
1010 ///
1011 /// # Errors
1012 ///
1013 /// Returns [`PlanError::PayloadOverflow`] when offset arithmetic
1014 /// exceeds `usize` or `u64` representable values.
1015 ///
1016 /// # Performance
1017 ///
1018 /// This function is `O(s)` for `s` sections.
1019 pub fn encoded_len(&self) -> Result<usize, PlanError> {
1020 let table_len = self
1021 .sections
1022 .len()
1023 .checked_mul(SECTION_ENTRY_SIZE)
1024 .ok_or(PlanError::PayloadOverflow)?;
1025 let mut total = HEADER_SIZE
1026 .checked_add(table_len)
1027 .ok_or(PlanError::PayloadOverflow)?;
1028
1029 for section in self.sections {
1030 total = align_up_checked(total, section.alignment_log2)?;
1031 total = total
1032 .checked_add(section.payload.len())
1033 .ok_or(PlanError::PayloadOverflow)?;
1034 }
1035
1036 u64::try_from(total).map_err(|_error| PlanError::PayloadOverflow)?;
1037 Ok(total)
1038 }
1039
1040 /// Writes the encoded snapshot into `out` and returns the number of
1041 /// bytes written.
1042 ///
1043 /// Padding bytes between the section table and each section payload
1044 /// are zero-filled deterministically; the resulting bytes are stable
1045 /// for any logical input.
1046 ///
1047 /// # Errors
1048 ///
1049 /// Returns [`PlanError::BufferTooSmall`] when `out.len()` is less than
1050 /// [`encoded_len`](Self::encoded_len) or [`PlanError::PayloadOverflow`]
1051 /// when offset arithmetic overflows during the write walk.
1052 ///
1053 /// # Performance
1054 ///
1055 /// This function is `O(s + total payload bytes)`.
1056 pub fn write_into(&self, out: &mut [u8]) -> Result<usize, PlanError> {
1057 let needed = self.encoded_len()?;
1058 if out.len() < needed {
1059 return Err(PlanError::BufferTooSmall {
1060 needed,
1061 actual: out.len(),
1062 });
1063 }
1064
1065 let prefix = &mut out[..needed];
1066 prefix.fill(0);
1067
1068 let section_count_u32 = match u32::try_from(self.sections.len()) {
1069 Ok(value) => value,
1070 Err(_error) => {
1071 return Err(PlanError::TooManySections {
1072 count: self.sections.len(),
1073 });
1074 }
1075 };
1076 let header = RawHeader {
1077 magic: FORMAT_MAGIC,
1078 format_major: U32::new(FORMAT_MAJOR),
1079 format_minor: U32::new(FORMAT_MINOR),
1080 header_size: U32::new(HEADER_SIZE_U32),
1081 section_count: U32::new(section_count_u32),
1082 reserved: [0; 8],
1083 };
1084 prefix[..HEADER_SIZE].copy_from_slice(header.as_bytes());
1085
1086 let table_start = HEADER_SIZE;
1087 let payload_start = table_start
1088 .checked_add(
1089 self.sections
1090 .len()
1091 .checked_mul(SECTION_ENTRY_SIZE)
1092 .ok_or(PlanError::PayloadOverflow)?,
1093 )
1094 .ok_or(PlanError::PayloadOverflow)?;
1095 let mut cursor = payload_start;
1096
1097 for (index, section) in self.sections.iter().enumerate() {
1098 cursor = align_up_checked(cursor, section.alignment_log2)?;
1099 let payload_end = cursor
1100 .checked_add(section.payload.len())
1101 .ok_or(PlanError::PayloadOverflow)?;
1102
1103 let offset_u64 = u64::try_from(cursor).map_err(|_error| PlanError::PayloadOverflow)?;
1104 let length_u64 = u64::try_from(section.payload.len())
1105 .map_err(|_error| PlanError::PayloadOverflow)?;
1106 let entry = RawSectionEntry {
1107 offset: U64::new(offset_u64),
1108 length: U64::new(length_u64),
1109 kind: U32::new(section.kind),
1110 version: U32::new(section.version),
1111 reserved_checksum: [0; 4],
1112 alignment_log2: section.alignment_log2,
1113 flags: 0,
1114 reserved: [0; 2],
1115 };
1116 let entry_offset = table_start
1117 .checked_add(
1118 index
1119 .checked_mul(SECTION_ENTRY_SIZE)
1120 .ok_or(PlanError::PayloadOverflow)?,
1121 )
1122 .ok_or(PlanError::PayloadOverflow)?;
1123 prefix[entry_offset..entry_offset + SECTION_ENTRY_SIZE]
1124 .copy_from_slice(entry.as_bytes());
1125
1126 prefix[cursor..payload_end].copy_from_slice(section.payload);
1127 cursor = payload_end;
1128 }
1129
1130 Ok(needed)
1131 }
1132}
1133
1134/// Rounds `value` up to the next multiple of `1 << alignment_log2`.
1135///
1136/// # Errors
1137///
1138/// Returns [`PlanError::PayloadOverflow`] on `usize` overflow.
1139///
1140/// # Performance
1141///
1142/// This function is `O(1)`.
1143fn align_up_checked(value: usize, alignment_log2: u8) -> Result<usize, PlanError> {
1144 let alignment = 1usize << alignment_log2;
1145 let mask = alignment - 1;
1146 let added = value.checked_add(mask).ok_or(PlanError::PayloadOverflow)?;
1147 Ok(added & !mask)
1148}
1149
1150/// One owned section pending in a [`SnapshotBuilder`].
1151#[cfg(feature = "alloc")]
1152#[derive(Clone, Debug)]
1153struct OwnedSection {
1154 /// Section kind.
1155 kind: u32,
1156 /// Section version.
1157 version: u32,
1158 /// Declared payload alignment as `log2`.
1159 alignment_log2: u8,
1160 /// Owned payload bytes.
1161 payload: Vec<u8>,
1162}
1163
1164/// Owning snapshot builder that produces a `Vec<u8>` on finish.
1165///
1166/// The builder rejects duplicate kinds, alignment overflows, and section
1167/// count overflows at `add_section*` time, so the only failure that can
1168/// reach [`SnapshotBuilder::finish`] is [`PlanError::PayloadOverflow`] —
1169/// when the cumulative encoded length would overflow `u64` or `usize`.
1170///
1171/// # Performance
1172///
1173/// `add_section*` methods are `O(s)` for the in-progress section count.
1174/// [`finish`](Self::finish) is `O(s + total payload bytes)`.
1175#[cfg(feature = "alloc")]
1176#[derive(Clone, Debug, Default)]
1177#[must_use]
1178pub struct SnapshotBuilder {
1179 /// Owned, in-order sections.
1180 sections: Vec<OwnedSection>,
1181}
1182
1183#[cfg(feature = "alloc")]
1184impl SnapshotBuilder {
1185 /// Constructs an empty builder.
1186 ///
1187 /// # Performance
1188 ///
1189 /// This function is `O(1)`.
1190 pub fn new() -> Self {
1191 Self::default()
1192 }
1193
1194 /// Appends a section with the given metadata and owned payload.
1195 ///
1196 /// # Errors
1197 ///
1198 /// Returns [`PlanError`] when `alignment_log2` is too large, when the
1199 /// builder already holds the maximum permitted section count, or when
1200 /// `kind` collides with an earlier section.
1201 ///
1202 /// # Performance
1203 ///
1204 /// This method is `O(s)` for the in-progress section count.
1205 pub fn add_section(
1206 &mut self,
1207 kind: u32,
1208 version: u32,
1209 alignment_log2: u8,
1210 payload: Vec<u8>,
1211 ) -> Result<&mut Self, PlanError> {
1212 if alignment_log2 > MAX_ALIGNMENT_LOG2 {
1213 return Err(PlanError::AlignmentTooLarge { alignment_log2 });
1214 }
1215 if self.sections.len() >= MAX_SECTION_COUNT as usize {
1216 return Err(PlanError::TooManySections {
1217 count: self.sections.len() + 1,
1218 });
1219 }
1220 for prior in &self.sections {
1221 if prior.kind == kind {
1222 return Err(PlanError::DuplicateKind { kind });
1223 }
1224 }
1225 self.sections.push(OwnedSection {
1226 kind,
1227 version,
1228 alignment_log2,
1229 payload,
1230 });
1231 Ok(self)
1232 }
1233
1234 /// Appends a section whose alignment is derived from `T`.
1235 ///
1236 /// The payload is copied via [`zerocopy::IntoBytes`].
1237 ///
1238 /// # Errors
1239 ///
1240 /// Returns [`PlanError`] for the same reasons as
1241 /// [`add_section`](Self::add_section), plus
1242 /// [`PlanError::AlignmentTooLarge`] when `align_of::<T>()` exceeds
1243 /// the v1 cap.
1244 ///
1245 /// # Performance
1246 ///
1247 /// This method is `O(s + payload.len() * size_of::<T>())`.
1248 pub fn add_section_typed<T>(
1249 &mut self,
1250 kind: u32,
1251 version: u32,
1252 payload: &[T],
1253 ) -> Result<&mut Self, PlanError>
1254 where
1255 T: zerocopy::IntoBytes + zerocopy::Immutable,
1256 {
1257 let alignment = core::mem::align_of::<T>();
1258 let alignment_log2 = match u8::try_from(alignment.trailing_zeros()) {
1259 Ok(value) => value,
1260 Err(_error) => {
1261 return Err(PlanError::AlignmentTooLarge {
1262 alignment_log2: u8::MAX,
1263 });
1264 }
1265 };
1266 if alignment_log2 > MAX_ALIGNMENT_LOG2 {
1267 return Err(PlanError::AlignmentTooLarge { alignment_log2 });
1268 }
1269 let bytes = payload.as_bytes().to_vec();
1270 self.add_section(kind, version, alignment_log2, bytes)
1271 }
1272
1273 /// Appends a section containing explicit little-endian typed words.
1274 ///
1275 /// Prefer [`add_section_widths`](Self::add_section_widths), which takes a
1276 /// native index slice and lowers it through `slice_to_le`, enforcing the
1277 /// little-endian guarantee in the type system. This method exists for
1278 /// callers that already hold portable byteorder words such as
1279 /// `zerocopy::byteorder::U32<LE>`; the payload is copied via
1280 /// [`zerocopy::IntoBytes`] and the alignment is derived from `T`.
1281 ///
1282 /// # Errors
1283 ///
1284 /// Returns [`PlanError`] for the same reasons as
1285 /// [`add_section_typed`](Self::add_section_typed).
1286 ///
1287 /// # Performance
1288 ///
1289 /// This method is `O(s + payload.len() * size_of::<T>())`.
1290 pub fn add_section_little_endian<T>(
1291 &mut self,
1292 kind: u32,
1293 version: u32,
1294 payload: &[T],
1295 ) -> Result<&mut Self, PlanError>
1296 where
1297 T: zerocopy::IntoBytes + zerocopy::Immutable,
1298 {
1299 self.add_section_typed(kind, version, payload)
1300 }
1301
1302 /// Appends a section from a native-width index slice, lowering it to its
1303 /// explicit little-endian storage words first.
1304 ///
1305 /// Convenience wrapper that calls `oxgraph_layout_util::slice_to_le` and
1306 /// then [`add_section_little_endian`](Self::add_section_little_endian), so
1307 /// exporters can pass native `&[u32]`-style slices without converting by
1308 /// hand. Requires the `alloc` feature (it allocates the converted words).
1309 ///
1310 /// # Errors
1311 ///
1312 /// Returns [`PlanError`] for the same reasons as
1313 /// [`add_section_typed`](Self::add_section_typed).
1314 ///
1315 /// # Performance
1316 ///
1317 /// This method is `O(s + payload.len())` plus one allocation for the
1318 /// converted words.
1319 pub fn add_section_widths<W>(
1320 &mut self,
1321 kind: u32,
1322 version: u32,
1323 values: &[W],
1324 ) -> Result<&mut Self, PlanError>
1325 where
1326 W: SnapshotWidth,
1327 {
1328 let words = oxgraph_layout_util::slice_to_le(values);
1329 self.add_section_little_endian(kind, version, &words)
1330 }
1331
1332 /// Returns the number of pending sections.
1333 ///
1334 /// # Performance
1335 ///
1336 /// This method is `O(1)`.
1337 #[must_use]
1338 pub const fn section_count(&self) -> usize {
1339 self.sections.len()
1340 }
1341
1342 /// Encodes the pending sections into an owned snapshot byte vector.
1343 ///
1344 /// The builder enforces per-insert invariants
1345 /// ([`PlanError::AlignmentTooLarge`], [`PlanError::TooManySections`],
1346 /// [`PlanError::DuplicateKind`]) on `add_section*`, so this method
1347 /// only fails when the cumulative payload arithmetic overflows
1348 /// (`u64`/`usize`), surfaced as [`PlanError::PayloadOverflow`].
1349 ///
1350 /// # Errors
1351 ///
1352 /// Returns [`PlanError::PayloadOverflow`] when the total encoded length
1353 /// would overflow `u64` or `usize`. All other [`PlanError`] variants
1354 /// are caught at `add_section*` time and cannot reach this method.
1355 ///
1356 /// # Performance
1357 ///
1358 /// This method is `O(s + total payload bytes)`.
1359 pub fn finish(self) -> Result<Vec<u8>, PlanError> {
1360 let pending: Vec<PendingSection<'_>> = self
1361 .sections
1362 .iter()
1363 .map(|section| PendingSection {
1364 kind: section.kind,
1365 version: section.version,
1366 alignment_log2: section.alignment_log2,
1367 payload: section.payload.as_slice(),
1368 })
1369 .collect();
1370
1371 let plan = SnapshotPlan::new(&pending)?;
1372 let needed = plan.encoded_len()?;
1373 let mut out = vec![0u8; needed];
1374 plan.write_into(&mut out)?;
1375 Ok(out)
1376 }
1377}