Skip to main content

oxgraph_snapshot/
container.rs

1//! Topology-agnostic snapshot container: format constants, byte-level header
2//! and section table, validation, reader, no-`alloc` planner, and the
3//! `alloc`-gated owning builder.
4//!
5//! All public types are re-exported through the crate root; consumers should
6//! depend on the crate-level paths rather than reaching in here.
7//!
8//! When the container ever graduates to a separate `topology-snapshot` crate
9//! the whole module moves wholesale, and the crate root becomes a shim of
10//! `pub use topology_snapshot::*`.
11
12#[cfg(feature = "alloc")]
13use alloc::{vec, vec::Vec};
14use core::fmt;
15
16use oxgraph_layout_util::SnapshotWidth;
17use zerocopy::{
18    FromBytes, Immutable, IntoBytes, KnownLayout,
19    byteorder::{LE, U32, U64},
20};
21
22use crate::container_error::{PlanError, SectionBindError, SectionViewError, SnapshotError};
23
24/// Magic bytes identifying the topology snapshot container format.
25///
26/// Producers MUST write these eight bytes at offset 0; readers MUST reject
27/// snapshots whose first eight bytes differ.
28///
29/// # Performance
30///
31/// `perf: unspecified`; this is a compile-time constant.
32pub const FORMAT_MAGIC: [u8; 8] = *b"OXGTOPO\0";
33
34/// Format major version this library reads and writes.
35///
36/// A snapshot whose `format_major` field does not equal this constant is
37/// rejected at open time. Major bumps are permitted to break compatibility
38/// in arbitrary ways.
39///
40/// # Performance
41///
42/// `perf: unspecified`; this is a compile-time constant.
43pub const FORMAT_MAJOR: u32 = 1;
44
45/// Format minor version written by this library's builder.
46///
47/// Minor bumps are reserved for backward-compatible additions (e.g. enabling
48/// previously reserved bits or fields). Producers using this library will
49/// emit this value unconditionally.
50///
51/// # Performance
52///
53/// `perf: unspecified`; this is a compile-time constant.
54pub const FORMAT_MINOR: u32 = 0;
55
56/// Highest format minor version this library can read.
57///
58/// Snapshots with `format_minor > MAX_SUPPORTED_MINOR` are rejected at open
59/// time. v1 is intentionally strict; raising this value is a deliberate
60/// per-minor decision once the new minor is proven safely readable here.
61///
62/// # Performance
63///
64/// `perf: unspecified`; this is a compile-time constant.
65pub const MAX_SUPPORTED_MINOR: u32 = 0;
66
67/// Size of the snapshot header in bytes.
68///
69/// # Performance
70///
71/// `perf: unspecified`; this is a compile-time constant.
72pub const HEADER_SIZE: usize = 32;
73
74/// Size of one section table entry in bytes.
75///
76/// # Performance
77///
78/// `perf: unspecified`; this is a compile-time constant.
79pub const SECTION_ENTRY_SIZE: usize = 32;
80
81/// Maximum permitted `alignment_log2` value (2^12 = 4 KiB, page-friendly).
82///
83/// # Performance
84///
85/// `perf: unspecified`; this is a compile-time constant.
86pub const MAX_ALIGNMENT_LOG2: u8 = 12;
87
88/// Maximum permitted section count for v1 snapshots.
89///
90/// Bounds the duplicate-kind detection in `O(s^2)` validation and keeps
91/// kani proofs tractable. Future minors may raise this if validation moves
92/// to a sorted-by-kind side index.
93///
94/// # Performance
95///
96/// `perf: unspecified`; this is a compile-time constant.
97pub const MAX_SECTION_COUNT: u32 = 1024;
98
99/// `HEADER_SIZE` rendered as a `u32` for header-field comparisons.
100const HEADER_SIZE_U32: u32 = 32;
101
102/// Typed wrapper over a section's opaque `u32` kind tag.
103///
104/// The container still treats the value opaquely, but [`SectionKind`] plus the
105/// [`kinds`] band registry give the wire format a single documented authority
106/// over the kind namespace. Layout crates declare their kind constants inside
107/// the band the registry reserves for them so that distinct subsystems cannot
108/// silently collide on a value.
109///
110/// # Performance
111///
112/// All methods are `O(1)`.
113#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
114pub struct SectionKind(u32);
115
116impl SectionKind {
117    /// Wraps a raw kind tag.
118    ///
119    /// # Performance
120    ///
121    /// This function is `O(1)`.
122    #[must_use]
123    pub const fn new(value: u32) -> Self {
124        Self(value)
125    }
126
127    /// Returns the raw kind tag.
128    ///
129    /// # Performance
130    ///
131    /// This function is `O(1)`.
132    #[must_use]
133    pub const fn get(self) -> u32 {
134        self.0
135    }
136}
137
138impl From<u32> for SectionKind {
139    fn from(value: u32) -> Self {
140        Self(value)
141    }
142}
143
144impl fmt::Display for SectionKind {
145    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
146        write!(formatter, "{:#06x}", self.0)
147    }
148}
149
150/// Section-kind band allocation registry: the single documented authority for
151/// who owns which range of the opaque `u32` kind namespace.
152///
153/// The container assigns no semantics to kinds, but every in-tree layer
154/// declares its `SNAPSHOT_KIND_*` constants inside the band reserved here, and
155/// the bands are mutually exclusive, so distinct subsystems cannot collide.
156/// Each band is a half-open `[start, end)` range of raw kind values.
157///
158/// # Performance
159///
160/// `perf: unspecified`; these are compile-time constants.
161pub mod kinds {
162    use core::ops::Range;
163
164    /// CSR graph layout sections (offsets/targets, all widths).
165    pub const CSR_BAND: Range<u32> = 0x0001..0x0020;
166    /// Bipartite-CSR hypergraph layout sections (all widths).
167    pub const BCSR_BAND: Range<u32> = 0x0020..0x0100;
168    /// Property and identity-map sections (all widths).
169    pub const PROPERTY_BAND: Range<u32> = 0x0100..0x0200;
170    /// `PostgreSQL` engine sections, including the inbound CSC layout.
171    pub const POSTGRES_BAND: Range<u32> = 0x0200..0x0300;
172    /// Embedded `OxGraph` database state sections.
173    pub const DATABASE_BAND: Range<u32> = 0x0300..0x0400;
174    /// Application/custom sections; the container reserves nothing here.
175    pub const CUSTOM_BASE: u32 = 0x0400;
176
177    /// Returns whether `kind` falls within the half-open `band`.
178    ///
179    /// Layout crates use this in `const`-checked tests to prove their kind
180    /// constants stay inside their reserved band.
181    ///
182    /// # Performance
183    ///
184    /// This function is `O(1)`.
185    #[must_use]
186    pub const fn in_band(kind: u32, band: Range<u32>) -> bool {
187        kind >= band.start && kind < band.end
188    }
189}
190
191/// Converts a checked `u64` into `usize`, asserting in debug mode that the
192/// value already fits because validation enforced an earlier bound.
193///
194/// # Panics
195///
196/// Panics via `unreachable!()` only on a target where `usize` is narrower
197/// than `u64` AND the caller has supplied a value that was not first vetted
198/// by the snapshot's `Layout` validation pass (which surfaces the failure
199/// as [`SnapshotError::UsizeOverflow`] before any `_validated` call).
200///
201/// # Performance
202///
203/// This function is `O(1)`.
204fn u64_to_usize_validated(value: u64) -> usize {
205    match usize::try_from(value) {
206        Ok(converted) => converted,
207        Err(_error) => unreachable!("validated u64 must fit usize on this target"),
208    }
209}
210
211/// Byte-level snapshot header.
212///
213/// Layout is `#[repr(C)]` with all multi-byte fields stored as zerocopy's
214/// unaligned little-endian wrappers. The struct itself has alignment 1, so
215/// it can be borrowed from any byte slice that is at least `HEADER_SIZE`
216/// long without an alignment check.
217#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
218#[repr(C)]
219struct RawHeader {
220    /// Magic bytes; must equal [`FORMAT_MAGIC`].
221    magic: [u8; 8],
222    /// Format major version.
223    format_major: U32<LE>,
224    /// Format minor version.
225    format_minor: U32<LE>,
226    /// Header size in bytes; v1.0 mandates `HEADER_SIZE`.
227    header_size: U32<LE>,
228    /// Number of section table entries.
229    section_count: U32<LE>,
230    /// Reserved; must be zero.
231    reserved: [u8; 8],
232}
233
234/// Parses the fixed header from the start of `bytes`.
235///
236/// # Errors
237///
238/// Returns [`SnapshotError::TruncatedHeader`] when fewer than [`HEADER_SIZE`]
239/// bytes are provided. Header field validation is performed separately in
240/// [`validate_magic_versions_reserved`].
241///
242/// # Performance
243///
244/// This function is `O(1)`.
245fn parse_header(bytes: &[u8]) -> Result<(&RawHeader, &[u8]), SnapshotError> {
246    if bytes.len() < HEADER_SIZE {
247        return Err(SnapshotError::TruncatedHeader {
248            needed: HEADER_SIZE,
249            actual: bytes.len(),
250        });
251    }
252
253    match RawHeader::ref_from_prefix(bytes) {
254        Ok((header, rest)) => Ok((header, rest)),
255        Err(_error) => Err(SnapshotError::MalformedHeader),
256    }
257}
258
259/// Validates header magic, version, header size, and reserved bytes.
260///
261/// # Errors
262///
263/// Returns [`SnapshotError`] for any header-level invariant violation.
264///
265/// # Performance
266///
267/// This function is `O(1)`.
268fn validate_magic_versions_reserved(header: &RawHeader) -> Result<(), SnapshotError> {
269    if header.magic != FORMAT_MAGIC {
270        return Err(SnapshotError::BadMagic {
271            actual: header.magic,
272        });
273    }
274
275    let major = header.format_major.get();
276    if major != FORMAT_MAJOR {
277        return Err(SnapshotError::FormatMajorMismatch {
278            actual: major,
279            supported: FORMAT_MAJOR,
280        });
281    }
282
283    let minor = header.format_minor.get();
284    if minor > MAX_SUPPORTED_MINOR {
285        return Err(SnapshotError::FormatMinorTooNew {
286            actual: minor,
287            max_supported: MAX_SUPPORTED_MINOR,
288        });
289    }
290
291    let header_size = header.header_size.get();
292    if header_size != HEADER_SIZE_U32 {
293        return Err(SnapshotError::HeaderSizeMismatch {
294            actual: header_size,
295            expected: HEADER_SIZE_U32,
296        });
297    }
298
299    if header.reserved != [0; 8] {
300        return Err(SnapshotError::NonZeroHeaderReserved);
301    }
302
303    Ok(())
304}
305
306/// Byte-level section table entry.
307///
308/// Layout is `#[repr(C)]` with unaligned little-endian fields, mirroring
309/// [`RawHeader`]'s alignment policy.
310#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
311#[repr(C)]
312struct RawSectionEntry {
313    /// Byte offset of the section payload from the start of the snapshot.
314    offset: U64<LE>,
315    /// Byte length of the section payload.
316    length: U64<LE>,
317    /// Opaque section kind; the container assigns no semantics.
318    kind: U32<LE>,
319    /// Opaque section version; consumers interpret per kind.
320    version: U32<LE>,
321    /// Reserved bytes for a future per-section CRC32C; must be zero in v1.
322    reserved_checksum: [u8; 4],
323    /// `log2` of the producer's chosen payload alignment; v1 cap is 12.
324    alignment_log2: u8,
325    /// Reserved flag bits; must be zero in v1.
326    flags: u8,
327    /// Trailing reserved bytes; must be zero in v1.
328    reserved: [u8; 2],
329}
330
331/// Borrowed view of one validated section in a snapshot.
332///
333/// A `Section` carries the section's byte payload along with its declared
334/// metadata. Payload bytes are bounds- and overlap-checked at snapshot open
335/// time. Typed-slice access via [`Section::try_as_slice`] verifies the
336/// actual borrowed pointer's alignment at the call site.
337///
338/// # Performance
339///
340/// All methods are `O(1)` or `O(payload.len())` for typed conversions.
341#[derive(Clone, Copy, Debug)]
342pub struct Section<'view> {
343    /// Borrowed payload bytes.
344    payload: &'view [u8],
345    /// Section kind, as recorded in the section entry.
346    kind: u32,
347    /// Section version, as recorded in the section entry.
348    version: u32,
349    /// `log2` of the declared payload alignment.
350    alignment_log2: u8,
351}
352
353impl<'view> Section<'view> {
354    /// Constructs a [`Section`] from a previously validated entry.
355    ///
356    /// # Performance
357    ///
358    /// This function is `O(1)`.
359    #[must_use]
360    fn from_entry(bytes: &'view [u8], entry: &RawSectionEntry) -> Self {
361        let offset = u64_to_usize_validated(entry.offset.get());
362        let length = u64_to_usize_validated(entry.length.get());
363        Self {
364            payload: &bytes[offset..offset + length],
365            kind: entry.kind.get(),
366            version: entry.version.get(),
367            alignment_log2: entry.alignment_log2,
368        }
369    }
370
371    /// Returns the section's opaque kind identifier.
372    ///
373    /// # Performance
374    ///
375    /// This method is `O(1)`.
376    #[must_use]
377    pub const fn kind(&self) -> u32 {
378        self.kind
379    }
380
381    /// Returns the section's opaque version identifier.
382    ///
383    /// # Performance
384    ///
385    /// This method is `O(1)`.
386    #[must_use]
387    pub const fn version(&self) -> u32 {
388        self.version
389    }
390
391    /// Returns the alignment the producer declared for this payload.
392    ///
393    /// This is metadata recorded at build time, not a guarantee about the
394    /// actual borrowed pointer. Callers that intend to interpret the payload
395    /// as a typed slice should prefer [`Section::try_as_slice`], which
396    /// checks the actual payload pointer.
397    ///
398    /// # Performance
399    ///
400    /// This method is `O(1)`.
401    #[must_use]
402    pub const fn declared_alignment(&self) -> usize {
403        1usize << self.alignment_log2
404    }
405
406    /// Returns the section's borrowed payload bytes.
407    ///
408    /// # Performance
409    ///
410    /// This method is `O(1)`.
411    #[must_use]
412    pub const fn bytes(&self) -> &'view [u8] {
413        self.payload
414    }
415
416    /// Borrows the payload as a typed slice of `T`.
417    ///
418    /// Errors if (a) `payload.len()` is not a multiple of
419    /// `core::mem::size_of::<T>()` or (b) the payload's actual base address
420    /// does not satisfy `core::mem::align_of::<T>()`. The producer's
421    /// declared `alignment_log2` is not consulted; the actual borrowed
422    /// pointer is checked directly so that mmap'd or sub-sliced inputs
423    /// cannot bypass the check.
424    ///
425    /// # Errors
426    ///
427    /// Returns [`SectionViewError`] when the payload cannot be borrowed
428    /// as `&[T]` without copying.
429    ///
430    /// # Performance
431    ///
432    /// This method is `O(1)` modulo the bounds and alignment checks; it
433    /// performs no allocation and no per-element work.
434    pub fn try_as_slice<T>(&self) -> Result<&'view [T], SectionViewError>
435    where
436        T: zerocopy::FromBytes + zerocopy::Immutable + zerocopy::KnownLayout,
437    {
438        let elem_size = core::mem::size_of::<T>();
439        let length = self.payload.len();
440
441        if elem_size == 0 {
442            return Err(SectionViewError::ZeroSizedType);
443        }
444
445        if !length.is_multiple_of(elem_size) {
446            return Err(SectionViewError::LengthNotMultipleOfSize { length, elem_size });
447        }
448
449        let required = core::mem::align_of::<T>();
450        let ptr_addr = self.payload.as_ptr().addr();
451        if !ptr_addr.is_multiple_of(required) {
452            return Err(SectionViewError::AlignmentMismatch { ptr_addr, required });
453        }
454
455        let count = length / elem_size;
456        match <[T]>::ref_from_bytes_with_elems(self.payload, count) {
457            Ok(slice) => Ok(slice),
458            Err(_error) => Err(SectionViewError::AlignmentMismatch { ptr_addr, required }),
459        }
460    }
461}
462
463/// Validation depth applied at snapshot open time.
464///
465/// Validation responsibilities are layered. Header-only validation is not a
466/// member of this enum; callers wanting it should use
467/// [`HeaderOnlySnapshot::open`] instead, so the type system distinguishes a
468/// section-bearing handle from one whose section table has not been
469/// validated.
470///
471/// - [`SectionTable`](Self::SectionTable) parses the section table and per-entry self-consistency
472///   (alignment bound, reserved bytes zero, flags zero).
473/// - [`Layout`](Self::Layout) is the default; it adds payload bounds, monotonic-offset enforcement,
474///   and duplicate-kind detection.
475///
476/// Topology-level validation (CSR offset monotonicity, hypergraph role
477/// consistency, etc.) is the consumer's responsibility — the container
478/// has no kind registry and cannot validate semantics it does not know.
479///
480/// # Performance
481///
482/// `perf: unspecified`; this is a metadata enum.
483#[non_exhaustive]
484#[derive(Clone, Copy, Debug, Eq, PartialEq)]
485pub enum ValidationLevel {
486    /// Validate header and section table self-consistency.
487    SectionTable,
488    /// Validate header, section table, and full payload layout.
489    Layout,
490}
491
492/// Returns the first `kind` value that appears more than once in `items`.
493///
494/// Shared by the reader's duplicate-kind walk and [`SnapshotPlan::new`] so the
495/// two paths cannot drift. `kind_of` projects each item to its kind tag.
496///
497/// # Performance
498///
499/// This function is `O(items.len()^2)`.
500fn first_duplicate_kind<T>(items: &[T], kind_of: impl Fn(&T) -> u32) -> Option<u32> {
501    for (index, item) in items.iter().enumerate() {
502        let kind = kind_of(item);
503        if items[..index].iter().any(|prior| kind_of(prior) == kind) {
504            return Some(kind);
505        }
506    }
507    None
508}
509
510/// Walks the section table once and checks all v1 invariants.
511///
512/// `bytes` is the entire snapshot byte slice; `entries` is the parsed
513/// section table; `level` controls how deep the walk goes. Header-level
514/// invariants are presumed already validated by the caller.
515///
516/// Per-entry self-consistency **and** payload bounds (`offset + length` does
517/// not overflow and stays within the snapshot) are enforced at every level, so
518/// every [`Section`] a [`Snapshot`] hands out is bounds-safe regardless of the
519/// requested [`ValidationLevel`]. [`ValidationLevel::Layout`] additionally
520/// enforces non-overlapping monotonic ordering and unique kinds.
521///
522/// # Errors
523///
524/// Returns [`SnapshotError`] for any per-entry, bounds, or layout violation.
525///
526/// # Performance
527///
528/// This function is `O(s)` for the always-run self-consistency and bounds walk
529/// and `O(s^2)` for the duplicate-kind walk at [`ValidationLevel::Layout`].
530fn validate_section_table(
531    bytes: &[u8],
532    entries: &[RawSectionEntry],
533    level: ValidationLevel,
534) -> Result<(), SnapshotError> {
535    let snapshot_len = bytes.len() as u64;
536
537    // Always-run: per-entry self-consistency plus payload-bounds safety. The
538    // bounds check guarantees `Section::from_entry`'s `bytes[offset..end]`
539    // slice is in range, so accessors are panic-free at SectionTable level too.
540    for entry in entries {
541        let kind = entry.kind.get();
542        if entry.reserved_checksum != [0; 4] {
543            return Err(SnapshotError::NonZeroEntryChecksum { kind });
544        }
545        if entry.flags != 0 {
546            return Err(SnapshotError::UnsupportedFlags {
547                kind,
548                flags: entry.flags,
549            });
550        }
551        if entry.reserved != [0; 2] {
552            return Err(SnapshotError::NonZeroEntryReserved { kind });
553        }
554        if entry.alignment_log2 > MAX_ALIGNMENT_LOG2 {
555            return Err(SnapshotError::AlignmentLog2TooLarge {
556                kind,
557                alignment_log2: entry.alignment_log2,
558            });
559        }
560        let offset = entry.offset.get();
561        let length = entry.length.get();
562        let end = offset
563            .checked_add(length)
564            .ok_or(SnapshotError::SectionRangeOverflow { kind })?;
565        if end > snapshot_len {
566            return Err(SnapshotError::SectionOutOfBounds {
567                kind,
568                offset,
569                length,
570                snapshot_len,
571            });
572        }
573    }
574
575    if matches!(level, ValidationLevel::SectionTable) {
576        return Ok(());
577    }
578
579    // Layout-only: non-overlapping monotonic ordering (sections start at or
580    // after the end of the header+table and never overlap a predecessor).
581    let header_plus_table = (HEADER_SIZE as u64)
582        .checked_add((entries.len() as u64).saturating_mul(SECTION_ENTRY_SIZE as u64))
583        .ok_or(SnapshotError::SectionRangeOverflow { kind: 0 })?;
584    let mut prev_end = header_plus_table;
585    for (index, entry) in entries.iter().enumerate() {
586        let offset = entry.offset.get();
587        // `end` cannot overflow: the always-run walk above already proved it.
588        let end = offset.saturating_add(entry.length.get());
589        if offset < prev_end {
590            return Err(SnapshotError::UnsortedSectionTable { index });
591        }
592        prev_end = end;
593    }
594
595    // Layout-only: duplicate-kind detection, shared with the writer.
596    if let Some(kind) = first_duplicate_kind(entries, |entry| entry.kind.get()) {
597        return Err(SnapshotError::DuplicateKind { kind });
598    }
599
600    Ok(())
601}
602
603/// Header-only handle to a snapshot's bytes.
604///
605/// `HeaderOnlySnapshot` is the typestate-distinct counterpart to
606/// [`Snapshot`]: it validates only the fixed header (magic, format
607/// versions, header size, reserved bytes) and exposes the format
608/// versions, but it deliberately does not parse or expose the section
609/// table. Callers who only need to inspect format compatibility (e.g.,
610/// to decide whether the snapshot is readable at all) should use this
611/// type rather than asking [`Snapshot`] to skip section validation.
612///
613/// # Performance
614///
615/// [`HeaderOnlySnapshot::open`] is `O(1)` — it does not walk the section
616/// table or payload region. Subsequent accessors are `O(1)`.
617#[derive(Clone, Copy, Debug)]
618pub struct HeaderOnlySnapshot<'view> {
619    /// Borrowed snapshot bytes.
620    bytes: &'view [u8],
621    /// Format major version recorded in the header.
622    format_major: u32,
623    /// Format minor version recorded in the header.
624    format_minor: u32,
625}
626
627impl<'view> HeaderOnlySnapshot<'view> {
628    /// Opens `bytes` as a header-validated snapshot handle.
629    ///
630    /// Validates the magic bytes, format major and minor, header size, and
631    /// reserved bytes only. The section table and payload region are not
632    /// inspected and may still be malformed.
633    ///
634    /// # Errors
635    ///
636    /// Returns [`SnapshotError`] for any header-level invariant violation.
637    ///
638    /// # Performance
639    ///
640    /// This function is `O(1)`.
641    pub fn open(bytes: &'view [u8]) -> Result<Self, SnapshotError> {
642        let (header, _after_header) = parse_header(bytes)?;
643        validate_magic_versions_reserved(header)?;
644        Ok(Self {
645            bytes,
646            format_major: header.format_major.get(),
647            format_minor: header.format_minor.get(),
648        })
649    }
650
651    /// Returns the borrowed snapshot bytes.
652    ///
653    /// # Performance
654    ///
655    /// This method is `O(1)`.
656    #[must_use]
657    pub const fn bytes(&self) -> &'view [u8] {
658        self.bytes
659    }
660
661    /// Returns the format major version recorded in the snapshot header.
662    ///
663    /// # Performance
664    ///
665    /// This method is `O(1)`.
666    #[must_use]
667    pub const fn format_major(&self) -> u32 {
668        self.format_major
669    }
670
671    /// Returns the format minor version recorded in the snapshot header.
672    ///
673    /// # Performance
674    ///
675    /// This method is `O(1)`.
676    #[must_use]
677    pub const fn format_minor(&self) -> u32 {
678        self.format_minor
679    }
680}
681
682/// Validated, borrowed handle to a snapshot's bytes and section table.
683///
684/// A `Snapshot` is constructed via [`Snapshot::open`] (default
685/// [`ValidationLevel::Layout`]) or [`Snapshot::open_with`]. The handle
686/// itself is `Copy` and trivially cheap to pass; cloning it does not
687/// re-validate.
688///
689/// For header-only inspection without parsing the section table, use
690/// [`HeaderOnlySnapshot`] instead — `Snapshot` always carries a validated
691/// section table.
692///
693/// # Performance
694///
695/// Open is `O(s^2)` for `s` sections at [`ValidationLevel::Layout`] due to
696/// duplicate-kind detection, otherwise `O(s)`. Subsequent reads are `O(1)`
697/// to `O(s)` per call. No allocation occurs.
698#[derive(Clone, Copy, Debug)]
699pub struct Snapshot<'view> {
700    /// Borrowed snapshot bytes.
701    bytes: &'view [u8],
702    /// Format major version recorded in the header.
703    format_major: u32,
704    /// Format minor version recorded in the header.
705    format_minor: u32,
706    /// Borrowed, validated section table entries.
707    entries: &'view [RawSectionEntry],
708}
709
710impl<'view> Snapshot<'view> {
711    /// Opens `bytes` as a validated snapshot at [`ValidationLevel::Layout`].
712    ///
713    /// # Errors
714    ///
715    /// Returns [`SnapshotError`] for any header, section table, or layout
716    /// invariant violation.
717    ///
718    /// # Performance
719    ///
720    /// `O(s^2)` for `s` section entries.
721    pub fn open(bytes: &'view [u8]) -> Result<Self, SnapshotError> {
722        Self::open_with(bytes, ValidationLevel::Layout)
723    }
724
725    /// Opens `bytes` as a snapshot validated at the requested level.
726    ///
727    /// `level` selects between [`ValidationLevel::SectionTable`] (per-entry
728    /// self-consistency only) and [`ValidationLevel::Layout`] (full
729    /// payload-bounds and duplicate-kind walk). Header-only validation is
730    /// deliberately not selectable here; callers wanting it should use
731    /// [`HeaderOnlySnapshot::open`].
732    ///
733    /// # Errors
734    ///
735    /// Returns [`SnapshotError`] for any invariant violation visible at
736    /// the requested level.
737    ///
738    /// # Performance
739    ///
740    /// `O(s)` at [`ValidationLevel::SectionTable`], `O(s^2)` at
741    /// [`ValidationLevel::Layout`].
742    pub fn open_with(bytes: &'view [u8], level: ValidationLevel) -> Result<Self, SnapshotError> {
743        let (header, after_header) = parse_header(bytes)?;
744        validate_magic_versions_reserved(header)?;
745
746        let format_major = header.format_major.get();
747        let format_minor = header.format_minor.get();
748
749        let section_count = header.section_count.get();
750        if section_count > MAX_SECTION_COUNT {
751            return Err(SnapshotError::SectionCountTooLarge {
752                count: section_count,
753                max: MAX_SECTION_COUNT,
754            });
755        }
756        let Ok(section_count_usize) = usize::try_from(section_count) else {
757            return Err(SnapshotError::UsizeOverflow {
758                value: u64::from(section_count),
759            });
760        };
761        let Some(table_len) = section_count_usize.checked_mul(SECTION_ENTRY_SIZE) else {
762            return Err(SnapshotError::SectionCountTooLarge {
763                count: section_count,
764                max: MAX_SECTION_COUNT,
765            });
766        };
767        if after_header.len() < table_len {
768            return Err(SnapshotError::TruncatedSectionTable {
769                needed: table_len,
770                actual: after_header.len(),
771            });
772        }
773
774        let table_bytes = &after_header[..table_len];
775        let entries =
776            <[RawSectionEntry]>::ref_from_bytes_with_elems(table_bytes, section_count_usize)
777                .map_err(|_error| SnapshotError::MalformedSectionTable)?;
778
779        validate_section_table(bytes, entries, level)?;
780
781        Ok(Self {
782            bytes,
783            format_major,
784            format_minor,
785            entries,
786        })
787    }
788
789    /// Returns the format major version recorded in the snapshot header.
790    ///
791    /// # Performance
792    ///
793    /// This method is `O(1)`.
794    #[must_use]
795    pub const fn format_major(&self) -> u32 {
796        self.format_major
797    }
798
799    /// Returns the format minor version recorded in the snapshot header.
800    ///
801    /// # Performance
802    ///
803    /// This method is `O(1)`.
804    #[must_use]
805    pub const fn format_minor(&self) -> u32 {
806        self.format_minor
807    }
808
809    /// Returns the number of validated sections.
810    ///
811    /// # Performance
812    ///
813    /// This method is `O(1)`.
814    #[must_use]
815    pub const fn section_count(&self) -> usize {
816        self.entries.len()
817    }
818
819    /// Returns an iterator over all validated sections.
820    ///
821    /// # Performance
822    ///
823    /// Constructing the iterator is `O(1)`; advancing it is `O(1)` per step.
824    #[must_use]
825    pub fn sections(&self) -> SectionIter<'view> {
826        SectionIter {
827            bytes: self.bytes,
828            entries: self.entries.iter(),
829        }
830    }
831
832    /// Returns the section with the given `kind`, when present.
833    ///
834    /// # Performance
835    ///
836    /// This method is `O(s)` for `s` section entries.
837    #[must_use]
838    pub fn section(&self, kind: u32) -> Option<Section<'view>> {
839        let bytes = self.bytes;
840        self.entries.iter().find_map(|entry| {
841            if entry.kind.get() == kind {
842                Some(Section::from_entry(bytes, entry))
843            } else {
844                None
845            }
846        })
847    }
848
849    /// Binds a width-typed section by kind and version in one step.
850    ///
851    /// Looks up the section, checks its version against `expected_version`, and
852    /// borrows the payload as `&[W::LittleEndianWord]`. This is the single
853    /// section-open primitive every layout crate reuses instead of
854    /// re-implementing the lookup/version/typed-view sequence with its own error
855    /// variants; callers map [`SectionBindError`] into their own typed error at
856    /// the boundary.
857    ///
858    /// # Errors
859    ///
860    /// Returns [`SectionBindError::Missing`] when no section has `kind`,
861    /// [`SectionBindError::VersionMismatch`] when the recorded version differs,
862    /// and [`SectionBindError::View`] when the payload cannot be borrowed as the
863    /// requested little-endian word.
864    ///
865    /// # Performance
866    ///
867    /// This method is `O(s)` for `s` section entries plus the typed-view checks.
868    pub fn typed_section<W>(
869        &self,
870        kind: u32,
871        expected_version: u32,
872    ) -> Result<&'view [W::LittleEndianWord], SectionBindError>
873    where
874        W: SnapshotWidth,
875    {
876        let section = self
877            .section(kind)
878            .ok_or(SectionBindError::Missing { kind })?;
879        if section.version() != expected_version {
880            return Err(SectionBindError::VersionMismatch {
881                kind,
882                expected: expected_version,
883                actual: section.version(),
884            });
885        }
886        section
887            .try_as_slice::<W::LittleEndianWord>()
888            .map_err(|error| SectionBindError::View { kind, error })
889    }
890}
891
892/// Iterator over a snapshot's validated sections.
893///
894/// Yields each [`Section`] in section-table order. The iterator does not
895/// allocate and borrows from the snapshot's underlying byte slice.
896///
897/// # Performance
898///
899/// Advancing the iterator is `O(1)` per step.
900#[derive(Clone, Debug)]
901pub struct SectionIter<'view> {
902    /// Borrowed snapshot bytes.
903    bytes: &'view [u8],
904    /// Remaining section table entries to yield.
905    entries: core::slice::Iter<'view, RawSectionEntry>,
906}
907
908impl<'view> Iterator for SectionIter<'view> {
909    type Item = Section<'view>;
910
911    fn next(&mut self) -> Option<Self::Item> {
912        self.entries
913            .next()
914            .map(|entry| Section::from_entry(self.bytes, entry))
915    }
916
917    fn size_hint(&self) -> (usize, Option<usize>) {
918        self.entries.size_hint()
919    }
920}
921
922impl ExactSizeIterator for SectionIter<'_> {
923    fn len(&self) -> usize {
924        self.entries.len()
925    }
926}
927
928/// Description of one section to include in a snapshot.
929///
930/// Every field is opaque to the encoder. `kind` and `version` are passed
931/// through unchanged; `alignment_log2` controls payload alignment relative
932/// to the snapshot's start; `payload` is the section's raw bytes.
933///
934/// # Performance
935///
936/// `perf: unspecified`; this is a metadata struct.
937#[derive(Clone, Copy, Debug)]
938pub struct PendingSection<'a> {
939    /// Section kind to record in the entry.
940    pub kind: u32,
941    /// Section version to record in the entry.
942    pub version: u32,
943    /// `log2` of the requested payload alignment; capped at
944    /// [`MAX_ALIGNMENT_LOG2`].
945    pub alignment_log2: u8,
946    /// Section payload bytes.
947    pub payload: &'a [u8],
948}
949
950/// Validated plan that can compute its encoded length and write itself.
951///
952/// `SnapshotPlan` performs all duplicate-kind, alignment, and count checks
953/// at construction. After construction, [`encoded_len`](Self::encoded_len)
954/// and [`write_into`](Self::write_into) are guaranteed to succeed for any
955/// caller-supplied buffer that is at least `encoded_len()` bytes long.
956///
957/// # Performance
958///
959/// Construction is `O(s^2)` for `s` sections (duplicate-kind check).
960/// `encoded_len` and `write_into` are `O(s + total payload bytes)`.
961#[derive(Clone, Copy, Debug)]
962pub struct SnapshotPlan<'a> {
963    /// Borrowed pending section descriptors, in declaration order.
964    sections: &'a [PendingSection<'a>],
965}
966
967impl<'a> SnapshotPlan<'a> {
968    /// Validates a slice of pending sections and constructs a plan.
969    ///
970    /// # Errors
971    ///
972    /// Returns [`PlanError`] when alignment is too large, too many sections
973    /// are supplied, or duplicate kinds are present.
974    ///
975    /// # Performance
976    ///
977    /// This function is `O(s^2)` for `s` sections.
978    pub fn new(sections: &'a [PendingSection<'a>]) -> Result<Self, PlanError> {
979        if sections.len() > MAX_SECTION_COUNT as usize {
980            return Err(PlanError::TooManySections {
981                count: sections.len(),
982            });
983        }
984
985        for section in sections {
986            if section.alignment_log2 > MAX_ALIGNMENT_LOG2 {
987                return Err(PlanError::AlignmentTooLarge {
988                    alignment_log2: section.alignment_log2,
989                });
990            }
991        }
992        if let Some(kind) = first_duplicate_kind(sections, |section| section.kind) {
993            return Err(PlanError::DuplicateKind { kind });
994        }
995
996        Ok(Self { sections })
997    }
998
999    /// Returns the number of sections in this plan.
1000    ///
1001    /// # Performance
1002    ///
1003    /// This method is `O(1)`.
1004    #[must_use]
1005    pub const fn section_count(&self) -> usize {
1006        self.sections.len()
1007    }
1008
1009    /// Computes the total bytes the encoded snapshot will occupy.
1010    ///
1011    /// # Errors
1012    ///
1013    /// Returns [`PlanError::PayloadOverflow`] when offset arithmetic
1014    /// exceeds `usize` or `u64` representable values.
1015    ///
1016    /// # Performance
1017    ///
1018    /// This function is `O(s)` for `s` sections.
1019    pub fn encoded_len(&self) -> Result<usize, PlanError> {
1020        let table_len = self
1021            .sections
1022            .len()
1023            .checked_mul(SECTION_ENTRY_SIZE)
1024            .ok_or(PlanError::PayloadOverflow)?;
1025        let mut total = HEADER_SIZE
1026            .checked_add(table_len)
1027            .ok_or(PlanError::PayloadOverflow)?;
1028
1029        for section in self.sections {
1030            total = align_up_checked(total, section.alignment_log2)?;
1031            total = total
1032                .checked_add(section.payload.len())
1033                .ok_or(PlanError::PayloadOverflow)?;
1034        }
1035
1036        u64::try_from(total).map_err(|_error| PlanError::PayloadOverflow)?;
1037        Ok(total)
1038    }
1039
1040    /// Writes the encoded snapshot into `out` and returns the number of
1041    /// bytes written.
1042    ///
1043    /// Padding bytes between the section table and each section payload
1044    /// are zero-filled deterministically; the resulting bytes are stable
1045    /// for any logical input.
1046    ///
1047    /// # Errors
1048    ///
1049    /// Returns [`PlanError::BufferTooSmall`] when `out.len()` is less than
1050    /// [`encoded_len`](Self::encoded_len) or [`PlanError::PayloadOverflow`]
1051    /// when offset arithmetic overflows during the write walk.
1052    ///
1053    /// # Performance
1054    ///
1055    /// This function is `O(s + total payload bytes)`.
1056    pub fn write_into(&self, out: &mut [u8]) -> Result<usize, PlanError> {
1057        let needed = self.encoded_len()?;
1058        if out.len() < needed {
1059            return Err(PlanError::BufferTooSmall {
1060                needed,
1061                actual: out.len(),
1062            });
1063        }
1064
1065        let prefix = &mut out[..needed];
1066        prefix.fill(0);
1067
1068        let section_count_u32 = match u32::try_from(self.sections.len()) {
1069            Ok(value) => value,
1070            Err(_error) => {
1071                return Err(PlanError::TooManySections {
1072                    count: self.sections.len(),
1073                });
1074            }
1075        };
1076        let header = RawHeader {
1077            magic: FORMAT_MAGIC,
1078            format_major: U32::new(FORMAT_MAJOR),
1079            format_minor: U32::new(FORMAT_MINOR),
1080            header_size: U32::new(HEADER_SIZE_U32),
1081            section_count: U32::new(section_count_u32),
1082            reserved: [0; 8],
1083        };
1084        prefix[..HEADER_SIZE].copy_from_slice(header.as_bytes());
1085
1086        let table_start = HEADER_SIZE;
1087        let payload_start = table_start
1088            .checked_add(
1089                self.sections
1090                    .len()
1091                    .checked_mul(SECTION_ENTRY_SIZE)
1092                    .ok_or(PlanError::PayloadOverflow)?,
1093            )
1094            .ok_or(PlanError::PayloadOverflow)?;
1095        let mut cursor = payload_start;
1096
1097        for (index, section) in self.sections.iter().enumerate() {
1098            cursor = align_up_checked(cursor, section.alignment_log2)?;
1099            let payload_end = cursor
1100                .checked_add(section.payload.len())
1101                .ok_or(PlanError::PayloadOverflow)?;
1102
1103            let offset_u64 = u64::try_from(cursor).map_err(|_error| PlanError::PayloadOverflow)?;
1104            let length_u64 = u64::try_from(section.payload.len())
1105                .map_err(|_error| PlanError::PayloadOverflow)?;
1106            let entry = RawSectionEntry {
1107                offset: U64::new(offset_u64),
1108                length: U64::new(length_u64),
1109                kind: U32::new(section.kind),
1110                version: U32::new(section.version),
1111                reserved_checksum: [0; 4],
1112                alignment_log2: section.alignment_log2,
1113                flags: 0,
1114                reserved: [0; 2],
1115            };
1116            let entry_offset = table_start
1117                .checked_add(
1118                    index
1119                        .checked_mul(SECTION_ENTRY_SIZE)
1120                        .ok_or(PlanError::PayloadOverflow)?,
1121                )
1122                .ok_or(PlanError::PayloadOverflow)?;
1123            prefix[entry_offset..entry_offset + SECTION_ENTRY_SIZE]
1124                .copy_from_slice(entry.as_bytes());
1125
1126            prefix[cursor..payload_end].copy_from_slice(section.payload);
1127            cursor = payload_end;
1128        }
1129
1130        Ok(needed)
1131    }
1132}
1133
1134/// Rounds `value` up to the next multiple of `1 << alignment_log2`.
1135///
1136/// # Errors
1137///
1138/// Returns [`PlanError::PayloadOverflow`] on `usize` overflow.
1139///
1140/// # Performance
1141///
1142/// This function is `O(1)`.
1143fn align_up_checked(value: usize, alignment_log2: u8) -> Result<usize, PlanError> {
1144    let alignment = 1usize << alignment_log2;
1145    let mask = alignment - 1;
1146    let added = value.checked_add(mask).ok_or(PlanError::PayloadOverflow)?;
1147    Ok(added & !mask)
1148}
1149
1150/// One owned section pending in a [`SnapshotBuilder`].
1151#[cfg(feature = "alloc")]
1152#[derive(Clone, Debug)]
1153struct OwnedSection {
1154    /// Section kind.
1155    kind: u32,
1156    /// Section version.
1157    version: u32,
1158    /// Declared payload alignment as `log2`.
1159    alignment_log2: u8,
1160    /// Owned payload bytes.
1161    payload: Vec<u8>,
1162}
1163
1164/// Owning snapshot builder that produces a `Vec<u8>` on finish.
1165///
1166/// The builder rejects duplicate kinds, alignment overflows, and section
1167/// count overflows at `add_section*` time, so the only failure that can
1168/// reach [`SnapshotBuilder::finish`] is [`PlanError::PayloadOverflow`] —
1169/// when the cumulative encoded length would overflow `u64` or `usize`.
1170///
1171/// # Performance
1172///
1173/// `add_section*` methods are `O(s)` for the in-progress section count.
1174/// [`finish`](Self::finish) is `O(s + total payload bytes)`.
1175#[cfg(feature = "alloc")]
1176#[derive(Clone, Debug, Default)]
1177#[must_use]
1178pub struct SnapshotBuilder {
1179    /// Owned, in-order sections.
1180    sections: Vec<OwnedSection>,
1181}
1182
1183#[cfg(feature = "alloc")]
1184impl SnapshotBuilder {
1185    /// Constructs an empty builder.
1186    ///
1187    /// # Performance
1188    ///
1189    /// This function is `O(1)`.
1190    pub fn new() -> Self {
1191        Self::default()
1192    }
1193
1194    /// Appends a section with the given metadata and owned payload.
1195    ///
1196    /// # Errors
1197    ///
1198    /// Returns [`PlanError`] when `alignment_log2` is too large, when the
1199    /// builder already holds the maximum permitted section count, or when
1200    /// `kind` collides with an earlier section.
1201    ///
1202    /// # Performance
1203    ///
1204    /// This method is `O(s)` for the in-progress section count.
1205    pub fn add_section(
1206        &mut self,
1207        kind: u32,
1208        version: u32,
1209        alignment_log2: u8,
1210        payload: Vec<u8>,
1211    ) -> Result<&mut Self, PlanError> {
1212        if alignment_log2 > MAX_ALIGNMENT_LOG2 {
1213            return Err(PlanError::AlignmentTooLarge { alignment_log2 });
1214        }
1215        if self.sections.len() >= MAX_SECTION_COUNT as usize {
1216            return Err(PlanError::TooManySections {
1217                count: self.sections.len() + 1,
1218            });
1219        }
1220        for prior in &self.sections {
1221            if prior.kind == kind {
1222                return Err(PlanError::DuplicateKind { kind });
1223            }
1224        }
1225        self.sections.push(OwnedSection {
1226            kind,
1227            version,
1228            alignment_log2,
1229            payload,
1230        });
1231        Ok(self)
1232    }
1233
1234    /// Appends a section whose alignment is derived from `T`.
1235    ///
1236    /// The payload is copied via [`zerocopy::IntoBytes`].
1237    ///
1238    /// # Errors
1239    ///
1240    /// Returns [`PlanError`] for the same reasons as
1241    /// [`add_section`](Self::add_section), plus
1242    /// [`PlanError::AlignmentTooLarge`] when `align_of::<T>()` exceeds
1243    /// the v1 cap.
1244    ///
1245    /// # Performance
1246    ///
1247    /// This method is `O(s + payload.len() * size_of::<T>())`.
1248    pub fn add_section_typed<T>(
1249        &mut self,
1250        kind: u32,
1251        version: u32,
1252        payload: &[T],
1253    ) -> Result<&mut Self, PlanError>
1254    where
1255        T: zerocopy::IntoBytes + zerocopy::Immutable,
1256    {
1257        let alignment = core::mem::align_of::<T>();
1258        let alignment_log2 = match u8::try_from(alignment.trailing_zeros()) {
1259            Ok(value) => value,
1260            Err(_error) => {
1261                return Err(PlanError::AlignmentTooLarge {
1262                    alignment_log2: u8::MAX,
1263                });
1264            }
1265        };
1266        if alignment_log2 > MAX_ALIGNMENT_LOG2 {
1267            return Err(PlanError::AlignmentTooLarge { alignment_log2 });
1268        }
1269        let bytes = payload.as_bytes().to_vec();
1270        self.add_section(kind, version, alignment_log2, bytes)
1271    }
1272
1273    /// Appends a section containing explicit little-endian typed words.
1274    ///
1275    /// Prefer [`add_section_widths`](Self::add_section_widths), which takes a
1276    /// native index slice and lowers it through `slice_to_le`, enforcing the
1277    /// little-endian guarantee in the type system. This method exists for
1278    /// callers that already hold portable byteorder words such as
1279    /// `zerocopy::byteorder::U32<LE>`; the payload is copied via
1280    /// [`zerocopy::IntoBytes`] and the alignment is derived from `T`.
1281    ///
1282    /// # Errors
1283    ///
1284    /// Returns [`PlanError`] for the same reasons as
1285    /// [`add_section_typed`](Self::add_section_typed).
1286    ///
1287    /// # Performance
1288    ///
1289    /// This method is `O(s + payload.len() * size_of::<T>())`.
1290    pub fn add_section_little_endian<T>(
1291        &mut self,
1292        kind: u32,
1293        version: u32,
1294        payload: &[T],
1295    ) -> Result<&mut Self, PlanError>
1296    where
1297        T: zerocopy::IntoBytes + zerocopy::Immutable,
1298    {
1299        self.add_section_typed(kind, version, payload)
1300    }
1301
1302    /// Appends a section from a native-width index slice, lowering it to its
1303    /// explicit little-endian storage words first.
1304    ///
1305    /// Convenience wrapper that calls `oxgraph_layout_util::slice_to_le` and
1306    /// then [`add_section_little_endian`](Self::add_section_little_endian), so
1307    /// exporters can pass native `&[u32]`-style slices without converting by
1308    /// hand. Requires the `alloc` feature (it allocates the converted words).
1309    ///
1310    /// # Errors
1311    ///
1312    /// Returns [`PlanError`] for the same reasons as
1313    /// [`add_section_typed`](Self::add_section_typed).
1314    ///
1315    /// # Performance
1316    ///
1317    /// This method is `O(s + payload.len())` plus one allocation for the
1318    /// converted words.
1319    pub fn add_section_widths<W>(
1320        &mut self,
1321        kind: u32,
1322        version: u32,
1323        values: &[W],
1324    ) -> Result<&mut Self, PlanError>
1325    where
1326        W: SnapshotWidth,
1327    {
1328        let words = oxgraph_layout_util::slice_to_le(values);
1329        self.add_section_little_endian(kind, version, &words)
1330    }
1331
1332    /// Returns the number of pending sections.
1333    ///
1334    /// # Performance
1335    ///
1336    /// This method is `O(1)`.
1337    #[must_use]
1338    pub const fn section_count(&self) -> usize {
1339        self.sections.len()
1340    }
1341
1342    /// Encodes the pending sections into an owned snapshot byte vector.
1343    ///
1344    /// The builder enforces per-insert invariants
1345    /// ([`PlanError::AlignmentTooLarge`], [`PlanError::TooManySections`],
1346    /// [`PlanError::DuplicateKind`]) on `add_section*`, so this method
1347    /// only fails when the cumulative payload arithmetic overflows
1348    /// (`u64`/`usize`), surfaced as [`PlanError::PayloadOverflow`].
1349    ///
1350    /// # Errors
1351    ///
1352    /// Returns [`PlanError::PayloadOverflow`] when the total encoded length
1353    /// would overflow `u64` or `usize`. All other [`PlanError`] variants
1354    /// are caught at `add_section*` time and cannot reach this method.
1355    ///
1356    /// # Performance
1357    ///
1358    /// This method is `O(s + total payload bytes)`.
1359    pub fn finish(self) -> Result<Vec<u8>, PlanError> {
1360        let pending: Vec<PendingSection<'_>> = self
1361            .sections
1362            .iter()
1363            .map(|section| PendingSection {
1364                kind: section.kind,
1365                version: section.version,
1366                alignment_log2: section.alignment_log2,
1367                payload: section.payload.as_slice(),
1368            })
1369            .collect();
1370
1371        let plan = SnapshotPlan::new(&pending)?;
1372        let needed = plan.encoded_len()?;
1373        let mut out = vec![0u8; needed];
1374        plan.write_into(&mut out)?;
1375        Ok(out)
1376    }
1377}