Skip to main content

fsqlite_types/
ecs.rs

1//! ECS (Erasure-Coded Stream) substrate types.
2//!
3//! This module defines foundational identity primitives for Native mode:
4//! - [`ObjectId`] / [`PayloadHash`]: content-addressed identity (§3.5.1)
5//! - [`SymbolRecord`] / [`SymbolRecordFlags`]: physical storage envelope (§3.5.2)
6//!
7//! Spec: COMPREHENSIVE_SPEC_FOR_FRANKENSQLITE_V1.md §3.5.1–§3.5.2.
8
9use std::fmt;
10
11use crate::encoding::{append_u32_le, append_u64_le, read_u32_le, read_u64_le, write_u64_le};
12use crate::glossary::{OTI_WIRE_SIZE, Oti};
13
14/// Domain separation prefix for ECS ObjectIds (spec: `"fsqlite:ecs:v1"`).
15const ECS_OBJECT_ID_DOMAIN_SEPARATOR: &[u8] = b"fsqlite:ecs:v1";
16
17/// Canonical 32-byte hash of an ECS object's payload.
18///
19/// The spec refers to this as `payload_hash` in:
20/// `ObjectId = Trunc128(BLAKE3("fsqlite:ecs:v1" || canonical_object_header || payload_hash))`.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
22#[repr(transparent)]
23pub struct PayloadHash([u8; 32]);
24
25impl PayloadHash {
26    /// Construct from raw bytes.
27    #[must_use]
28    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
29        Self(bytes)
30    }
31
32    /// Return the hash bytes.
33    #[must_use]
34    pub const fn as_bytes(&self) -> &[u8; 32] {
35        &self.0
36    }
37
38    /// Hash a payload using BLAKE3-256.
39    #[must_use]
40    pub fn blake3(payload: &[u8]) -> Self {
41        let hash = blake3::hash(payload);
42        Self(*hash.as_bytes())
43    }
44}
45
46/// 16-byte truncated content-addressed identity for an ECS object.
47///
48/// Spec:
49/// `ObjectId = Trunc128(BLAKE3("fsqlite:ecs:v1" || canonical_object_header || payload_hash))`.
50#[derive(
51    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
52)]
53#[repr(transparent)]
54pub struct ObjectId([u8; 16]);
55
56impl ObjectId {
57    /// ObjectId length in bytes.
58    pub const LEN: usize = 16;
59
60    /// Domain separation prefix from the spec.
61    pub const DOMAIN_SEPARATOR: &'static [u8] = ECS_OBJECT_ID_DOMAIN_SEPARATOR;
62
63    /// Construct from raw bytes.
64    #[must_use]
65    pub const fn from_bytes(bytes: [u8; 16]) -> Self {
66        Self(bytes)
67    }
68
69    /// Return the raw bytes.
70    #[must_use]
71    pub const fn as_bytes(&self) -> &[u8; 16] {
72        &self.0
73    }
74
75    /// Derive an ObjectId from already-canonicalized bytes.
76    ///
77    /// `canonical_bytes` must be a deterministic, versioned wire-format blob
78    /// (spec: "not serde vibes") representing the object's header plus its
79    /// `payload_hash`.
80    #[must_use]
81    pub fn derive_from_canonical_bytes(canonical_bytes: &[u8]) -> Self {
82        let mut hasher = blake3::Hasher::new();
83        hasher.update(Self::DOMAIN_SEPARATOR);
84        hasher.update(canonical_bytes);
85        let digest = hasher.finalize();
86
87        let mut out = [0u8; Self::LEN];
88        out.copy_from_slice(&digest.as_bytes()[..Self::LEN]);
89        Self(out)
90    }
91
92    /// Derive an ObjectId from canonical header bytes and a payload hash.
93    #[must_use]
94    pub fn derive(canonical_object_header: &[u8], payload_hash: PayloadHash) -> Self {
95        let mut hasher = blake3::Hasher::new();
96        hasher.update(Self::DOMAIN_SEPARATOR);
97        hasher.update(canonical_object_header);
98        hasher.update(payload_hash.as_bytes());
99        let digest = hasher.finalize();
100
101        let mut out = [0u8; Self::LEN];
102        out.copy_from_slice(&digest.as_bytes()[..Self::LEN]);
103        Self(out)
104    }
105}
106
107impl fmt::Display for ObjectId {
108    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109        for b in self.0 {
110            write!(f, "{b:02x}")?;
111        }
112        Ok(())
113    }
114}
115
116impl AsRef<[u8]> for ObjectId {
117    fn as_ref(&self) -> &[u8] {
118        &self.0
119    }
120}
121
122impl From<[u8; 16]> for ObjectId {
123    fn from(value: [u8; 16]) -> Self {
124        Self(value)
125    }
126}
127
128// ---------------------------------------------------------------------------
129// §3.5.2 SymbolRecord Envelope and Auth Tags
130// ---------------------------------------------------------------------------
131
132/// Magic bytes identifying a SymbolRecord: `"FSEC"` (0x46 0x53 0x45 0x43).
133pub const SYMBOL_RECORD_MAGIC: [u8; 4] = [0x46, 0x53, 0x45, 0x43];
134
135/// Current envelope version.
136pub const SYMBOL_RECORD_VERSION: u8 = 1;
137
138/// Domain separation prefix for symbol auth tags.
139const SYMBOL_AUTH_DOMAIN: &[u8] = b"fsqlite:symbol-auth:v1";
140
141bitflags::bitflags! {
142    /// Flags for a [`SymbolRecord`].
143    ///
144    /// Additional local flags MAY be defined but MUST be treated as advisory
145    /// optimization hints. Correctness never depends on them.
146    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
147    pub struct SymbolRecordFlags: u8 {
148        /// This record is the first source symbol (esi = 0) and the writer
149        /// attempted to place the entire systematic run contiguously.
150        const SYSTEMATIC_RUN_START = 0x01;
151    }
152}
153
154/// Validation error when deserializing or checking a [`SymbolRecord`].
155#[derive(Debug, Clone, PartialEq, Eq)]
156pub enum SymbolRecordError {
157    /// Input too short to contain a complete record.
158    TooShort { expected_min: usize, actual: usize },
159    /// Magic bytes do not match `"FSEC"`.
160    BadMagic([u8; 4]),
161    /// Envelope version is unsupported.
162    UnsupportedVersion(u8),
163    /// `symbol_size != OTI.T` — key invariant violated.
164    SymbolSizeMismatch { symbol_size: u32, oti_t: u32 },
165    /// `symbol_size` is not representable as a `usize` on this platform.
166    SymbolSizeTooLarge { symbol_size: u32 },
167    /// Size computation overflowed.
168    SizeOverflow,
169    /// `frame_xxh3` integrity check failed.
170    IntegrityFailure { expected: u64, computed: u64 },
171    /// Auth tag verification failed.
172    AuthTagFailure,
173}
174
175impl fmt::Display for SymbolRecordError {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        match self {
178            Self::TooShort {
179                expected_min,
180                actual,
181            } => {
182                write!(
183                    f,
184                    "symbol record too short: need {expected_min}, got {actual}"
185                )
186            }
187            Self::BadMagic(m) => write!(f, "bad magic: {m:02x?}"),
188            Self::UnsupportedVersion(v) => write!(f, "unsupported version: {v}"),
189            Self::SymbolSizeMismatch { symbol_size, oti_t } => {
190                write!(f, "symbol_size ({symbol_size}) != OTI.T ({oti_t})")
191            }
192            Self::SymbolSizeTooLarge { symbol_size } => {
193                write!(f, "symbol_size too large for platform: {symbol_size}")
194            }
195            Self::SizeOverflow => write!(f, "symbol record size overflow"),
196            Self::IntegrityFailure { expected, computed } => {
197                write!(
198                    f,
199                    "frame_xxh3 mismatch: stored {expected:#018x}, computed {computed:#018x}"
200                )
201            }
202            Self::AuthTagFailure => write!(f, "auth tag verification failed"),
203        }
204    }
205}
206
207impl std::error::Error for SymbolRecordError {}
208
209/// Fixed header size before `symbol_data`:
210/// magic(4) + version(1) + object_id(16) + OTI(22) + esi(4) + symbol_size(4) = 51.
211const HEADER_BEFORE_DATA: usize = 4 + 1 + 16 + OTI_WIRE_SIZE + 4 + 4;
212
213/// Fixed trailer size after `symbol_data`:
214/// flags(1) + frame_xxh3(8) + auth_tag(16) = 25.
215const TRAILER_AFTER_DATA: usize = 1 + 8 + 16;
216
217/// The atomic unit of physical storage for ECS objects (§3.5.2).
218///
219/// A `SymbolRecord` is self-describing: a decoder collecting K' symbols with
220/// the same `ObjectId` can reconstruct the original object without any
221/// external metadata.
222///
223/// Wire layout (all integers little-endian):
224/// ```text
225/// magic[4] | version[1] | object_id[16] | OTI[22] | esi[4] | symbol_size[4]
226/// | symbol_data[T] | flags[1] | frame_xxh3[8] | auth_tag[16]
227/// ```
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct SymbolRecord {
230    /// Content-addressed identity of the parent ECS object.
231    pub object_id: ObjectId,
232    /// RaptorQ Object Transmission Information.
233    pub oti: Oti,
234    /// Encoding Symbol Identifier — which symbol this is.
235    pub esi: u32,
236    /// The actual RaptorQ encoding symbol payload.
237    pub symbol_data: Vec<u8>,
238    /// Advisory flags.
239    pub flags: SymbolRecordFlags,
240    /// xxhash3 of all preceding fields for fast integrity checking.
241    pub frame_xxh3: u64,
242    /// Optional BLAKE3-keyed auth tag for authenticated transport.
243    /// All-zero when `symbol_auth = off`.
244    pub auth_tag: [u8; 16],
245}
246
247impl SymbolRecord {
248    /// Compute the `frame_xxh3` digest over the header + symbol_data + flags.
249    ///
250    /// This covers everything from `magic` through `flags`, i.e. all fields
251    /// preceding `frame_xxh3` in the wire layout.
252    #[must_use]
253    fn compute_frame_xxh3(pre_hash_bytes: &[u8]) -> u64 {
254        xxhash_rust::xxh3::xxh3_64(pre_hash_bytes)
255    }
256
257    /// Build the byte region that `frame_xxh3` covers (magic..flags inclusive).
258    fn pre_hash_bytes(&self) -> Vec<u8> {
259        let mut buf = Vec::with_capacity(
260            HEADER_BEFORE_DATA + self.symbol_data.len() + 1, /* flags */
261        );
262        buf.extend_from_slice(&SYMBOL_RECORD_MAGIC);
263        buf.push(SYMBOL_RECORD_VERSION);
264        buf.extend_from_slice(self.object_id.as_bytes());
265        buf.extend_from_slice(&self.oti.to_bytes());
266        append_u32_le(&mut buf, self.esi);
267        let expected_len = usize::try_from(self.oti.t).expect("OTI.t fits in usize");
268        debug_assert_eq!(
269            self.symbol_data.len(),
270            expected_len,
271            "symbol_data length must equal OTI.t"
272        );
273        append_u32_le(&mut buf, self.oti.t);
274        buf.extend_from_slice(&self.symbol_data);
275        buf.push(self.flags.bits());
276        buf
277    }
278
279    /// Create a new `SymbolRecord`, computing `frame_xxh3` automatically.
280    ///
281    /// `auth_tag` is set to all-zero (symbol_auth off). Use
282    /// [`Self::with_auth_tag`] to set an authenticated tag.
283    #[must_use]
284    pub fn new(
285        object_id: ObjectId,
286        oti: Oti,
287        esi: u32,
288        symbol_data: Vec<u8>,
289        flags: SymbolRecordFlags,
290    ) -> Self {
291        let expected_len = usize::try_from(oti.t).expect("OTI.t fits in usize");
292        assert_eq!(
293            symbol_data.len(),
294            expected_len,
295            "SymbolRecord::new: symbol_data.len ({}) must equal oti.t ({})",
296            symbol_data.len(),
297            oti.t
298        );
299
300        let mut rec = Self {
301            object_id,
302            oti,
303            esi,
304            symbol_data,
305            flags,
306            frame_xxh3: 0,
307            auth_tag: [0u8; 16],
308        };
309        let pre_hash = rec.pre_hash_bytes();
310        rec.frame_xxh3 = Self::compute_frame_xxh3(&pre_hash);
311        rec
312    }
313
314    /// Set the auth tag using a BLAKE3-keyed MAC.
315    ///
316    /// `epoch_key` is the 32-byte key derived from `SymbolSegmentHeader.epoch_id`
317    /// per §4.18.2.
318    ///
319    /// ```text
320    /// auth_tag = Trunc128(BLAKE3_KEYED(epoch_key,
321    ///     "fsqlite:symbol-auth:v1" || bytes(magic..frame_xxh3)))
322    /// ```
323    #[must_use]
324    pub fn with_auth_tag(mut self, epoch_key: &[u8; 32]) -> Self {
325        self.auth_tag = Self::compute_auth_tag(epoch_key, &self.pre_hash_bytes(), self.frame_xxh3);
326        self
327    }
328
329    /// Compute the 16-byte auth tag.
330    fn compute_auth_tag(epoch_key: &[u8; 32], pre_hash: &[u8], frame_xxh3: u64) -> [u8; 16] {
331        let mut keyed_hasher = blake3::Hasher::new_keyed(epoch_key);
332        keyed_hasher.update(SYMBOL_AUTH_DOMAIN);
333        keyed_hasher.update(pre_hash);
334        let mut frame_hash_bytes = [0u8; 8];
335        write_u64_le(&mut frame_hash_bytes, frame_xxh3).expect("fixed u64 field");
336        keyed_hasher.update(&frame_hash_bytes);
337        let digest = keyed_hasher.finalize();
338        let mut tag = [0u8; 16];
339        tag.copy_from_slice(&digest.as_bytes()[..16]);
340        tag
341    }
342
343    /// Serialize to canonical wire bytes.
344    #[must_use]
345    pub fn to_bytes(&self) -> Vec<u8> {
346        let expected_len = usize::try_from(self.oti.t).expect("OTI.t fits in usize");
347        debug_assert_eq!(
348            self.symbol_data.len(),
349            expected_len,
350            "symbol_data length must equal OTI.t"
351        );
352
353        let total = HEADER_BEFORE_DATA + self.symbol_data.len() + TRAILER_AFTER_DATA;
354        let mut buf = Vec::with_capacity(total);
355
356        // Header
357        buf.extend_from_slice(&SYMBOL_RECORD_MAGIC);
358        buf.push(SYMBOL_RECORD_VERSION);
359        buf.extend_from_slice(self.object_id.as_bytes());
360        buf.extend_from_slice(&self.oti.to_bytes());
361        append_u32_le(&mut buf, self.esi);
362        append_u32_le(&mut buf, self.oti.t);
363
364        // Payload
365        buf.extend_from_slice(&self.symbol_data);
366
367        // Trailer
368        buf.push(self.flags.bits());
369        append_u64_le(&mut buf, self.frame_xxh3);
370        buf.extend_from_slice(&self.auth_tag);
371
372        debug_assert_eq!(buf.len(), total);
373        buf
374    }
375
376    /// Deserialize from canonical wire bytes, validating all invariants.
377    ///
378    /// # Errors
379    ///
380    /// Returns [`SymbolRecordError`] if the data is malformed, the magic is
381    /// wrong, the version is unsupported, `symbol_size != OTI.T`, or the
382    /// `frame_xxh3` integrity check fails.
383    pub fn from_bytes(data: &[u8]) -> Result<Self, SymbolRecordError> {
384        // Need at least the fixed header to read symbol_size.
385        if data.len() < HEADER_BEFORE_DATA {
386            return Err(SymbolRecordError::TooShort {
387                expected_min: HEADER_BEFORE_DATA,
388                actual: data.len(),
389            });
390        }
391
392        // Magic
393        let magic: [u8; 4] = data[0..4].try_into().expect("4 bytes");
394        if magic != SYMBOL_RECORD_MAGIC {
395            return Err(SymbolRecordError::BadMagic(magic));
396        }
397
398        // Version
399        let version = data[4];
400        if version != SYMBOL_RECORD_VERSION {
401            return Err(SymbolRecordError::UnsupportedVersion(version));
402        }
403
404        // ObjectId
405        let object_id = ObjectId::from_bytes(data[5..21].try_into().expect("16 bytes"));
406
407        // OTI
408        let oti =
409            Oti::from_bytes(&data[21..43]).expect("already checked length >= HEADER_BEFORE_DATA");
410
411        // ESI + symbol_size
412        let esi = read_u32_le(&data[43..47]).expect("fixed u32 field");
413        let symbol_size = read_u32_le(&data[47..51]).expect("fixed u32 field");
414
415        // Key invariant: symbol_size == OTI.T
416        if symbol_size != oti.t {
417            return Err(SymbolRecordError::SymbolSizeMismatch {
418                symbol_size,
419                oti_t: oti.t,
420            });
421        }
422
423        let symbol_size_usize = usize::try_from(symbol_size)
424            .map_err(|_| SymbolRecordError::SymbolSizeTooLarge { symbol_size })?;
425        let total_size = HEADER_BEFORE_DATA
426            .checked_add(symbol_size_usize)
427            .and_then(|v| v.checked_add(TRAILER_AFTER_DATA))
428            .ok_or(SymbolRecordError::SizeOverflow)?;
429        if data.len() < total_size {
430            return Err(SymbolRecordError::TooShort {
431                expected_min: total_size,
432                actual: data.len(),
433            });
434        }
435
436        // Symbol data
437        let data_start = HEADER_BEFORE_DATA;
438        let data_end = data_start
439            .checked_add(symbol_size_usize)
440            .ok_or(SymbolRecordError::SizeOverflow)?;
441        let symbol_data = data[data_start..data_end].to_vec();
442
443        // Trailer
444        let flags = SymbolRecordFlags::from_bits_truncate(data[data_end]);
445        let frame_xxh3 = read_u64_le(&data[data_end + 1..data_end + 9]).expect("fixed u64 field");
446        let auth_tag: [u8; 16] = data[data_end + 9..data_end + 25]
447            .try_into()
448            .expect("16 bytes");
449
450        // Verify frame_xxh3 integrity
451        let pre_hash_end = data_end + 1; // magic..flags inclusive
452        let computed = Self::compute_frame_xxh3(&data[..pre_hash_end]);
453        if computed != frame_xxh3 {
454            return Err(SymbolRecordError::IntegrityFailure {
455                expected: frame_xxh3,
456                computed,
457            });
458        }
459
460        Ok(Self {
461            object_id,
462            oti,
463            esi,
464            symbol_data,
465            flags,
466            frame_xxh3,
467            auth_tag,
468        })
469    }
470
471    /// Verify `frame_xxh3` integrity without full deserialization.
472    #[must_use]
473    pub fn verify_integrity(&self) -> bool {
474        let pre_hash = self.pre_hash_bytes();
475        Self::compute_frame_xxh3(&pre_hash) == self.frame_xxh3
476    }
477
478    /// Verify the auth tag using the given epoch key.
479    ///
480    /// Returns `true` if the auth tag matches, or if the tag is all-zero
481    /// (symbol_auth off — tag is ignored per spec).
482    #[must_use]
483    pub fn verify_auth(&self, epoch_key: &[u8; 32]) -> bool {
484        if self.auth_tag == [0u8; 16] {
485            return true; // auth off: tag ignored
486        }
487        let expected = Self::compute_auth_tag(epoch_key, &self.pre_hash_bytes(), self.frame_xxh3);
488        self.auth_tag == expected
489    }
490
491    /// Total serialized size of this record in bytes.
492    #[must_use]
493    pub fn wire_size(&self) -> usize {
494        HEADER_BEFORE_DATA + self.symbol_data.len() + TRAILER_AFTER_DATA
495    }
496}
497
498// ---------------------------------------------------------------------------
499// §1.5 Systematic symbol layout + fast-path reconstruction helpers
500// ---------------------------------------------------------------------------
501
502/// Error when validating or reconstructing systematic symbol runs.
503#[derive(Debug, Clone, PartialEq, Eq)]
504pub enum SystematicLayoutError {
505    /// No symbol records were provided.
506    EmptySymbolSet,
507    /// OTI uses `t = 0`, which is invalid.
508    ZeroSymbolSize,
509    /// Source-symbol count cannot be represented on this platform.
510    SourceSymbolCountTooLarge { source_symbols: u64 },
511    /// Source-symbol count exceeds the `u32` ESI namespace.
512    SourceSymbolCountExceedsEsiRange { source_symbols: u64 },
513    /// Transfer length cannot be represented as `usize`.
514    TransferLengthTooLarge { transfer_length: u64 },
515    /// Reconstructed buffer size overflow.
516    ReconstructedSizeOverflow {
517        source_symbols: usize,
518        symbol_size: usize,
519    },
520    /// Record object id does not match the run's object id.
521    InconsistentObjectId {
522        expected: ObjectId,
523        found: ObjectId,
524        esi: u32,
525    },
526    /// Record OTI does not match the run's OTI.
527    InconsistentOti { expected: Oti, found: Oti, esi: u32 },
528    /// Record payload length does not match `OTI.t`.
529    InvalidSymbolPayloadSize {
530        esi: u32,
531        expected: usize,
532        found: usize,
533    },
534    /// ESI 0 must carry [`SymbolRecordFlags::SYSTEMATIC_RUN_START`].
535    MissingSystematicStartFlag,
536    /// Missing required source symbol.
537    MissingSystematicSymbol { expected_esi: u32 },
538    /// Duplicate source symbol with the same ESI.
539    DuplicateSystematicSymbol { esi: u32 },
540    /// Source symbols are not laid out as `ESI 0..K-1` contiguously.
541    NonContiguousSystematicSymbol { expected_esi: u32, found_esi: u32 },
542    /// A source symbol appears after the systematic run.
543    RepairInterleaved { index: usize, esi: u32 },
544    /// Symbol integrity check failed.
545    CorruptSymbol { esi: u32 },
546}
547
548impl fmt::Display for SystematicLayoutError {
549    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
550        match self {
551            Self::EmptySymbolSet => f.write_str("no symbol records provided"),
552            Self::ZeroSymbolSize => f.write_str("OTI.t is zero"),
553            Self::SourceSymbolCountTooLarge { source_symbols } => {
554                write!(
555                    f,
556                    "source symbol count too large for platform: {source_symbols}"
557                )
558            }
559            Self::SourceSymbolCountExceedsEsiRange { source_symbols } => {
560                write!(
561                    f,
562                    "source symbol count exceeds u32 ESI range: {source_symbols}"
563                )
564            }
565            Self::TransferLengthTooLarge { transfer_length } => {
566                write!(
567                    f,
568                    "transfer length too large for platform: {transfer_length}"
569                )
570            }
571            Self::ReconstructedSizeOverflow {
572                source_symbols,
573                symbol_size,
574            } => {
575                write!(
576                    f,
577                    "reconstructed size overflow: {source_symbols} * {symbol_size}"
578                )
579            }
580            Self::InconsistentObjectId {
581                expected,
582                found,
583                esi,
584            } => {
585                write!(
586                    f,
587                    "object_id mismatch at esi={esi}: expected {expected}, found {found}"
588                )
589            }
590            Self::InconsistentOti {
591                expected,
592                found,
593                esi,
594            } => {
595                write!(
596                    f,
597                    "OTI mismatch at esi={esi}: expected {expected:?}, found {found:?}"
598                )
599            }
600            Self::InvalidSymbolPayloadSize {
601                esi,
602                expected,
603                found,
604            } => {
605                write!(
606                    f,
607                    "symbol payload size mismatch at esi={esi}: expected {expected}, found {found}"
608                )
609            }
610            Self::MissingSystematicStartFlag => {
611                f.write_str("missing SYSTEMATIC_RUN_START flag on ESI 0")
612            }
613            Self::MissingSystematicSymbol { expected_esi } => {
614                write!(f, "missing systematic symbol esi={expected_esi}")
615            }
616            Self::DuplicateSystematicSymbol { esi } => {
617                write!(f, "duplicate systematic symbol esi={esi}")
618            }
619            Self::NonContiguousSystematicSymbol {
620                expected_esi,
621                found_esi,
622            } => {
623                write!(
624                    f,
625                    "non-contiguous systematic run: expected esi={expected_esi}, found esi={found_esi}"
626                )
627            }
628            Self::RepairInterleaved { index, esi } => {
629                write!(
630                    f,
631                    "repair/source interleave at index={index}: encountered source esi={esi} after systematic run"
632                )
633            }
634            Self::CorruptSymbol { esi } => write!(f, "integrity check failed for esi={esi}"),
635        }
636    }
637}
638
639impl std::error::Error for SystematicLayoutError {}
640
641fn source_symbol_count_u64(oti: Oti) -> Result<u64, SystematicLayoutError> {
642    if oti.t == 0 {
643        return Err(SystematicLayoutError::ZeroSymbolSize);
644    }
645    if oti.f == 0 {
646        return Ok(0);
647    }
648    Ok(oti.f.div_ceil(u64::from(oti.t)))
649}
650
651fn validate_record_shape(
652    record: &SymbolRecord,
653    object_id: ObjectId,
654    oti: Oti,
655    symbol_size: usize,
656) -> Result<(), SystematicLayoutError> {
657    if record.object_id != object_id {
658        return Err(SystematicLayoutError::InconsistentObjectId {
659            expected: object_id,
660            found: record.object_id,
661            esi: record.esi,
662        });
663    }
664    if record.oti != oti {
665        return Err(SystematicLayoutError::InconsistentOti {
666            expected: oti,
667            found: record.oti,
668            esi: record.esi,
669        });
670    }
671    if record.symbol_data.len() != symbol_size {
672        return Err(SystematicLayoutError::InvalidSymbolPayloadSize {
673            esi: record.esi,
674            expected: symbol_size,
675            found: record.symbol_data.len(),
676        });
677    }
678    if !record.verify_integrity() {
679        return Err(SystematicLayoutError::CorruptSymbol { esi: record.esi });
680    }
681    Ok(())
682}
683
684/// Compute source-symbol count `K = ceil(F / T)` for the given [`Oti`].
685pub fn source_symbol_count(oti: Oti) -> Result<usize, SystematicLayoutError> {
686    let source_symbols = source_symbol_count_u64(oti)?;
687    usize::try_from(source_symbols)
688        .map_err(|_| SystematicLayoutError::SourceSymbolCountTooLarge { source_symbols })
689}
690
691/// Writer helper: normalize records into `ESI 0..K-1` contiguous layout.
692///
693/// Guarantees:
694/// - Source symbols are first, in ascending ESI order (`0..K-1`).
695/// - Repair symbols (`ESI >= K`) follow the systematic run.
696/// - Only ESI 0 has [`SymbolRecordFlags::SYSTEMATIC_RUN_START`].
697pub fn layout_systematic_run(
698    records: Vec<SymbolRecord>,
699) -> Result<Vec<SymbolRecord>, SystematicLayoutError> {
700    let first = records
701        .first()
702        .ok_or(SystematicLayoutError::EmptySymbolSet)?
703        .clone();
704    let source_symbols = source_symbol_count(first.oti)?;
705    let source_symbols_u64 = source_symbol_count_u64(first.oti)?;
706    let source_symbols_u32 = u32::try_from(source_symbols_u64).map_err(|_| {
707        SystematicLayoutError::SourceSymbolCountExceedsEsiRange {
708            source_symbols: source_symbols_u64,
709        }
710    })?;
711    let symbol_size =
712        usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
713
714    let mut systematic = vec![None; source_symbols];
715    let mut repairs = Vec::new();
716
717    for mut record in records {
718        validate_record_shape(&record, first.object_id, first.oti, symbol_size)?;
719        record.flags.remove(SymbolRecordFlags::SYSTEMATIC_RUN_START);
720        if record.esi < source_symbols_u32 {
721            let idx = usize::try_from(record.esi).expect("ESI < K fits usize");
722            if systematic[idx].is_some() {
723                return Err(SystematicLayoutError::DuplicateSystematicSymbol { esi: record.esi });
724            }
725            systematic[idx] = Some(record);
726        } else {
727            repairs.push(record);
728        }
729    }
730
731    let mut ordered = Vec::with_capacity(systematic.len().saturating_add(repairs.len()));
732    for (idx, maybe_record) in systematic.into_iter().enumerate() {
733        let mut record =
734            maybe_record.ok_or_else(|| SystematicLayoutError::MissingSystematicSymbol {
735                expected_esi: u32::try_from(idx).expect("idx fits u32"),
736            })?;
737        if idx == 0 {
738            record.flags.insert(SymbolRecordFlags::SYSTEMATIC_RUN_START);
739        }
740        ordered.push(record);
741    }
742
743    repairs.sort_by_key(|record| record.esi);
744    ordered.extend(repairs);
745
746    Ok(ordered)
747}
748
749/// Validate whether records already satisfy systematic contiguous run layout.
750///
751/// Returns the required source-symbol count `K` on success.
752pub fn validate_systematic_run(records: &[SymbolRecord]) -> Result<usize, SystematicLayoutError> {
753    let first = records
754        .first()
755        .ok_or(SystematicLayoutError::EmptySymbolSet)?;
756    let source_symbols = source_symbol_count(first.oti)?;
757    let source_symbols_u64 = source_symbol_count_u64(first.oti)?;
758    let source_symbols_u32 = u32::try_from(source_symbols_u64).map_err(|_| {
759        SystematicLayoutError::SourceSymbolCountExceedsEsiRange {
760            source_symbols: source_symbols_u64,
761        }
762    })?;
763    let symbol_size =
764        usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
765
766    if source_symbols == 0 {
767        return Ok(0);
768    }
769
770    for expected_idx in 0..source_symbols {
771        let record = records.get(expected_idx).ok_or_else(|| {
772            SystematicLayoutError::MissingSystematicSymbol {
773                expected_esi: u32::try_from(expected_idx).expect("idx fits u32"),
774            }
775        })?;
776        validate_record_shape(record, first.object_id, first.oti, symbol_size)?;
777
778        let expected_esi = u32::try_from(expected_idx).expect("idx fits u32");
779        if record.esi != expected_esi {
780            return Err(SystematicLayoutError::NonContiguousSystematicSymbol {
781                expected_esi,
782                found_esi: record.esi,
783            });
784        }
785
786        if expected_idx == 0 {
787            if !record
788                .flags
789                .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
790            {
791                return Err(SystematicLayoutError::MissingSystematicStartFlag);
792            }
793        } else if record
794            .flags
795            .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
796        {
797            return Err(SystematicLayoutError::MissingSystematicStartFlag);
798        }
799    }
800
801    for (index, record) in records.iter().enumerate().skip(source_symbols) {
802        validate_record_shape(record, first.object_id, first.oti, symbol_size)?;
803        if record.esi < source_symbols_u32 {
804            return Err(SystematicLayoutError::RepairInterleaved {
805                index,
806                esi: record.esi,
807            });
808        }
809    }
810
811    Ok(source_symbols)
812}
813
814/// Reconstruct payload bytes directly from contiguous systematic symbols.
815///
816/// This is the GF(256)-free happy path.
817pub fn reconstruct_systematic_happy_path(
818    records: &[SymbolRecord],
819) -> Result<Vec<u8>, SystematicLayoutError> {
820    let source_symbols = validate_systematic_run(records)?;
821    if source_symbols == 0 {
822        return Ok(Vec::new());
823    }
824
825    let first = &records[0];
826    let symbol_size =
827        usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
828    let transfer_length = usize::try_from(first.oti.f).map_err(|_| {
829        SystematicLayoutError::TransferLengthTooLarge {
830            transfer_length: first.oti.f,
831        }
832    })?;
833    let total_len = source_symbols.checked_mul(symbol_size).ok_or(
834        SystematicLayoutError::ReconstructedSizeOverflow {
835            source_symbols,
836            symbol_size,
837        },
838    )?;
839
840    let mut out = Vec::with_capacity(total_len);
841    for record in records.iter().take(source_symbols) {
842        out.extend_from_slice(&record.symbol_data);
843    }
844    out.truncate(transfer_length);
845    Ok(out)
846}
847
848/// Read-path classification for ECS object recovery.
849#[derive(Debug, Clone, PartialEq, Eq)]
850pub enum SymbolReadPath {
851    /// Recovered by concatenating contiguous systematic symbols.
852    SystematicFastPath,
853    /// Happy path was unavailable; decoder fallback was invoked.
854    FullDecodeFallback { reason: SystematicLayoutError },
855}
856
857/// Recover object bytes using happy-path first, with decoder fallback.
858pub fn recover_object_with_fallback<F>(
859    records: &[SymbolRecord],
860    mut fallback_decode: F,
861) -> Result<(Vec<u8>, SymbolReadPath), SystematicLayoutError>
862where
863    F: FnMut(&[SymbolRecord]) -> Result<Vec<u8>, SystematicLayoutError>,
864{
865    match reconstruct_systematic_happy_path(records) {
866        Ok(bytes) => Ok((bytes, SymbolReadPath::SystematicFastPath)),
867        Err(reason) => {
868            let decoded = fallback_decode(records)?;
869            Ok((decoded, SymbolReadPath::FullDecodeFallback { reason }))
870        }
871    }
872}
873
874// ---------------------------------------------------------------------------
875// §3.6.1-3.6.3 Native Index Types
876// ---------------------------------------------------------------------------
877
878/// How a page version is stored in an ECS patch object.
879#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
880#[repr(u8)]
881pub enum PatchKind {
882    /// Full page image — the patch object contains the entire page.
883    FullImage = 0,
884    /// Intent log — a sequence of semantic operations to replay.
885    IntentLog = 1,
886    /// Sparse XOR — byte-range XOR delta against a base image.
887    SparseXor = 2,
888}
889
890impl PatchKind {
891    /// Deserialize from wire byte.
892    #[must_use]
893    pub fn from_byte(b: u8) -> Option<Self> {
894        match b {
895            0 => Some(Self::FullImage),
896            1 => Some(Self::IntentLog),
897            2 => Some(Self::SparseXor),
898            _ => None,
899        }
900    }
901}
902
903/// Stable, content-addressed pointer from a page index to a patch object (§3.6.2).
904///
905/// The atom of lookup in Native mode. References content-addressed ECS
906/// objects, not physical offsets, so the pointer is replicable across nodes.
907#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
908pub struct VersionPointer {
909    /// Commit sequence at which this version was created.
910    pub commit_seq: u64,
911    /// ECS object containing the patch/intent.
912    pub patch_object: ObjectId,
913    /// How the page bytes are represented.
914    pub patch_kind: PatchKind,
915    /// Optional base image hint for fast materialization of deltas.
916    pub base_hint: Option<ObjectId>,
917}
918
919/// Wire size of [`VersionPointer`]: commit_seq(8) + object_id(16) + patch_kind(1)
920/// + has_base(1) + optional base_hint(16) = 26 or 42 bytes.
921const VERSION_POINTER_MIN_WIRE: usize = 8 + 16 + 1 + 1;
922
923impl VersionPointer {
924    /// Serialize to canonical little-endian bytes.
925    #[must_use]
926    pub fn to_bytes(&self) -> Vec<u8> {
927        let has_base: u8 = u8::from(self.base_hint.is_some());
928        let cap = VERSION_POINTER_MIN_WIRE + if has_base == 1 { 16 } else { 0 };
929        let mut buf = Vec::with_capacity(cap);
930        append_u64_le(&mut buf, self.commit_seq);
931        buf.extend_from_slice(self.patch_object.as_bytes());
932        buf.push(self.patch_kind as u8);
933        buf.push(has_base);
934        if let Some(base) = self.base_hint {
935            buf.extend_from_slice(base.as_bytes());
936        }
937        buf
938    }
939
940    /// Deserialize from canonical little-endian bytes.
941    #[must_use]
942    pub fn from_bytes(data: &[u8]) -> Option<Self> {
943        if data.len() < VERSION_POINTER_MIN_WIRE {
944            return None;
945        }
946        let commit_seq = read_u64_le(&data[0..8])?;
947        let patch_object = ObjectId::from_bytes(data[8..24].try_into().ok()?);
948        let patch_kind = PatchKind::from_byte(data[24])?;
949        let has_base = data[25];
950        let base_hint = if has_base != 0 {
951            if data.len() < VERSION_POINTER_MIN_WIRE + 16 {
952                return None;
953            }
954            Some(ObjectId::from_bytes(data[26..42].try_into().ok()?))
955        } else {
956            None
957        };
958        Some(Self {
959            commit_seq,
960            patch_object,
961            patch_kind,
962            base_hint,
963        })
964    }
965}
966
967/// Offset into a symbol log file.
968#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
969pub struct SymbolLogOffset(pub u64);
970
971impl SymbolLogOffset {
972    #[must_use]
973    pub const fn new(offset: u64) -> Self {
974        Self(offset)
975    }
976
977    #[must_use]
978    pub const fn get(self) -> u64 {
979        self.0
980    }
981}
982
983/// Minimal bloom filter for fast "not present" checks in index segments.
984///
985/// Uses double hashing (xxh3 + BLAKE3 truncated) to probe `k` bit positions
986/// in a bitvec of `m` bits.
987#[derive(Debug, Clone, PartialEq, Eq)]
988pub struct BloomFilter {
989    bits: Vec<u64>,
990    num_bits: u32,
991    num_hashes: u8,
992}
993
994impl BloomFilter {
995    /// Create a new bloom filter sized for `expected_items` with the given
996    /// false positive rate.
997    ///
998    /// Uses the classic formula: m = -n*ln(p) / (ln2)^2, k = (m/n)*ln2.
999    #[must_use]
1000    pub fn new(expected_items: u32, false_positive_rate: f64) -> Self {
1001        let n = f64::from(expected_items).max(1.0);
1002        let p = false_positive_rate.clamp(1e-10, 0.5);
1003
1004        #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
1005        let m = ((-n * p.ln()) / (core::f64::consts::LN_2.powi(2))).ceil() as u32;
1006        let m = m.max(64); // minimum 64 bits
1007
1008        #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
1009        let k = ((f64::from(m) / n) * core::f64::consts::LN_2).ceil() as u8;
1010        let k = k.clamp(1, 16);
1011
1012        let words = usize::try_from(m.div_ceil(64)).expect("BloomFilter word count fits usize");
1013        Self {
1014            bits: vec![0u64; words],
1015            num_bits: m,
1016            num_hashes: k,
1017        }
1018    }
1019
1020    /// Insert a page number into the filter.
1021    pub fn insert(&mut self, page: crate::PageNumber) {
1022        let raw = page.get();
1023        let (h1, h2) = Self::double_hash(raw);
1024        for i in 0..u32::from(self.num_hashes) {
1025            let pos = (h1.wrapping_add(i.wrapping_mul(h2))) % self.num_bits;
1026            let word = (pos / 64) as usize;
1027            let bit = pos % 64;
1028            self.bits[word] |= 1u64 << bit;
1029        }
1030    }
1031
1032    /// Check if a page number might be present.
1033    ///
1034    /// Returns `false` if definitely not present (zero false negatives).
1035    /// Returns `true` if possibly present (may be false positive).
1036    #[must_use]
1037    pub fn maybe_contains(&self, page: crate::PageNumber) -> bool {
1038        let raw = page.get();
1039        let (h1, h2) = Self::double_hash(raw);
1040        for i in 0..u32::from(self.num_hashes) {
1041            let pos = (h1.wrapping_add(i.wrapping_mul(h2))) % self.num_bits;
1042            let word = (pos / 64) as usize;
1043            let bit = pos % 64;
1044            if self.bits[word] & (1u64 << bit) == 0 {
1045                return false;
1046            }
1047        }
1048        true
1049    }
1050
1051    fn double_hash(page_raw: u32) -> (u32, u32) {
1052        let mut bytes = [0u8; 4];
1053        crate::encoding::write_u32_le(&mut bytes, page_raw).expect("fixed u32 field");
1054        let h1 = xxhash_rust::xxh3::xxh3_64(&bytes);
1055        let mut h2 = {
1056            let digest = blake3::hash(&bytes);
1057            let b = digest.as_bytes();
1058            read_u32_le(&b[..4]).expect("blake3 digest prefix is 4 bytes")
1059        };
1060        // A zero step size degenerates double hashing into a single probe.
1061        if h2 == 0 {
1062            h2 = 1;
1063        }
1064        #[allow(clippy::cast_possible_truncation)]
1065        let h1_trunc = h1 as u32;
1066        (h1_trunc, h2)
1067    }
1068}
1069
1070/// Maps `PageNumber -> VersionPointer` for a specific commit range (§3.6.3).
1071///
1072/// Includes a bloom filter for fast "not present" checks. All index segments
1073/// are ECS objects (content-addressed, repairable via RaptorQ).
1074#[derive(Debug, Clone, PartialEq, Eq)]
1075pub struct PageVersionIndexSegment {
1076    /// Inclusive start of the commit range covered.
1077    pub start_seq: u64,
1078    /// Inclusive end of the commit range covered.
1079    pub end_seq: u64,
1080    /// Sorted entries mapping page numbers to version pointers.
1081    ///
1082    /// Sorted by `(page_number, commit_seq)` ascending. Multiple entries per
1083    /// page are allowed (a page may be updated multiple times within the
1084    /// segment's commit range).
1085    pub entries: Vec<(crate::PageNumber, VersionPointer)>,
1086    /// Bloom filter for fast "not present" checks.
1087    pub bloom: BloomFilter,
1088}
1089
1090impl PageVersionIndexSegment {
1091    /// Create a new segment from entries. Sorts entries by `(page, commit_seq)` and
1092    /// builds the bloom filter automatically.
1093    #[must_use]
1094    pub fn new(
1095        start_seq: u64,
1096        end_seq: u64,
1097        mut entries: Vec<(crate::PageNumber, VersionPointer)>,
1098    ) -> Self {
1099        entries.sort_by_key(|(pgno, vp)| (pgno.get(), vp.commit_seq));
1100
1101        #[allow(clippy::cast_possible_truncation)]
1102        let count = entries.len() as u32;
1103        let mut bloom = BloomFilter::new(count.max(1), 0.01);
1104        for &(pgno, _) in &entries {
1105            bloom.insert(pgno);
1106        }
1107
1108        Self {
1109            start_seq,
1110            end_seq,
1111            entries,
1112            bloom,
1113        }
1114    }
1115
1116    /// Look up the newest version pointer for `page` with
1117    /// `commit_seq <= snapshot_high`.
1118    ///
1119    /// Returns `None` if the page has no entry in this segment or if
1120    /// no version is visible under the given snapshot.
1121    #[must_use]
1122    pub fn lookup(&self, page: crate::PageNumber, snapshot_high: u64) -> Option<&VersionPointer> {
1123        if !self.bloom.maybe_contains(page) {
1124            return None;
1125        }
1126
1127        let page_raw = page.get();
1128        let start = self
1129            .entries
1130            .partition_point(|(pgno, _)| pgno.get() < page_raw);
1131        let end = self
1132            .entries
1133            .partition_point(|(pgno, _)| pgno.get() <= page_raw);
1134        let slice = self.entries.get(start..end)?;
1135        if slice.is_empty() {
1136            return None;
1137        }
1138
1139        // Find the newest commit_seq <= snapshot_high.
1140        let idx = slice.partition_point(|(_, vp)| vp.commit_seq <= snapshot_high);
1141        if idx == 0 {
1142            None
1143        } else {
1144            Some(&slice[idx - 1].1)
1145        }
1146    }
1147}
1148
1149/// Maps `ObjectId -> Vec<SymbolLogOffset>` — accelerator for finding
1150/// symbols on disk (§3.6.3).
1151///
1152/// Rebuildable by scanning symbol logs.
1153#[derive(Debug, Clone, PartialEq, Eq)]
1154pub struct ObjectLocatorSegment {
1155    /// Sorted entries mapping object IDs to their symbol log offsets.
1156    pub entries: Vec<(ObjectId, Vec<SymbolLogOffset>)>,
1157}
1158
1159impl ObjectLocatorSegment {
1160    /// Create from unsorted entries. Sorts by `ObjectId` bytes for
1161    /// deterministic encoding.
1162    #[must_use]
1163    pub fn new(mut entries: Vec<(ObjectId, Vec<SymbolLogOffset>)>) -> Self {
1164        entries.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
1165        Self { entries }
1166    }
1167
1168    /// Look up symbol log offsets for a given `ObjectId`.
1169    #[must_use]
1170    pub fn lookup(&self, id: &ObjectId) -> Option<&[SymbolLogOffset]> {
1171        self.entries
1172            .binary_search_by(|(oid, _)| oid.as_bytes().cmp(id.as_bytes()))
1173            .ok()
1174            .map(|idx| self.entries[idx].1.as_slice())
1175    }
1176
1177    /// Rebuild from a set of `(ObjectId, SymbolLogOffset)` pairs, typically
1178    /// obtained by scanning symbol log files.
1179    #[must_use]
1180    pub fn rebuild_from_scan(pairs: impl IntoIterator<Item = (ObjectId, SymbolLogOffset)>) -> Self {
1181        let mut map: std::collections::BTreeMap<[u8; 16], Vec<SymbolLogOffset>> =
1182            std::collections::BTreeMap::new();
1183        for (oid, offset) in pairs {
1184            map.entry(*oid.as_bytes()).or_default().push(offset);
1185        }
1186        let entries: Vec<_> = map
1187            .into_iter()
1188            .map(|(bytes, mut offsets)| {
1189                offsets.sort();
1190                (ObjectId::from_bytes(bytes), offsets)
1191            })
1192            .collect();
1193        Self { entries }
1194    }
1195}
1196
1197/// Maps commit_seq ranges to `IndexSegment` object IDs (§3.6.3).
1198///
1199/// Used for bootstrapping: given a commit_seq, find which index segment
1200/// covers it.
1201#[derive(Debug, Clone, PartialEq, Eq)]
1202pub struct ManifestSegment {
1203    /// Sorted, non-overlapping entries: (start_seq, end_seq, segment ObjectId).
1204    pub entries: Vec<(u64, u64, ObjectId)>,
1205}
1206
1207impl ManifestSegment {
1208    /// Create from entries. Sorts by `start_seq` for binary search.
1209    #[must_use]
1210    pub fn new(mut entries: Vec<(u64, u64, ObjectId)>) -> Self {
1211        entries.sort_by_key(|&(start, _, _)| start);
1212        Self { entries }
1213    }
1214
1215    /// Find the index segment covering the given `commit_seq`.
1216    #[must_use]
1217    pub fn lookup(&self, commit_seq: u64) -> Option<&ObjectId> {
1218        // Binary search for the last entry with start_seq <= commit_seq
1219        let idx = self
1220            .entries
1221            .partition_point(|&(start, _, _)| start <= commit_seq);
1222        if idx == 0 {
1223            return None;
1224        }
1225        let (start, end, ref oid) = self.entries[idx - 1];
1226        if commit_seq >= start && commit_seq <= end {
1227            Some(oid)
1228        } else {
1229            None
1230        }
1231    }
1232}
1233
1234#[cfg(test)]
1235mod tests {
1236    use super::*;
1237    use crate::encoding::{read_u32_le, read_u64_le};
1238
1239    #[test]
1240    fn test_object_id_blake3_derivation() {
1241        let header = b"hdr:v1\x00";
1242        let payload = b"hello world";
1243        let payload_hash = PayloadHash::blake3(payload);
1244
1245        let derived = ObjectId::derive(header, payload_hash);
1246
1247        let mut canonical = Vec::new();
1248        canonical.extend_from_slice(header);
1249        canonical.extend_from_slice(payload_hash.as_bytes());
1250        let derived2 = ObjectId::derive_from_canonical_bytes(&canonical);
1251
1252        assert_eq!(derived, derived2);
1253
1254        let mut hasher = blake3::Hasher::new();
1255        hasher.update(ObjectId::DOMAIN_SEPARATOR);
1256        hasher.update(&canonical);
1257        let digest = hasher.finalize();
1258        let mut expected = [0u8; 16];
1259        expected.copy_from_slice(&digest.as_bytes()[..16]);
1260
1261        assert_eq!(derived.as_bytes(), &expected);
1262    }
1263
1264    #[test]
1265    fn test_object_id_collision_resistance() {
1266        let header = b"hdr:v1\x00";
1267        let payload_a = b"payload-a";
1268        let payload_b = b"payload-b";
1269        let id_a = ObjectId::derive(header, PayloadHash::blake3(payload_a));
1270        let id_b = ObjectId::derive(header, PayloadHash::blake3(payload_b));
1271        assert_ne!(id_a, id_b);
1272    }
1273
1274    #[test]
1275    fn test_object_id_deterministic() {
1276        let header = b"hdr:v1\x00";
1277        let payload = b"payload";
1278        let hash = PayloadHash::blake3(payload);
1279        let id1 = ObjectId::derive(header, hash);
1280        let id2 = ObjectId::derive(header, hash);
1281        assert_eq!(id1, id2);
1282    }
1283
1284    #[test]
1285    fn test_object_id_display_hex() {
1286        let id = ObjectId::from_bytes([0u8; 16]);
1287        let s = id.to_string();
1288        assert_eq!(s.len(), 32);
1289        assert!(s.chars().all(|ch| matches!(ch, '0'..='9' | 'a'..='f')));
1290
1291        // A stable known-value check (16 zero bytes => 32 zero hex chars).
1292        assert_eq!(s, "00000000000000000000000000000000");
1293    }
1294
1295    // -----------------------------------------------------------------------
1296    // Helpers
1297    // -----------------------------------------------------------------------
1298
1299    fn test_oti(symbol_size: u32) -> Oti {
1300        Oti {
1301            f: 16384,
1302            al: 4,
1303            t: symbol_size,
1304            z: 1,
1305            n: 1,
1306        }
1307    }
1308
1309    fn test_record(symbol_size: u32) -> SymbolRecord {
1310        let data = vec![0xAB; symbol_size as usize];
1311        let oid = ObjectId::from_bytes([1u8; 16]);
1312        SymbolRecord::new(
1313            oid,
1314            test_oti(symbol_size),
1315            0,
1316            data,
1317            SymbolRecordFlags::empty(),
1318        )
1319    }
1320
1321    fn make_symbol_run(
1322        object_id: ObjectId,
1323        source_symbols: u32,
1324        symbol_size: u32,
1325        repair_symbols: u32,
1326    ) -> (Vec<SymbolRecord>, Vec<u8>, Oti) {
1327        let symbol_size_usize = usize::try_from(symbol_size).expect("symbol_size fits usize");
1328        let transfer_length = u64::from(source_symbols).saturating_mul(u64::from(symbol_size));
1329        let oti = Oti {
1330            f: transfer_length,
1331            al: 4,
1332            t: symbol_size,
1333            z: 1,
1334            n: 1,
1335        };
1336        let mut records = Vec::new();
1337        let mut expected = Vec::new();
1338
1339        for esi in 0..source_symbols {
1340            let mut payload = Vec::with_capacity(symbol_size_usize);
1341            for idx in 0..symbol_size_usize {
1342                let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
1343                let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
1344                payload.push(esi_low ^ idx_low.wrapping_mul(3));
1345            }
1346            expected.extend_from_slice(&payload);
1347            let flags = if esi == 0 {
1348                SymbolRecordFlags::SYSTEMATIC_RUN_START
1349            } else {
1350                SymbolRecordFlags::empty()
1351            };
1352            records.push(SymbolRecord::new(object_id, oti, esi, payload, flags));
1353        }
1354
1355        for repair in 0..repair_symbols {
1356            let esi = source_symbols.saturating_add(repair);
1357            let mut payload = vec![0u8; symbol_size_usize];
1358            let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
1359            for (idx, byte) in payload.iter_mut().enumerate() {
1360                let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
1361                *byte = esi_low.wrapping_mul(13) ^ idx_low;
1362            }
1363            records.push(SymbolRecord::new(
1364                object_id,
1365                oti,
1366                esi,
1367                payload,
1368                SymbolRecordFlags::empty(),
1369            ));
1370        }
1371
1372        (records, expected, oti)
1373    }
1374
1375    // -----------------------------------------------------------------------
1376    // §3.5.2 SymbolRecord tests
1377    // -----------------------------------------------------------------------
1378
1379    #[test]
1380    fn test_symbol_record_serialize_deserialize() {
1381        let rec = test_record(4096);
1382        let bytes = rec.to_bytes();
1383        let rec2 = SymbolRecord::from_bytes(&bytes).expect("roundtrip");
1384        assert_eq!(rec, rec2);
1385    }
1386
1387    #[test]
1388    fn test_symbol_record_magic_validation() {
1389        let rec = test_record(64);
1390        let mut bytes = rec.to_bytes();
1391        bytes[0] = 0xFF;
1392        let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1393        assert!(matches!(err, SymbolRecordError::BadMagic(_)));
1394    }
1395
1396    #[test]
1397    fn test_symbol_record_frame_xxh3_integrity() {
1398        let rec = test_record(128);
1399        let mut bytes = rec.to_bytes();
1400        // Flip one bit in symbol_data
1401        bytes[HEADER_BEFORE_DATA] ^= 0x01;
1402        let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1403        assert!(matches!(err, SymbolRecordError::IntegrityFailure { .. }));
1404    }
1405
1406    #[test]
1407    fn test_symbol_record_invariant_symbol_size_eq_oti_t() {
1408        let oid = ObjectId::from_bytes([2u8; 16]);
1409        let oti = test_oti(100);
1410        // Manually build wire bytes with symbol_size=200 but OTI.T=100
1411        let mut bytes = Vec::new();
1412        bytes.extend_from_slice(&SYMBOL_RECORD_MAGIC);
1413        bytes.push(SYMBOL_RECORD_VERSION);
1414        bytes.extend_from_slice(oid.as_bytes());
1415        bytes.extend_from_slice(&oti.to_bytes());
1416        bytes.extend_from_slice(&0u32.to_le_bytes()); // esi
1417        bytes.extend_from_slice(&200u32.to_le_bytes()); // symbol_size != oti.t
1418        bytes.extend_from_slice(&[0u8; 200]);
1419        bytes.push(0); // flags
1420        let hash = xxhash_rust::xxh3::xxh3_64(&bytes);
1421        bytes.extend_from_slice(&hash.to_le_bytes());
1422        bytes.extend_from_slice(&[0u8; 16]);
1423
1424        let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1425        assert!(matches!(
1426            err,
1427            SymbolRecordError::SymbolSizeMismatch {
1428                symbol_size: 200,
1429                oti_t: 100
1430            }
1431        ));
1432    }
1433
1434    #[test]
1435    fn test_symbol_record_auth_tag_verification() {
1436        let epoch_key = [0x42u8; 32];
1437        let rec = test_record(64).with_auth_tag(&epoch_key);
1438        assert_ne!(rec.auth_tag, [0u8; 16]);
1439        assert!(rec.verify_auth(&epoch_key));
1440
1441        // Tamper: change one data byte, recompute frame_xxh3 but NOT auth_tag
1442        let mut tampered = rec;
1443        tampered.symbol_data[0] ^= 0x01;
1444        let pre_hash = tampered.pre_hash_bytes();
1445        tampered.frame_xxh3 = xxhash_rust::xxh3::xxh3_64(&pre_hash);
1446        assert!(!tampered.verify_auth(&epoch_key));
1447    }
1448
1449    #[test]
1450    fn test_symbol_record_auth_tag_ignored_when_off() {
1451        let rec = test_record(64);
1452        assert_eq!(rec.auth_tag, [0u8; 16]);
1453        let any_key = [0xFFu8; 32];
1454        assert!(rec.verify_auth(&any_key));
1455    }
1456
1457    #[test]
1458    fn test_symbol_record_systematic_flag() {
1459        let oid = ObjectId::from_bytes([3u8; 16]);
1460        let rec = SymbolRecord::new(
1461            oid,
1462            test_oti(64),
1463            0,
1464            vec![0u8; 64],
1465            SymbolRecordFlags::SYSTEMATIC_RUN_START,
1466        );
1467        assert!(rec.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1468        assert_eq!(rec.esi, 0);
1469
1470        let bytes = rec.to_bytes();
1471        let rec2 = SymbolRecord::from_bytes(&bytes).unwrap();
1472        assert!(rec2.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1473    }
1474
1475    #[test]
1476    fn test_oti_field_widths() {
1477        let oti = Oti {
1478            f: 1_000_000,
1479            al: 4,
1480            t: 65536,
1481            z: 10,
1482            n: 1,
1483        };
1484        let bytes = oti.to_bytes();
1485        let oti2 = Oti::from_bytes(&bytes).unwrap();
1486        assert_eq!(oti, oti2);
1487        assert_eq!(oti2.t, 65536);
1488    }
1489
1490    #[test]
1491    fn test_systematic_fast_path_happy() {
1492        let oid = ObjectId::from_bytes([4u8; 16]);
1493        let oti = Oti {
1494            f: 256,
1495            al: 4,
1496            t: 64,
1497            z: 1,
1498            n: 1,
1499        };
1500
1501        let records: Vec<_> = (0u32..4)
1502            .map(|i| {
1503                let flags = if i == 0 {
1504                    SymbolRecordFlags::SYSTEMATIC_RUN_START
1505                } else {
1506                    SymbolRecordFlags::empty()
1507                };
1508                let fill = u8::try_from(i).expect("i < 4");
1509                SymbolRecord::new(oid, oti, i, vec![fill; 64], flags)
1510            })
1511            .collect();
1512
1513        assert!(
1514            records[0]
1515                .flags
1516                .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1517        );
1518        for rec in &records[1..] {
1519            assert!(!rec.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1520        }
1521
1522        // Reconstruct via systematic fast path
1523        let mut reconstructed = Vec::new();
1524        for rec in &records {
1525            assert!(rec.verify_integrity());
1526            reconstructed.extend_from_slice(&rec.symbol_data);
1527        }
1528        let f = usize::try_from(oti.f).expect("OTI transfer length fits in usize");
1529        reconstructed.truncate(f);
1530        assert_eq!(reconstructed.len(), 256);
1531
1532        for (i, chunk) in reconstructed.chunks(64).enumerate() {
1533            let expected = u8::try_from(i).expect("i < 4");
1534            assert!(chunk.iter().all(|&b| b == expected));
1535        }
1536    }
1537
1538    #[test]
1539    fn test_systematic_fast_path_fallback() {
1540        let oid = ObjectId::from_bytes([5u8; 16]);
1541        let oti = Oti {
1542            f: 256,
1543            al: 4,
1544            t: 64,
1545            z: 1,
1546            n: 1,
1547        };
1548
1549        let rec2 = SymbolRecord::new(oid, oti, 2, vec![2u8; 64], SymbolRecordFlags::empty());
1550        let mut bytes = rec2.to_bytes();
1551        bytes[HEADER_BEFORE_DATA] ^= 0xFF; // corrupt data
1552
1553        let result = SymbolRecord::from_bytes(&bytes);
1554        assert!(matches!(
1555            result.unwrap_err(),
1556            SymbolRecordError::IntegrityFailure { .. }
1557        ));
1558    }
1559
1560    #[test]
1561    fn test_systematic_symbols_contiguous() {
1562        let oid = ObjectId::from_bytes([0x44; 16]);
1563        let (mut records, expected, oti) = make_symbol_run(oid, 100, 64, 8);
1564        let repair = records
1565            .pop()
1566            .expect("repair symbol exists for interleaving simulation");
1567        records.insert(9, repair);
1568        records.swap(3, 21);
1569
1570        let ordered = layout_systematic_run(records).expect("layout must normalize");
1571        let source_symbols = validate_systematic_run(&ordered).expect("must be contiguous");
1572        assert_eq!(source_symbols, 100);
1573
1574        for (idx, record) in ordered.iter().take(100).enumerate() {
1575            let expected_esi = u32::try_from(idx).expect("idx fits u32");
1576            assert_eq!(record.esi, expected_esi);
1577        }
1578        assert!(
1579            ordered.iter().skip(100).all(|record| record.esi >= 100_u32),
1580            "repair symbols must follow source run"
1581        );
1582        assert!(
1583            ordered[0]
1584                .flags
1585                .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1586        );
1587        assert!(ordered[1..].iter().all(|record| {
1588            !record
1589                .flags
1590                .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1591        }));
1592
1593        let recovered =
1594            reconstruct_systematic_happy_path(&ordered).expect("happy-path reconstruction");
1595        assert_eq!(
1596            recovered.len(),
1597            usize::try_from(oti.f).expect("transfer length fits usize")
1598        );
1599        assert_eq!(recovered, expected);
1600    }
1601
1602    #[test]
1603    fn test_happy_path_read_no_gf256() {
1604        let oid = ObjectId::from_bytes([0x55; 16]);
1605        let (records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1606        let decode_invocations = std::cell::Cell::new(0_u32);
1607
1608        let (decoded, path) = recover_object_with_fallback(&records, |_| {
1609            decode_invocations.set(decode_invocations.get().saturating_add(1));
1610            Err(SystematicLayoutError::EmptySymbolSet)
1611        })
1612        .expect("happy-path should succeed");
1613
1614        assert!(matches!(path, SymbolReadPath::SystematicFastPath));
1615        assert_eq!(
1616            decode_invocations.get(),
1617            0,
1618            "fallback decode must not run on systematic happy path"
1619        );
1620        assert_eq!(decoded, expected);
1621    }
1622
1623    #[test]
1624    fn test_fallback_on_missing_symbol() {
1625        let oid = ObjectId::from_bytes([0x66; 16]);
1626        let (mut records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1627        records.retain(|record| record.esi != 5);
1628
1629        let decode_invocations = std::cell::Cell::new(0_u32);
1630        let fallback_expected = expected.clone();
1631        let (decoded, path) = recover_object_with_fallback(&records, |_| {
1632            decode_invocations.set(decode_invocations.get().saturating_add(1));
1633            Ok(fallback_expected.clone())
1634        })
1635        .expect("fallback decode should recover object");
1636
1637        assert_eq!(decode_invocations.get(), 1);
1638        assert_eq!(decoded, expected);
1639        assert!(matches!(path, SymbolReadPath::FullDecodeFallback { .. }));
1640        if let SymbolReadPath::FullDecodeFallback { reason } = path {
1641            assert!(matches!(
1642                reason,
1643                SystematicLayoutError::NonContiguousSystematicSymbol {
1644                    expected_esi: 5,
1645                    ..
1646                } | SystematicLayoutError::MissingSystematicSymbol { expected_esi: 5 }
1647            ));
1648        }
1649    }
1650
1651    #[test]
1652    fn test_fallback_on_corruption() {
1653        let oid = ObjectId::from_bytes([0x77; 16]);
1654        let (mut records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1655        let corrupt_idx = records
1656            .iter()
1657            .position(|record| record.esi == 3)
1658            .expect("ESI 3 present");
1659        records[corrupt_idx].symbol_data[0] ^= 0xAA;
1660
1661        let decode_invocations = std::cell::Cell::new(0_u32);
1662        let fallback_expected = expected.clone();
1663        let (decoded, path) = recover_object_with_fallback(&records, |_| {
1664            decode_invocations.set(decode_invocations.get().saturating_add(1));
1665            Ok(fallback_expected.clone())
1666        })
1667        .expect("fallback decode should recover corrupted symbol run");
1668
1669        assert_eq!(decode_invocations.get(), 1);
1670        assert_eq!(decoded, expected);
1671        assert!(matches!(path, SymbolReadPath::FullDecodeFallback { .. }));
1672        if let SymbolReadPath::FullDecodeFallback { reason } = path {
1673            assert!(matches!(
1674                reason,
1675                SystematicLayoutError::CorruptSymbol { esi: 3 }
1676            ));
1677        }
1678    }
1679
1680    #[test]
1681    fn test_benchmark_happy_vs_full() {
1682        fn emulate_full_decode(records: &[SymbolRecord]) -> Vec<u8> {
1683            let first = records.first().expect("records non-empty");
1684            let source_symbols = source_symbol_count(first.oti).expect("valid K");
1685            let source_symbols_u32 = u32::try_from(source_symbols).expect("K fits u32");
1686            let symbol_size = usize::try_from(first.oti.t).expect("symbol size fits usize");
1687            let mut scratch = vec![0_u8; symbol_size];
1688            let mut out = Vec::with_capacity(source_symbols.saturating_mul(symbol_size));
1689
1690            for record in records {
1691                if record.esi < source_symbols_u32 {
1692                    out.extend_from_slice(&record.symbol_data);
1693                }
1694                let coeff = u8::try_from((record.esi % 251) + 1).expect("coeff in 1..=251");
1695                for _ in 0..24 {
1696                    for (dst, src) in scratch.iter_mut().zip(record.symbol_data.iter()) {
1697                        *dst ^= crate::gf256_mul_byte(coeff, *src);
1698                    }
1699                }
1700            }
1701
1702            let transfer_len = usize::try_from(first.oti.f).expect("transfer length fits usize");
1703            out.truncate(transfer_len);
1704            out
1705        }
1706
1707        let oid = ObjectId::from_bytes([0x88; 16]);
1708        let (records, expected, _) = make_symbol_run(oid, 100, 4096, 6);
1709        let rounds = 6_u32;
1710
1711        let fast_start = std::time::Instant::now();
1712        let mut fast_guard = 0_u8;
1713        for _ in 0..rounds {
1714            let decoded = reconstruct_systematic_happy_path(&records).expect("happy-path decode");
1715            fast_guard ^= decoded[0];
1716            assert_eq!(decoded, expected);
1717        }
1718        let fast_elapsed = fast_start.elapsed();
1719
1720        let full_start = std::time::Instant::now();
1721        let mut full_guard = 0_u8;
1722        for _ in 0..rounds {
1723            let decoded = emulate_full_decode(&records);
1724            full_guard ^= decoded[0];
1725            assert_eq!(decoded, expected);
1726        }
1727        let full_elapsed = full_start.elapsed();
1728
1729        assert_ne!(
1730            fast_guard,
1731            full_guard.wrapping_add(1),
1732            "keep optimizer honest"
1733        );
1734
1735        let fast_ns = fast_elapsed.as_nanos().max(1);
1736        let full_ns = full_elapsed.as_nanos().max(1);
1737        let speedup = full_ns as f64 / fast_ns as f64;
1738        assert!(
1739            speedup >= 10.0,
1740            "expected happy-path to be >=10x faster, got {speedup:.2}x (happy={fast_elapsed:?}, full={full_elapsed:?})"
1741        );
1742    }
1743
1744    #[test]
1745    fn test_symbol_record_version_validation() {
1746        let rec = test_record(64);
1747        let mut bytes = rec.to_bytes();
1748        bytes[4] = 99;
1749        let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1750        assert!(matches!(err, SymbolRecordError::UnsupportedVersion(99)));
1751    }
1752
1753    #[test]
1754    fn test_symbol_record_too_short() {
1755        let err = SymbolRecord::from_bytes(&[0u8; 10]).unwrap_err();
1756        assert!(matches!(err, SymbolRecordError::TooShort { .. }));
1757    }
1758
1759    #[test]
1760    fn test_symbol_record_wire_size() {
1761        let rec = test_record(4096);
1762        assert_eq!(
1763            rec.wire_size(),
1764            HEADER_BEFORE_DATA + 4096 + TRAILER_AFTER_DATA
1765        );
1766        assert_eq!(rec.wire_size(), rec.to_bytes().len());
1767    }
1768
1769    #[test]
1770    fn test_symbol_record_verify_integrity() {
1771        let rec = test_record(128);
1772        assert!(rec.verify_integrity());
1773
1774        let mut bad = rec;
1775        bad.symbol_data[0] ^= 0x01;
1776        assert!(!bad.verify_integrity());
1777    }
1778
1779    #[test]
1780    fn test_oti_roundtrip() {
1781        let oti = Oti {
1782            f: u64::MAX,
1783            al: u16::MAX,
1784            t: u32::MAX,
1785            z: u32::MAX,
1786            n: u32::MAX,
1787        };
1788        let bytes = oti.to_bytes();
1789        assert_eq!(bytes.len(), OTI_WIRE_SIZE);
1790        let oti2 = Oti::from_bytes(&bytes).unwrap();
1791        assert_eq!(oti, oti2);
1792    }
1793
1794    #[test]
1795    fn test_oti_from_bytes_too_short() {
1796        assert!(Oti::from_bytes(&[0u8; 10]).is_none());
1797    }
1798
1799    // -----------------------------------------------------------------------
1800    // §3.6 Native Index Types tests
1801    // -----------------------------------------------------------------------
1802
1803    fn make_oid(seed: u8) -> ObjectId {
1804        ObjectId::from_bytes([seed; 16])
1805    }
1806
1807    fn make_page(n: u32) -> crate::PageNumber {
1808        crate::PageNumber::new(n).expect("non-zero")
1809    }
1810
1811    fn make_vp(seq: u64, seed: u8, kind: PatchKind) -> VersionPointer {
1812        VersionPointer {
1813            commit_seq: seq,
1814            patch_object: make_oid(seed),
1815            patch_kind: kind,
1816            base_hint: None,
1817        }
1818    }
1819
1820    #[test]
1821    fn test_version_pointer_serialization_roundtrip() {
1822        for kind in [
1823            PatchKind::FullImage,
1824            PatchKind::IntentLog,
1825            PatchKind::SparseXor,
1826        ] {
1827            let vp = VersionPointer {
1828                commit_seq: 42,
1829                patch_object: make_oid(0xAA),
1830                patch_kind: kind,
1831                base_hint: None,
1832            };
1833            let bytes = vp.to_bytes();
1834            let vp2 = VersionPointer::from_bytes(&bytes).unwrap();
1835            assert_eq!(vp, vp2);
1836
1837            let vp_with_base = VersionPointer {
1838                base_hint: Some(make_oid(0xBB)),
1839                ..vp
1840            };
1841            let bytes2 = vp_with_base.to_bytes();
1842            let vp3 = VersionPointer::from_bytes(&bytes2).unwrap();
1843            assert_eq!(vp_with_base, vp3);
1844        }
1845    }
1846
1847    #[test]
1848    fn test_page_version_index_segment_lookup() {
1849        let entries: Vec<_> = (1..=50u32)
1850            .map(|i| {
1851                let pgno = make_page(i);
1852                let seed = u8::try_from(i).expect("i <= 50");
1853                let vp = make_vp(u64::from(i) + 10, seed, PatchKind::FullImage);
1854                (pgno, vp)
1855            })
1856            .collect();
1857
1858        let seg = PageVersionIndexSegment::new(10, 60, entries);
1859
1860        let result = seg.lookup(make_page(25), 60);
1861        assert!(result.is_some());
1862        assert_eq!(result.unwrap().commit_seq, 35);
1863
1864        assert!(seg.lookup(make_page(25), 30).is_none());
1865        assert!(seg.lookup(make_page(99), 60).is_none());
1866    }
1867
1868    #[test]
1869    fn test_page_version_index_segment_lookup_picks_latest_leq_snapshot() {
1870        let page = make_page(7);
1871        let vp10 = make_vp(10, 0x10, PatchKind::FullImage);
1872        let vp15 = make_vp(15, 0x20, PatchKind::IntentLog);
1873        let vp20 = make_vp(20, 0x30, PatchKind::SparseXor);
1874        let seg =
1875            PageVersionIndexSegment::new(10, 20, vec![(page, vp10), (page, vp15), (page, vp20)]);
1876
1877        assert!(seg.lookup(page, 9).is_none());
1878        assert_eq!(seg.lookup(page, 10), Some(&vp10));
1879        assert_eq!(seg.lookup(page, 14), Some(&vp10));
1880        assert_eq!(seg.lookup(page, 15), Some(&vp15));
1881        assert_eq!(seg.lookup(page, 19), Some(&vp15));
1882        assert_eq!(seg.lookup(page, 20), Some(&vp20));
1883    }
1884
1885    #[test]
1886    fn test_page_version_index_segment_bloom_filter() {
1887        let entries: Vec<_> = (1..=100u32)
1888            .map(|i| {
1889                let seed = u8::try_from(i).expect("i <= 100");
1890                (
1891                    make_page(i),
1892                    make_vp(u64::from(i), seed, PatchKind::FullImage),
1893                )
1894            })
1895            .collect();
1896        let seg = PageVersionIndexSegment::new(1, 100, entries);
1897
1898        // Zero false negatives
1899        for i in 1..=100u32 {
1900            assert!(
1901                seg.bloom.maybe_contains(make_page(i)),
1902                "bloom must not have false negatives for page {i}"
1903            );
1904        }
1905
1906        // False positive rate check
1907        let mut false_positives = 0u32;
1908        for i in 101..=1100u32 {
1909            if seg.bloom.maybe_contains(make_page(i)) {
1910                false_positives += 1;
1911            }
1912        }
1913        let fp_rate = f64::from(false_positives) / 1000.0;
1914        assert!(fp_rate < 0.05, "bloom FP rate {fp_rate:.3} exceeds 5%");
1915    }
1916
1917    #[test]
1918    fn test_object_locator_segment_rebuild() {
1919        let pairs = vec![
1920            (make_oid(1), vec![SymbolLogOffset(0), SymbolLogOffset(100)]),
1921            (make_oid(2), vec![SymbolLogOffset(200)]),
1922            (
1923                make_oid(3),
1924                vec![SymbolLogOffset(300), SymbolLogOffset(400)],
1925            ),
1926        ];
1927        let seg = ObjectLocatorSegment::new(pairs);
1928
1929        let scan_pairs = vec![
1930            (make_oid(1), SymbolLogOffset(100)),
1931            (make_oid(3), SymbolLogOffset(300)),
1932            (make_oid(1), SymbolLogOffset(0)),
1933            (make_oid(2), SymbolLogOffset(200)),
1934            (make_oid(3), SymbolLogOffset(400)),
1935        ];
1936        let rebuilt = ObjectLocatorSegment::rebuild_from_scan(scan_pairs);
1937
1938        assert_eq!(seg.lookup(&make_oid(1)), rebuilt.lookup(&make_oid(1)));
1939        assert_eq!(seg.lookup(&make_oid(2)), rebuilt.lookup(&make_oid(2)));
1940        assert_eq!(seg.lookup(&make_oid(3)), rebuilt.lookup(&make_oid(3)));
1941        assert!(seg.lookup(&make_oid(99)).is_none());
1942    }
1943
1944    #[test]
1945    fn test_manifest_segment_bootstrap() {
1946        let seg = ManifestSegment::new(vec![
1947            (1, 100, make_oid(0x10)),
1948            (101, 200, make_oid(0x20)),
1949            (201, 300, make_oid(0x30)),
1950        ]);
1951
1952        assert_eq!(seg.lookup(50), Some(&make_oid(0x10)));
1953        assert_eq!(seg.lookup(100), Some(&make_oid(0x10)));
1954        assert_eq!(seg.lookup(101), Some(&make_oid(0x20)));
1955        assert_eq!(seg.lookup(250), Some(&make_oid(0x30)));
1956        assert_eq!(seg.lookup(300), Some(&make_oid(0x30)));
1957        assert!(seg.lookup(0).is_none());
1958        assert!(seg.lookup(301).is_none());
1959    }
1960
1961    #[test]
1962    fn test_version_pointer_references_content_addressed() {
1963        let vp = make_vp(42, 0xCC, PatchKind::FullImage);
1964        assert_eq!(vp.patch_object.as_bytes().len(), ObjectId::LEN);
1965    }
1966
1967    #[test]
1968    fn test_patch_kind_from_byte() {
1969        assert_eq!(PatchKind::from_byte(0), Some(PatchKind::FullImage));
1970        assert_eq!(PatchKind::from_byte(1), Some(PatchKind::IntentLog));
1971        assert_eq!(PatchKind::from_byte(2), Some(PatchKind::SparseXor));
1972        assert!(PatchKind::from_byte(3).is_none());
1973        assert!(PatchKind::from_byte(255).is_none());
1974    }
1975
1976    #[test]
1977    fn test_version_pointer_too_short() {
1978        assert!(VersionPointer::from_bytes(&[0u8; 10]).is_none());
1979        let vp = make_vp(1, 1, PatchKind::FullImage);
1980        let bytes = vp.to_bytes();
1981        assert_eq!(bytes.len(), VERSION_POINTER_MIN_WIRE);
1982        assert!(VersionPointer::from_bytes(&bytes).is_some());
1983    }
1984
1985    #[test]
1986    fn test_native_ecs_structures_little_endian() {
1987        let rec = test_record(64);
1988        let bytes = rec.to_bytes();
1989        assert_eq!(read_u32_le(&bytes[43..47]), Some(rec.esi));
1990        assert_eq!(read_u32_le(&bytes[47..51]), Some(rec.oti.t));
1991        let frame_offset = HEADER_BEFORE_DATA + rec.symbol_data.len() + 1;
1992        assert_eq!(
1993            read_u64_le(&bytes[frame_offset..frame_offset + 8]),
1994            Some(rec.frame_xxh3)
1995        );
1996
1997        let vp = make_vp(0x0102_0304_0506_0708, 0xAA, PatchKind::SparseXor);
1998        let vp_bytes = vp.to_bytes();
1999        assert_eq!(
2000            read_u64_le(&vp_bytes[0..8]),
2001            Some(0x0102_0304_0506_0708),
2002            "version pointer commit_seq must remain little-endian"
2003        );
2004    }
2005
2006    #[test]
2007    fn test_canonical_encoding_unique() {
2008        let rec = test_record(48);
2009        let encoded_a = rec.to_bytes();
2010        let encoded_b = rec.to_bytes();
2011        assert_eq!(
2012            encoded_a, encoded_b,
2013            "same symbol record must encode identically"
2014        );
2015
2016        let different = make_vp(2, 0x11, PatchKind::FullImage);
2017        let different_encoded = different.to_bytes();
2018        assert_ne!(
2019            encoded_a, different_encoded,
2020            "different structures must not share canonical byte encodings"
2021        );
2022    }
2023
2024    #[test]
2025    fn test_roundtrip_encode_decode() {
2026        let oti = test_oti(512);
2027        let oti_bytes = oti.to_bytes();
2028        let oti_decoded = Oti::from_bytes(&oti_bytes).expect("OTI roundtrip must succeed");
2029        assert_eq!(oti, oti_decoded);
2030
2031        let rec = test_record(128);
2032        let rec_bytes = rec.to_bytes();
2033        let rec_decoded =
2034            SymbolRecord::from_bytes(&rec_bytes).expect("symbol record roundtrip must succeed");
2035        assert_eq!(rec, rec_decoded);
2036
2037        let vp = make_vp(99, 0x55, PatchKind::IntentLog);
2038        let vp_bytes = vp.to_bytes();
2039        let vp_decoded =
2040            VersionPointer::from_bytes(&vp_bytes).expect("version pointer roundtrip must succeed");
2041        assert_eq!(vp, vp_decoded);
2042    }
2043
2044    #[test]
2045    fn test_no_adhoc_byte_shuffling() {
2046        let source = include_str!("ecs.rs");
2047        let production = source.split("\n#[cfg(test)]").next().unwrap_or(source);
2048        assert!(
2049            !production.contains("to_le_bytes("),
2050            "production ECS serialization should use canonical helpers"
2051        );
2052        assert!(
2053            !production.contains("from_le_bytes("),
2054            "production ECS decoding should use canonical helpers"
2055        );
2056        assert!(
2057            production.contains("append_u32_le")
2058                && production.contains("append_u64_le")
2059                && production.contains("read_u32_le")
2060                && production.contains("read_u64_le"),
2061            "expected canonical helper usage markers missing"
2062        );
2063    }
2064
2065    #[test]
2066    fn test_symbol_log_offset_ordering() {
2067        let a = SymbolLogOffset::new(10);
2068        let b = SymbolLogOffset::new(20);
2069        assert!(a < b);
2070        assert_eq!(a.get(), 10);
2071    }
2072}
2073
2074#[cfg(test)]
2075mod proptests {
2076    use super::*;
2077    use proptest::prelude::*;
2078
2079    fn arb_oti() -> impl Strategy<Value = Oti> {
2080        (
2081            any::<u64>(),
2082            any::<u16>(),
2083            1..=65536u32,
2084            1..=100u32,
2085            1..=100u32,
2086        )
2087            .prop_map(|(f, al, t, z, n)| Oti { f, al, t, z, n })
2088    }
2089
2090    proptest! {
2091        #[test]
2092        fn prop_symbol_record_roundtrip(
2093            oti in arb_oti(),
2094            esi in any::<u32>(),
2095            data_byte in any::<u8>(),
2096        ) {
2097            let oid = ObjectId::from_bytes([7u8; 16]);
2098            let data = vec![data_byte; oti.t as usize];
2099            let rec = SymbolRecord::new(oid, oti, esi, data, SymbolRecordFlags::empty());
2100            let bytes = rec.to_bytes();
2101            let rec2 = SymbolRecord::from_bytes(&bytes).unwrap();
2102            prop_assert_eq!(rec, rec2);
2103        }
2104
2105        #[test]
2106        fn test_write_produces_contiguous_layout(
2107            source_symbols in 1u16..=500u16,
2108            symbol_size in prop::sample::select(vec![64u32, 128u32, 256u32, 512u32]),
2109            seed in any::<u8>(),
2110        ) {
2111            let source_symbols_u32 = u32::from(source_symbols);
2112            let symbol_size_usize = usize::try_from(symbol_size).expect("symbol size fits usize");
2113            let transfer_length = u64::from(source_symbols_u32).saturating_mul(u64::from(symbol_size));
2114            let oti = Oti {
2115                f: transfer_length,
2116                al: 4,
2117                t: symbol_size,
2118                z: 1,
2119                n: 1,
2120            };
2121            let oid = ObjectId::from_bytes([seed; 16]);
2122
2123            let mut records = Vec::new();
2124            for esi in 0..source_symbols_u32 {
2125                let mut payload = vec![0u8; symbol_size_usize];
2126                let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
2127                for (idx, byte) in payload.iter_mut().enumerate() {
2128                    let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
2129                    *byte = idx_low ^ esi_low.wrapping_mul(5);
2130                }
2131                let flags = if esi == 0 {
2132                    SymbolRecordFlags::SYSTEMATIC_RUN_START
2133                } else {
2134                    SymbolRecordFlags::empty()
2135                };
2136                records.push(SymbolRecord::new(oid, oti, esi, payload, flags));
2137            }
2138
2139            for extra in 0..3_u32 {
2140                let esi = source_symbols_u32.saturating_add(extra);
2141                records.push(SymbolRecord::new(
2142                    oid,
2143                    oti,
2144                    esi,
2145                    vec![0xEE; symbol_size_usize],
2146                    SymbolRecordFlags::empty(),
2147                ));
2148            }
2149
2150            if records.len() > 1 {
2151                let rotate_by = usize::from(seed) % records.len();
2152                records.rotate_left(rotate_by);
2153            }
2154
2155            let contiguous = layout_systematic_run(records).expect("writer layout normalization");
2156            let k = validate_systematic_run(&contiguous).expect("must validate after layout");
2157            prop_assert_eq!(k, usize::from(source_symbols));
2158            for (idx, record) in contiguous.iter().take(usize::from(source_symbols)).enumerate() {
2159                let expected_esi = u32::try_from(idx).expect("idx fits u32");
2160                prop_assert_eq!(record.esi, expected_esi);
2161            }
2162            prop_assert!(
2163                contiguous
2164                    .iter()
2165                    .skip(usize::from(source_symbols))
2166                    .all(|record| record.esi >= source_symbols_u32)
2167            );
2168        }
2169    }
2170}