1use std::fmt;
10
11use crate::encoding::{append_u32_le, append_u64_le, read_u32_le, read_u64_le, write_u64_le};
12use crate::glossary::{OTI_WIRE_SIZE, Oti};
13
14const ECS_OBJECT_ID_DOMAIN_SEPARATOR: &[u8] = b"fsqlite:ecs:v1";
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
22#[repr(transparent)]
23pub struct PayloadHash([u8; 32]);
24
25impl PayloadHash {
26 #[must_use]
28 pub const fn from_bytes(bytes: [u8; 32]) -> Self {
29 Self(bytes)
30 }
31
32 #[must_use]
34 pub const fn as_bytes(&self) -> &[u8; 32] {
35 &self.0
36 }
37
38 #[must_use]
40 pub fn blake3(payload: &[u8]) -> Self {
41 let hash = blake3::hash(payload);
42 Self(*hash.as_bytes())
43 }
44}
45
46#[derive(
51 Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
52)]
53#[repr(transparent)]
54pub struct ObjectId([u8; 16]);
55
56impl ObjectId {
57 pub const LEN: usize = 16;
59
60 pub const DOMAIN_SEPARATOR: &'static [u8] = ECS_OBJECT_ID_DOMAIN_SEPARATOR;
62
63 #[must_use]
65 pub const fn from_bytes(bytes: [u8; 16]) -> Self {
66 Self(bytes)
67 }
68
69 #[must_use]
71 pub const fn as_bytes(&self) -> &[u8; 16] {
72 &self.0
73 }
74
75 #[must_use]
81 pub fn derive_from_canonical_bytes(canonical_bytes: &[u8]) -> Self {
82 let mut hasher = blake3::Hasher::new();
83 hasher.update(Self::DOMAIN_SEPARATOR);
84 hasher.update(canonical_bytes);
85 let digest = hasher.finalize();
86
87 let mut out = [0u8; Self::LEN];
88 out.copy_from_slice(&digest.as_bytes()[..Self::LEN]);
89 Self(out)
90 }
91
92 #[must_use]
94 pub fn derive(canonical_object_header: &[u8], payload_hash: PayloadHash) -> Self {
95 let mut hasher = blake3::Hasher::new();
96 hasher.update(Self::DOMAIN_SEPARATOR);
97 hasher.update(canonical_object_header);
98 hasher.update(payload_hash.as_bytes());
99 let digest = hasher.finalize();
100
101 let mut out = [0u8; Self::LEN];
102 out.copy_from_slice(&digest.as_bytes()[..Self::LEN]);
103 Self(out)
104 }
105}
106
107impl fmt::Display for ObjectId {
108 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109 for b in self.0 {
110 write!(f, "{b:02x}")?;
111 }
112 Ok(())
113 }
114}
115
116impl AsRef<[u8]> for ObjectId {
117 fn as_ref(&self) -> &[u8] {
118 &self.0
119 }
120}
121
122impl From<[u8; 16]> for ObjectId {
123 fn from(value: [u8; 16]) -> Self {
124 Self(value)
125 }
126}
127
128pub const SYMBOL_RECORD_MAGIC: [u8; 4] = [0x46, 0x53, 0x45, 0x43];
134
135pub const SYMBOL_RECORD_VERSION: u8 = 1;
137
138const SYMBOL_AUTH_DOMAIN: &[u8] = b"fsqlite:symbol-auth:v1";
140
141bitflags::bitflags! {
142 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
147 pub struct SymbolRecordFlags: u8 {
148 const SYSTEMATIC_RUN_START = 0x01;
151 }
152}
153
154#[derive(Debug, Clone, PartialEq, Eq)]
156pub enum SymbolRecordError {
157 TooShort { expected_min: usize, actual: usize },
159 BadMagic([u8; 4]),
161 UnsupportedVersion(u8),
163 SymbolSizeMismatch { symbol_size: u32, oti_t: u32 },
165 SymbolSizeTooLarge { symbol_size: u32 },
167 SizeOverflow,
169 IntegrityFailure { expected: u64, computed: u64 },
171 AuthTagFailure,
173}
174
175impl fmt::Display for SymbolRecordError {
176 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177 match self {
178 Self::TooShort {
179 expected_min,
180 actual,
181 } => {
182 write!(
183 f,
184 "symbol record too short: need {expected_min}, got {actual}"
185 )
186 }
187 Self::BadMagic(m) => write!(f, "bad magic: {m:02x?}"),
188 Self::UnsupportedVersion(v) => write!(f, "unsupported version: {v}"),
189 Self::SymbolSizeMismatch { symbol_size, oti_t } => {
190 write!(f, "symbol_size ({symbol_size}) != OTI.T ({oti_t})")
191 }
192 Self::SymbolSizeTooLarge { symbol_size } => {
193 write!(f, "symbol_size too large for platform: {symbol_size}")
194 }
195 Self::SizeOverflow => write!(f, "symbol record size overflow"),
196 Self::IntegrityFailure { expected, computed } => {
197 write!(
198 f,
199 "frame_xxh3 mismatch: stored {expected:#018x}, computed {computed:#018x}"
200 )
201 }
202 Self::AuthTagFailure => write!(f, "auth tag verification failed"),
203 }
204 }
205}
206
207impl std::error::Error for SymbolRecordError {}
208
209const HEADER_BEFORE_DATA: usize = 4 + 1 + 16 + OTI_WIRE_SIZE + 4 + 4;
212
213const TRAILER_AFTER_DATA: usize = 1 + 8 + 16;
216
217#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct SymbolRecord {
230 pub object_id: ObjectId,
232 pub oti: Oti,
234 pub esi: u32,
236 pub symbol_data: Vec<u8>,
238 pub flags: SymbolRecordFlags,
240 pub frame_xxh3: u64,
242 pub auth_tag: [u8; 16],
245}
246
247impl SymbolRecord {
248 #[must_use]
253 fn compute_frame_xxh3(pre_hash_bytes: &[u8]) -> u64 {
254 xxhash_rust::xxh3::xxh3_64(pre_hash_bytes)
255 }
256
257 fn pre_hash_bytes(&self) -> Vec<u8> {
259 let mut buf = Vec::with_capacity(
260 HEADER_BEFORE_DATA + self.symbol_data.len() + 1, );
262 buf.extend_from_slice(&SYMBOL_RECORD_MAGIC);
263 buf.push(SYMBOL_RECORD_VERSION);
264 buf.extend_from_slice(self.object_id.as_bytes());
265 buf.extend_from_slice(&self.oti.to_bytes());
266 append_u32_le(&mut buf, self.esi);
267 let expected_len = usize::try_from(self.oti.t).expect("OTI.t fits in usize");
268 debug_assert_eq!(
269 self.symbol_data.len(),
270 expected_len,
271 "symbol_data length must equal OTI.t"
272 );
273 append_u32_le(&mut buf, self.oti.t);
274 buf.extend_from_slice(&self.symbol_data);
275 buf.push(self.flags.bits());
276 buf
277 }
278
279 #[must_use]
284 pub fn new(
285 object_id: ObjectId,
286 oti: Oti,
287 esi: u32,
288 symbol_data: Vec<u8>,
289 flags: SymbolRecordFlags,
290 ) -> Self {
291 let expected_len = usize::try_from(oti.t).expect("OTI.t fits in usize");
292 assert_eq!(
293 symbol_data.len(),
294 expected_len,
295 "SymbolRecord::new: symbol_data.len ({}) must equal oti.t ({})",
296 symbol_data.len(),
297 oti.t
298 );
299
300 let mut rec = Self {
301 object_id,
302 oti,
303 esi,
304 symbol_data,
305 flags,
306 frame_xxh3: 0,
307 auth_tag: [0u8; 16],
308 };
309 let pre_hash = rec.pre_hash_bytes();
310 rec.frame_xxh3 = Self::compute_frame_xxh3(&pre_hash);
311 rec
312 }
313
314 #[must_use]
324 pub fn with_auth_tag(mut self, epoch_key: &[u8; 32]) -> Self {
325 self.auth_tag = Self::compute_auth_tag(epoch_key, &self.pre_hash_bytes(), self.frame_xxh3);
326 self
327 }
328
329 fn compute_auth_tag(epoch_key: &[u8; 32], pre_hash: &[u8], frame_xxh3: u64) -> [u8; 16] {
331 let mut keyed_hasher = blake3::Hasher::new_keyed(epoch_key);
332 keyed_hasher.update(SYMBOL_AUTH_DOMAIN);
333 keyed_hasher.update(pre_hash);
334 let mut frame_hash_bytes = [0u8; 8];
335 write_u64_le(&mut frame_hash_bytes, frame_xxh3).expect("fixed u64 field");
336 keyed_hasher.update(&frame_hash_bytes);
337 let digest = keyed_hasher.finalize();
338 let mut tag = [0u8; 16];
339 tag.copy_from_slice(&digest.as_bytes()[..16]);
340 tag
341 }
342
343 #[must_use]
345 pub fn to_bytes(&self) -> Vec<u8> {
346 let expected_len = usize::try_from(self.oti.t).expect("OTI.t fits in usize");
347 debug_assert_eq!(
348 self.symbol_data.len(),
349 expected_len,
350 "symbol_data length must equal OTI.t"
351 );
352
353 let total = HEADER_BEFORE_DATA + self.symbol_data.len() + TRAILER_AFTER_DATA;
354 let mut buf = Vec::with_capacity(total);
355
356 buf.extend_from_slice(&SYMBOL_RECORD_MAGIC);
358 buf.push(SYMBOL_RECORD_VERSION);
359 buf.extend_from_slice(self.object_id.as_bytes());
360 buf.extend_from_slice(&self.oti.to_bytes());
361 append_u32_le(&mut buf, self.esi);
362 append_u32_le(&mut buf, self.oti.t);
363
364 buf.extend_from_slice(&self.symbol_data);
366
367 buf.push(self.flags.bits());
369 append_u64_le(&mut buf, self.frame_xxh3);
370 buf.extend_from_slice(&self.auth_tag);
371
372 debug_assert_eq!(buf.len(), total);
373 buf
374 }
375
376 pub fn from_bytes(data: &[u8]) -> Result<Self, SymbolRecordError> {
384 if data.len() < HEADER_BEFORE_DATA {
386 return Err(SymbolRecordError::TooShort {
387 expected_min: HEADER_BEFORE_DATA,
388 actual: data.len(),
389 });
390 }
391
392 let magic: [u8; 4] = data[0..4].try_into().expect("4 bytes");
394 if magic != SYMBOL_RECORD_MAGIC {
395 return Err(SymbolRecordError::BadMagic(magic));
396 }
397
398 let version = data[4];
400 if version != SYMBOL_RECORD_VERSION {
401 return Err(SymbolRecordError::UnsupportedVersion(version));
402 }
403
404 let object_id = ObjectId::from_bytes(data[5..21].try_into().expect("16 bytes"));
406
407 let oti =
409 Oti::from_bytes(&data[21..43]).expect("already checked length >= HEADER_BEFORE_DATA");
410
411 let esi = read_u32_le(&data[43..47]).expect("fixed u32 field");
413 let symbol_size = read_u32_le(&data[47..51]).expect("fixed u32 field");
414
415 if symbol_size != oti.t {
417 return Err(SymbolRecordError::SymbolSizeMismatch {
418 symbol_size,
419 oti_t: oti.t,
420 });
421 }
422
423 let symbol_size_usize = usize::try_from(symbol_size)
424 .map_err(|_| SymbolRecordError::SymbolSizeTooLarge { symbol_size })?;
425 let total_size = HEADER_BEFORE_DATA
426 .checked_add(symbol_size_usize)
427 .and_then(|v| v.checked_add(TRAILER_AFTER_DATA))
428 .ok_or(SymbolRecordError::SizeOverflow)?;
429 if data.len() < total_size {
430 return Err(SymbolRecordError::TooShort {
431 expected_min: total_size,
432 actual: data.len(),
433 });
434 }
435
436 let data_start = HEADER_BEFORE_DATA;
438 let data_end = data_start
439 .checked_add(symbol_size_usize)
440 .ok_or(SymbolRecordError::SizeOverflow)?;
441 let symbol_data = data[data_start..data_end].to_vec();
442
443 let flags = SymbolRecordFlags::from_bits_truncate(data[data_end]);
445 let frame_xxh3 = read_u64_le(&data[data_end + 1..data_end + 9]).expect("fixed u64 field");
446 let auth_tag: [u8; 16] = data[data_end + 9..data_end + 25]
447 .try_into()
448 .expect("16 bytes");
449
450 let pre_hash_end = data_end + 1; let computed = Self::compute_frame_xxh3(&data[..pre_hash_end]);
453 if computed != frame_xxh3 {
454 return Err(SymbolRecordError::IntegrityFailure {
455 expected: frame_xxh3,
456 computed,
457 });
458 }
459
460 Ok(Self {
461 object_id,
462 oti,
463 esi,
464 symbol_data,
465 flags,
466 frame_xxh3,
467 auth_tag,
468 })
469 }
470
471 #[must_use]
473 pub fn verify_integrity(&self) -> bool {
474 let pre_hash = self.pre_hash_bytes();
475 Self::compute_frame_xxh3(&pre_hash) == self.frame_xxh3
476 }
477
478 #[must_use]
483 pub fn verify_auth(&self, epoch_key: &[u8; 32]) -> bool {
484 if self.auth_tag == [0u8; 16] {
485 return true; }
487 let expected = Self::compute_auth_tag(epoch_key, &self.pre_hash_bytes(), self.frame_xxh3);
488 self.auth_tag == expected
489 }
490
491 #[must_use]
493 pub fn wire_size(&self) -> usize {
494 HEADER_BEFORE_DATA + self.symbol_data.len() + TRAILER_AFTER_DATA
495 }
496}
497
498#[derive(Debug, Clone, PartialEq, Eq)]
504pub enum SystematicLayoutError {
505 EmptySymbolSet,
507 ZeroSymbolSize,
509 SourceSymbolCountTooLarge { source_symbols: u64 },
511 SourceSymbolCountExceedsEsiRange { source_symbols: u64 },
513 TransferLengthTooLarge { transfer_length: u64 },
515 ReconstructedSizeOverflow {
517 source_symbols: usize,
518 symbol_size: usize,
519 },
520 InconsistentObjectId {
522 expected: ObjectId,
523 found: ObjectId,
524 esi: u32,
525 },
526 InconsistentOti { expected: Oti, found: Oti, esi: u32 },
528 InvalidSymbolPayloadSize {
530 esi: u32,
531 expected: usize,
532 found: usize,
533 },
534 MissingSystematicStartFlag,
536 MissingSystematicSymbol { expected_esi: u32 },
538 DuplicateSystematicSymbol { esi: u32 },
540 NonContiguousSystematicSymbol { expected_esi: u32, found_esi: u32 },
542 RepairInterleaved { index: usize, esi: u32 },
544 CorruptSymbol { esi: u32 },
546}
547
548impl fmt::Display for SystematicLayoutError {
549 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
550 match self {
551 Self::EmptySymbolSet => f.write_str("no symbol records provided"),
552 Self::ZeroSymbolSize => f.write_str("OTI.t is zero"),
553 Self::SourceSymbolCountTooLarge { source_symbols } => {
554 write!(
555 f,
556 "source symbol count too large for platform: {source_symbols}"
557 )
558 }
559 Self::SourceSymbolCountExceedsEsiRange { source_symbols } => {
560 write!(
561 f,
562 "source symbol count exceeds u32 ESI range: {source_symbols}"
563 )
564 }
565 Self::TransferLengthTooLarge { transfer_length } => {
566 write!(
567 f,
568 "transfer length too large for platform: {transfer_length}"
569 )
570 }
571 Self::ReconstructedSizeOverflow {
572 source_symbols,
573 symbol_size,
574 } => {
575 write!(
576 f,
577 "reconstructed size overflow: {source_symbols} * {symbol_size}"
578 )
579 }
580 Self::InconsistentObjectId {
581 expected,
582 found,
583 esi,
584 } => {
585 write!(
586 f,
587 "object_id mismatch at esi={esi}: expected {expected}, found {found}"
588 )
589 }
590 Self::InconsistentOti {
591 expected,
592 found,
593 esi,
594 } => {
595 write!(
596 f,
597 "OTI mismatch at esi={esi}: expected {expected:?}, found {found:?}"
598 )
599 }
600 Self::InvalidSymbolPayloadSize {
601 esi,
602 expected,
603 found,
604 } => {
605 write!(
606 f,
607 "symbol payload size mismatch at esi={esi}: expected {expected}, found {found}"
608 )
609 }
610 Self::MissingSystematicStartFlag => {
611 f.write_str("missing SYSTEMATIC_RUN_START flag on ESI 0")
612 }
613 Self::MissingSystematicSymbol { expected_esi } => {
614 write!(f, "missing systematic symbol esi={expected_esi}")
615 }
616 Self::DuplicateSystematicSymbol { esi } => {
617 write!(f, "duplicate systematic symbol esi={esi}")
618 }
619 Self::NonContiguousSystematicSymbol {
620 expected_esi,
621 found_esi,
622 } => {
623 write!(
624 f,
625 "non-contiguous systematic run: expected esi={expected_esi}, found esi={found_esi}"
626 )
627 }
628 Self::RepairInterleaved { index, esi } => {
629 write!(
630 f,
631 "repair/source interleave at index={index}: encountered source esi={esi} after systematic run"
632 )
633 }
634 Self::CorruptSymbol { esi } => write!(f, "integrity check failed for esi={esi}"),
635 }
636 }
637}
638
639impl std::error::Error for SystematicLayoutError {}
640
641fn source_symbol_count_u64(oti: Oti) -> Result<u64, SystematicLayoutError> {
642 if oti.t == 0 {
643 return Err(SystematicLayoutError::ZeroSymbolSize);
644 }
645 if oti.f == 0 {
646 return Ok(0);
647 }
648 Ok(oti.f.div_ceil(u64::from(oti.t)))
649}
650
651fn validate_record_shape(
652 record: &SymbolRecord,
653 object_id: ObjectId,
654 oti: Oti,
655 symbol_size: usize,
656) -> Result<(), SystematicLayoutError> {
657 if record.object_id != object_id {
658 return Err(SystematicLayoutError::InconsistentObjectId {
659 expected: object_id,
660 found: record.object_id,
661 esi: record.esi,
662 });
663 }
664 if record.oti != oti {
665 return Err(SystematicLayoutError::InconsistentOti {
666 expected: oti,
667 found: record.oti,
668 esi: record.esi,
669 });
670 }
671 if record.symbol_data.len() != symbol_size {
672 return Err(SystematicLayoutError::InvalidSymbolPayloadSize {
673 esi: record.esi,
674 expected: symbol_size,
675 found: record.symbol_data.len(),
676 });
677 }
678 if !record.verify_integrity() {
679 return Err(SystematicLayoutError::CorruptSymbol { esi: record.esi });
680 }
681 Ok(())
682}
683
684pub fn source_symbol_count(oti: Oti) -> Result<usize, SystematicLayoutError> {
686 let source_symbols = source_symbol_count_u64(oti)?;
687 usize::try_from(source_symbols)
688 .map_err(|_| SystematicLayoutError::SourceSymbolCountTooLarge { source_symbols })
689}
690
691pub fn layout_systematic_run(
698 records: Vec<SymbolRecord>,
699) -> Result<Vec<SymbolRecord>, SystematicLayoutError> {
700 let first = records
701 .first()
702 .ok_or(SystematicLayoutError::EmptySymbolSet)?
703 .clone();
704 let source_symbols = source_symbol_count(first.oti)?;
705 let source_symbols_u64 = source_symbol_count_u64(first.oti)?;
706 let source_symbols_u32 = u32::try_from(source_symbols_u64).map_err(|_| {
707 SystematicLayoutError::SourceSymbolCountExceedsEsiRange {
708 source_symbols: source_symbols_u64,
709 }
710 })?;
711 let symbol_size =
712 usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
713
714 let mut systematic = vec![None; source_symbols];
715 let mut repairs = Vec::new();
716
717 for mut record in records {
718 validate_record_shape(&record, first.object_id, first.oti, symbol_size)?;
719 record.flags.remove(SymbolRecordFlags::SYSTEMATIC_RUN_START);
720 if record.esi < source_symbols_u32 {
721 let idx = usize::try_from(record.esi).expect("ESI < K fits usize");
722 if systematic[idx].is_some() {
723 return Err(SystematicLayoutError::DuplicateSystematicSymbol { esi: record.esi });
724 }
725 systematic[idx] = Some(record);
726 } else {
727 repairs.push(record);
728 }
729 }
730
731 let mut ordered = Vec::with_capacity(systematic.len().saturating_add(repairs.len()));
732 for (idx, maybe_record) in systematic.into_iter().enumerate() {
733 let mut record =
734 maybe_record.ok_or_else(|| SystematicLayoutError::MissingSystematicSymbol {
735 expected_esi: u32::try_from(idx).expect("idx fits u32"),
736 })?;
737 if idx == 0 {
738 record.flags.insert(SymbolRecordFlags::SYSTEMATIC_RUN_START);
739 }
740 ordered.push(record);
741 }
742
743 repairs.sort_by_key(|record| record.esi);
744 ordered.extend(repairs);
745
746 Ok(ordered)
747}
748
749pub fn validate_systematic_run(records: &[SymbolRecord]) -> Result<usize, SystematicLayoutError> {
753 let first = records
754 .first()
755 .ok_or(SystematicLayoutError::EmptySymbolSet)?;
756 let source_symbols = source_symbol_count(first.oti)?;
757 let source_symbols_u64 = source_symbol_count_u64(first.oti)?;
758 let source_symbols_u32 = u32::try_from(source_symbols_u64).map_err(|_| {
759 SystematicLayoutError::SourceSymbolCountExceedsEsiRange {
760 source_symbols: source_symbols_u64,
761 }
762 })?;
763 let symbol_size =
764 usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
765
766 if source_symbols == 0 {
767 return Ok(0);
768 }
769
770 for expected_idx in 0..source_symbols {
771 let record = records.get(expected_idx).ok_or_else(|| {
772 SystematicLayoutError::MissingSystematicSymbol {
773 expected_esi: u32::try_from(expected_idx).expect("idx fits u32"),
774 }
775 })?;
776 validate_record_shape(record, first.object_id, first.oti, symbol_size)?;
777
778 let expected_esi = u32::try_from(expected_idx).expect("idx fits u32");
779 if record.esi != expected_esi {
780 return Err(SystematicLayoutError::NonContiguousSystematicSymbol {
781 expected_esi,
782 found_esi: record.esi,
783 });
784 }
785
786 if expected_idx == 0 {
787 if !record
788 .flags
789 .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
790 {
791 return Err(SystematicLayoutError::MissingSystematicStartFlag);
792 }
793 } else if record
794 .flags
795 .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
796 {
797 return Err(SystematicLayoutError::MissingSystematicStartFlag);
798 }
799 }
800
801 for (index, record) in records.iter().enumerate().skip(source_symbols) {
802 validate_record_shape(record, first.object_id, first.oti, symbol_size)?;
803 if record.esi < source_symbols_u32 {
804 return Err(SystematicLayoutError::RepairInterleaved {
805 index,
806 esi: record.esi,
807 });
808 }
809 }
810
811 Ok(source_symbols)
812}
813
814pub fn reconstruct_systematic_happy_path(
818 records: &[SymbolRecord],
819) -> Result<Vec<u8>, SystematicLayoutError> {
820 let source_symbols = validate_systematic_run(records)?;
821 if source_symbols == 0 {
822 return Ok(Vec::new());
823 }
824
825 let first = &records[0];
826 let symbol_size =
827 usize::try_from(first.oti.t).map_err(|_| SystematicLayoutError::ZeroSymbolSize)?;
828 let transfer_length = usize::try_from(first.oti.f).map_err(|_| {
829 SystematicLayoutError::TransferLengthTooLarge {
830 transfer_length: first.oti.f,
831 }
832 })?;
833 let total_len = source_symbols.checked_mul(symbol_size).ok_or(
834 SystematicLayoutError::ReconstructedSizeOverflow {
835 source_symbols,
836 symbol_size,
837 },
838 )?;
839
840 let mut out = Vec::with_capacity(total_len);
841 for record in records.iter().take(source_symbols) {
842 out.extend_from_slice(&record.symbol_data);
843 }
844 out.truncate(transfer_length);
845 Ok(out)
846}
847
848#[derive(Debug, Clone, PartialEq, Eq)]
850pub enum SymbolReadPath {
851 SystematicFastPath,
853 FullDecodeFallback { reason: SystematicLayoutError },
855}
856
857pub fn recover_object_with_fallback<F>(
859 records: &[SymbolRecord],
860 mut fallback_decode: F,
861) -> Result<(Vec<u8>, SymbolReadPath), SystematicLayoutError>
862where
863 F: FnMut(&[SymbolRecord]) -> Result<Vec<u8>, SystematicLayoutError>,
864{
865 match reconstruct_systematic_happy_path(records) {
866 Ok(bytes) => Ok((bytes, SymbolReadPath::SystematicFastPath)),
867 Err(reason) => {
868 let decoded = fallback_decode(records)?;
869 Ok((decoded, SymbolReadPath::FullDecodeFallback { reason }))
870 }
871 }
872}
873
874#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
880#[repr(u8)]
881pub enum PatchKind {
882 FullImage = 0,
884 IntentLog = 1,
886 SparseXor = 2,
888}
889
890impl PatchKind {
891 #[must_use]
893 pub fn from_byte(b: u8) -> Option<Self> {
894 match b {
895 0 => Some(Self::FullImage),
896 1 => Some(Self::IntentLog),
897 2 => Some(Self::SparseXor),
898 _ => None,
899 }
900 }
901}
902
903#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
908pub struct VersionPointer {
909 pub commit_seq: u64,
911 pub patch_object: ObjectId,
913 pub patch_kind: PatchKind,
915 pub base_hint: Option<ObjectId>,
917}
918
919const VERSION_POINTER_MIN_WIRE: usize = 8 + 16 + 1 + 1;
922
923impl VersionPointer {
924 #[must_use]
926 pub fn to_bytes(&self) -> Vec<u8> {
927 let has_base: u8 = u8::from(self.base_hint.is_some());
928 let cap = VERSION_POINTER_MIN_WIRE + if has_base == 1 { 16 } else { 0 };
929 let mut buf = Vec::with_capacity(cap);
930 append_u64_le(&mut buf, self.commit_seq);
931 buf.extend_from_slice(self.patch_object.as_bytes());
932 buf.push(self.patch_kind as u8);
933 buf.push(has_base);
934 if let Some(base) = self.base_hint {
935 buf.extend_from_slice(base.as_bytes());
936 }
937 buf
938 }
939
940 #[must_use]
942 pub fn from_bytes(data: &[u8]) -> Option<Self> {
943 if data.len() < VERSION_POINTER_MIN_WIRE {
944 return None;
945 }
946 let commit_seq = read_u64_le(&data[0..8])?;
947 let patch_object = ObjectId::from_bytes(data[8..24].try_into().ok()?);
948 let patch_kind = PatchKind::from_byte(data[24])?;
949 let has_base = data[25];
950 let base_hint = if has_base != 0 {
951 if data.len() < VERSION_POINTER_MIN_WIRE + 16 {
952 return None;
953 }
954 Some(ObjectId::from_bytes(data[26..42].try_into().ok()?))
955 } else {
956 None
957 };
958 Some(Self {
959 commit_seq,
960 patch_object,
961 patch_kind,
962 base_hint,
963 })
964 }
965}
966
967#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
969pub struct SymbolLogOffset(pub u64);
970
971impl SymbolLogOffset {
972 #[must_use]
973 pub const fn new(offset: u64) -> Self {
974 Self(offset)
975 }
976
977 #[must_use]
978 pub const fn get(self) -> u64 {
979 self.0
980 }
981}
982
983#[derive(Debug, Clone, PartialEq, Eq)]
988pub struct BloomFilter {
989 bits: Vec<u64>,
990 num_bits: u32,
991 num_hashes: u8,
992}
993
994impl BloomFilter {
995 #[must_use]
1000 pub fn new(expected_items: u32, false_positive_rate: f64) -> Self {
1001 let n = f64::from(expected_items).max(1.0);
1002 let p = false_positive_rate.clamp(1e-10, 0.5);
1003
1004 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
1005 let m = ((-n * p.ln()) / (core::f64::consts::LN_2.powi(2))).ceil() as u32;
1006 let m = m.max(64); #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
1009 let k = ((f64::from(m) / n) * core::f64::consts::LN_2).ceil() as u8;
1010 let k = k.clamp(1, 16);
1011
1012 let words = usize::try_from(m.div_ceil(64)).expect("BloomFilter word count fits usize");
1013 Self {
1014 bits: vec![0u64; words],
1015 num_bits: m,
1016 num_hashes: k,
1017 }
1018 }
1019
1020 pub fn insert(&mut self, page: crate::PageNumber) {
1022 let raw = page.get();
1023 let (h1, h2) = Self::double_hash(raw);
1024 for i in 0..u32::from(self.num_hashes) {
1025 let pos = (h1.wrapping_add(i.wrapping_mul(h2))) % self.num_bits;
1026 let word = (pos / 64) as usize;
1027 let bit = pos % 64;
1028 self.bits[word] |= 1u64 << bit;
1029 }
1030 }
1031
1032 #[must_use]
1037 pub fn maybe_contains(&self, page: crate::PageNumber) -> bool {
1038 let raw = page.get();
1039 let (h1, h2) = Self::double_hash(raw);
1040 for i in 0..u32::from(self.num_hashes) {
1041 let pos = (h1.wrapping_add(i.wrapping_mul(h2))) % self.num_bits;
1042 let word = (pos / 64) as usize;
1043 let bit = pos % 64;
1044 if self.bits[word] & (1u64 << bit) == 0 {
1045 return false;
1046 }
1047 }
1048 true
1049 }
1050
1051 fn double_hash(page_raw: u32) -> (u32, u32) {
1052 let mut bytes = [0u8; 4];
1053 crate::encoding::write_u32_le(&mut bytes, page_raw).expect("fixed u32 field");
1054 let h1 = xxhash_rust::xxh3::xxh3_64(&bytes);
1055 let mut h2 = {
1056 let digest = blake3::hash(&bytes);
1057 let b = digest.as_bytes();
1058 read_u32_le(&b[..4]).expect("blake3 digest prefix is 4 bytes")
1059 };
1060 if h2 == 0 {
1062 h2 = 1;
1063 }
1064 #[allow(clippy::cast_possible_truncation)]
1065 let h1_trunc = h1 as u32;
1066 (h1_trunc, h2)
1067 }
1068}
1069
1070#[derive(Debug, Clone, PartialEq, Eq)]
1075pub struct PageVersionIndexSegment {
1076 pub start_seq: u64,
1078 pub end_seq: u64,
1080 pub entries: Vec<(crate::PageNumber, VersionPointer)>,
1086 pub bloom: BloomFilter,
1088}
1089
1090impl PageVersionIndexSegment {
1091 #[must_use]
1094 pub fn new(
1095 start_seq: u64,
1096 end_seq: u64,
1097 mut entries: Vec<(crate::PageNumber, VersionPointer)>,
1098 ) -> Self {
1099 entries.sort_by_key(|(pgno, vp)| (pgno.get(), vp.commit_seq));
1100
1101 #[allow(clippy::cast_possible_truncation)]
1102 let count = entries.len() as u32;
1103 let mut bloom = BloomFilter::new(count.max(1), 0.01);
1104 for &(pgno, _) in &entries {
1105 bloom.insert(pgno);
1106 }
1107
1108 Self {
1109 start_seq,
1110 end_seq,
1111 entries,
1112 bloom,
1113 }
1114 }
1115
1116 #[must_use]
1122 pub fn lookup(&self, page: crate::PageNumber, snapshot_high: u64) -> Option<&VersionPointer> {
1123 if !self.bloom.maybe_contains(page) {
1124 return None;
1125 }
1126
1127 let page_raw = page.get();
1128 let start = self
1129 .entries
1130 .partition_point(|(pgno, _)| pgno.get() < page_raw);
1131 let end = self
1132 .entries
1133 .partition_point(|(pgno, _)| pgno.get() <= page_raw);
1134 let slice = self.entries.get(start..end)?;
1135 if slice.is_empty() {
1136 return None;
1137 }
1138
1139 let idx = slice.partition_point(|(_, vp)| vp.commit_seq <= snapshot_high);
1141 if idx == 0 {
1142 None
1143 } else {
1144 Some(&slice[idx - 1].1)
1145 }
1146 }
1147}
1148
1149#[derive(Debug, Clone, PartialEq, Eq)]
1154pub struct ObjectLocatorSegment {
1155 pub entries: Vec<(ObjectId, Vec<SymbolLogOffset>)>,
1157}
1158
1159impl ObjectLocatorSegment {
1160 #[must_use]
1163 pub fn new(mut entries: Vec<(ObjectId, Vec<SymbolLogOffset>)>) -> Self {
1164 entries.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
1165 Self { entries }
1166 }
1167
1168 #[must_use]
1170 pub fn lookup(&self, id: &ObjectId) -> Option<&[SymbolLogOffset]> {
1171 self.entries
1172 .binary_search_by(|(oid, _)| oid.as_bytes().cmp(id.as_bytes()))
1173 .ok()
1174 .map(|idx| self.entries[idx].1.as_slice())
1175 }
1176
1177 #[must_use]
1180 pub fn rebuild_from_scan(pairs: impl IntoIterator<Item = (ObjectId, SymbolLogOffset)>) -> Self {
1181 let mut map: std::collections::BTreeMap<[u8; 16], Vec<SymbolLogOffset>> =
1182 std::collections::BTreeMap::new();
1183 for (oid, offset) in pairs {
1184 map.entry(*oid.as_bytes()).or_default().push(offset);
1185 }
1186 let entries: Vec<_> = map
1187 .into_iter()
1188 .map(|(bytes, mut offsets)| {
1189 offsets.sort();
1190 (ObjectId::from_bytes(bytes), offsets)
1191 })
1192 .collect();
1193 Self { entries }
1194 }
1195}
1196
1197#[derive(Debug, Clone, PartialEq, Eq)]
1202pub struct ManifestSegment {
1203 pub entries: Vec<(u64, u64, ObjectId)>,
1205}
1206
1207impl ManifestSegment {
1208 #[must_use]
1210 pub fn new(mut entries: Vec<(u64, u64, ObjectId)>) -> Self {
1211 entries.sort_by_key(|&(start, _, _)| start);
1212 Self { entries }
1213 }
1214
1215 #[must_use]
1217 pub fn lookup(&self, commit_seq: u64) -> Option<&ObjectId> {
1218 let idx = self
1220 .entries
1221 .partition_point(|&(start, _, _)| start <= commit_seq);
1222 if idx == 0 {
1223 return None;
1224 }
1225 let (start, end, ref oid) = self.entries[idx - 1];
1226 if commit_seq >= start && commit_seq <= end {
1227 Some(oid)
1228 } else {
1229 None
1230 }
1231 }
1232}
1233
1234#[cfg(test)]
1235mod tests {
1236 use super::*;
1237 use crate::encoding::{read_u32_le, read_u64_le};
1238
1239 #[test]
1240 fn test_object_id_blake3_derivation() {
1241 let header = b"hdr:v1\x00";
1242 let payload = b"hello world";
1243 let payload_hash = PayloadHash::blake3(payload);
1244
1245 let derived = ObjectId::derive(header, payload_hash);
1246
1247 let mut canonical = Vec::new();
1248 canonical.extend_from_slice(header);
1249 canonical.extend_from_slice(payload_hash.as_bytes());
1250 let derived2 = ObjectId::derive_from_canonical_bytes(&canonical);
1251
1252 assert_eq!(derived, derived2);
1253
1254 let mut hasher = blake3::Hasher::new();
1255 hasher.update(ObjectId::DOMAIN_SEPARATOR);
1256 hasher.update(&canonical);
1257 let digest = hasher.finalize();
1258 let mut expected = [0u8; 16];
1259 expected.copy_from_slice(&digest.as_bytes()[..16]);
1260
1261 assert_eq!(derived.as_bytes(), &expected);
1262 }
1263
1264 #[test]
1265 fn test_object_id_collision_resistance() {
1266 let header = b"hdr:v1\x00";
1267 let payload_a = b"payload-a";
1268 let payload_b = b"payload-b";
1269 let id_a = ObjectId::derive(header, PayloadHash::blake3(payload_a));
1270 let id_b = ObjectId::derive(header, PayloadHash::blake3(payload_b));
1271 assert_ne!(id_a, id_b);
1272 }
1273
1274 #[test]
1275 fn test_object_id_deterministic() {
1276 let header = b"hdr:v1\x00";
1277 let payload = b"payload";
1278 let hash = PayloadHash::blake3(payload);
1279 let id1 = ObjectId::derive(header, hash);
1280 let id2 = ObjectId::derive(header, hash);
1281 assert_eq!(id1, id2);
1282 }
1283
1284 #[test]
1285 fn test_object_id_display_hex() {
1286 let id = ObjectId::from_bytes([0u8; 16]);
1287 let s = id.to_string();
1288 assert_eq!(s.len(), 32);
1289 assert!(s.chars().all(|ch| matches!(ch, '0'..='9' | 'a'..='f')));
1290
1291 assert_eq!(s, "00000000000000000000000000000000");
1293 }
1294
1295 fn test_oti(symbol_size: u32) -> Oti {
1300 Oti {
1301 f: 16384,
1302 al: 4,
1303 t: symbol_size,
1304 z: 1,
1305 n: 1,
1306 }
1307 }
1308
1309 fn test_record(symbol_size: u32) -> SymbolRecord {
1310 let data = vec![0xAB; symbol_size as usize];
1311 let oid = ObjectId::from_bytes([1u8; 16]);
1312 SymbolRecord::new(
1313 oid,
1314 test_oti(symbol_size),
1315 0,
1316 data,
1317 SymbolRecordFlags::empty(),
1318 )
1319 }
1320
1321 fn make_symbol_run(
1322 object_id: ObjectId,
1323 source_symbols: u32,
1324 symbol_size: u32,
1325 repair_symbols: u32,
1326 ) -> (Vec<SymbolRecord>, Vec<u8>, Oti) {
1327 let symbol_size_usize = usize::try_from(symbol_size).expect("symbol_size fits usize");
1328 let transfer_length = u64::from(source_symbols).saturating_mul(u64::from(symbol_size));
1329 let oti = Oti {
1330 f: transfer_length,
1331 al: 4,
1332 t: symbol_size,
1333 z: 1,
1334 n: 1,
1335 };
1336 let mut records = Vec::new();
1337 let mut expected = Vec::new();
1338
1339 for esi in 0..source_symbols {
1340 let mut payload = Vec::with_capacity(symbol_size_usize);
1341 for idx in 0..symbol_size_usize {
1342 let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
1343 let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
1344 payload.push(esi_low ^ idx_low.wrapping_mul(3));
1345 }
1346 expected.extend_from_slice(&payload);
1347 let flags = if esi == 0 {
1348 SymbolRecordFlags::SYSTEMATIC_RUN_START
1349 } else {
1350 SymbolRecordFlags::empty()
1351 };
1352 records.push(SymbolRecord::new(object_id, oti, esi, payload, flags));
1353 }
1354
1355 for repair in 0..repair_symbols {
1356 let esi = source_symbols.saturating_add(repair);
1357 let mut payload = vec![0u8; symbol_size_usize];
1358 let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
1359 for (idx, byte) in payload.iter_mut().enumerate() {
1360 let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
1361 *byte = esi_low.wrapping_mul(13) ^ idx_low;
1362 }
1363 records.push(SymbolRecord::new(
1364 object_id,
1365 oti,
1366 esi,
1367 payload,
1368 SymbolRecordFlags::empty(),
1369 ));
1370 }
1371
1372 (records, expected, oti)
1373 }
1374
1375 #[test]
1380 fn test_symbol_record_serialize_deserialize() {
1381 let rec = test_record(4096);
1382 let bytes = rec.to_bytes();
1383 let rec2 = SymbolRecord::from_bytes(&bytes).expect("roundtrip");
1384 assert_eq!(rec, rec2);
1385 }
1386
1387 #[test]
1388 fn test_symbol_record_magic_validation() {
1389 let rec = test_record(64);
1390 let mut bytes = rec.to_bytes();
1391 bytes[0] = 0xFF;
1392 let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1393 assert!(matches!(err, SymbolRecordError::BadMagic(_)));
1394 }
1395
1396 #[test]
1397 fn test_symbol_record_frame_xxh3_integrity() {
1398 let rec = test_record(128);
1399 let mut bytes = rec.to_bytes();
1400 bytes[HEADER_BEFORE_DATA] ^= 0x01;
1402 let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1403 assert!(matches!(err, SymbolRecordError::IntegrityFailure { .. }));
1404 }
1405
1406 #[test]
1407 fn test_symbol_record_invariant_symbol_size_eq_oti_t() {
1408 let oid = ObjectId::from_bytes([2u8; 16]);
1409 let oti = test_oti(100);
1410 let mut bytes = Vec::new();
1412 bytes.extend_from_slice(&SYMBOL_RECORD_MAGIC);
1413 bytes.push(SYMBOL_RECORD_VERSION);
1414 bytes.extend_from_slice(oid.as_bytes());
1415 bytes.extend_from_slice(&oti.to_bytes());
1416 bytes.extend_from_slice(&0u32.to_le_bytes()); bytes.extend_from_slice(&200u32.to_le_bytes()); bytes.extend_from_slice(&[0u8; 200]);
1419 bytes.push(0); let hash = xxhash_rust::xxh3::xxh3_64(&bytes);
1421 bytes.extend_from_slice(&hash.to_le_bytes());
1422 bytes.extend_from_slice(&[0u8; 16]);
1423
1424 let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1425 assert!(matches!(
1426 err,
1427 SymbolRecordError::SymbolSizeMismatch {
1428 symbol_size: 200,
1429 oti_t: 100
1430 }
1431 ));
1432 }
1433
1434 #[test]
1435 fn test_symbol_record_auth_tag_verification() {
1436 let epoch_key = [0x42u8; 32];
1437 let rec = test_record(64).with_auth_tag(&epoch_key);
1438 assert_ne!(rec.auth_tag, [0u8; 16]);
1439 assert!(rec.verify_auth(&epoch_key));
1440
1441 let mut tampered = rec;
1443 tampered.symbol_data[0] ^= 0x01;
1444 let pre_hash = tampered.pre_hash_bytes();
1445 tampered.frame_xxh3 = xxhash_rust::xxh3::xxh3_64(&pre_hash);
1446 assert!(!tampered.verify_auth(&epoch_key));
1447 }
1448
1449 #[test]
1450 fn test_symbol_record_auth_tag_ignored_when_off() {
1451 let rec = test_record(64);
1452 assert_eq!(rec.auth_tag, [0u8; 16]);
1453 let any_key = [0xFFu8; 32];
1454 assert!(rec.verify_auth(&any_key));
1455 }
1456
1457 #[test]
1458 fn test_symbol_record_systematic_flag() {
1459 let oid = ObjectId::from_bytes([3u8; 16]);
1460 let rec = SymbolRecord::new(
1461 oid,
1462 test_oti(64),
1463 0,
1464 vec![0u8; 64],
1465 SymbolRecordFlags::SYSTEMATIC_RUN_START,
1466 );
1467 assert!(rec.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1468 assert_eq!(rec.esi, 0);
1469
1470 let bytes = rec.to_bytes();
1471 let rec2 = SymbolRecord::from_bytes(&bytes).unwrap();
1472 assert!(rec2.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1473 }
1474
1475 #[test]
1476 fn test_oti_field_widths() {
1477 let oti = Oti {
1478 f: 1_000_000,
1479 al: 4,
1480 t: 65536,
1481 z: 10,
1482 n: 1,
1483 };
1484 let bytes = oti.to_bytes();
1485 let oti2 = Oti::from_bytes(&bytes).unwrap();
1486 assert_eq!(oti, oti2);
1487 assert_eq!(oti2.t, 65536);
1488 }
1489
1490 #[test]
1491 fn test_systematic_fast_path_happy() {
1492 let oid = ObjectId::from_bytes([4u8; 16]);
1493 let oti = Oti {
1494 f: 256,
1495 al: 4,
1496 t: 64,
1497 z: 1,
1498 n: 1,
1499 };
1500
1501 let records: Vec<_> = (0u32..4)
1502 .map(|i| {
1503 let flags = if i == 0 {
1504 SymbolRecordFlags::SYSTEMATIC_RUN_START
1505 } else {
1506 SymbolRecordFlags::empty()
1507 };
1508 let fill = u8::try_from(i).expect("i < 4");
1509 SymbolRecord::new(oid, oti, i, vec![fill; 64], flags)
1510 })
1511 .collect();
1512
1513 assert!(
1514 records[0]
1515 .flags
1516 .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1517 );
1518 for rec in &records[1..] {
1519 assert!(!rec.flags.contains(SymbolRecordFlags::SYSTEMATIC_RUN_START));
1520 }
1521
1522 let mut reconstructed = Vec::new();
1524 for rec in &records {
1525 assert!(rec.verify_integrity());
1526 reconstructed.extend_from_slice(&rec.symbol_data);
1527 }
1528 let f = usize::try_from(oti.f).expect("OTI transfer length fits in usize");
1529 reconstructed.truncate(f);
1530 assert_eq!(reconstructed.len(), 256);
1531
1532 for (i, chunk) in reconstructed.chunks(64).enumerate() {
1533 let expected = u8::try_from(i).expect("i < 4");
1534 assert!(chunk.iter().all(|&b| b == expected));
1535 }
1536 }
1537
1538 #[test]
1539 fn test_systematic_fast_path_fallback() {
1540 let oid = ObjectId::from_bytes([5u8; 16]);
1541 let oti = Oti {
1542 f: 256,
1543 al: 4,
1544 t: 64,
1545 z: 1,
1546 n: 1,
1547 };
1548
1549 let rec2 = SymbolRecord::new(oid, oti, 2, vec![2u8; 64], SymbolRecordFlags::empty());
1550 let mut bytes = rec2.to_bytes();
1551 bytes[HEADER_BEFORE_DATA] ^= 0xFF; let result = SymbolRecord::from_bytes(&bytes);
1554 assert!(matches!(
1555 result.unwrap_err(),
1556 SymbolRecordError::IntegrityFailure { .. }
1557 ));
1558 }
1559
1560 #[test]
1561 fn test_systematic_symbols_contiguous() {
1562 let oid = ObjectId::from_bytes([0x44; 16]);
1563 let (mut records, expected, oti) = make_symbol_run(oid, 100, 64, 8);
1564 let repair = records
1565 .pop()
1566 .expect("repair symbol exists for interleaving simulation");
1567 records.insert(9, repair);
1568 records.swap(3, 21);
1569
1570 let ordered = layout_systematic_run(records).expect("layout must normalize");
1571 let source_symbols = validate_systematic_run(&ordered).expect("must be contiguous");
1572 assert_eq!(source_symbols, 100);
1573
1574 for (idx, record) in ordered.iter().take(100).enumerate() {
1575 let expected_esi = u32::try_from(idx).expect("idx fits u32");
1576 assert_eq!(record.esi, expected_esi);
1577 }
1578 assert!(
1579 ordered.iter().skip(100).all(|record| record.esi >= 100_u32),
1580 "repair symbols must follow source run"
1581 );
1582 assert!(
1583 ordered[0]
1584 .flags
1585 .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1586 );
1587 assert!(ordered[1..].iter().all(|record| {
1588 !record
1589 .flags
1590 .contains(SymbolRecordFlags::SYSTEMATIC_RUN_START)
1591 }));
1592
1593 let recovered =
1594 reconstruct_systematic_happy_path(&ordered).expect("happy-path reconstruction");
1595 assert_eq!(
1596 recovered.len(),
1597 usize::try_from(oti.f).expect("transfer length fits usize")
1598 );
1599 assert_eq!(recovered, expected);
1600 }
1601
1602 #[test]
1603 fn test_happy_path_read_no_gf256() {
1604 let oid = ObjectId::from_bytes([0x55; 16]);
1605 let (records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1606 let decode_invocations = std::cell::Cell::new(0_u32);
1607
1608 let (decoded, path) = recover_object_with_fallback(&records, |_| {
1609 decode_invocations.set(decode_invocations.get().saturating_add(1));
1610 Err(SystematicLayoutError::EmptySymbolSet)
1611 })
1612 .expect("happy-path should succeed");
1613
1614 assert!(matches!(path, SymbolReadPath::SystematicFastPath));
1615 assert_eq!(
1616 decode_invocations.get(),
1617 0,
1618 "fallback decode must not run on systematic happy path"
1619 );
1620 assert_eq!(decoded, expected);
1621 }
1622
1623 #[test]
1624 fn test_fallback_on_missing_symbol() {
1625 let oid = ObjectId::from_bytes([0x66; 16]);
1626 let (mut records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1627 records.retain(|record| record.esi != 5);
1628
1629 let decode_invocations = std::cell::Cell::new(0_u32);
1630 let fallback_expected = expected.clone();
1631 let (decoded, path) = recover_object_with_fallback(&records, |_| {
1632 decode_invocations.set(decode_invocations.get().saturating_add(1));
1633 Ok(fallback_expected.clone())
1634 })
1635 .expect("fallback decode should recover object");
1636
1637 assert_eq!(decode_invocations.get(), 1);
1638 assert_eq!(decoded, expected);
1639 assert!(matches!(path, SymbolReadPath::FullDecodeFallback { .. }));
1640 if let SymbolReadPath::FullDecodeFallback { reason } = path {
1641 assert!(matches!(
1642 reason,
1643 SystematicLayoutError::NonContiguousSystematicSymbol {
1644 expected_esi: 5,
1645 ..
1646 } | SystematicLayoutError::MissingSystematicSymbol { expected_esi: 5 }
1647 ));
1648 }
1649 }
1650
1651 #[test]
1652 fn test_fallback_on_corruption() {
1653 let oid = ObjectId::from_bytes([0x77; 16]);
1654 let (mut records, expected, _) = make_symbol_run(oid, 50, 64, 5);
1655 let corrupt_idx = records
1656 .iter()
1657 .position(|record| record.esi == 3)
1658 .expect("ESI 3 present");
1659 records[corrupt_idx].symbol_data[0] ^= 0xAA;
1660
1661 let decode_invocations = std::cell::Cell::new(0_u32);
1662 let fallback_expected = expected.clone();
1663 let (decoded, path) = recover_object_with_fallback(&records, |_| {
1664 decode_invocations.set(decode_invocations.get().saturating_add(1));
1665 Ok(fallback_expected.clone())
1666 })
1667 .expect("fallback decode should recover corrupted symbol run");
1668
1669 assert_eq!(decode_invocations.get(), 1);
1670 assert_eq!(decoded, expected);
1671 assert!(matches!(path, SymbolReadPath::FullDecodeFallback { .. }));
1672 if let SymbolReadPath::FullDecodeFallback { reason } = path {
1673 assert!(matches!(
1674 reason,
1675 SystematicLayoutError::CorruptSymbol { esi: 3 }
1676 ));
1677 }
1678 }
1679
1680 #[test]
1681 fn test_benchmark_happy_vs_full() {
1682 fn emulate_full_decode(records: &[SymbolRecord]) -> Vec<u8> {
1683 let first = records.first().expect("records non-empty");
1684 let source_symbols = source_symbol_count(first.oti).expect("valid K");
1685 let source_symbols_u32 = u32::try_from(source_symbols).expect("K fits u32");
1686 let symbol_size = usize::try_from(first.oti.t).expect("symbol size fits usize");
1687 let mut scratch = vec![0_u8; symbol_size];
1688 let mut out = Vec::with_capacity(source_symbols.saturating_mul(symbol_size));
1689
1690 for record in records {
1691 if record.esi < source_symbols_u32 {
1692 out.extend_from_slice(&record.symbol_data);
1693 }
1694 let coeff = u8::try_from((record.esi % 251) + 1).expect("coeff in 1..=251");
1695 for _ in 0..24 {
1696 for (dst, src) in scratch.iter_mut().zip(record.symbol_data.iter()) {
1697 *dst ^= crate::gf256_mul_byte(coeff, *src);
1698 }
1699 }
1700 }
1701
1702 let transfer_len = usize::try_from(first.oti.f).expect("transfer length fits usize");
1703 out.truncate(transfer_len);
1704 out
1705 }
1706
1707 let oid = ObjectId::from_bytes([0x88; 16]);
1708 let (records, expected, _) = make_symbol_run(oid, 100, 4096, 6);
1709 let rounds = 6_u32;
1710
1711 let fast_start = std::time::Instant::now();
1712 let mut fast_guard = 0_u8;
1713 for _ in 0..rounds {
1714 let decoded = reconstruct_systematic_happy_path(&records).expect("happy-path decode");
1715 fast_guard ^= decoded[0];
1716 assert_eq!(decoded, expected);
1717 }
1718 let fast_elapsed = fast_start.elapsed();
1719
1720 let full_start = std::time::Instant::now();
1721 let mut full_guard = 0_u8;
1722 for _ in 0..rounds {
1723 let decoded = emulate_full_decode(&records);
1724 full_guard ^= decoded[0];
1725 assert_eq!(decoded, expected);
1726 }
1727 let full_elapsed = full_start.elapsed();
1728
1729 assert_ne!(
1730 fast_guard,
1731 full_guard.wrapping_add(1),
1732 "keep optimizer honest"
1733 );
1734
1735 let fast_ns = fast_elapsed.as_nanos().max(1);
1736 let full_ns = full_elapsed.as_nanos().max(1);
1737 let speedup = full_ns as f64 / fast_ns as f64;
1738 assert!(
1739 speedup >= 10.0,
1740 "expected happy-path to be >=10x faster, got {speedup:.2}x (happy={fast_elapsed:?}, full={full_elapsed:?})"
1741 );
1742 }
1743
1744 #[test]
1745 fn test_symbol_record_version_validation() {
1746 let rec = test_record(64);
1747 let mut bytes = rec.to_bytes();
1748 bytes[4] = 99;
1749 let err = SymbolRecord::from_bytes(&bytes).unwrap_err();
1750 assert!(matches!(err, SymbolRecordError::UnsupportedVersion(99)));
1751 }
1752
1753 #[test]
1754 fn test_symbol_record_too_short() {
1755 let err = SymbolRecord::from_bytes(&[0u8; 10]).unwrap_err();
1756 assert!(matches!(err, SymbolRecordError::TooShort { .. }));
1757 }
1758
1759 #[test]
1760 fn test_symbol_record_wire_size() {
1761 let rec = test_record(4096);
1762 assert_eq!(
1763 rec.wire_size(),
1764 HEADER_BEFORE_DATA + 4096 + TRAILER_AFTER_DATA
1765 );
1766 assert_eq!(rec.wire_size(), rec.to_bytes().len());
1767 }
1768
1769 #[test]
1770 fn test_symbol_record_verify_integrity() {
1771 let rec = test_record(128);
1772 assert!(rec.verify_integrity());
1773
1774 let mut bad = rec;
1775 bad.symbol_data[0] ^= 0x01;
1776 assert!(!bad.verify_integrity());
1777 }
1778
1779 #[test]
1780 fn test_oti_roundtrip() {
1781 let oti = Oti {
1782 f: u64::MAX,
1783 al: u16::MAX,
1784 t: u32::MAX,
1785 z: u32::MAX,
1786 n: u32::MAX,
1787 };
1788 let bytes = oti.to_bytes();
1789 assert_eq!(bytes.len(), OTI_WIRE_SIZE);
1790 let oti2 = Oti::from_bytes(&bytes).unwrap();
1791 assert_eq!(oti, oti2);
1792 }
1793
1794 #[test]
1795 fn test_oti_from_bytes_too_short() {
1796 assert!(Oti::from_bytes(&[0u8; 10]).is_none());
1797 }
1798
1799 fn make_oid(seed: u8) -> ObjectId {
1804 ObjectId::from_bytes([seed; 16])
1805 }
1806
1807 fn make_page(n: u32) -> crate::PageNumber {
1808 crate::PageNumber::new(n).expect("non-zero")
1809 }
1810
1811 fn make_vp(seq: u64, seed: u8, kind: PatchKind) -> VersionPointer {
1812 VersionPointer {
1813 commit_seq: seq,
1814 patch_object: make_oid(seed),
1815 patch_kind: kind,
1816 base_hint: None,
1817 }
1818 }
1819
1820 #[test]
1821 fn test_version_pointer_serialization_roundtrip() {
1822 for kind in [
1823 PatchKind::FullImage,
1824 PatchKind::IntentLog,
1825 PatchKind::SparseXor,
1826 ] {
1827 let vp = VersionPointer {
1828 commit_seq: 42,
1829 patch_object: make_oid(0xAA),
1830 patch_kind: kind,
1831 base_hint: None,
1832 };
1833 let bytes = vp.to_bytes();
1834 let vp2 = VersionPointer::from_bytes(&bytes).unwrap();
1835 assert_eq!(vp, vp2);
1836
1837 let vp_with_base = VersionPointer {
1838 base_hint: Some(make_oid(0xBB)),
1839 ..vp
1840 };
1841 let bytes2 = vp_with_base.to_bytes();
1842 let vp3 = VersionPointer::from_bytes(&bytes2).unwrap();
1843 assert_eq!(vp_with_base, vp3);
1844 }
1845 }
1846
1847 #[test]
1848 fn test_page_version_index_segment_lookup() {
1849 let entries: Vec<_> = (1..=50u32)
1850 .map(|i| {
1851 let pgno = make_page(i);
1852 let seed = u8::try_from(i).expect("i <= 50");
1853 let vp = make_vp(u64::from(i) + 10, seed, PatchKind::FullImage);
1854 (pgno, vp)
1855 })
1856 .collect();
1857
1858 let seg = PageVersionIndexSegment::new(10, 60, entries);
1859
1860 let result = seg.lookup(make_page(25), 60);
1861 assert!(result.is_some());
1862 assert_eq!(result.unwrap().commit_seq, 35);
1863
1864 assert!(seg.lookup(make_page(25), 30).is_none());
1865 assert!(seg.lookup(make_page(99), 60).is_none());
1866 }
1867
1868 #[test]
1869 fn test_page_version_index_segment_lookup_picks_latest_leq_snapshot() {
1870 let page = make_page(7);
1871 let vp10 = make_vp(10, 0x10, PatchKind::FullImage);
1872 let vp15 = make_vp(15, 0x20, PatchKind::IntentLog);
1873 let vp20 = make_vp(20, 0x30, PatchKind::SparseXor);
1874 let seg =
1875 PageVersionIndexSegment::new(10, 20, vec![(page, vp10), (page, vp15), (page, vp20)]);
1876
1877 assert!(seg.lookup(page, 9).is_none());
1878 assert_eq!(seg.lookup(page, 10), Some(&vp10));
1879 assert_eq!(seg.lookup(page, 14), Some(&vp10));
1880 assert_eq!(seg.lookup(page, 15), Some(&vp15));
1881 assert_eq!(seg.lookup(page, 19), Some(&vp15));
1882 assert_eq!(seg.lookup(page, 20), Some(&vp20));
1883 }
1884
1885 #[test]
1886 fn test_page_version_index_segment_bloom_filter() {
1887 let entries: Vec<_> = (1..=100u32)
1888 .map(|i| {
1889 let seed = u8::try_from(i).expect("i <= 100");
1890 (
1891 make_page(i),
1892 make_vp(u64::from(i), seed, PatchKind::FullImage),
1893 )
1894 })
1895 .collect();
1896 let seg = PageVersionIndexSegment::new(1, 100, entries);
1897
1898 for i in 1..=100u32 {
1900 assert!(
1901 seg.bloom.maybe_contains(make_page(i)),
1902 "bloom must not have false negatives for page {i}"
1903 );
1904 }
1905
1906 let mut false_positives = 0u32;
1908 for i in 101..=1100u32 {
1909 if seg.bloom.maybe_contains(make_page(i)) {
1910 false_positives += 1;
1911 }
1912 }
1913 let fp_rate = f64::from(false_positives) / 1000.0;
1914 assert!(fp_rate < 0.05, "bloom FP rate {fp_rate:.3} exceeds 5%");
1915 }
1916
1917 #[test]
1918 fn test_object_locator_segment_rebuild() {
1919 let pairs = vec![
1920 (make_oid(1), vec![SymbolLogOffset(0), SymbolLogOffset(100)]),
1921 (make_oid(2), vec![SymbolLogOffset(200)]),
1922 (
1923 make_oid(3),
1924 vec![SymbolLogOffset(300), SymbolLogOffset(400)],
1925 ),
1926 ];
1927 let seg = ObjectLocatorSegment::new(pairs);
1928
1929 let scan_pairs = vec![
1930 (make_oid(1), SymbolLogOffset(100)),
1931 (make_oid(3), SymbolLogOffset(300)),
1932 (make_oid(1), SymbolLogOffset(0)),
1933 (make_oid(2), SymbolLogOffset(200)),
1934 (make_oid(3), SymbolLogOffset(400)),
1935 ];
1936 let rebuilt = ObjectLocatorSegment::rebuild_from_scan(scan_pairs);
1937
1938 assert_eq!(seg.lookup(&make_oid(1)), rebuilt.lookup(&make_oid(1)));
1939 assert_eq!(seg.lookup(&make_oid(2)), rebuilt.lookup(&make_oid(2)));
1940 assert_eq!(seg.lookup(&make_oid(3)), rebuilt.lookup(&make_oid(3)));
1941 assert!(seg.lookup(&make_oid(99)).is_none());
1942 }
1943
1944 #[test]
1945 fn test_manifest_segment_bootstrap() {
1946 let seg = ManifestSegment::new(vec![
1947 (1, 100, make_oid(0x10)),
1948 (101, 200, make_oid(0x20)),
1949 (201, 300, make_oid(0x30)),
1950 ]);
1951
1952 assert_eq!(seg.lookup(50), Some(&make_oid(0x10)));
1953 assert_eq!(seg.lookup(100), Some(&make_oid(0x10)));
1954 assert_eq!(seg.lookup(101), Some(&make_oid(0x20)));
1955 assert_eq!(seg.lookup(250), Some(&make_oid(0x30)));
1956 assert_eq!(seg.lookup(300), Some(&make_oid(0x30)));
1957 assert!(seg.lookup(0).is_none());
1958 assert!(seg.lookup(301).is_none());
1959 }
1960
1961 #[test]
1962 fn test_version_pointer_references_content_addressed() {
1963 let vp = make_vp(42, 0xCC, PatchKind::FullImage);
1964 assert_eq!(vp.patch_object.as_bytes().len(), ObjectId::LEN);
1965 }
1966
1967 #[test]
1968 fn test_patch_kind_from_byte() {
1969 assert_eq!(PatchKind::from_byte(0), Some(PatchKind::FullImage));
1970 assert_eq!(PatchKind::from_byte(1), Some(PatchKind::IntentLog));
1971 assert_eq!(PatchKind::from_byte(2), Some(PatchKind::SparseXor));
1972 assert!(PatchKind::from_byte(3).is_none());
1973 assert!(PatchKind::from_byte(255).is_none());
1974 }
1975
1976 #[test]
1977 fn test_version_pointer_too_short() {
1978 assert!(VersionPointer::from_bytes(&[0u8; 10]).is_none());
1979 let vp = make_vp(1, 1, PatchKind::FullImage);
1980 let bytes = vp.to_bytes();
1981 assert_eq!(bytes.len(), VERSION_POINTER_MIN_WIRE);
1982 assert!(VersionPointer::from_bytes(&bytes).is_some());
1983 }
1984
1985 #[test]
1986 fn test_native_ecs_structures_little_endian() {
1987 let rec = test_record(64);
1988 let bytes = rec.to_bytes();
1989 assert_eq!(read_u32_le(&bytes[43..47]), Some(rec.esi));
1990 assert_eq!(read_u32_le(&bytes[47..51]), Some(rec.oti.t));
1991 let frame_offset = HEADER_BEFORE_DATA + rec.symbol_data.len() + 1;
1992 assert_eq!(
1993 read_u64_le(&bytes[frame_offset..frame_offset + 8]),
1994 Some(rec.frame_xxh3)
1995 );
1996
1997 let vp = make_vp(0x0102_0304_0506_0708, 0xAA, PatchKind::SparseXor);
1998 let vp_bytes = vp.to_bytes();
1999 assert_eq!(
2000 read_u64_le(&vp_bytes[0..8]),
2001 Some(0x0102_0304_0506_0708),
2002 "version pointer commit_seq must remain little-endian"
2003 );
2004 }
2005
2006 #[test]
2007 fn test_canonical_encoding_unique() {
2008 let rec = test_record(48);
2009 let encoded_a = rec.to_bytes();
2010 let encoded_b = rec.to_bytes();
2011 assert_eq!(
2012 encoded_a, encoded_b,
2013 "same symbol record must encode identically"
2014 );
2015
2016 let different = make_vp(2, 0x11, PatchKind::FullImage);
2017 let different_encoded = different.to_bytes();
2018 assert_ne!(
2019 encoded_a, different_encoded,
2020 "different structures must not share canonical byte encodings"
2021 );
2022 }
2023
2024 #[test]
2025 fn test_roundtrip_encode_decode() {
2026 let oti = test_oti(512);
2027 let oti_bytes = oti.to_bytes();
2028 let oti_decoded = Oti::from_bytes(&oti_bytes).expect("OTI roundtrip must succeed");
2029 assert_eq!(oti, oti_decoded);
2030
2031 let rec = test_record(128);
2032 let rec_bytes = rec.to_bytes();
2033 let rec_decoded =
2034 SymbolRecord::from_bytes(&rec_bytes).expect("symbol record roundtrip must succeed");
2035 assert_eq!(rec, rec_decoded);
2036
2037 let vp = make_vp(99, 0x55, PatchKind::IntentLog);
2038 let vp_bytes = vp.to_bytes();
2039 let vp_decoded =
2040 VersionPointer::from_bytes(&vp_bytes).expect("version pointer roundtrip must succeed");
2041 assert_eq!(vp, vp_decoded);
2042 }
2043
2044 #[test]
2045 fn test_no_adhoc_byte_shuffling() {
2046 let source = include_str!("ecs.rs");
2047 let production = source.split("\n#[cfg(test)]").next().unwrap_or(source);
2048 assert!(
2049 !production.contains("to_le_bytes("),
2050 "production ECS serialization should use canonical helpers"
2051 );
2052 assert!(
2053 !production.contains("from_le_bytes("),
2054 "production ECS decoding should use canonical helpers"
2055 );
2056 assert!(
2057 production.contains("append_u32_le")
2058 && production.contains("append_u64_le")
2059 && production.contains("read_u32_le")
2060 && production.contains("read_u64_le"),
2061 "expected canonical helper usage markers missing"
2062 );
2063 }
2064
2065 #[test]
2066 fn test_symbol_log_offset_ordering() {
2067 let a = SymbolLogOffset::new(10);
2068 let b = SymbolLogOffset::new(20);
2069 assert!(a < b);
2070 assert_eq!(a.get(), 10);
2071 }
2072}
2073
2074#[cfg(test)]
2075mod proptests {
2076 use super::*;
2077 use proptest::prelude::*;
2078
2079 fn arb_oti() -> impl Strategy<Value = Oti> {
2080 (
2081 any::<u64>(),
2082 any::<u16>(),
2083 1..=65536u32,
2084 1..=100u32,
2085 1..=100u32,
2086 )
2087 .prop_map(|(f, al, t, z, n)| Oti { f, al, t, z, n })
2088 }
2089
2090 proptest! {
2091 #[test]
2092 fn prop_symbol_record_roundtrip(
2093 oti in arb_oti(),
2094 esi in any::<u32>(),
2095 data_byte in any::<u8>(),
2096 ) {
2097 let oid = ObjectId::from_bytes([7u8; 16]);
2098 let data = vec![data_byte; oti.t as usize];
2099 let rec = SymbolRecord::new(oid, oti, esi, data, SymbolRecordFlags::empty());
2100 let bytes = rec.to_bytes();
2101 let rec2 = SymbolRecord::from_bytes(&bytes).unwrap();
2102 prop_assert_eq!(rec, rec2);
2103 }
2104
2105 #[test]
2106 fn test_write_produces_contiguous_layout(
2107 source_symbols in 1u16..=500u16,
2108 symbol_size in prop::sample::select(vec![64u32, 128u32, 256u32, 512u32]),
2109 seed in any::<u8>(),
2110 ) {
2111 let source_symbols_u32 = u32::from(source_symbols);
2112 let symbol_size_usize = usize::try_from(symbol_size).expect("symbol size fits usize");
2113 let transfer_length = u64::from(source_symbols_u32).saturating_mul(u64::from(symbol_size));
2114 let oti = Oti {
2115 f: transfer_length,
2116 al: 4,
2117 t: symbol_size,
2118 z: 1,
2119 n: 1,
2120 };
2121 let oid = ObjectId::from_bytes([seed; 16]);
2122
2123 let mut records = Vec::new();
2124 for esi in 0..source_symbols_u32 {
2125 let mut payload = vec![0u8; symbol_size_usize];
2126 let esi_low = u8::try_from(esi & 0xFF).expect("masked to u8");
2127 for (idx, byte) in payload.iter_mut().enumerate() {
2128 let idx_low = u8::try_from(idx & 0xFF).expect("masked to u8");
2129 *byte = idx_low ^ esi_low.wrapping_mul(5);
2130 }
2131 let flags = if esi == 0 {
2132 SymbolRecordFlags::SYSTEMATIC_RUN_START
2133 } else {
2134 SymbolRecordFlags::empty()
2135 };
2136 records.push(SymbolRecord::new(oid, oti, esi, payload, flags));
2137 }
2138
2139 for extra in 0..3_u32 {
2140 let esi = source_symbols_u32.saturating_add(extra);
2141 records.push(SymbolRecord::new(
2142 oid,
2143 oti,
2144 esi,
2145 vec![0xEE; symbol_size_usize],
2146 SymbolRecordFlags::empty(),
2147 ));
2148 }
2149
2150 if records.len() > 1 {
2151 let rotate_by = usize::from(seed) % records.len();
2152 records.rotate_left(rotate_by);
2153 }
2154
2155 let contiguous = layout_systematic_run(records).expect("writer layout normalization");
2156 let k = validate_systematic_run(&contiguous).expect("must validate after layout");
2157 prop_assert_eq!(k, usize::from(source_symbols));
2158 for (idx, record) in contiguous.iter().take(usize::from(source_symbols)).enumerate() {
2159 let expected_esi = u32::try_from(idx).expect("idx fits u32");
2160 prop_assert_eq!(record.esi, expected_esi);
2161 }
2162 prop_assert!(
2163 contiguous
2164 .iter()
2165 .skip(usize::from(source_symbols))
2166 .all(|record| record.esi >= source_symbols_u32)
2167 );
2168 }
2169 }
2170}