mkit_core/pack_shard.rs
1//! Erasure-coded pack delivery via Reed-Solomon shards.
2//!
3//! This module is the **Phase 1** scaffolding for issue #159: it wraps
4//! `commonware_coding::ReedSolomon<Sha256>` so a producer can split a
5//! pack into `N + K` shards and a consumer can reconstruct the pack
6//! from any `N` of those shards.
7//!
8//! The wire format and motivation are normatively documented in
9//! `docs/SPEC-PACK-SHARDS.md`. The implementation here matches the v0
10//! spec; transport-level shard fetch (HTTP, S3) is **out of scope** and
11//! lives in a later phase under `mkit-transport-*`.
12//!
13//! # Threat model
14//!
15//! * Each [`Shard`] is a self-describing envelope carrying the
16//! commonware `Chunk` (shard payload + index + Merkle proof).
17//! * Before passing a shard to the decoder, the receiver compares
18//! `BLAKE3(shard.bytes)` against the manifest entry in
19//! [`ShardSet::shard_hashes`]. A mismatch means the shard was
20//! tampered with in transit; the shard is rejected without ever
21//! reaching the Reed-Solomon decoder.
22//! * After reconstruction, the recovered pack bytes are hashed with
23//! BLAKE3 and compared against [`ShardSet::pack_hash`]. This catches
24//! the (cryptographically unlikely) case where a coordinated attacker
25//! crafted shards that pass the Merkle check but reconstruct a
26//! different pack.
27//!
28//! # Feature gate
29//!
30//! This module is compiled only when `--features pack-shards` is set.
31//! The default `mkit-core` build does **not** pull in the
32//! `commonware-*` dep stack.
33//!
34//! # Defaults
35//!
36//! `Config { minimum_shards: 16, extra_shards: 4 }` — 20 total shards,
37//! 25% redundancy. Any 16 of 20 shards reconstruct the pack. Tuning
38//! lives in `docs/SPEC-PACK-SHARDS.md` §6.
39
40use std::num::NonZeroU16;
41
42use commonware_codec::{Decode, Encode};
43use commonware_coding::{CodecConfig, Scheme as _};
44use commonware_cryptography::Sha256;
45use commonware_parallel::Sequential;
46
47use crate::hash::{self, HASH_LEN, Hash};
48
49// Re-exports so callers don't need to depend on `commonware-coding` directly.
50pub use commonware_coding::Config;
51
52type RsScheme = commonware_coding::ReedSolomon<Sha256>;
53type Commitment = <RsScheme as commonware_coding::Scheme>::Commitment;
54type RsChunk = <RsScheme as commonware_coding::Scheme>::Shard;
55
56/// Strategy used for the Reed-Solomon encode/decode internals. We use
57/// `Sequential` here so the scaffolding has no rayon thread-pool
58/// surprises; benches can swap in a parallel strategy in Phase 2.
59const STRATEGY: Sequential = Sequential;
60
61/// Cap on the per-shard codec payload size accepted at decode time.
62/// 4 GiB matches the existing packfile size cap (see
63/// `crate::pack::MAX_TOTAL_PAYLOAD`); anything bigger could not have
64/// originated from a valid mkit pack.
65const MAX_SHARD_BYTES: usize = 4 * 1024 * 1024 * 1024;
66
67/// Size below which a producer SHOULD NOT shard a pack.
68///
69/// Per SPEC-PACK-SHARDS §6 the per-shard Merkle-proof overhead
70/// dominates for small packs, so producers serve them monolithically.
71/// 1 MiB is the v0 cutoff; the constant is exported so transports and
72/// CLI tooling agree on a single number.
73pub const SHARD_SIZE_THRESHOLD: u64 = 1024 * 1024;
74
75/// Wire-format magic for a serialised [`ShardSet`]. Spells "MKSH" —
76/// "mkit-shards" — and lets a parser refuse to treat random bytes as a
77/// manifest.
78pub const MANIFEST_MAGIC: [u8; 4] = *b"MKSH";
79
80/// Wire-format version for a serialised [`ShardSet`]. Bumped whenever
81/// the on-the-wire layout changes in a non-backwards-compatible way.
82pub const MANIFEST_VERSION: u8 = 0x01;
83
84/// Total prologue size: magic (4) + version (1).
85const MANIFEST_PROLOGUE_LEN: usize = 5;
86
87/// Per SPEC-PACK-SHARDS §6, a manifest with the v0 default config is
88/// `~ 32 * (T + 2)` bytes plus the prologue and config. We cap at
89/// 1 MiB so a hostile peer can not stream gigabytes through the
90/// deserialiser.
91pub const MANIFEST_MAX_BYTES: usize = 1024 * 1024;
92
93/// Default config: `(minimum_shards = 16, extra_shards = 4)`.
94///
95/// 20 total shards, any 16 of which reconstruct. See SPEC-PACK-SHARDS §6
96/// for the rationale and when callers may want to tune these.
97///
98/// # Panics
99///
100/// Infallible — both `16` and `4` are nonzero. The `expect` calls
101/// document intent; they cannot fire.
102#[must_use]
103pub fn default_config() -> Config {
104 Config {
105 minimum_shards: NonZeroU16::new(16).expect("16 != 0"),
106 extra_shards: NonZeroU16::new(4).expect("4 != 0"),
107 }
108}
109
110/// A single shard of an erasure-coded pack.
111///
112/// `bytes` is the codec-serialised commonware `Chunk` (shard payload +
113/// index + Merkle proof). The receiver hashes these bytes with BLAKE3
114/// and matches them against [`ShardSet::shard_hashes`] before decoding.
115#[derive(Debug, Clone, PartialEq, Eq)]
116pub struct Shard {
117 /// Shard index in `[0, minimum_shards + extra_shards)`.
118 pub index: u16,
119 /// Codec-serialised commonware `Chunk` payload. Opaque at this
120 /// layer; the only operations performed against it are hashing and
121 /// decoding via the commonware codec.
122 pub bytes: Vec<u8>,
123}
124
125/// Manifest describing a set of shards encoding one pack.
126///
127/// In the wire protocol this is published alongside the shards under
128/// `/packs/<pack_hash>/shards.manifest` (see SPEC-PACK-SHARDS §2). A
129/// consumer fetches the manifest first, then fetches up to
130/// `config.total_shards()` shards in parallel, rejecting any whose
131/// BLAKE3 hash does not match.
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub struct ShardSet {
134 /// BLAKE3 of the original pack bytes. Verified after reconstruction
135 /// as the final defence against shard-set forgery.
136 pub pack_hash: Hash,
137 /// Reed-Solomon `(minimum_shards, extra_shards)` configuration used
138 /// to produce this shard set. The decoder MUST use the same
139 /// configuration.
140 pub config: Config,
141 /// BLAKE3 of each shard's `bytes`, indexed by shard index.
142 /// `shard_hashes.len()` MUST equal `config.total_shards()`.
143 pub shard_hashes: Vec<Hash>,
144 /// Commonware BMT root committing to all shards. Required by the
145 /// commonware decoder for per-shard Merkle-proof checks. Stored
146 /// here so the manifest is self-contained — a receiver does not
147 /// need a second round-trip to fetch the commitment.
148 pub commitment: Hash,
149}
150
151/// Errors produced by [`encode_pack_to_shards`] / [`decode_pack_from_shards`].
152#[derive(Debug, thiserror::Error)]
153pub enum ShardError {
154 /// The Reed-Solomon encoder rejected the input. Typically means
155 /// the pack is larger than `u32::MAX` bytes (commonware's limit).
156 #[error("reed-solomon encode failed: {0}")]
157 EncodeFailed(String),
158 /// The Reed-Solomon decoder rejected the supplied shards. Usually
159 /// triggered by too few shards, duplicate indices, or a Merkle
160 /// proof that no longer matches the commitment.
161 #[error("reed-solomon decode failed: {0}")]
162 DecodeFailed(String),
163 /// The codec layer could not parse a shard's `bytes`. Means the
164 /// shard envelope is malformed — distinct from a BLAKE3 mismatch.
165 #[error("shard codec decode failed at index {index}: {source}")]
166 ShardCodecFailed {
167 index: u16,
168 #[source]
169 source: commonware_codec::Error,
170 },
171 /// A shard's BLAKE3 hash does not match the manifest entry for its
172 /// index. The shard is corrupt or maliciously substituted.
173 #[error("shard {index} BLAKE3 mismatch (manifest tampered or shard corrupted)")]
174 ShardHashMismatch { index: u16 },
175 /// Manifest claims an index outside `0..total_shards`.
176 #[error("shard index {index} is out of range for config (total = {total})")]
177 IndexOutOfRange { index: u16, total: u32 },
178 /// Duplicate shard index supplied to the decoder.
179 #[error("duplicate shard index {index}")]
180 DuplicateIndex { index: u16 },
181 /// Manifest carries the wrong number of `shard_hashes` for the
182 /// declared config.
183 #[error(
184 "manifest has {actual} shard_hashes, expected {expected} \
185 (config.total_shards())"
186 )]
187 ManifestShardCountMismatch { actual: usize, expected: usize },
188 /// Reconstruction produced bytes whose BLAKE3 does not match
189 /// `manifest.pack_hash`. Cryptographically the manifest was forged.
190 #[error("reconstructed pack hash does not match manifest.pack_hash")]
191 PackHashMismatch,
192 /// Caller passed fewer than `config.minimum_shards` shards.
193 #[error("insufficient shards: {provided} < {minimum}")]
194 InsufficientShards { provided: usize, minimum: u16 },
195 /// The manifest wire bytes are shorter than the v0 prologue, do not
196 /// begin with [`MANIFEST_MAGIC`], or carry an unrecognised
197 /// [`MANIFEST_VERSION`].
198 #[error("invalid manifest prologue: {0}")]
199 InvalidManifestPrologue(&'static str),
200 /// The manifest wire bytes are truncated — a length-prefixed field
201 /// claims more bytes than remain in the buffer.
202 #[error("unexpected eof while decoding manifest")]
203 ManifestUnexpectedEof,
204 /// The manifest carries trailing bytes after the last expected
205 /// field. Most likely a producer / consumer version mismatch.
206 #[error("trailing bytes after manifest body")]
207 ManifestTrailingBytes,
208 /// The manifest declares a `(minimum_shards, extra_shards)` pair
209 /// whose components are zero — illegal at the SPEC level.
210 #[error("manifest declares zero shard count (min={minimum}, extra={extra})")]
211 ManifestZeroShardCount { minimum: u16, extra: u16 },
212 /// The manifest exceeds [`MANIFEST_MAX_BYTES`].
213 #[error("manifest is too large: {actual} > {max}")]
214 ManifestTooLarge { actual: usize, max: usize },
215}
216
217/// Encode a pack into shards.
218///
219/// Produces `config.minimum_shards + config.extra_shards` shards and a
220/// manifest committing to them. The pack itself is not modified.
221///
222/// # Errors
223///
224/// Returns [`ShardError::EncodeFailed`] if the underlying Reed-Solomon
225/// encoder rejects the input (e.g. the pack exceeds `u32::MAX` bytes,
226/// or `total_shards()` exceeds `u16::MAX`).
227///
228/// # Panics
229///
230/// Infallible — the only `expect` in the body asserts that commonware
231/// never emits more than `u16::MAX` shards, which it enforces in
232/// `ReedSolomon::encode` (`Error::TooManyTotalShards`).
233pub fn encode_pack_to_shards(
234 pack: &[u8],
235 config: Config,
236) -> Result<(Vec<Shard>, ShardSet), ShardError> {
237 let (commitment, chunks) = RsScheme::encode(&config, pack, &STRATEGY)
238 .map_err(|e| ShardError::EncodeFailed(format!("{e:?}")))?;
239
240 let total = config.total_shards() as usize;
241 debug_assert_eq!(chunks.len(), total);
242
243 let mut shards = Vec::with_capacity(total);
244 let mut shard_hashes = Vec::with_capacity(total);
245 for (i, chunk) in chunks.into_iter().enumerate() {
246 // `i < total <= u16::MAX` by commonware's own bound
247 // (`Chunk::index: u16`), so the conversion is infallible.
248 let index = u16::try_from(i).expect("commonware emits <= u16::MAX shards");
249 let bytes = chunk.encode().to_vec();
250 let h = hash::hash(&bytes);
251 shards.push(Shard { index, bytes });
252 shard_hashes.push(h);
253 }
254
255 let manifest = ShardSet {
256 pack_hash: hash::hash(pack),
257 config,
258 shard_hashes,
259 commitment: digest_to_bytes(&commitment),
260 };
261
262 Ok((shards, manifest))
263}
264
265/// Decode a pack from a (possibly partial) set of shards.
266///
267/// The decoder:
268///
269/// 1. Verifies each shard's BLAKE3 against the manifest entry for its
270/// index. Mismatched shards are dropped before they reach the
271/// Reed-Solomon decoder.
272/// 2. Deserialises each surviving shard as a commonware `Chunk`.
273/// 3. Calls `ReedSolomon::check` on each chunk (Merkle-proof check
274/// against `manifest.commitment`).
275/// 4. Calls `ReedSolomon::decode` on the checked set.
276/// 5. Verifies the reconstructed pack's BLAKE3 against
277/// `manifest.pack_hash`.
278///
279/// # Errors
280///
281/// See [`ShardError`] for the full taxonomy. Any step's failure
282/// short-circuits.
283pub fn decode_pack_from_shards(
284 shards: &[Shard],
285 manifest: &ShardSet,
286) -> Result<Vec<u8>, ShardError> {
287 let total = manifest.config.total_shards();
288 if manifest.shard_hashes.len() != total as usize {
289 return Err(ShardError::ManifestShardCountMismatch {
290 actual: manifest.shard_hashes.len(),
291 expected: total as usize,
292 });
293 }
294
295 let minimum = manifest.config.minimum_shards.get();
296 let commitment = bytes_to_digest(&manifest.commitment);
297 let codec_cfg = CodecConfig {
298 maximum_shard_size: MAX_SHARD_BYTES,
299 };
300
301 let mut seen = vec![false; total as usize];
302 let mut checked = Vec::with_capacity(shards.len());
303
304 for shard in shards {
305 // (1) Range + duplicate index check.
306 if u32::from(shard.index) >= total {
307 return Err(ShardError::IndexOutOfRange {
308 index: shard.index,
309 total,
310 });
311 }
312 let slot = &mut seen[shard.index as usize];
313 if *slot {
314 return Err(ShardError::DuplicateIndex { index: shard.index });
315 }
316 *slot = true;
317
318 // (2) BLAKE3 tamper check against the manifest.
319 let expected = &manifest.shard_hashes[shard.index as usize];
320 if &hash::hash(&shard.bytes) != expected {
321 return Err(ShardError::ShardHashMismatch { index: shard.index });
322 }
323
324 // (3) Codec decode → commonware `Chunk`.
325 let chunk = RsChunk::decode_cfg(shard.bytes.as_slice(), &codec_cfg).map_err(|e| {
326 ShardError::ShardCodecFailed {
327 index: shard.index,
328 source: e,
329 }
330 })?;
331
332 // (4) Merkle-proof check against the commitment.
333 let checked_shard = RsScheme::check(&manifest.config, &commitment, shard.index, &chunk)
334 .map_err(|e| ShardError::DecodeFailed(format!("check({}): {e:?}", shard.index)))?;
335 checked.push(checked_shard);
336 }
337
338 if checked.len() < usize::from(minimum) {
339 return Err(ShardError::InsufficientShards {
340 provided: checked.len(),
341 minimum,
342 });
343 }
344
345 // (5) Reed-Solomon decode.
346 let pack = RsScheme::decode(&manifest.config, &commitment, checked.iter(), &STRATEGY)
347 .map_err(|e| ShardError::DecodeFailed(format!("{e:?}")))?;
348
349 // (6) Final BLAKE3 check.
350 if hash::hash(&pack) != manifest.pack_hash {
351 return Err(ShardError::PackHashMismatch);
352 }
353
354 Ok(pack)
355}
356
357/// Extract the raw 32 bytes from a commonware `Sha256` digest.
358fn digest_to_bytes(d: &Commitment) -> [u8; HASH_LEN] {
359 // `Sha256::Digest` derefs to `[u8; 32]`. We avoid relying on a
360 // specific accessor name by going through `AsRef<[u8]>` which the
361 // digest type implements.
362 let slice: &[u8] = d.as_ref();
363 let mut out = [0u8; HASH_LEN];
364 out.copy_from_slice(slice);
365 out
366}
367
368/// Inverse of [`digest_to_bytes`]: reconstruct a commonware digest
369/// from the 32 bytes stored in the manifest.
370fn bytes_to_digest(b: &[u8; HASH_LEN]) -> Commitment {
371 // `Sha256::Digest` is a 32-byte `Array` and only exposes
372 // `From<[u8; 32]>`, not `TryFrom<&[u8]>`. Copy through a fixed
373 // array to keep the bound surface narrow.
374 use commonware_codec::FixedSize;
375 debug_assert_eq!(<Commitment as FixedSize>::SIZE, HASH_LEN);
376 Commitment::from(*b)
377}
378
379// ---------------------------------------------------------------------
380// Manifest wire format (v0)
381// ---------------------------------------------------------------------
382//
383// Layout (all multi-byte integers are little-endian):
384//
385// offset size field
386// ------ ---- -----------------------------------------
387// 0 4 magic = b"MKSH"
388// 4 1 version = 0x01
389// 5 32 pack_hash
390// 37 2 config.minimum_shards
391// 39 2 config.extra_shards
392// 41 32 commitment
393// 73 4 shard_hashes_len (== minimum + extra)
394// 77 32*T shard_hashes
395//
396// Total size for the v0 default `(16, 4)` config:
397// 5 + 32 + 2 + 2 + 32 + 4 + 32*20 = 717 bytes.
398//
399// Rationale for adding a new format here rather than reusing
400// `mkit_core::serialize`:
401// * `serialize.rs` is hard-coded to the [`Object`] enum and its
402// `MAGIC = "MKT1"` / `SCHEMA_VERSION` prologue. Shoehorning a
403// non-`Object` payload into that path would require widening its
404// public API and re-encoding every golden vector.
405// * The shard manifest is a transport artifact, not an object on
406// disk. Keeping its wire format colocated with the rest of the
407// pack-shard module keeps Phase 2 changes scoped to one file.
408
409/// Serialise a [`ShardSet`] into its v0 wire bytes.
410///
411/// The format is documented above and in SPEC-PACK-SHARDS §2. The
412/// caller takes ownership of the returned `Vec`.
413///
414/// # Errors
415///
416/// Returns [`ShardError::ManifestShardCountMismatch`] if
417/// `manifest.shard_hashes.len()` does not equal
418/// `manifest.config.total_shards()` — we refuse to encode a manifest
419/// whose vectors disagree with its config.
420///
421/// # Panics
422///
423/// Infallible: `config.total_shards()` is `u32` by commonware's own
424/// bound and the `expect` documents intent. It cannot fire.
425pub fn encode_manifest(manifest: &ShardSet) -> Result<Vec<u8>, ShardError> {
426 let total = manifest.config.total_shards() as usize;
427 if manifest.shard_hashes.len() != total {
428 return Err(ShardError::ManifestShardCountMismatch {
429 actual: manifest.shard_hashes.len(),
430 expected: total,
431 });
432 }
433
434 let body_len = MANIFEST_PROLOGUE_LEN + HASH_LEN + 2 + 2 + HASH_LEN + 4 + total * HASH_LEN;
435 let mut out = Vec::with_capacity(body_len);
436 out.extend_from_slice(&MANIFEST_MAGIC);
437 out.push(MANIFEST_VERSION);
438 out.extend_from_slice(&manifest.pack_hash);
439 out.extend_from_slice(&manifest.config.minimum_shards.get().to_le_bytes());
440 out.extend_from_slice(&manifest.config.extra_shards.get().to_le_bytes());
441 out.extend_from_slice(&manifest.commitment);
442 // Length-prefix the shard_hashes vector as u32 so the parser can
443 // bail before allocating attacker-controlled capacity.
444 out.extend_from_slice(
445 &u32::try_from(total)
446 .expect("total_shards fits in u32")
447 .to_le_bytes(),
448 );
449 for h in &manifest.shard_hashes {
450 out.extend_from_slice(h);
451 }
452 debug_assert_eq!(out.len(), body_len);
453 Ok(out)
454}
455
456/// Deserialise a [`ShardSet`] from its v0 wire bytes.
457///
458/// Validates the prologue, the length-prefixed shard-hashes vector,
459/// the per-config bounds, and rejects trailing bytes.
460///
461/// # Errors
462///
463/// * [`ShardError::ManifestTooLarge`] — input exceeds
464/// [`MANIFEST_MAX_BYTES`].
465/// * [`ShardError::InvalidManifestPrologue`] — magic / version
466/// mismatch or input shorter than the prologue.
467/// * [`ShardError::ManifestUnexpectedEof`] — any field claims more
468/// bytes than remain in the buffer.
469/// * [`ShardError::ManifestZeroShardCount`] — manifest declares
470/// `(0, _)` or `(_, 0)`.
471/// * [`ShardError::ManifestShardCountMismatch`] — declared
472/// `shard_hashes_len` does not equal `minimum + extra`.
473/// * [`ShardError::ManifestTrailingBytes`] — input has bytes after
474/// the last hash.
475pub fn decode_manifest(bytes: &[u8]) -> Result<ShardSet, ShardError> {
476 if bytes.len() > MANIFEST_MAX_BYTES {
477 return Err(ShardError::ManifestTooLarge {
478 actual: bytes.len(),
479 max: MANIFEST_MAX_BYTES,
480 });
481 }
482 if bytes.len() < MANIFEST_PROLOGUE_LEN {
483 return Err(ShardError::InvalidManifestPrologue(
484 "input shorter than prologue",
485 ));
486 }
487 if bytes[..4] != MANIFEST_MAGIC {
488 return Err(ShardError::InvalidManifestPrologue("bad magic"));
489 }
490 if bytes[4] != MANIFEST_VERSION {
491 return Err(ShardError::InvalidManifestPrologue("unsupported version"));
492 }
493 let mut pos = MANIFEST_PROLOGUE_LEN;
494
495 // pack_hash
496 if bytes.len() - pos < HASH_LEN {
497 return Err(ShardError::ManifestUnexpectedEof);
498 }
499 let mut pack_hash = [0u8; HASH_LEN];
500 pack_hash.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
501 pos += HASH_LEN;
502
503 // config
504 if bytes.len() - pos < 4 {
505 return Err(ShardError::ManifestUnexpectedEof);
506 }
507 let minimum = u16::from_le_bytes([bytes[pos], bytes[pos + 1]]);
508 let extra = u16::from_le_bytes([bytes[pos + 2], bytes[pos + 3]]);
509 pos += 4;
510 let minimum_nz =
511 NonZeroU16::new(minimum).ok_or(ShardError::ManifestZeroShardCount { minimum, extra })?;
512 let extra_nz =
513 NonZeroU16::new(extra).ok_or(ShardError::ManifestZeroShardCount { minimum, extra })?;
514 let config = Config {
515 minimum_shards: minimum_nz,
516 extra_shards: extra_nz,
517 };
518 let total = config.total_shards();
519
520 // commitment
521 if bytes.len() - pos < HASH_LEN {
522 return Err(ShardError::ManifestUnexpectedEof);
523 }
524 let mut commitment = [0u8; HASH_LEN];
525 commitment.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
526 pos += HASH_LEN;
527
528 // shard_hashes_len
529 if bytes.len() - pos < 4 {
530 return Err(ShardError::ManifestUnexpectedEof);
531 }
532 let declared_len =
533 u32::from_le_bytes([bytes[pos], bytes[pos + 1], bytes[pos + 2], bytes[pos + 3]]);
534 pos += 4;
535 if declared_len != total {
536 return Err(ShardError::ManifestShardCountMismatch {
537 actual: declared_len as usize,
538 expected: total as usize,
539 });
540 }
541 // Cheap upper bound — reject impossible counts before allocating.
542 if (declared_len as usize).saturating_mul(HASH_LEN) > bytes.len() - pos {
543 return Err(ShardError::ManifestUnexpectedEof);
544 }
545 let mut shard_hashes = Vec::with_capacity(declared_len as usize);
546 for _ in 0..declared_len {
547 let mut h = [0u8; HASH_LEN];
548 h.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
549 pos += HASH_LEN;
550 shard_hashes.push(h);
551 }
552
553 if pos != bytes.len() {
554 return Err(ShardError::ManifestTrailingBytes);
555 }
556
557 Ok(ShardSet {
558 pack_hash,
559 config,
560 shard_hashes,
561 commitment,
562 })
563}
564
565#[cfg(test)]
566mod tests {
567 use super::*;
568
569 /// A deterministic 1-MiB pack-like payload. Not a real packfile —
570 /// the shard layer treats its input as opaque bytes, so any byte
571 /// stream with enough entropy exercises the encoder.
572 fn synthetic_pack(bytes: usize) -> Vec<u8> {
573 // Xorshift-style PRNG seeded with a fixed constant so the
574 // tests are reproducible.
575 let mut x: u64 = 0x9E37_79B9_7F4A_7C15;
576 let mut out = Vec::with_capacity(bytes);
577 while out.len() < bytes {
578 x ^= x << 13;
579 x ^= x >> 7;
580 x ^= x << 17;
581 out.extend_from_slice(&x.to_le_bytes());
582 }
583 out.truncate(bytes);
584 out
585 }
586
587 #[test]
588 fn round_trip_default_config_1_mib_first_n_shards() {
589 let pack = synthetic_pack(1024 * 1024);
590 let config = default_config();
591 let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
592
593 assert_eq!(shards.len(), 20);
594 assert_eq!(manifest.shard_hashes.len(), 20);
595 assert_eq!(manifest.pack_hash, hash::hash(&pack));
596
597 // Decode using shards 0..16 (the first `minimum_shards`).
598 let subset: Vec<Shard> = shards.into_iter().take(16).collect();
599 let recovered = decode_pack_from_shards(&subset, &manifest).unwrap();
600 assert_eq!(recovered, pack);
601 }
602
603 #[test]
604 fn lossy_round_trip_drops_shards_0_5_10_17() {
605 let pack = synthetic_pack(1024 * 1024);
606 let config = default_config();
607 let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
608
609 let dropped = [0u16, 5, 10, 17];
610 let subset: Vec<Shard> = shards
611 .into_iter()
612 .filter(|s| !dropped.contains(&s.index))
613 .collect();
614
615 // Should be exactly 16 = minimum_shards remaining.
616 assert_eq!(subset.len(), 16);
617
618 let recovered = decode_pack_from_shards(&subset, &manifest).unwrap();
619 assert_eq!(recovered, pack);
620 }
621
622 #[test]
623 fn tampered_shard_is_rejected_before_decode() {
624 let pack = synthetic_pack(256 * 1024);
625 let config = default_config();
626 let (mut shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
627
628 // Flip a bit deep inside shard 0's bytes. The manifest entry
629 // for shard 0 still reflects the *original* BLAKE3 (we did
630 // not update it), so the tamper detection MUST fire.
631 let last = shards[0].bytes.len() - 1;
632 shards[0].bytes[last] ^= 0x01;
633
634 let subset: Vec<Shard> = shards.into_iter().take(16).collect();
635 let err = decode_pack_from_shards(&subset, &manifest).unwrap_err();
636 assert!(
637 matches!(err, ShardError::ShardHashMismatch { index: 0 }),
638 "expected ShardHashMismatch{{index: 0}}, got {err:?}"
639 );
640 }
641
642 // ---- Manifest wire-format tests --------------------------------
643
644 #[test]
645 fn manifest_wire_format_round_trip_default_config() {
646 let pack = synthetic_pack(64 * 1024);
647 let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
648
649 let bytes = encode_manifest(&manifest).unwrap();
650 // Pin the v0 size for the default (16, 4) config.
651 // 5 (prologue) + 32 (pack_hash) + 4 (config) + 32 (commitment)
652 // + 4 (len) + 32 * 20 (hashes) = 717.
653 assert_eq!(bytes.len(), 717);
654 assert_eq!(&bytes[..4], &MANIFEST_MAGIC);
655 assert_eq!(bytes[4], MANIFEST_VERSION);
656
657 let decoded = decode_manifest(&bytes).unwrap();
658 assert_eq!(decoded, manifest);
659 }
660
661 #[test]
662 fn manifest_decode_rejects_bad_magic() {
663 let pack = synthetic_pack(32 * 1024);
664 let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
665 let mut bytes = encode_manifest(&manifest).unwrap();
666 bytes[0] = b'X';
667 let err = decode_manifest(&bytes).unwrap_err();
668 assert!(
669 matches!(err, ShardError::InvalidManifestPrologue("bad magic")),
670 "expected InvalidManifestPrologue(bad magic), got {err:?}"
671 );
672 }
673
674 #[test]
675 fn manifest_decode_rejects_unsupported_version() {
676 let pack = synthetic_pack(32 * 1024);
677 let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
678 let mut bytes = encode_manifest(&manifest).unwrap();
679 bytes[4] = 0xFF;
680 let err = decode_manifest(&bytes).unwrap_err();
681 assert!(
682 matches!(
683 err,
684 ShardError::InvalidManifestPrologue("unsupported version")
685 ),
686 "expected InvalidManifestPrologue(unsupported version), got {err:?}"
687 );
688 }
689
690 #[test]
691 fn manifest_decode_rejects_trailing_bytes() {
692 let pack = synthetic_pack(32 * 1024);
693 let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
694 let mut bytes = encode_manifest(&manifest).unwrap();
695 bytes.push(0xAB);
696 let err = decode_manifest(&bytes).unwrap_err();
697 assert!(
698 matches!(err, ShardError::ManifestTrailingBytes),
699 "expected ManifestTrailingBytes, got {err:?}"
700 );
701 }
702
703 #[test]
704 fn manifest_decode_rejects_truncated_body() {
705 let pack = synthetic_pack(32 * 1024);
706 let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
707 let mut bytes = encode_manifest(&manifest).unwrap();
708 bytes.truncate(bytes.len() - 1);
709 let err = decode_manifest(&bytes).unwrap_err();
710 assert!(
711 matches!(err, ShardError::ManifestUnexpectedEof),
712 "expected ManifestUnexpectedEof, got {err:?}"
713 );
714 }
715
716 #[test]
717 fn manifest_decode_rejects_oversize_input() {
718 // Construct a buffer that *claims* to be a valid manifest by
719 // shape but exceeds the cap. We don't need a real manifest;
720 // the size check fires before prologue parsing.
721 let bytes = vec![0u8; MANIFEST_MAX_BYTES + 1];
722 let err = decode_manifest(&bytes).unwrap_err();
723 assert!(
724 matches!(err, ShardError::ManifestTooLarge { .. }),
725 "expected ManifestTooLarge, got {err:?}"
726 );
727 }
728
729 #[test]
730 fn manifest_decode_rejects_zero_config() {
731 // Hand-craft a manifest with minimum_shards = 0.
732 let mut bytes = Vec::new();
733 bytes.extend_from_slice(&MANIFEST_MAGIC);
734 bytes.push(MANIFEST_VERSION);
735 bytes.extend_from_slice(&[0u8; HASH_LEN]); // pack_hash
736 bytes.extend_from_slice(&0u16.to_le_bytes()); // minimum_shards = 0
737 bytes.extend_from_slice(&4u16.to_le_bytes()); // extra_shards
738 bytes.extend_from_slice(&[0u8; HASH_LEN]); // commitment
739 bytes.extend_from_slice(&0u32.to_le_bytes()); // shard_hashes_len
740 let err = decode_manifest(&bytes).unwrap_err();
741 assert!(
742 matches!(err, ShardError::ManifestZeroShardCount { .. }),
743 "expected ManifestZeroShardCount, got {err:?}"
744 );
745 }
746
747 #[test]
748 fn insufficient_shards_returns_error() {
749 let pack = synthetic_pack(64 * 1024);
750 let config = default_config();
751 let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
752
753 // Only 15 of the 16 required shards.
754 let subset: Vec<Shard> = shards.into_iter().take(15).collect();
755 let err = decode_pack_from_shards(&subset, &manifest).unwrap_err();
756 assert!(
757 matches!(
758 err,
759 ShardError::InsufficientShards {
760 provided: 15,
761 minimum: 16,
762 }
763 ),
764 "expected InsufficientShards{{15, 16}}, got {err:?}"
765 );
766 }
767}