Skip to main content

mkit_core/
pack_shard.rs

1//! Erasure-coded pack delivery via Reed-Solomon shards.
2//!
3//! This module is the **Phase 1** scaffolding for issue #159: it wraps
4//! `commonware_coding::ReedSolomon<Sha256>` so a producer can split a
5//! pack into `N + K` shards and a consumer can reconstruct the pack
6//! from any `N` of those shards.
7//!
8//! The wire format and motivation are normatively documented in
9//! `docs/SPEC-PACK-SHARDS.md`. The implementation here matches the v0
10//! spec; transport-level shard fetch (HTTP, S3) is **out of scope** and
11//! lives in a later phase under `mkit-transport-*`.
12//!
13//! # Threat model
14//!
15//! * Each [`Shard`] is a self-describing envelope carrying the
16//!   commonware `Chunk` (shard payload + index + Merkle proof).
17//! * Before passing a shard to the decoder, the receiver compares
18//!   `BLAKE3(shard.bytes)` against the manifest entry in
19//!   [`ShardSet::shard_hashes`]. A mismatch means the shard was
20//!   tampered with in transit; the shard is rejected without ever
21//!   reaching the Reed-Solomon decoder.
22//! * After reconstruction, the recovered pack bytes are hashed with
23//!   BLAKE3 and compared against [`ShardSet::pack_hash`]. This catches
24//!   the (cryptographically unlikely) case where a coordinated attacker
25//!   crafted shards that pass the Merkle check but reconstruct a
26//!   different pack.
27//!
28//! # Feature gate
29//!
30//! This module is compiled only when `--features pack-shards` is set.
31//! The default `mkit-core` build does **not** pull in the
32//! `commonware-*` dep stack.
33//!
34//! # Defaults
35//!
36//! `Config { minimum_shards: 16, extra_shards: 4 }` — 20 total shards,
37//! 25% redundancy. Any 16 of 20 shards reconstruct the pack. Tuning
38//! lives in `docs/SPEC-PACK-SHARDS.md` §6.
39
40use std::num::NonZeroU16;
41
42use commonware_codec::{Decode, Encode};
43use commonware_coding::{CodecConfig, Scheme as _};
44use commonware_cryptography::Sha256;
45use commonware_parallel::Sequential;
46
47use crate::hash::{self, HASH_LEN, Hash};
48
49// Re-exports so callers don't need to depend on `commonware-coding` directly.
50pub use commonware_coding::Config;
51
52type RsScheme = commonware_coding::ReedSolomon<Sha256>;
53type Commitment = <RsScheme as commonware_coding::Scheme>::Commitment;
54type RsChunk = <RsScheme as commonware_coding::Scheme>::Shard;
55
56/// Strategy used for the Reed-Solomon encode/decode internals. We use
57/// `Sequential` here so the scaffolding has no rayon thread-pool
58/// surprises; benches can swap in a parallel strategy in Phase 2.
59const STRATEGY: Sequential = Sequential;
60
61/// Cap on the per-shard codec payload size accepted at decode time.
62/// 4 GiB matches the existing packfile size cap (see
63/// `crate::pack::MAX_TOTAL_PAYLOAD`); anything bigger could not have
64/// originated from a valid mkit pack.
65const MAX_SHARD_BYTES: usize = 4 * 1024 * 1024 * 1024;
66
67/// Size below which a producer SHOULD NOT shard a pack.
68///
69/// Per SPEC-PACK-SHARDS §6 the per-shard Merkle-proof overhead
70/// dominates for small packs, so producers serve them monolithically.
71/// 1 MiB is the v0 cutoff; the constant is exported so transports and
72/// CLI tooling agree on a single number.
73pub const SHARD_SIZE_THRESHOLD: u64 = 1024 * 1024;
74
75/// Wire-format magic for a serialised [`ShardSet`]. Spells "MKSH" —
76/// "mkit-shards" — and lets a parser refuse to treat random bytes as a
77/// manifest.
78pub const MANIFEST_MAGIC: [u8; 4] = *b"MKSH";
79
80/// Wire-format version for a serialised [`ShardSet`]. Bumped whenever
81/// the on-the-wire layout changes in a non-backwards-compatible way.
82pub const MANIFEST_VERSION: u8 = 0x01;
83
84/// Total prologue size: magic (4) + version (1).
85const MANIFEST_PROLOGUE_LEN: usize = 5;
86
87/// Per SPEC-PACK-SHARDS §6, a manifest with the v0 default config is
88/// `~ 32 * (T + 2)` bytes plus the prologue and config. We cap at
89/// 1 MiB so a hostile peer can not stream gigabytes through the
90/// deserialiser.
91pub const MANIFEST_MAX_BYTES: usize = 1024 * 1024;
92
93/// Default config: `(minimum_shards = 16, extra_shards = 4)`.
94///
95/// 20 total shards, any 16 of which reconstruct. See SPEC-PACK-SHARDS §6
96/// for the rationale and when callers may want to tune these.
97///
98/// # Panics
99///
100/// Infallible — both `16` and `4` are nonzero. The `expect` calls
101/// document intent; they cannot fire.
102#[must_use]
103pub fn default_config() -> Config {
104    Config {
105        minimum_shards: NonZeroU16::new(16).expect("16 != 0"),
106        extra_shards: NonZeroU16::new(4).expect("4 != 0"),
107    }
108}
109
110/// A single shard of an erasure-coded pack.
111///
112/// `bytes` is the codec-serialised commonware `Chunk` (shard payload +
113/// index + Merkle proof). The receiver hashes these bytes with BLAKE3
114/// and matches them against [`ShardSet::shard_hashes`] before decoding.
115#[derive(Debug, Clone, PartialEq, Eq)]
116pub struct Shard {
117    /// Shard index in `[0, minimum_shards + extra_shards)`.
118    pub index: u16,
119    /// Codec-serialised commonware `Chunk` payload. Opaque at this
120    /// layer; the only operations performed against it are hashing and
121    /// decoding via the commonware codec.
122    pub bytes: Vec<u8>,
123}
124
125/// Manifest describing a set of shards encoding one pack.
126///
127/// In the wire protocol this is published alongside the shards under
128/// `/packs/<pack_hash>/shards.manifest` (see SPEC-PACK-SHARDS §2). A
129/// consumer fetches the manifest first, then fetches up to
130/// `config.total_shards()` shards in parallel, rejecting any whose
131/// BLAKE3 hash does not match.
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub struct ShardSet {
134    /// BLAKE3 of the original pack bytes. Verified after reconstruction
135    /// as the final defence against shard-set forgery.
136    pub pack_hash: Hash,
137    /// Reed-Solomon `(minimum_shards, extra_shards)` configuration used
138    /// to produce this shard set. The decoder MUST use the same
139    /// configuration.
140    pub config: Config,
141    /// BLAKE3 of each shard's `bytes`, indexed by shard index.
142    /// `shard_hashes.len()` MUST equal `config.total_shards()`.
143    pub shard_hashes: Vec<Hash>,
144    /// Commonware BMT root committing to all shards. Required by the
145    /// commonware decoder for per-shard Merkle-proof checks. Stored
146    /// here so the manifest is self-contained — a receiver does not
147    /// need a second round-trip to fetch the commitment.
148    pub commitment: Hash,
149}
150
151/// Errors produced by [`encode_pack_to_shards`] / [`decode_pack_from_shards`].
152#[derive(Debug, thiserror::Error)]
153pub enum ShardError {
154    /// The Reed-Solomon encoder rejected the input. Typically means
155    /// the pack is larger than `u32::MAX` bytes (commonware's limit).
156    #[error("reed-solomon encode failed: {0}")]
157    EncodeFailed(String),
158    /// The Reed-Solomon decoder rejected the supplied shards. Usually
159    /// triggered by too few shards, duplicate indices, or a Merkle
160    /// proof that no longer matches the commitment.
161    #[error("reed-solomon decode failed: {0}")]
162    DecodeFailed(String),
163    /// The codec layer could not parse a shard's `bytes`. Means the
164    /// shard envelope is malformed — distinct from a BLAKE3 mismatch.
165    #[error("shard codec decode failed at index {index}: {source}")]
166    ShardCodecFailed {
167        index: u16,
168        #[source]
169        source: commonware_codec::Error,
170    },
171    /// A shard's BLAKE3 hash does not match the manifest entry for its
172    /// index. The shard is corrupt or maliciously substituted.
173    #[error("shard {index} BLAKE3 mismatch (manifest tampered or shard corrupted)")]
174    ShardHashMismatch { index: u16 },
175    /// Manifest claims an index outside `0..total_shards`.
176    #[error("shard index {index} is out of range for config (total = {total})")]
177    IndexOutOfRange { index: u16, total: u32 },
178    /// Duplicate shard index supplied to the decoder.
179    #[error("duplicate shard index {index}")]
180    DuplicateIndex { index: u16 },
181    /// Manifest carries the wrong number of `shard_hashes` for the
182    /// declared config.
183    #[error(
184        "manifest has {actual} shard_hashes, expected {expected} \
185         (config.total_shards())"
186    )]
187    ManifestShardCountMismatch { actual: usize, expected: usize },
188    /// Reconstruction produced bytes whose BLAKE3 does not match
189    /// `manifest.pack_hash`. Cryptographically the manifest was forged.
190    #[error("reconstructed pack hash does not match manifest.pack_hash")]
191    PackHashMismatch,
192    /// Caller passed fewer than `config.minimum_shards` shards.
193    #[error("insufficient shards: {provided} < {minimum}")]
194    InsufficientShards { provided: usize, minimum: u16 },
195    /// The manifest wire bytes are shorter than the v0 prologue, do not
196    /// begin with [`MANIFEST_MAGIC`], or carry an unrecognised
197    /// [`MANIFEST_VERSION`].
198    #[error("invalid manifest prologue: {0}")]
199    InvalidManifestPrologue(&'static str),
200    /// The manifest wire bytes are truncated — a length-prefixed field
201    /// claims more bytes than remain in the buffer.
202    #[error("unexpected eof while decoding manifest")]
203    ManifestUnexpectedEof,
204    /// The manifest carries trailing bytes after the last expected
205    /// field. Most likely a producer / consumer version mismatch.
206    #[error("trailing bytes after manifest body")]
207    ManifestTrailingBytes,
208    /// The manifest declares a `(minimum_shards, extra_shards)` pair
209    /// whose components are zero — illegal at the SPEC level.
210    #[error("manifest declares zero shard count (min={minimum}, extra={extra})")]
211    ManifestZeroShardCount { minimum: u16, extra: u16 },
212    /// The manifest exceeds [`MANIFEST_MAX_BYTES`].
213    #[error("manifest is too large: {actual} > {max}")]
214    ManifestTooLarge { actual: usize, max: usize },
215}
216
217/// Encode a pack into shards.
218///
219/// Produces `config.minimum_shards + config.extra_shards` shards and a
220/// manifest committing to them. The pack itself is not modified.
221///
222/// # Errors
223///
224/// Returns [`ShardError::EncodeFailed`] if the underlying Reed-Solomon
225/// encoder rejects the input (e.g. the pack exceeds `u32::MAX` bytes,
226/// or `total_shards()` exceeds `u16::MAX`).
227///
228/// # Panics
229///
230/// Infallible — the only `expect` in the body asserts that commonware
231/// never emits more than `u16::MAX` shards, which it enforces in
232/// `ReedSolomon::encode` (`Error::TooManyTotalShards`).
233pub fn encode_pack_to_shards(
234    pack: &[u8],
235    config: Config,
236) -> Result<(Vec<Shard>, ShardSet), ShardError> {
237    let (commitment, chunks) = RsScheme::encode(&config, pack, &STRATEGY)
238        .map_err(|e| ShardError::EncodeFailed(format!("{e:?}")))?;
239
240    let total = config.total_shards() as usize;
241    debug_assert_eq!(chunks.len(), total);
242
243    let mut shards = Vec::with_capacity(total);
244    let mut shard_hashes = Vec::with_capacity(total);
245    for (i, chunk) in chunks.into_iter().enumerate() {
246        // `i < total <= u16::MAX` by commonware's own bound
247        // (`Chunk::index: u16`), so the conversion is infallible.
248        let index = u16::try_from(i).expect("commonware emits <= u16::MAX shards");
249        let bytes = chunk.encode().to_vec();
250        let h = hash::hash(&bytes);
251        shards.push(Shard { index, bytes });
252        shard_hashes.push(h);
253    }
254
255    let manifest = ShardSet {
256        pack_hash: hash::hash(pack),
257        config,
258        shard_hashes,
259        commitment: digest_to_bytes(&commitment),
260    };
261
262    Ok((shards, manifest))
263}
264
265/// Decode a pack from a (possibly partial) set of shards.
266///
267/// The decoder:
268///
269/// 1. Verifies each shard's BLAKE3 against the manifest entry for its
270///    index. Mismatched shards are dropped before they reach the
271///    Reed-Solomon decoder.
272/// 2. Deserialises each surviving shard as a commonware `Chunk`.
273/// 3. Calls `ReedSolomon::check` on each chunk (Merkle-proof check
274///    against `manifest.commitment`).
275/// 4. Calls `ReedSolomon::decode` on the checked set.
276/// 5. Verifies the reconstructed pack's BLAKE3 against
277///    `manifest.pack_hash`.
278///
279/// # Errors
280///
281/// See [`ShardError`] for the full taxonomy. Any step's failure
282/// short-circuits.
283pub fn decode_pack_from_shards(
284    shards: &[Shard],
285    manifest: &ShardSet,
286) -> Result<Vec<u8>, ShardError> {
287    let total = manifest.config.total_shards();
288    if manifest.shard_hashes.len() != total as usize {
289        return Err(ShardError::ManifestShardCountMismatch {
290            actual: manifest.shard_hashes.len(),
291            expected: total as usize,
292        });
293    }
294
295    let minimum = manifest.config.minimum_shards.get();
296    let commitment = bytes_to_digest(&manifest.commitment);
297    let codec_cfg = CodecConfig {
298        maximum_shard_size: MAX_SHARD_BYTES,
299    };
300
301    let mut seen = vec![false; total as usize];
302    let mut checked = Vec::with_capacity(shards.len());
303
304    for shard in shards {
305        // (1) Range + duplicate index check.
306        if u32::from(shard.index) >= total {
307            return Err(ShardError::IndexOutOfRange {
308                index: shard.index,
309                total,
310            });
311        }
312        let slot = &mut seen[shard.index as usize];
313        if *slot {
314            return Err(ShardError::DuplicateIndex { index: shard.index });
315        }
316        *slot = true;
317
318        // (2) BLAKE3 tamper check against the manifest.
319        let expected = &manifest.shard_hashes[shard.index as usize];
320        if &hash::hash(&shard.bytes) != expected {
321            return Err(ShardError::ShardHashMismatch { index: shard.index });
322        }
323
324        // (3) Codec decode → commonware `Chunk`.
325        let chunk = RsChunk::decode_cfg(shard.bytes.as_slice(), &codec_cfg).map_err(|e| {
326            ShardError::ShardCodecFailed {
327                index: shard.index,
328                source: e,
329            }
330        })?;
331
332        // (4) Merkle-proof check against the commitment.
333        let checked_shard = RsScheme::check(&manifest.config, &commitment, shard.index, &chunk)
334            .map_err(|e| ShardError::DecodeFailed(format!("check({}): {e:?}", shard.index)))?;
335        checked.push(checked_shard);
336    }
337
338    if checked.len() < usize::from(minimum) {
339        return Err(ShardError::InsufficientShards {
340            provided: checked.len(),
341            minimum,
342        });
343    }
344
345    // (5) Reed-Solomon decode.
346    let pack = RsScheme::decode(&manifest.config, &commitment, checked.iter(), &STRATEGY)
347        .map_err(|e| ShardError::DecodeFailed(format!("{e:?}")))?;
348
349    // (6) Final BLAKE3 check.
350    if hash::hash(&pack) != manifest.pack_hash {
351        return Err(ShardError::PackHashMismatch);
352    }
353
354    Ok(pack)
355}
356
357/// Extract the raw 32 bytes from a commonware `Sha256` digest.
358fn digest_to_bytes(d: &Commitment) -> [u8; HASH_LEN] {
359    // `Sha256::Digest` derefs to `[u8; 32]`. We avoid relying on a
360    // specific accessor name by going through `AsRef<[u8]>` which the
361    // digest type implements.
362    let slice: &[u8] = d.as_ref();
363    let mut out = [0u8; HASH_LEN];
364    out.copy_from_slice(slice);
365    out
366}
367
368/// Inverse of [`digest_to_bytes`]: reconstruct a commonware digest
369/// from the 32 bytes stored in the manifest.
370fn bytes_to_digest(b: &[u8; HASH_LEN]) -> Commitment {
371    // `Sha256::Digest` is a 32-byte `Array` and only exposes
372    // `From<[u8; 32]>`, not `TryFrom<&[u8]>`. Copy through a fixed
373    // array to keep the bound surface narrow.
374    use commonware_codec::FixedSize;
375    debug_assert_eq!(<Commitment as FixedSize>::SIZE, HASH_LEN);
376    Commitment::from(*b)
377}
378
379// ---------------------------------------------------------------------
380// Manifest wire format (v0)
381// ---------------------------------------------------------------------
382//
383// Layout (all multi-byte integers are little-endian):
384//
385//     offset  size  field
386//     ------  ----  -----------------------------------------
387//     0       4     magic = b"MKSH"
388//     4       1     version = 0x01
389//     5       32    pack_hash
390//     37      2     config.minimum_shards
391//     39      2     config.extra_shards
392//     41      32    commitment
393//     73      4     shard_hashes_len (== minimum + extra)
394//     77      32*T  shard_hashes
395//
396// Total size for the v0 default `(16, 4)` config:
397//     5 + 32 + 2 + 2 + 32 + 4 + 32*20 = 717 bytes.
398//
399// Rationale for adding a new format here rather than reusing
400// `mkit_core::serialize`:
401//   * `serialize.rs` is hard-coded to the [`Object`] enum and its
402//     `MAGIC = "MKT1"` / `SCHEMA_VERSION` prologue. Shoehorning a
403//     non-`Object` payload into that path would require widening its
404//     public API and re-encoding every golden vector.
405//   * The shard manifest is a transport artifact, not an object on
406//     disk. Keeping its wire format colocated with the rest of the
407//     pack-shard module keeps Phase 2 changes scoped to one file.
408
409/// Serialise a [`ShardSet`] into its v0 wire bytes.
410///
411/// The format is documented above and in SPEC-PACK-SHARDS §2. The
412/// caller takes ownership of the returned `Vec`.
413///
414/// # Errors
415///
416/// Returns [`ShardError::ManifestShardCountMismatch`] if
417/// `manifest.shard_hashes.len()` does not equal
418/// `manifest.config.total_shards()` — we refuse to encode a manifest
419/// whose vectors disagree with its config.
420///
421/// # Panics
422///
423/// Infallible: `config.total_shards()` is `u32` by commonware's own
424/// bound and the `expect` documents intent. It cannot fire.
425pub fn encode_manifest(manifest: &ShardSet) -> Result<Vec<u8>, ShardError> {
426    let total = manifest.config.total_shards() as usize;
427    if manifest.shard_hashes.len() != total {
428        return Err(ShardError::ManifestShardCountMismatch {
429            actual: manifest.shard_hashes.len(),
430            expected: total,
431        });
432    }
433
434    let body_len = MANIFEST_PROLOGUE_LEN + HASH_LEN + 2 + 2 + HASH_LEN + 4 + total * HASH_LEN;
435    let mut out = Vec::with_capacity(body_len);
436    out.extend_from_slice(&MANIFEST_MAGIC);
437    out.push(MANIFEST_VERSION);
438    out.extend_from_slice(&manifest.pack_hash);
439    out.extend_from_slice(&manifest.config.minimum_shards.get().to_le_bytes());
440    out.extend_from_slice(&manifest.config.extra_shards.get().to_le_bytes());
441    out.extend_from_slice(&manifest.commitment);
442    // Length-prefix the shard_hashes vector as u32 so the parser can
443    // bail before allocating attacker-controlled capacity.
444    out.extend_from_slice(
445        &u32::try_from(total)
446            .expect("total_shards fits in u32")
447            .to_le_bytes(),
448    );
449    for h in &manifest.shard_hashes {
450        out.extend_from_slice(h);
451    }
452    debug_assert_eq!(out.len(), body_len);
453    Ok(out)
454}
455
456/// Deserialise a [`ShardSet`] from its v0 wire bytes.
457///
458/// Validates the prologue, the length-prefixed shard-hashes vector,
459/// the per-config bounds, and rejects trailing bytes.
460///
461/// # Errors
462///
463/// * [`ShardError::ManifestTooLarge`] — input exceeds
464///   [`MANIFEST_MAX_BYTES`].
465/// * [`ShardError::InvalidManifestPrologue`] — magic / version
466///   mismatch or input shorter than the prologue.
467/// * [`ShardError::ManifestUnexpectedEof`] — any field claims more
468///   bytes than remain in the buffer.
469/// * [`ShardError::ManifestZeroShardCount`] — manifest declares
470///   `(0, _)` or `(_, 0)`.
471/// * [`ShardError::ManifestShardCountMismatch`] — declared
472///   `shard_hashes_len` does not equal `minimum + extra`.
473/// * [`ShardError::ManifestTrailingBytes`] — input has bytes after
474///   the last hash.
475pub fn decode_manifest(bytes: &[u8]) -> Result<ShardSet, ShardError> {
476    if bytes.len() > MANIFEST_MAX_BYTES {
477        return Err(ShardError::ManifestTooLarge {
478            actual: bytes.len(),
479            max: MANIFEST_MAX_BYTES,
480        });
481    }
482    if bytes.len() < MANIFEST_PROLOGUE_LEN {
483        return Err(ShardError::InvalidManifestPrologue(
484            "input shorter than prologue",
485        ));
486    }
487    if bytes[..4] != MANIFEST_MAGIC {
488        return Err(ShardError::InvalidManifestPrologue("bad magic"));
489    }
490    if bytes[4] != MANIFEST_VERSION {
491        return Err(ShardError::InvalidManifestPrologue("unsupported version"));
492    }
493    let mut pos = MANIFEST_PROLOGUE_LEN;
494
495    // pack_hash
496    if bytes.len() - pos < HASH_LEN {
497        return Err(ShardError::ManifestUnexpectedEof);
498    }
499    let mut pack_hash = [0u8; HASH_LEN];
500    pack_hash.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
501    pos += HASH_LEN;
502
503    // config
504    if bytes.len() - pos < 4 {
505        return Err(ShardError::ManifestUnexpectedEof);
506    }
507    let minimum = u16::from_le_bytes([bytes[pos], bytes[pos + 1]]);
508    let extra = u16::from_le_bytes([bytes[pos + 2], bytes[pos + 3]]);
509    pos += 4;
510    let minimum_nz =
511        NonZeroU16::new(minimum).ok_or(ShardError::ManifestZeroShardCount { minimum, extra })?;
512    let extra_nz =
513        NonZeroU16::new(extra).ok_or(ShardError::ManifestZeroShardCount { minimum, extra })?;
514    let config = Config {
515        minimum_shards: minimum_nz,
516        extra_shards: extra_nz,
517    };
518    let total = config.total_shards();
519
520    // commitment
521    if bytes.len() - pos < HASH_LEN {
522        return Err(ShardError::ManifestUnexpectedEof);
523    }
524    let mut commitment = [0u8; HASH_LEN];
525    commitment.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
526    pos += HASH_LEN;
527
528    // shard_hashes_len
529    if bytes.len() - pos < 4 {
530        return Err(ShardError::ManifestUnexpectedEof);
531    }
532    let declared_len =
533        u32::from_le_bytes([bytes[pos], bytes[pos + 1], bytes[pos + 2], bytes[pos + 3]]);
534    pos += 4;
535    if declared_len != total {
536        return Err(ShardError::ManifestShardCountMismatch {
537            actual: declared_len as usize,
538            expected: total as usize,
539        });
540    }
541    // Cheap upper bound — reject impossible counts before allocating.
542    if (declared_len as usize).saturating_mul(HASH_LEN) > bytes.len() - pos {
543        return Err(ShardError::ManifestUnexpectedEof);
544    }
545    let mut shard_hashes = Vec::with_capacity(declared_len as usize);
546    for _ in 0..declared_len {
547        let mut h = [0u8; HASH_LEN];
548        h.copy_from_slice(&bytes[pos..pos + HASH_LEN]);
549        pos += HASH_LEN;
550        shard_hashes.push(h);
551    }
552
553    if pos != bytes.len() {
554        return Err(ShardError::ManifestTrailingBytes);
555    }
556
557    Ok(ShardSet {
558        pack_hash,
559        config,
560        shard_hashes,
561        commitment,
562    })
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    /// A deterministic 1-MiB pack-like payload. Not a real packfile —
570    /// the shard layer treats its input as opaque bytes, so any byte
571    /// stream with enough entropy exercises the encoder.
572    fn synthetic_pack(bytes: usize) -> Vec<u8> {
573        // Xorshift-style PRNG seeded with a fixed constant so the
574        // tests are reproducible.
575        let mut x: u64 = 0x9E37_79B9_7F4A_7C15;
576        let mut out = Vec::with_capacity(bytes);
577        while out.len() < bytes {
578            x ^= x << 13;
579            x ^= x >> 7;
580            x ^= x << 17;
581            out.extend_from_slice(&x.to_le_bytes());
582        }
583        out.truncate(bytes);
584        out
585    }
586
587    #[test]
588    fn round_trip_default_config_1_mib_first_n_shards() {
589        let pack = synthetic_pack(1024 * 1024);
590        let config = default_config();
591        let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
592
593        assert_eq!(shards.len(), 20);
594        assert_eq!(manifest.shard_hashes.len(), 20);
595        assert_eq!(manifest.pack_hash, hash::hash(&pack));
596
597        // Decode using shards 0..16 (the first `minimum_shards`).
598        let subset: Vec<Shard> = shards.into_iter().take(16).collect();
599        let recovered = decode_pack_from_shards(&subset, &manifest).unwrap();
600        assert_eq!(recovered, pack);
601    }
602
603    #[test]
604    fn lossy_round_trip_drops_shards_0_5_10_17() {
605        let pack = synthetic_pack(1024 * 1024);
606        let config = default_config();
607        let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
608
609        let dropped = [0u16, 5, 10, 17];
610        let subset: Vec<Shard> = shards
611            .into_iter()
612            .filter(|s| !dropped.contains(&s.index))
613            .collect();
614
615        // Should be exactly 16 = minimum_shards remaining.
616        assert_eq!(subset.len(), 16);
617
618        let recovered = decode_pack_from_shards(&subset, &manifest).unwrap();
619        assert_eq!(recovered, pack);
620    }
621
622    #[test]
623    fn tampered_shard_is_rejected_before_decode() {
624        let pack = synthetic_pack(256 * 1024);
625        let config = default_config();
626        let (mut shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
627
628        // Flip a bit deep inside shard 0's bytes. The manifest entry
629        // for shard 0 still reflects the *original* BLAKE3 (we did
630        // not update it), so the tamper detection MUST fire.
631        let last = shards[0].bytes.len() - 1;
632        shards[0].bytes[last] ^= 0x01;
633
634        let subset: Vec<Shard> = shards.into_iter().take(16).collect();
635        let err = decode_pack_from_shards(&subset, &manifest).unwrap_err();
636        assert!(
637            matches!(err, ShardError::ShardHashMismatch { index: 0 }),
638            "expected ShardHashMismatch{{index: 0}}, got {err:?}"
639        );
640    }
641
642    // ---- Manifest wire-format tests --------------------------------
643
644    #[test]
645    fn manifest_wire_format_round_trip_default_config() {
646        let pack = synthetic_pack(64 * 1024);
647        let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
648
649        let bytes = encode_manifest(&manifest).unwrap();
650        // Pin the v0 size for the default (16, 4) config.
651        // 5 (prologue) + 32 (pack_hash) + 4 (config) + 32 (commitment)
652        // + 4 (len) + 32 * 20 (hashes) = 717.
653        assert_eq!(bytes.len(), 717);
654        assert_eq!(&bytes[..4], &MANIFEST_MAGIC);
655        assert_eq!(bytes[4], MANIFEST_VERSION);
656
657        let decoded = decode_manifest(&bytes).unwrap();
658        assert_eq!(decoded, manifest);
659    }
660
661    #[test]
662    fn manifest_decode_rejects_bad_magic() {
663        let pack = synthetic_pack(32 * 1024);
664        let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
665        let mut bytes = encode_manifest(&manifest).unwrap();
666        bytes[0] = b'X';
667        let err = decode_manifest(&bytes).unwrap_err();
668        assert!(
669            matches!(err, ShardError::InvalidManifestPrologue("bad magic")),
670            "expected InvalidManifestPrologue(bad magic), got {err:?}"
671        );
672    }
673
674    #[test]
675    fn manifest_decode_rejects_unsupported_version() {
676        let pack = synthetic_pack(32 * 1024);
677        let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
678        let mut bytes = encode_manifest(&manifest).unwrap();
679        bytes[4] = 0xFF;
680        let err = decode_manifest(&bytes).unwrap_err();
681        assert!(
682            matches!(
683                err,
684                ShardError::InvalidManifestPrologue("unsupported version")
685            ),
686            "expected InvalidManifestPrologue(unsupported version), got {err:?}"
687        );
688    }
689
690    #[test]
691    fn manifest_decode_rejects_trailing_bytes() {
692        let pack = synthetic_pack(32 * 1024);
693        let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
694        let mut bytes = encode_manifest(&manifest).unwrap();
695        bytes.push(0xAB);
696        let err = decode_manifest(&bytes).unwrap_err();
697        assert!(
698            matches!(err, ShardError::ManifestTrailingBytes),
699            "expected ManifestTrailingBytes, got {err:?}"
700        );
701    }
702
703    #[test]
704    fn manifest_decode_rejects_truncated_body() {
705        let pack = synthetic_pack(32 * 1024);
706        let (_, manifest) = encode_pack_to_shards(&pack, default_config()).unwrap();
707        let mut bytes = encode_manifest(&manifest).unwrap();
708        bytes.truncate(bytes.len() - 1);
709        let err = decode_manifest(&bytes).unwrap_err();
710        assert!(
711            matches!(err, ShardError::ManifestUnexpectedEof),
712            "expected ManifestUnexpectedEof, got {err:?}"
713        );
714    }
715
716    #[test]
717    fn manifest_decode_rejects_oversize_input() {
718        // Construct a buffer that *claims* to be a valid manifest by
719        // shape but exceeds the cap. We don't need a real manifest;
720        // the size check fires before prologue parsing.
721        let bytes = vec![0u8; MANIFEST_MAX_BYTES + 1];
722        let err = decode_manifest(&bytes).unwrap_err();
723        assert!(
724            matches!(err, ShardError::ManifestTooLarge { .. }),
725            "expected ManifestTooLarge, got {err:?}"
726        );
727    }
728
729    #[test]
730    fn manifest_decode_rejects_zero_config() {
731        // Hand-craft a manifest with minimum_shards = 0.
732        let mut bytes = Vec::new();
733        bytes.extend_from_slice(&MANIFEST_MAGIC);
734        bytes.push(MANIFEST_VERSION);
735        bytes.extend_from_slice(&[0u8; HASH_LEN]); // pack_hash
736        bytes.extend_from_slice(&0u16.to_le_bytes()); // minimum_shards = 0
737        bytes.extend_from_slice(&4u16.to_le_bytes()); // extra_shards
738        bytes.extend_from_slice(&[0u8; HASH_LEN]); // commitment
739        bytes.extend_from_slice(&0u32.to_le_bytes()); // shard_hashes_len
740        let err = decode_manifest(&bytes).unwrap_err();
741        assert!(
742            matches!(err, ShardError::ManifestZeroShardCount { .. }),
743            "expected ManifestZeroShardCount, got {err:?}"
744        );
745    }
746
747    #[test]
748    fn insufficient_shards_returns_error() {
749        let pack = synthetic_pack(64 * 1024);
750        let config = default_config();
751        let (shards, manifest) = encode_pack_to_shards(&pack, config).unwrap();
752
753        // Only 15 of the 16 required shards.
754        let subset: Vec<Shard> = shards.into_iter().take(15).collect();
755        let err = decode_pack_from_shards(&subset, &manifest).unwrap_err();
756        assert!(
757            matches!(
758                err,
759                ShardError::InsufficientShards {
760                    provided: 15,
761                    minimum: 16,
762                }
763            ),
764            "expected InsufficientShards{{15, 16}}, got {err:?}"
765        );
766    }
767}