Skip to main content

mkit_core/
sparse.rs

1//! Verifiable sparse-checkout (Phase 1 scaffold).
2//!
3//! Spec reference: `docs/SPEC-SPARSE-CHECKOUT.md`. Issue #158.
4//!
5//! # What this is
6//!
7//! Today's `mkit sparse-checkout` filters paths *on the client* after
8//! the server has handed over the full tree. That's fine for the file
9//! transport but wasteful on HTTP / S3 transports where the server
10//! could ship a partial subtree if the client could *verify* the
11//! server didn't lie about which entries were omitted by request
12//! versus silently dropped.
13//!
14//! This module is the Phase 1 core scaffolding: build a manifest from
15//! a `Tree` + filter, and verify a delivered set of `TreeEntry`s
16//! against it. The actual transport-level integration (HTTP/S3 query
17//! params, on-disk bitmap cache) is Phase 2 and is intentionally out
18//! of scope.
19//!
20//! # Authenticated bitmap
21//!
22//! Authentication uses
23//! [`commonware_storage::AuthenticatedBitMap`][bitmap], which provides
24//! a Merkleized bitmap with bit-level inclusion proofs. The bitmap is
25//! `ALPHA`-tier upstream and `std`-only, so this entire module sits
26//! behind the `sparse-checkout` Cargo feature (default off).
27//!
28//! Each entry in the underlying `Tree` is assigned a leaf index equal
29//! to its position in the tree's strict lexicographic byte ordering
30//! (the same ordering enforced by [`Tree::is_sorted`]). A bit set at
31//! index `i` means "the server is shipping entry `i`"; an unset bit
32//! means "this entry is omitted by client request". Tampering — the
33//! server flipping a bit or omitting/inserting an entry — produces a
34//! different bitmap root, which fails verification against the
35//! root committed in the [`SparseManifest`].
36//!
37//! # Wire format
38//!
39//! Strictly defined by `docs/SPEC-SPARSE-CHECKOUT.md`. Phase 1 does
40//! not yet wire `SparseProof` into any transport — the type is the
41//! in-memory carrier between [`build_sparse`] and [`verify_sparse`].
42//!
43//! [bitmap]: https://docs.rs/commonware-storage
44
45use crate::hash::{Hash, Hasher, ZERO, hash as blake3_hash};
46use crate::object::{EntryMode, Object, Tree, TreeEntry};
47use crate::serialize::serialize;
48use std::path::PathBuf;
49
50use commonware_cryptography::{Sha256, sha256};
51use commonware_parallel::Sequential;
52use commonware_runtime::{Runner as _, Supervisor as _, deterministic};
53use commonware_storage::{MerkleizedBitMap, merkle::Bagging, merkle::mmr};
54
55/// Bitmap chunk size in bytes (32 bytes = 256 bits = one SHA-256 digest).
56///
57/// Chosen to match the upstream hasher digest size, which is the
58/// upstream recommendation for minimising proof size.
59const CHUNK_BYTES: usize = 32;
60
61/// Peak-bagging policy for the sparse-checkout authenticated bitmap.
62///
63/// Producer ([`build_sparse`]) and verifier ([`verify_sparse`]) both reach
64/// the bitmap root through the single [`merkleize_bits`] helper, so they
65/// cannot drift — but the policy is pinned here as the explicit contract.
66/// `ForwardFold` reproduces the pre-2026.5 commonware default root
67/// byte-for-byte, so the on-wire `bitmap_root` is unchanged across the
68/// commonware bump and [`SPARSE_WIRE_VERSION`] does not need to move.
69/// Changing this is a wire-format break — bump [`SPARSE_WIRE_VERSION`].
70const SPARSE_BAGGING: Bagging = Bagging::ForwardFold;
71
72/// Hard cap on the number of leaves in a tree we are willing to build
73/// a sparse manifest for. Matches the per-tree `entry_count` bound in
74/// SPEC-OBJECTS §4. Verifier MUST enforce the same cap so a malicious
75/// `manifest.leaf_count` can't allocate unbounded memory.
76pub const MAX_LEAVES: u64 = 1_000_000;
77
78/// Hard cap on the number of filter paths. Prevents a hostile client
79/// from sending a billion-entry filter to a server. Mirrors the
80/// transport-side bound documented in SPEC-SPARSE-CHECKOUT §4.
81pub const MAX_FILTER_PATHS: usize = 100_000;
82
83/// Manifest committing to which tree entries the server is including
84/// in a sparse delivery. See SPEC-SPARSE-CHECKOUT §2.
85///
86/// All three fields are 32-byte BLAKE3 / SHA-256 digests. They are
87/// length-prefixed and content-addressed independently so a
88/// downstream codec can serialise them in any order.
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
90pub struct SparseManifest {
91    /// BLAKE3 hash of the full tree object the manifest is derived
92    /// from. Lets the client correlate this manifest with the
93    /// `tree_hash` it asked the server for.
94    pub tree_hash: Hash,
95    /// Root of the [`MerkleizedBitMap`] over the include / exclude
96    /// bitmap. SHA-256 (32 bytes) under the upstream `mmr::Family`
97    /// hasher. Verifier MUST recompute this from the delivered
98    /// bitmap chunks and reject on mismatch.
99    pub bitmap_root: Hash,
100    /// BLAKE3 hash of the canonicalised filter — see
101    /// [`hash_filter`]. Binds the manifest to a specific filter so
102    /// the server can't substitute a different one mid-transfer.
103    pub filter_hash: Hash,
104    /// Total number of leaves in the source tree (= bitmap length in
105    /// bits). Bounded by [`MAX_LEAVES`].
106    pub leaf_count: u64,
107}
108
109/// Verifiable proof bundle accompanying a [`SparseManifest`].
110///
111/// Phase 1 carries the full bitmap chunks; for any realistic tree
112/// size the bitmap fits comfortably in a few hundred bytes and the
113/// verifier walks every delivered entry anyway. Phase 2's transport
114/// wire-format may add per-bit inclusion proofs if bandwidth ever
115/// becomes a concern — those will land as a new field, not as a swap.
116#[derive(Debug, Clone)]
117pub struct SparseProof {
118    /// The raw bitmap bytes, exactly `ceil(leaf_count / 8)` bytes
119    /// padded to a chunk boundary (multiple of `CHUNK_BYTES`).
120    /// Verifier MUST recompute the bitmap root from these bytes and
121    /// compare to `manifest.bitmap_root`.
122    pub bitmap_bytes: Vec<u8>,
123}
124
125/// Stable BLAKE3 hash of a path-prefix filter. Canonical form:
126///
127/// 1. Sort the filter lexicographically by raw bytes.
128/// 2. Deduplicate.
129/// 3. For each path, append `len: u32 LE` then UTF-8 bytes.
130/// 4. Hash the resulting buffer with BLAKE3.
131///
132/// The empty filter hashes to `BLAKE3([])`, not `ZERO`. An empty
133/// filter is a valid manifest committing to "no entries delivered".
134#[must_use]
135pub fn hash_filter(filter: &[PathBuf]) -> Hash {
136    let mut canonical: Vec<&[u8]> = filter
137        .iter()
138        .filter_map(|p| p.to_str().map(str::as_bytes))
139        .collect();
140    canonical.sort_unstable();
141    canonical.dedup();
142
143    let mut h = Hasher::new();
144    for bytes in &canonical {
145        let len = u32::try_from(bytes.len()).unwrap_or(u32::MAX);
146        h.update(&len.to_le_bytes());
147        h.update(bytes);
148    }
149    h.finalize()
150}
151
152/// Returns `true` if `entry.name` is selected by *any* prefix in
153/// `filter`. The filter is interpreted as a list of path-prefixes;
154/// an empty filter selects nothing (it commits to "no entries").
155///
156/// Semantics:
157///
158/// * A filter path exactly equal to the entry name matches.
159/// * A filter path that is a strict prefix of the entry name matches
160///   only when followed by a `/` byte; this prevents `foo` from
161///   matching `foobar`.
162fn entry_matches_filter(entry: &TreeEntry, filter: &[PathBuf]) -> bool {
163    let name = entry.name.as_slice();
164    for path in filter {
165        let Some(bytes) = path.to_str().map(str::as_bytes) else {
166            continue;
167        };
168        if bytes.is_empty() {
169            continue;
170        }
171        if name == bytes {
172            return true;
173        }
174        if name.len() > bytes.len() && name.starts_with(bytes) && name[bytes.len()] == b'/' {
175            return true;
176        }
177    }
178    false
179}
180
181/// Errors raised by [`build_sparse`] and [`verify_sparse`]. Phase 1
182/// keeps this small — the transport layer will wrap these in its own
183/// error type in Phase 2.
184#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
185pub enum SparseError {
186    /// Source tree has more entries than [`MAX_LEAVES`]. The bitmap
187    /// would still build, but we refuse out of caution: an attacker
188    /// shouldn't be able to force a multi-GB allocation on the
189    /// verifier by claiming a huge `leaf_count`.
190    #[error("tree has {actual} entries, exceeds MAX_LEAVES = {}", MAX_LEAVES)]
191    TooManyLeaves { actual: u64 },
192    /// Filter has more entries than [`MAX_FILTER_PATHS`].
193    #[error(
194        "filter has {actual} paths, exceeds MAX_FILTER_PATHS = {}",
195        MAX_FILTER_PATHS
196    )]
197    TooManyFilterPaths { actual: usize },
198    /// Source tree's entries are not in strict lex order. Our leaf
199    /// indices are defined by that order, so we refuse to build a
200    /// manifest from a tree that violates the invariant.
201    #[error("source tree entries are not lex-sorted; refusing to build manifest")]
202    UnsortedTree,
203}
204
205/// Build a sparse manifest from a tree and a filter.
206///
207/// Walks `tree.entries` in canonical order (which, per
208/// SPEC-OBJECTS §4, is byte-wise lex order on `name`). For each
209/// entry, sets bit `i` in the underlying [`MerkleizedBitMap`] iff
210/// any prefix in `filter` selects that entry's name.
211///
212/// Returns
213///
214/// * the subset of `tree.entries` selected by the filter (the ones
215///   the server would actually ship under a server-side sparse
216///   delivery), in the same canonical order;
217/// * the [`SparseManifest`] committing to that subset;
218/// * the [`SparseProof`] the verifier needs to check the manifest.
219///
220/// # Errors
221///
222/// * [`SparseError::UnsortedTree`] — `tree.entries` violates the
223///   spec-mandated lex ordering.
224/// * [`SparseError::TooManyLeaves`] — `tree.entries.len() > MAX_LEAVES`.
225/// * [`SparseError::TooManyFilterPaths`] — `filter.len() > MAX_FILTER_PATHS`.
226///
227/// # Panics
228///
229/// Never panics on caller input. May abort the in-process commonware
230/// async runtime on an upstream bug; we treat that as a programmer
231/// error because the bitmap is in-memory only and has no real I/O
232/// paths to fail on.
233pub fn build_sparse(
234    tree: &Tree,
235    filter: &[PathBuf],
236) -> Result<(Vec<TreeEntry>, SparseManifest, SparseProof), SparseError> {
237    let leaf_count = u64::try_from(tree.entries.len()).unwrap_or(u64::MAX);
238    if leaf_count > MAX_LEAVES {
239        return Err(SparseError::TooManyLeaves { actual: leaf_count });
240    }
241    if filter.len() > MAX_FILTER_PATHS {
242        return Err(SparseError::TooManyFilterPaths {
243            actual: filter.len(),
244        });
245    }
246    if !tree.is_sorted() {
247        return Err(SparseError::UnsortedTree);
248    }
249
250    // Compute the include-bit vector and pull out the entries we'd
251    // actually ship. The bitmap is just a Vec<bool> at this point;
252    // we hand it to the merkleized bitmap below.
253    let mut bits: Vec<bool> = Vec::with_capacity(tree.entries.len());
254    let mut delivered: Vec<TreeEntry> = Vec::new();
255    for entry in &tree.entries {
256        let include = entry_matches_filter(entry, filter);
257        bits.push(include);
258        if include {
259            delivered.push(entry.clone());
260        }
261    }
262
263    // Build the bitmap inside the upstream's async runtime. The
264    // deterministic runner is an in-memory test executor — no real
265    // I/O, no network — which is exactly what we want here: the
266    // bitmap is purely a verifiable commitment object, not a
267    // persistent store.
268    let (bitmap_root, bitmap_bytes) = merkleize_bits(&bits);
269
270    let manifest = SparseManifest {
271        tree_hash: tree_hash(tree),
272        bitmap_root,
273        filter_hash: hash_filter(filter),
274        leaf_count,
275    };
276    let proof = SparseProof { bitmap_bytes };
277    Ok((delivered, manifest, proof))
278}
279
280/// Build the upstream `MerkleizedBitMap` over `bits` and return
281/// `(root, bitmap_bytes)`. Shared between [`build_sparse`] and
282/// [`verify_sparse`] so the two cannot drift.
283///
284/// Phase 1 spins a fresh `deterministic::Runner` per call. Phase 2
285/// (sparse over a real transport) will reuse a long-lived executor via
286/// the future `mkit_core::protocol::Executor` shim; the dependency is
287/// captured at this seam to keep the migration mechanical.
288fn merkleize_bits(bits: &[bool]) -> (Hash, Vec<u8>) {
289    let runner = deterministic::Runner::default();
290    let bits_owned = bits.to_vec();
291    runner.start(move |ctx| async move {
292        let hasher = mmr::StandardHasher::<Sha256>::new(SPARSE_BAGGING);
293        let bitmap: MerkleizedBitMap<_, sha256::Digest, CHUNK_BYTES, Sequential> =
294            MerkleizedBitMap::init(ctx.child("sparse"), "sparse", Sequential, &hasher)
295                .await
296                .expect("in-memory bitmap init cannot fail");
297        let mut dirty = bitmap.into_dirty();
298        for b in &bits_owned {
299            dirty.push(*b);
300        }
301        let merkleized = dirty.merkleize(&hasher).expect("merkleize is infallible");
302        let root = merkleized.root();
303
304        let mut bytes = Vec::with_capacity(bits_owned.len().div_ceil(8));
305        for (i, bit) in bits_owned.iter().enumerate() {
306            if i % 8 == 0 {
307                bytes.push(0u8);
308            }
309            if *bit {
310                let last = bytes.last_mut().expect("just pushed a byte");
311                *last |= 1 << (i % 8);
312            }
313        }
314
315        let mut root_bytes: Hash = [0u8; 32];
316        root_bytes.copy_from_slice(root.as_ref());
317        (root_bytes, bytes)
318    })
319}
320
321/// Verify a sparse delivery against a manifest.
322///
323/// Returns `true` iff *all* of the following hold:
324///
325/// 1. `manifest.leaf_count <= MAX_LEAVES`.
326/// 2. `manifest.filter_hash == hash_filter(filter)` — the manifest
327///    was issued against the same filter the client supplied.
328/// 3. The set of leaf-indices implied by `bitmap_bytes` matches
329///    the canonical leaf-indices the filter would select.
330/// 4. The bitmap reconstructed from `bitmap_bytes` hashes to
331///    `manifest.bitmap_root` under the upstream bitmap commitment.
332/// 5. `delivered_entries`, in order, are *exactly* the entries
333///    whose leaf-index has its bit set.
334///
335/// Phase 1 cannot independently check `tree_hash` because the
336/// verifier doesn't have the full tree (that's the whole point of
337/// sparse delivery). The Phase 2 transport layer will recompute the
338/// tree hash once it has assembled enough of the structure.
339///
340/// # Panics
341///
342/// Never. All failure modes return `false`.
343#[must_use]
344pub fn verify_sparse(
345    manifest: &SparseManifest,
346    delivered_entries: &[TreeEntry],
347    filter: &[PathBuf],
348    proof: &SparseProof,
349) -> bool {
350    // (1) Sanity caps. A hostile manifest could claim 2^63 leaves;
351    // refuse before we allocate anything proportional to it.
352    if manifest.leaf_count > MAX_LEAVES {
353        return false;
354    }
355    if filter.len() > MAX_FILTER_PATHS {
356        return false;
357    }
358
359    // (2) Filter binding. Cheap and catches the "server swapped the
360    // filter" attack early.
361    if manifest.filter_hash != hash_filter(filter) {
362        return false;
363    }
364
365    // The bitmap must be exactly enough bytes to hold `leaf_count`
366    // bits, with no extra trailing bytes (otherwise an attacker
367    // could pad with extra "set" bits the manifest never committed
368    // to). `usize` is safe because we just bounded leaf_count by
369    // MAX_LEAVES = 1M; refuse on impossibly large 32-bit casts.
370    let Ok(leaf_count) = usize::try_from(manifest.leaf_count) else {
371        return false;
372    };
373    let expected_bitmap_bytes = leaf_count.div_ceil(8);
374    if proof.bitmap_bytes.len() != expected_bitmap_bytes {
375        return false;
376    }
377
378    // (3) + (5) Walk bits and delivered entries together.
379    //
380    // We can't fully verify (5) without seeing the source tree, but
381    // we can verify the *count* of set bits matches the count of
382    // delivered entries, and that delivered entries are themselves
383    // selected by the filter. The Phase 2 transport will cross-check
384    // delivered_entries[i] against tree position once it has the
385    // canonical leaf-index → name mapping.
386    let mut set_bits = 0usize;
387    for i in 0..leaf_count {
388        let byte = proof.bitmap_bytes[i / 8];
389        if (byte >> (i % 8)) & 1 == 1 {
390            set_bits += 1;
391        }
392    }
393    if set_bits != delivered_entries.len() {
394        return false;
395    }
396    for entry in delivered_entries {
397        if !entry_matches_filter(entry, filter) {
398            return false;
399        }
400    }
401
402    // (4) Reconstruct the bitmap root and compare. Rebuilds the
403    // bitmap inside an in-memory commonware runtime, identical to
404    // the one [`build_sparse`] used. Any tampering with
405    // `bitmap_bytes` produces a different root.
406    let bits: Vec<bool> = (0..leaf_count)
407        .map(|i| (proof.bitmap_bytes[i / 8] >> (i % 8)) & 1 == 1)
408        .collect();
409    let (computed_root, _bytes) = merkleize_bits(&bits);
410    if computed_root != manifest.bitmap_root {
411        return false;
412    }
413
414    true
415}
416
417/// Compute the canonical SPEC-OBJECTS tree hash.
418///
419/// Phase 2 swaps the Phase-1 placeholder for the canonical hash:
420/// `BLAKE3(serialize(Object::Tree(t)))`. This is the *same* digest the
421/// rest of the codebase uses to address a tree object — commits, remix
422/// roots, and the object store all key trees by this value.
423///
424/// Binding the manifest to the canonical tree hash means the verifier
425/// can cross-check the manifest against any independently-known
426/// commitment to the source tree (a parent commit's `tree_hash`, a
427/// merge base's tree, the local object store) without re-deriving a
428/// sparse-private digest.
429///
430/// The empty tree still serializes to a valid 10-byte object
431/// (`6-byte prologue || u32 LE 0` entries) and hashes to a non-zero
432/// digest — there is no longer a `ZERO` short-circuit.
433#[must_use]
434pub fn tree_hash(tree: &Tree) -> Hash {
435    // `serialize` only fails when an individual length-prefixed field
436    // overflows `u32` — `Tree::entries` is bounded above by
437    // `MAX_LEAVES` (1 M) here so the serialise call cannot fail on any
438    // tree that already passed our caller guards. We still return
439    // `ZERO` as a defensive fallback to keep the function infallible
440    // for downstream users — they have no recourse if the upstream
441    // tree is malformed.
442    let Ok(bytes) = serialize(&Object::Tree(tree.clone())) else {
443        return ZERO;
444    };
445    blake3_hash(&bytes)
446}
447
448/// Wire envelope magic — `b"MSP1"` (mkit-sparse-v1). Helps the
449/// transport sanity-check the body before any deserialisation. Sits in
450/// the same family as the v1 object prologue, but the codes are
451/// distinct so a misrouted blob can't be parsed as a sparse response.
452pub const SPARSE_WIRE_MAGIC: [u8; 4] = *b"MSP1";
453
454/// Wire-format version of [`SparseResponse`]. Bumped on any
455/// non-backward-compatible change.
456pub const SPARSE_WIRE_VERSION: u8 = 0x01;
457
458/// Maximum encoded sparse-response wire size. Caller-side cap; ~16 MiB
459/// is comfortably more than a maximum-sized bitmap (~125 KB) plus the
460/// largest possible entry stream.
461pub const SPARSE_WIRE_MAX_BYTES: usize = 16 * 1024 * 1024;
462
463/// Complete server-to-client sparse delivery: manifest + entries +
464/// proof, in the order they appear on the wire. The encoder and
465/// decoder are content-stable across calls so the byte layout can be
466/// pinned in golden vectors if and when needed.
467#[derive(Debug, Clone)]
468pub struct SparseResponse {
469    /// Manifest committing to the delivery (104 bytes on the wire).
470    pub manifest: SparseManifest,
471    /// The subset of tree entries the filter selects, in canonical
472    /// lex-sorted order. The verifier walks these alongside the bitmap.
473    pub entries: Vec<TreeEntry>,
474    /// Proof bundle. Phase 2 carries only the raw bitmap bytes; the
475    /// future MMR proof slot is reserved for streaming-only transports.
476    pub proof: SparseProof,
477}
478
479/// Errors raised when encoding or decoding a [`SparseResponse`] on the
480/// wire. Kept tight — the transport layer wraps these in its own
481/// transport-error type at the call site.
482#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
483pub enum SparseWireError {
484    /// Buffer was shorter than the fixed-size manifest header (or
485    /// truncated partway through a length-prefixed section). The
486    /// decoder refuses to allocate from a truncated header.
487    #[error("sparse wire: truncated buffer")]
488    Truncated,
489    /// First 4 bytes are not `b"MSP1"`. Either the body is for a
490    /// different endpoint or the server is on a fork.
491    #[error("sparse wire: bad magic")]
492    BadMagic,
493    /// Version byte is something other than [`SPARSE_WIRE_VERSION`].
494    /// A future server speaking v2 must not be silently downgraded.
495    #[error("sparse wire: unsupported version {0}")]
496    UnsupportedVersion(u8),
497    /// One of the length-prefixed sections claims more bytes than
498    /// remain in the input. Likely a malicious server trying to make
499    /// us allocate.
500    #[error("sparse wire: length out of bounds")]
501    LengthOutOfBounds,
502    /// `leaf_count` exceeds [`MAX_LEAVES`]. The verifier MUST refuse
503    /// before allocating anything proportional to the claimed count.
504    #[error("sparse wire: leaf_count exceeds MAX_LEAVES")]
505    TooManyLeaves,
506    /// Encoded buffer would exceed [`SPARSE_WIRE_MAX_BYTES`].
507    #[error("sparse wire: response exceeds maximum size")]
508    TooLarge,
509    /// A tree entry's mode byte is not a valid [`EntryMode`].
510    #[error("sparse wire: invalid entry mode {0}")]
511    InvalidEntryMode(u8),
512    /// Entry name length declared as zero or > 255 bytes — outside the
513    /// SPEC-OBJECTS §4 range. Matches the per-tree-entry name cap.
514    #[error("sparse wire: invalid entry name length")]
515    InvalidEntryName,
516}
517
518/// Encode a [`SparseResponse`] to the canonical wire bytes.
519///
520/// Layout:
521/// ```text
522/// offset  size  field
523/// 0       4     magic        = SPARSE_WIRE_MAGIC ("MSP1")
524/// 4       1     version      = SPARSE_WIRE_VERSION
525/// 5       32    tree_hash
526/// 37      32    bitmap_root
527/// 69      32    filter_hash
528/// 101     8     leaf_count   (u64 LE)
529/// 109     4     entries_len  (u32 LE) — count of TreeEntry items
530/// 113     ...   TreeEntry stream, each entry:
531///                 u16 LE name_len   (1..=255)
532///                 name_len bytes    (name; arbitrary bytes)
533///                 u8          mode  (EntryMode)
534///                 [u8; 32]    object_hash
535/// ...     4     bitmap_len   (u32 LE)
536/// ...     N     bitmap_bytes
537/// ```
538///
539/// Names are u16-LE length-prefixed rather than the u32 used by
540/// SPEC-OBJECTS — names are bounded at 255 bytes, so a u16 prefix is
541/// already overkill, and the four-byte saving per entry adds up over a
542/// large tree. The decoder rejects `name_len == 0` and `name_len > 255`
543/// to keep the bound enforced.
544///
545/// # Errors
546///
547/// * [`SparseWireError::TooManyLeaves`] — `manifest.leaf_count > MAX_LEAVES`.
548/// * [`SparseWireError::InvalidEntryName`] — an entry name is empty or
549///   > 255 bytes. Defensive: callers should reject before encoding.
550/// * [`SparseWireError::TooLarge`] — the encoded buffer would exceed
551///   [`SPARSE_WIRE_MAX_BYTES`].
552pub fn encode_sparse_response(resp: &SparseResponse) -> Result<Vec<u8>, SparseWireError> {
553    if resp.manifest.leaf_count > MAX_LEAVES {
554        return Err(SparseWireError::TooManyLeaves);
555    }
556    // Pre-size: 113 byte header + per-entry (2 + name_len + 1 + 32) +
557    // 4-byte bitmap length prefix + bitmap bytes. Overshoot is fine.
558    let mut entries_size: usize = 0;
559    for e in &resp.entries {
560        if e.name.is_empty() || e.name.len() > 255 {
561            return Err(SparseWireError::InvalidEntryName);
562        }
563        entries_size = entries_size
564            .checked_add(2 + e.name.len() + 1 + 32)
565            .ok_or(SparseWireError::TooLarge)?;
566    }
567    let total = 113usize
568        .checked_add(entries_size)
569        .and_then(|n| n.checked_add(4))
570        .and_then(|n| n.checked_add(resp.proof.bitmap_bytes.len()))
571        .ok_or(SparseWireError::TooLarge)?;
572    if total > SPARSE_WIRE_MAX_BYTES {
573        return Err(SparseWireError::TooLarge);
574    }
575
576    let mut out = Vec::with_capacity(total);
577    out.extend_from_slice(&SPARSE_WIRE_MAGIC);
578    out.push(SPARSE_WIRE_VERSION);
579    out.extend_from_slice(&resp.manifest.tree_hash);
580    out.extend_from_slice(&resp.manifest.bitmap_root);
581    out.extend_from_slice(&resp.manifest.filter_hash);
582    out.extend_from_slice(&resp.manifest.leaf_count.to_le_bytes());
583
584    let entries_len = u32::try_from(resp.entries.len()).map_err(|_| SparseWireError::TooLarge)?;
585    out.extend_from_slice(&entries_len.to_le_bytes());
586    for e in &resp.entries {
587        // Name length already bounded above.
588        #[allow(clippy::cast_possible_truncation)]
589        let name_len = e.name.len() as u16;
590        out.extend_from_slice(&name_len.to_le_bytes());
591        out.extend_from_slice(&e.name);
592        out.push(e.mode as u8);
593        out.extend_from_slice(&e.object_hash);
594    }
595
596    let bitmap_len =
597        u32::try_from(resp.proof.bitmap_bytes.len()).map_err(|_| SparseWireError::TooLarge)?;
598    out.extend_from_slice(&bitmap_len.to_le_bytes());
599    out.extend_from_slice(&resp.proof.bitmap_bytes);
600
601    Ok(out)
602}
603
604/// Decode a wire-format sparse response. Refuses any input larger than
605/// [`SPARSE_WIRE_MAX_BYTES`] before parsing.
606///
607/// # Errors
608///
609/// * [`SparseWireError::Truncated`] — buffer ended mid-field.
610/// * [`SparseWireError::BadMagic`] — magic prefix mismatch.
611/// * [`SparseWireError::UnsupportedVersion`] — version byte mismatch.
612/// * [`SparseWireError::TooManyLeaves`] — claimed `leaf_count >
613///   MAX_LEAVES`. Refused before any allocation.
614/// * [`SparseWireError::LengthOutOfBounds`] — a length prefix exceeds
615///   the remaining buffer.
616/// * [`SparseWireError::InvalidEntryMode`] — an entry mode byte is
617///   not a recognised [`EntryMode`].
618/// * [`SparseWireError::InvalidEntryName`] — an entry name length is
619///   outside the 1..=255 SPEC-OBJECTS bound.
620pub fn decode_sparse_response(buf: &[u8]) -> Result<SparseResponse, SparseWireError> {
621    if buf.len() > SPARSE_WIRE_MAX_BYTES {
622        return Err(SparseWireError::TooLarge);
623    }
624    if buf.len() < 113 {
625        return Err(SparseWireError::Truncated);
626    }
627    if buf[0..4] != SPARSE_WIRE_MAGIC {
628        return Err(SparseWireError::BadMagic);
629    }
630    if buf[4] != SPARSE_WIRE_VERSION {
631        return Err(SparseWireError::UnsupportedVersion(buf[4]));
632    }
633    let mut tree_hash = [0u8; 32];
634    tree_hash.copy_from_slice(&buf[5..37]);
635    let mut bitmap_root = [0u8; 32];
636    bitmap_root.copy_from_slice(&buf[37..69]);
637    let mut filter_hash = [0u8; 32];
638    filter_hash.copy_from_slice(&buf[69..101]);
639    let mut leaf_count_bytes = [0u8; 8];
640    leaf_count_bytes.copy_from_slice(&buf[101..109]);
641    let leaf_count = u64::from_le_bytes(leaf_count_bytes);
642    if leaf_count > MAX_LEAVES {
643        return Err(SparseWireError::TooManyLeaves);
644    }
645
646    let mut entries_len_bytes = [0u8; 4];
647    entries_len_bytes.copy_from_slice(&buf[109..113]);
648    let entries_len = u32::from_le_bytes(entries_len_bytes) as usize;
649    // Independently bound by the per-tree cap.
650    if entries_len as u64 > MAX_LEAVES {
651        return Err(SparseWireError::TooManyLeaves);
652    }
653
654    let mut cursor: usize = 113;
655    let mut entries: Vec<TreeEntry> = Vec::with_capacity(entries_len.min(64));
656    for _ in 0..entries_len {
657        if cursor.checked_add(2).ok_or(SparseWireError::Truncated)? > buf.len() {
658            return Err(SparseWireError::Truncated);
659        }
660        let mut nlb = [0u8; 2];
661        nlb.copy_from_slice(&buf[cursor..cursor + 2]);
662        let name_len = u16::from_le_bytes(nlb) as usize;
663        cursor += 2;
664        if name_len == 0 || name_len > 255 {
665            return Err(SparseWireError::InvalidEntryName);
666        }
667        let needed = name_len
668            .checked_add(1)
669            .and_then(|n| n.checked_add(32))
670            .ok_or(SparseWireError::LengthOutOfBounds)?;
671        if cursor
672            .checked_add(needed)
673            .ok_or(SparseWireError::Truncated)?
674            > buf.len()
675        {
676            return Err(SparseWireError::Truncated);
677        }
678        let name = buf[cursor..cursor + name_len].to_vec();
679        cursor += name_len;
680        let mode_byte = buf[cursor];
681        cursor += 1;
682        let mode = EntryMode::from_u8(mode_byte)
683            .map_err(|_| SparseWireError::InvalidEntryMode(mode_byte))?;
684        let mut object_hash = [0u8; 32];
685        object_hash.copy_from_slice(&buf[cursor..cursor + 32]);
686        cursor += 32;
687        entries.push(TreeEntry {
688            name,
689            mode,
690            object_hash,
691        });
692    }
693
694    // Bitmap length prefix.
695    if cursor.checked_add(4).ok_or(SparseWireError::Truncated)? > buf.len() {
696        return Err(SparseWireError::Truncated);
697    }
698    let mut blb = [0u8; 4];
699    blb.copy_from_slice(&buf[cursor..cursor + 4]);
700    let bitmap_len = u32::from_le_bytes(blb) as usize;
701    cursor += 4;
702    if cursor
703        .checked_add(bitmap_len)
704        .ok_or(SparseWireError::LengthOutOfBounds)?
705        > buf.len()
706    {
707        return Err(SparseWireError::LengthOutOfBounds);
708    }
709    let bitmap_bytes = buf[cursor..cursor + bitmap_len].to_vec();
710    cursor += bitmap_len;
711    if cursor != buf.len() {
712        // Trailing bytes — refuse so a hostile server can't pad with
713        // extra data hoping a future client parses it.
714        return Err(SparseWireError::LengthOutOfBounds);
715    }
716
717    Ok(SparseResponse {
718        manifest: SparseManifest {
719            tree_hash,
720            bitmap_root,
721            filter_hash,
722            leaf_count,
723        },
724        entries,
725        proof: SparseProof { bitmap_bytes },
726    })
727}
728
729// ---------------------------------------------------------------------------
730// On-disk bitmap cache
731// ---------------------------------------------------------------------------
732
733/// Cache file header magic — `b"MSPC"` (mkit-sparse-cache). Distinct
734/// from the wire magic so a misnamed file can't be parsed as either.
735pub const SPARSE_CACHE_MAGIC: [u8; 4] = *b"MSPC";
736
737/// Cache file format version. Bumped on any breaking change.
738pub const SPARSE_CACHE_VERSION: u8 = 0x01;
739
740/// Subdirectory under `.mkit/` for the sparse bitmap cache.
741pub const SPARSE_CACHE_DIR: &str = "sparse";
742
743/// Encode the on-disk cache payload for a verified sparse delivery.
744///
745/// Layout:
746/// ```text
747/// 0   4   magic        = SPARSE_CACHE_MAGIC ("MSPC")
748/// 4   1   version      = SPARSE_CACHE_VERSION
749/// 5   32  bitmap_root
750/// 37  32  filter_hash
751/// 69  8   leaf_count   (u64 LE)
752/// 77  4   bitmap_len   (u32 LE)
753/// 81  N   bitmap_bytes
754/// ```
755///
756/// The `tree_hash` is *not* stored in the file body — it is the
757/// filename. Re-verifying a cached bitmap means recomputing the
758/// bitmap root from the bytes and comparing to `bitmap_root` here.
759#[must_use]
760pub fn encode_sparse_cache(manifest: &SparseManifest, proof: &SparseProof) -> Vec<u8> {
761    let mut out = Vec::with_capacity(81 + proof.bitmap_bytes.len());
762    out.extend_from_slice(&SPARSE_CACHE_MAGIC);
763    out.push(SPARSE_CACHE_VERSION);
764    out.extend_from_slice(&manifest.bitmap_root);
765    out.extend_from_slice(&manifest.filter_hash);
766    out.extend_from_slice(&manifest.leaf_count.to_le_bytes());
767    let len = u32::try_from(proof.bitmap_bytes.len()).unwrap_or(u32::MAX);
768    out.extend_from_slice(&len.to_le_bytes());
769    out.extend_from_slice(&proof.bitmap_bytes);
770    out
771}
772
773/// Decode the on-disk cache payload. Returns the bitmap root, filter
774/// hash, leaf count, and bitmap bytes — the caller reconstructs the
775/// `SparseManifest` if needed (the `tree_hash` field comes from the
776/// filename, not from the file body).
777///
778/// # Errors
779///
780/// Same shape as [`decode_sparse_response`].
781#[allow(clippy::type_complexity)]
782pub fn decode_sparse_cache(buf: &[u8]) -> Result<(Hash, Hash, u64, Vec<u8>), SparseWireError> {
783    if buf.len() < 81 {
784        return Err(SparseWireError::Truncated);
785    }
786    if buf[0..4] != SPARSE_CACHE_MAGIC {
787        return Err(SparseWireError::BadMagic);
788    }
789    if buf[4] != SPARSE_CACHE_VERSION {
790        return Err(SparseWireError::UnsupportedVersion(buf[4]));
791    }
792    let mut bitmap_root = [0u8; 32];
793    bitmap_root.copy_from_slice(&buf[5..37]);
794    let mut filter_hash = [0u8; 32];
795    filter_hash.copy_from_slice(&buf[37..69]);
796    let mut lcb = [0u8; 8];
797    lcb.copy_from_slice(&buf[69..77]);
798    let leaf_count = u64::from_le_bytes(lcb);
799    if leaf_count > MAX_LEAVES {
800        return Err(SparseWireError::TooManyLeaves);
801    }
802    let mut blb = [0u8; 4];
803    blb.copy_from_slice(&buf[77..81]);
804    let bitmap_len = u32::from_le_bytes(blb) as usize;
805    let end = 81usize
806        .checked_add(bitmap_len)
807        .ok_or(SparseWireError::LengthOutOfBounds)?;
808    if end > buf.len() {
809        return Err(SparseWireError::LengthOutOfBounds);
810    }
811    if end != buf.len() {
812        return Err(SparseWireError::LengthOutOfBounds);
813    }
814    let bitmap_bytes = buf[81..end].to_vec();
815    Ok((bitmap_root, filter_hash, leaf_count, bitmap_bytes))
816}
817
818// ---------------------------------------------------------------------------
819// Tests
820// ---------------------------------------------------------------------------
821
822#[cfg(test)]
823mod tests {
824    use super::*;
825    use crate::hash::ZERO;
826    use crate::object::{EntryMode, TreeEntry};
827
828    fn entry(name: &[u8]) -> TreeEntry {
829        TreeEntry {
830            name: name.to_vec(),
831            mode: EntryMode::Blob,
832            object_hash: ZERO,
833        }
834    }
835
836    /// Tree with `n` lex-sorted entries named `b"aa"`, `b"ab"`,
837    /// `b"ac"`, etc. — enough variety that a prefix filter on
838    /// `"aa"` selects exactly one entry.
839    fn make_tree(n: usize) -> Tree {
840        assert!(n <= 26 * 26, "test helper only supports n <= 676");
841        let mut entries = Vec::with_capacity(n);
842        for i in 0..n {
843            // Two-letter ASCII names, lex-sorted by construction.
844            let a = b'a' + u8::try_from(i / 26).unwrap();
845            let b = b'a' + u8::try_from(i % 26).unwrap();
846            entries.push(entry(&[a, b]));
847        }
848        Tree { entries }
849    }
850
851    #[test]
852    fn build_and_verify_round_trip_simple() {
853        let tree = make_tree(10);
854        // Select "aa", "ab", "ac" — three distinct prefixes that
855        // each match exactly one entry (the entry names are 2 bytes
856        // and the filter paths are 2 bytes, so the exact-match arm
857        // of `entry_matches_filter` fires).
858        let filter = vec![
859            PathBuf::from("aa"),
860            PathBuf::from("ab"),
861            PathBuf::from("ac"),
862        ];
863
864        let (delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
865        assert_eq!(delivered.len(), 3);
866        assert_eq!(delivered[0].name, b"aa");
867        assert_eq!(delivered[1].name, b"ab");
868        assert_eq!(delivered[2].name, b"ac");
869        assert_eq!(manifest.leaf_count, 10);
870
871        assert!(verify_sparse(&manifest, &delivered, &filter, &proof));
872    }
873
874    #[test]
875    fn verify_rejects_extra_entry() {
876        // Server tries to ship a 4th entry the filter didn't ask
877        // for. Even though the bitmap commits to the original 3,
878        // the count mismatch fires immediately.
879        let tree = make_tree(10);
880        let filter = vec![
881            PathBuf::from("aa"),
882            PathBuf::from("ab"),
883            PathBuf::from("ac"),
884        ];
885
886        let (mut delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
887        // Sneak in an entry that the filter does NOT select.
888        delivered.push(entry(b"ad"));
889
890        assert!(
891            !verify_sparse(&manifest, &delivered, &filter, &proof),
892            "verifier must reject delivered entries beyond the bitmap's set-bit count"
893        );
894    }
895
896    #[test]
897    fn verify_rejects_entry_outside_filter() {
898        // Server ships the right *number* of entries, but one of
899        // them isn't selected by the filter. The per-entry filter
900        // check fires.
901        let tree = make_tree(10);
902        let filter = vec![PathBuf::from("aa"), PathBuf::from("ab")];
903
904        let (mut delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
905        assert_eq!(delivered.len(), 2);
906
907        // Replace "ab" with "az" — same count, but "az" isn't in
908        // the filter.
909        delivered[1] = entry(b"az");
910        assert!(
911            !verify_sparse(&manifest, &delivered, &filter, &proof),
912            "verifier must reject any delivered entry not selected by the filter"
913        );
914    }
915
916    #[test]
917    fn verify_rejects_tampered_bitmap_bytes() {
918        // Server flips a bit in `bitmap_bytes` (claims an extra
919        // entry was included) but doesn't update the manifest's
920        // bitmap_root. Root reconstruction catches it.
921        let tree = make_tree(10);
922        let filter = vec![PathBuf::from("aa"), PathBuf::from("ab")];
923
924        let (delivered, manifest, mut proof) = build_sparse(&tree, &filter).unwrap();
925        // Flip a high bit nobody set.
926        proof.bitmap_bytes[0] ^= 0b1000_0000;
927
928        assert!(
929            !verify_sparse(&manifest, &delivered, &filter, &proof),
930            "verifier must reject when bitmap_bytes diverges from manifest.bitmap_root"
931        );
932    }
933
934    #[test]
935    fn verify_rejects_tampered_manifest_root() {
936        // Symmetric to the above: server preserves the bitmap but
937        // claims a different root. Catches an attacker substituting
938        // the manifest while leaving the bytes intact.
939        let tree = make_tree(10);
940        let filter = vec![PathBuf::from("aa"), PathBuf::from("ab")];
941
942        let (delivered, mut manifest, proof) = build_sparse(&tree, &filter).unwrap();
943        manifest.bitmap_root[0] ^= 1;
944
945        assert!(!verify_sparse(&manifest, &delivered, &filter, &proof));
946    }
947
948    #[test]
949    fn verify_rejects_wrong_filter() {
950        // Manifest was built against filter A but verifier supplies
951        // filter B. `filter_hash` mismatch.
952        let tree = make_tree(10);
953        let filter_a = vec![PathBuf::from("aa")];
954        let filter_b = vec![PathBuf::from("ab")];
955
956        let (delivered, manifest, proof) = build_sparse(&tree, &filter_a).unwrap();
957        assert!(!verify_sparse(&manifest, &delivered, &filter_b, &proof));
958    }
959
960    #[test]
961    fn empty_filter_yields_empty_delivery() {
962        // Empty filter selects no entries. The manifest is
963        // well-defined (commits to "every bit unset") and verify
964        // accepts an empty delivered list.
965        let tree = make_tree(10);
966        let filter: Vec<PathBuf> = vec![];
967
968        let (delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
969        assert!(delivered.is_empty());
970        assert_eq!(manifest.leaf_count, 10);
971        assert!(verify_sparse(&manifest, &delivered, &filter, &proof));
972    }
973
974    #[test]
975    fn empty_tree_is_well_defined() {
976        // No entries, no filter. Verifier accepts the trivial
977        // manifest.
978        let tree = Tree {
979            entries: Vec::new(),
980        };
981        let filter: Vec<PathBuf> = vec![];
982        let (delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
983        assert!(delivered.is_empty());
984        assert_eq!(manifest.leaf_count, 0);
985        assert!(verify_sparse(&manifest, &delivered, &filter, &proof));
986    }
987
988    #[test]
989    fn prefix_filter_matches_subtree() {
990        // "src" should select "src/foo" and "src/bar" but NOT "srx"
991        // or "srcabc" (no `/` boundary).
992        let entries = vec![
993            entry(b"a"),
994            entry(b"src/bar"),
995            entry(b"src/foo"),
996            entry(b"srx"),
997        ];
998        // Lex-sort: 'a' < 'src/bar' < 'src/foo' < 'srx' — already sorted.
999        let tree = Tree { entries };
1000        let filter = vec![PathBuf::from("src")];
1001
1002        let (delivered, manifest, proof) = build_sparse(&tree, &filter).unwrap();
1003        assert_eq!(delivered.len(), 2);
1004        assert_eq!(delivered[0].name, b"src/bar");
1005        assert_eq!(delivered[1].name, b"src/foo");
1006        assert!(verify_sparse(&manifest, &delivered, &filter, &proof));
1007    }
1008
1009    #[test]
1010    fn unsorted_tree_is_rejected() {
1011        let tree = Tree {
1012            entries: vec![entry(b"b"), entry(b"a")],
1013        };
1014        let err = build_sparse(&tree, &[]).unwrap_err();
1015        assert_eq!(err, SparseError::UnsortedTree);
1016    }
1017
1018    #[test]
1019    fn filter_hash_is_order_independent() {
1020        // Canonical form sorts and dedups, so these must collide.
1021        let a = hash_filter(&[PathBuf::from("y"), PathBuf::from("x")]);
1022        let b = hash_filter(&[
1023            PathBuf::from("x"),
1024            PathBuf::from("y"),
1025            PathBuf::from("x"), // duplicate
1026        ]);
1027        assert_eq!(a, b);
1028        // ...but a different content set must produce a different hash.
1029        let c = hash_filter(&[PathBuf::from("x"), PathBuf::from("z")]);
1030        assert_ne!(a, c);
1031    }
1032
1033    // ----- Phase 2: canonical tree-hash + wire format ----------------------
1034
1035    #[test]
1036    fn tree_hash_matches_canonical_serialize_then_hash() {
1037        // The Phase-2 tree_hash is BLAKE3(serialize(Object::Tree(t))).
1038        // Cross-check against the codebase's canonical "address a tree
1039        // object" recipe — anything else would defeat the point of the
1040        // Phase-1 → Phase-2 swap.
1041        let tree = make_tree(5);
1042        let canonical = crate::hash::hash(
1043            &crate::serialize::serialize(&crate::object::Object::Tree(tree.clone())).unwrap(),
1044        );
1045        assert_eq!(tree_hash(&tree), canonical);
1046    }
1047
1048    #[test]
1049    fn tree_hash_differs_from_phase1_placeholder() {
1050        // Sanity: the Phase-1 sparse-internal hash and the new
1051        // canonical hash MUST differ for any non-empty tree, so a
1052        // mistakenly-pinned Phase-1 hash anywhere upstream surfaces
1053        // immediately as a verifier mismatch.
1054        let tree = make_tree(3);
1055        // Phase-1 placeholder recipe — kept verbatim for the diff.
1056        let mut body = Hasher::new();
1057        let count = u32::try_from(tree.entries.len()).unwrap();
1058        body.update(&count.to_le_bytes());
1059        for entry in &tree.entries {
1060            let name_len = u32::try_from(entry.name.len()).unwrap();
1061            body.update(&name_len.to_le_bytes());
1062            body.update(&entry.name);
1063            body.update(&[entry.mode as u8]);
1064            body.update(&entry.object_hash);
1065        }
1066        let body_digest = body.finalize();
1067        let phase1 = crate::hash::domain_digest(b"mkit-sparse-tree-v1", &body_digest);
1068        assert_ne!(tree_hash(&tree), phase1);
1069    }
1070
1071    #[test]
1072    fn wire_round_trip_simple() {
1073        let tree = make_tree(8);
1074        let filter = vec![PathBuf::from("aa"), PathBuf::from("ab")];
1075        let (entries, manifest, proof) = build_sparse(&tree, &filter).unwrap();
1076        let resp = SparseResponse {
1077            manifest,
1078            entries,
1079            proof,
1080        };
1081        let bytes = encode_sparse_response(&resp).unwrap();
1082        let parsed = decode_sparse_response(&bytes).unwrap();
1083        assert_eq!(parsed.manifest, resp.manifest);
1084        assert_eq!(parsed.entries.len(), resp.entries.len());
1085        for (a, b) in parsed.entries.iter().zip(resp.entries.iter()) {
1086            assert_eq!(a.name, b.name);
1087            assert_eq!(a.mode as u8, b.mode as u8);
1088            assert_eq!(a.object_hash, b.object_hash);
1089        }
1090        assert_eq!(parsed.proof.bitmap_bytes, resp.proof.bitmap_bytes);
1091        // Decoded response still verifies.
1092        assert!(verify_sparse(
1093            &parsed.manifest,
1094            &parsed.entries,
1095            &filter,
1096            &parsed.proof
1097        ));
1098    }
1099
1100    #[test]
1101    fn wire_rejects_bad_magic() {
1102        let tree = make_tree(2);
1103        let (entries, manifest, proof) = build_sparse(&tree, &[PathBuf::from("aa")]).unwrap();
1104        let mut bytes = encode_sparse_response(&SparseResponse {
1105            manifest,
1106            entries,
1107            proof,
1108        })
1109        .unwrap();
1110        bytes[0] = 0xFF;
1111        assert_eq!(
1112            decode_sparse_response(&bytes).unwrap_err(),
1113            SparseWireError::BadMagic
1114        );
1115    }
1116
1117    #[test]
1118    fn wire_rejects_unsupported_version() {
1119        let tree = make_tree(2);
1120        let (entries, manifest, proof) = build_sparse(&tree, &[PathBuf::from("aa")]).unwrap();
1121        let mut bytes = encode_sparse_response(&SparseResponse {
1122            manifest,
1123            entries,
1124            proof,
1125        })
1126        .unwrap();
1127        bytes[4] = 0x99;
1128        assert!(matches!(
1129            decode_sparse_response(&bytes).unwrap_err(),
1130            SparseWireError::UnsupportedVersion(0x99)
1131        ));
1132    }
1133
1134    #[test]
1135    fn wire_rejects_trailing_garbage() {
1136        let tree = make_tree(2);
1137        let (entries, manifest, proof) = build_sparse(&tree, &[PathBuf::from("aa")]).unwrap();
1138        let mut bytes = encode_sparse_response(&SparseResponse {
1139            manifest,
1140            entries,
1141            proof,
1142        })
1143        .unwrap();
1144        bytes.push(0xAA);
1145        assert_eq!(
1146            decode_sparse_response(&bytes).unwrap_err(),
1147            SparseWireError::LengthOutOfBounds
1148        );
1149    }
1150
1151    #[test]
1152    fn wire_rejects_truncated_header() {
1153        let tiny = vec![0u8; 50];
1154        assert_eq!(
1155            decode_sparse_response(&tiny).unwrap_err(),
1156            SparseWireError::Truncated
1157        );
1158    }
1159
1160    #[test]
1161    fn wire_rejects_overlong_leaf_count() {
1162        let mut buf = Vec::with_capacity(113);
1163        buf.extend_from_slice(&SPARSE_WIRE_MAGIC);
1164        buf.push(SPARSE_WIRE_VERSION);
1165        buf.extend_from_slice(&[0u8; 32]); // tree_hash
1166        buf.extend_from_slice(&[0u8; 32]); // bitmap_root
1167        buf.extend_from_slice(&[0u8; 32]); // filter_hash
1168        buf.extend_from_slice(&(MAX_LEAVES + 1).to_le_bytes());
1169        buf.extend_from_slice(&0u32.to_le_bytes()); // entries_len = 0
1170        // need bitmap length prefix for completeness
1171        buf.extend_from_slice(&0u32.to_le_bytes());
1172        assert_eq!(
1173            decode_sparse_response(&buf).unwrap_err(),
1174            SparseWireError::TooManyLeaves
1175        );
1176    }
1177
1178    #[test]
1179    fn cache_round_trip_recovers_bitmap_root() {
1180        let tree = make_tree(10);
1181        let filter = vec![PathBuf::from("ab")];
1182        let (_entries, manifest, proof) = build_sparse(&tree, &filter).unwrap();
1183        let bytes = encode_sparse_cache(&manifest, &proof);
1184        let (bitmap_root, filter_hash, leaf_count, bitmap_bytes) =
1185            decode_sparse_cache(&bytes).unwrap();
1186        assert_eq!(bitmap_root, manifest.bitmap_root);
1187        assert_eq!(filter_hash, manifest.filter_hash);
1188        assert_eq!(leaf_count, manifest.leaf_count);
1189        assert_eq!(bitmap_bytes, proof.bitmap_bytes);
1190    }
1191
1192    #[test]
1193    fn cache_rejects_bad_magic() {
1194        let tree = make_tree(1);
1195        let (_entries, manifest, proof) = build_sparse(&tree, &[PathBuf::from("aa")]).unwrap();
1196        let mut bytes = encode_sparse_cache(&manifest, &proof);
1197        bytes[0] = 0x00;
1198        assert_eq!(
1199            decode_sparse_cache(&bytes).unwrap_err(),
1200            SparseWireError::BadMagic
1201        );
1202    }
1203}