Skip to main content

net/adapter/net/dataforts/blob/
blob_ref.rs

1//! `BlobRef` — typed event-payload that points at content stored
2//! out-of-band in a [`super::BlobAdapter`] backend.
3//!
4//! # Wire encoding (v0.15 Small + v0.2 Manifest)
5//!
6//! Every encoded form starts with the four-byte magic
7//! `[0xB0, 0xB1, 0xB2, 0xB3]` followed by a one-byte version
8//! discriminant:
9//!
10//! | Version | Variant | Body layout |
11//! |---|---|---|
12//! | `0x01` | [`BlobRef::Small`] | `[hash 32][size 8][uri …]` — hand-rolled byte layout, v0.15-compatible. |
13//! | `0x02` | [`BlobRef::Manifest`] | `[postcard manifest body …]` — chunked content. See [`BLOB_MANIFEST_BODY_VERSION`]. |
14//!
15//! No length prefix on the Small URI — the encoded form lives inside
16//! an event payload whose length is already framed by the substrate.
17//! The Manifest body is postcard-encoded with its own 1-byte version
18//! prefix (`BLOB_MANIFEST_BODY_VERSION`) so the manifest schema can
19//! evolve independently of the outer wire discriminant.
20//!
21//! Inline event payloads carry no magic (back-compat); the substrate
22//! distinguishes by peeking at the first four bytes. The magic is
23//! four bytes (rather than one) because a single discriminator byte
24//! (`0xB0`) collides with arbitrary binary payloads — protobuf wire
25//! bytes, MessagePack, compressed data — and a false match would
26//! silently re-interpret an inline payload as a `BlobRef` whose
27//! decoded URI gets fetched against the channel's adapter. A
28//! four-byte magic with three high-bit bytes is statistically
29//! unreachable in valid UTF-8 text and rare enough in binary that
30//! decode-then-verify catches the rest.
31//!
32//! # Chunking
33//!
34//! Payloads above [`BLOB_CHUNK_SIZE_BYTES`] (4 MiB) split into
35//! fixed-size chunks; the resulting [`BlobRef::Manifest`] carries the
36//! chunk list. Below the threshold, payloads ride as a single
37//! [`BlobRef::Small`]. Chunk size is fixed across versions for
38//! determinism: two callers chunking the same N-byte payload produce
39//! identical [`ChunkRef`] lists, which deduplicates at the
40//! replication layer for free. See [`chunk_payload`] for the
41//! algorithm + [`byte_range_to_chunks`] for the inverse (resolving a
42//! byte range to chunk indices for partial fetches).
43
44use serde::{Deserialize, Serialize};
45
46use super::error::BlobError;
47
48/// 4-byte magic at offset 0 of an encoded [`BlobRef`].
49/// Distinguishes blob-ref payloads from inline event payloads on
50/// every `read_range` / `tail` output. Single-byte discriminators
51/// collide too readily with arbitrary binary payloads; four
52/// high-bit bytes are improbable enough that decode-then-verify
53/// handles the residual cases without misinterpreting attacker-
54/// controlled bytes as a `BlobRef`.
55pub const BLOB_REF_MAGIC: [u8; 4] = [0xB0, 0xB1, 0xB2, 0xB3];
56
57/// Backwards-compatible single-byte discriminator alias for code
58/// paths that just need to peek byte 0 (e.g. the bindings'
59/// `EventPayload` classification). Equal to `BLOB_REF_MAGIC[0]`.
60/// The decoder still requires the full four-byte magic, so this
61/// alias is only useful for a cheap "might be a blob" pre-check.
62pub const BLOB_REF_DISCRIMINATOR: u8 = BLOB_REF_MAGIC[0];
63
64/// `BlobRef::Small` wire-encoding version. v1 is the only Small
65/// version this build encodes; the version byte is reserved so
66/// future migrations (e.g. BLAKE3-256 → BLAKE3-512, or a multi-hash
67/// format) can land without breaking the decoder.
68pub const BLOB_REF_VERSION_V1: u8 = 0x01;
69
70/// `BlobRef::Manifest` wire-encoding version. Lands in v0.2 alongside
71/// the mesh-native blob storage track. Manifest body schema evolves
72/// independently via [`BLOB_MANIFEST_BODY_VERSION`].
73pub const BLOB_REF_VERSION_V2_MANIFEST: u8 = 0x02;
74
75/// `BlobRef::Tree` wire-encoding version. Lands in v0.3 alongside
76/// the hierarchical-manifest terabyte-scale track. Tree body
77/// schema evolves independently via [`BLOB_TREE_BODY_VERSION`].
78pub const BLOB_REF_VERSION_V3_TREE: u8 = 0x03;
79
80/// Inner-version prefix on the postcard-encoded tree body. Bumps
81/// independently of the outer wire discriminator
82/// ([`BLOB_REF_VERSION_V3_TREE`]) so the tree body schema can
83/// evolve without re-cutting the outer version space.
84pub const BLOB_TREE_BODY_VERSION: u8 = 0x01;
85
86/// Hard ceiling on the postcard-encoded `BlobRef::Tree` body.
87/// Tree bodies are tiny by design (a few hashes + ints), so a
88/// 1 KiB cap is generous and bounds the decoder's allocator
89/// before per-field validation runs.
90pub const BLOB_REF_TREE_BODY_MAX_BYTES: usize = 1024;
91
92/// Hard ceiling on `BlobRef::Tree::total_size`. Equals the
93/// fanout 128 + depth 4 + 4 MiB chunk maximum: 128 × 128 × 128
94/// × 128 × 4 MiB = 128 PiB = 2^57 bytes. Bounded so a malicious
95/// or buggy publisher can't stamp `total_size = u64::MAX` and
96/// propagate it into `Vec::with_capacity` allocations downstream.
97pub const BLOB_TREE_MAX_TOTAL_SIZE: u64 = 128 * (1u64 << 50);
98
99/// Inner-version prefix on the postcard-encoded manifest body. Bumps
100/// independently of the outer wire discriminator
101/// ([`BLOB_REF_VERSION_V2_MANIFEST`]) so the manifest schema can
102/// evolve (extra fields, new encodings, etc.) without re-cutting the
103/// outer version space.
104pub const BLOB_MANIFEST_BODY_VERSION: u8 = 0x01;
105
106/// Minimum encoded length for a [`BlobRef::Small`]: magic + version
107/// + hash + size. URI may be empty.
108pub const BLOB_REF_SMALL_HEADER_LEN: usize = 4 + 1 + 32 + 8;
109
110/// Hard ceiling on any single blob payload — applies to both the
111/// `size` field on a [`BlobRef::Small`] and the `total_size` field on
112/// a [`BlobRef::Manifest`]. A malicious or buggy publisher could
113/// otherwise stamp `size = u64::MAX` which then propagates into
114/// `vec![0u8; len as usize]` allocations on the fetch path — OOMs on
115/// 64-bit targets, silent truncation to short reads on 32-bit. 16
116/// GiB is generous enough for legitimate multi-GB blobs while still
117/// bounded; sites that need higher should validate on construction
118/// and consider streaming (the BlobAdapter trait's streaming hooks
119/// are the right escape valve).
120pub const BLOB_REF_MAX_SIZE: u64 = 16 * 1024 * 1024 * 1024;
121
122/// Fixed chunk size for chunked storage. 4 MiB is the locked
123/// threshold per [`DATAFORTS_BLOB_STORAGE_PLAN.md`] — fixed across
124/// versions for determinism (two callers chunking the same N-byte
125/// payload produce identical [`ChunkRef`] lists, which deduplicates
126/// at the replication layer for free). Payloads at or below this
127/// threshold ride as a single [`BlobRef::Small`]; above it, the
128/// chunker emits a [`BlobRef::Manifest`].
129///
130/// [`DATAFORTS_BLOB_STORAGE_PLAN.md`]: ../../../../../docs/plans/DATAFORTS_BLOB_STORAGE_PLAN.md
131pub const BLOB_CHUNK_SIZE_BYTES: u64 = 4 * 1024 * 1024;
132
133/// Hard ceiling on the number of chunks a single
134/// [`BlobRef::Manifest`] may carry. 4 GiB / 4 MiB = 1024 chunks at
135/// the typical max-blob size; 16 GiB / 4 MiB = 4096 chunks at the
136/// `BLOB_REF_MAX_SIZE` cap. The cap protects the decoder from a
137/// malicious peer stamping `chunks: Vec<…>` with tens of millions of
138/// entries (the postcard varint length prefix would otherwise admit
139/// up to `u32::MAX` and OOM the decoder).
140pub const BLOB_MANIFEST_MAX_CHUNKS: usize = 8192;
141
142/// Replication encoding for a chunked blob. v0.2 only supports
143/// `Replicated`; `ReedSolomon { k, m }` is reserved on the wire so
144/// v0.3 can land erasure coding without a manifest format change.
145///
146/// Wire-encoded via postcard; the unit-variant `Replicated`
147/// occupies 1 byte (varint discriminant 0), `ReedSolomon { k, m }`
148/// occupies 3 bytes (varint 1 + two `u8`).
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
150pub enum Encoding {
151    /// N identical replicas of every chunk; the only encoding
152    /// supported in v0.2. Survives loss of `replication_factor - 1`
153    /// nodes per chunk; correlated failures depend on placement
154    /// tags. See `DATAFORTS_BLOB_STORAGE_PLAN.md` § W-2.
155    Replicated,
156    /// Reed–Solomon `(k, m)` erasure coding. **Reserved for v0.3**;
157    /// constructing this variant is allowed for forward-compat
158    /// testing, but the v0.2 store / fetch paths reject it with a
159    /// `BlobError::UnsupportedEncoding` variant added in PR-2.
160    ReedSolomon {
161        /// Data chunks per group.
162        k: u8,
163        /// Parity chunks per group.
164        m: u8,
165    },
166}
167
168/// Reference to a single chunk within a [`BlobRef::Manifest`].
169/// Each chunk is a content-addressed RedEX file in the mesh-native
170/// storage path (v0.2). The hash is BLAKE3-256 of the chunk's raw
171/// bytes; `size` is the chunk's payload length in bytes (≤
172/// [`BLOB_CHUNK_SIZE_BYTES`]; only the last chunk may be smaller).
173#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
174pub struct ChunkRef {
175    /// BLAKE3-256 of the chunk's canonical bytes.
176    pub hash: [u8; 32],
177    /// Chunk payload length in bytes. Bounded above by
178    /// [`BLOB_CHUNK_SIZE_BYTES`]; only the last chunk in a manifest
179    /// may be strictly smaller.
180    pub size: u32,
181}
182
183/// Postcard-encoded tree body. Lives inside the
184/// [`BlobRef::Tree`] wire form after the four-byte magic +
185/// version discriminator. The body itself is tiny — fixed-size
186/// fields only; no embedded chunk list (the chunks live at the
187/// referenced [`TreeNode`](super::blob_tree::TreeNode) leaves).
188#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
189struct TreeBody {
190    /// Body schema version; bumps independently of the outer
191    /// `BlobRef::Tree` discriminant.
192    body_version: u8,
193    /// Adapter-routed URI. For the mesh-native path this is
194    /// `mesh://<hex-of-root_hash>`; external adapters use their
195    /// own scheme.
196    uri: String,
197    /// Replication / erasure encoding for the chunks. Tree
198    /// inherits the same enum surface as Manifest.
199    encoding: Encoding,
200    /// BLAKE3 hash of the root
201    /// [`TreeNode`](super::blob_tree::TreeNode) body. The
202    /// substrate fetches the root, verifies its bytes hash to
203    /// this value, then walks down.
204    root_hash: [u8; 32],
205    /// Total reconstructed payload size in bytes. The decoder
206    /// trusts this value (same trust model as Manifest's
207    /// `total_size`); the tree walk cross-checks against the
208    /// sum of leaf chunk sizes at the bottom of each descent.
209    total_size: u64,
210    /// Tree depth — `0` is a single-leaf tree (root IS the leaf,
211    /// degenerate), `1` is root + leaves, `2` is root + internals +
212    /// leaves, etc. Capped at [`super::blob_tree::MAX_TREE_DEPTH`]
213    /// (currently 4).
214    depth: u8,
215}
216
217/// Borrow-only sibling of [`TreeBody`]. Same rationale as
218/// [`ManifestBodyRef`] — measure-only via postcard with no URI
219/// clone. (Tree bodies don't have a chunk vector, so the saving
220/// here is just the `String` clone, but the symmetry keeps the
221/// two encoded_len arms consistent.)
222#[derive(Serialize)]
223struct TreeBodyRef<'a> {
224    body_version: u8,
225    uri: &'a str,
226    encoding: Encoding,
227    root_hash: [u8; 32],
228    total_size: u64,
229    depth: u8,
230}
231
232/// Postcard-encoded manifest body. Lives inside the
233/// [`BlobRef::Manifest`] wire form after the four-byte magic +
234/// version discriminator.
235#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
236struct ManifestBody {
237    /// Body schema version; bumps independently of the outer
238    /// `BlobRef::Manifest` discriminant.
239    body_version: u8,
240    /// Adapter-routed URI — e.g. `mesh://<hex>`, `s3://bucket/key`.
241    /// The scheme picks the adapter; the rest is passed through
242    /// opaque.
243    uri: String,
244    /// Replication / erasure encoding for the chunks.
245    encoding: Encoding,
246    /// Ordered chunk list. Position N in the vector corresponds to
247    /// the byte range `[N * BLOB_CHUNK_SIZE_BYTES, …)`.
248    chunks: Vec<ChunkRef>,
249    /// Sum of every chunk's `size`. Cached for cheap `BlobRef::size`
250    /// without iterating the vector; validated on decode to match
251    /// the iterated sum.
252    total_size: u64,
253}
254
255/// Borrow-only sibling of [`ManifestBody`]. Postcard's serializer
256/// walks fields in declaration order and encodes `String` /
257/// `Vec<T>` identically to `&str` / `&[T]` (same length-prefix +
258/// bytes shape), so this type's `Serialize` output is byte-for-
259/// byte identical to [`ManifestBody`]'s with the same field
260/// values. Used by [`BlobRef::encoded_len`] to *measure* without
261/// cloning the URI and chunk vector — cubic-dev-ai code review
262/// flagged the original `chunks.clone()` as a 36-bytes-per-chunk
263/// allocation per sizing call on large manifests.
264///
265/// Encoders that need to produce the wire bytes still use
266/// [`ManifestBody`] via `postcard::to_allocvec`; this type is
267/// measure-only.
268#[derive(Serialize)]
269struct ManifestBodyRef<'a> {
270    body_version: u8,
271    uri: &'a str,
272    encoding: Encoding,
273    chunks: &'a [ChunkRef],
274    total_size: u64,
275}
276
277/// Pointer to content stored out-of-band. Round-trips through every
278/// binding as a typed value via the public fields; the substrate
279/// uses [`Self::encode`] / [`Self::decode`] for the wire form.
280///
281/// Two variants:
282///
283/// - [`BlobRef::Small`] — payload ≤ [`BLOB_CHUNK_SIZE_BYTES`]; a
284///   single content-addressed blob. Wire-compatible with v0.15.
285/// - [`BlobRef::Manifest`] — payload > [`BLOB_CHUNK_SIZE_BYTES`];
286///   carries an ordered [`ChunkRef`] list plus an [`Encoding`]
287///   discriminant. Each chunk is itself a content-addressed Small
288///   blob stored independently via the adapter; the manifest exists
289///   only as the routing structure that ties them together.
290#[derive(Clone, Debug, PartialEq, Eq, Hash)]
291pub enum BlobRef {
292    /// Single-blob path. Wire-compatible with v0.15.
293    Small {
294        /// Encoding version byte. Always [`BLOB_REF_VERSION_V1`] on
295        /// fresh constructions; decode preserves the on-wire value so
296        /// upstream code can detect forward-compat scenarios.
297        version: u8,
298        /// Adapter-routed URI — e.g. `s3://bucket/key`,
299        /// `ipfs://<cid>`, `file:///abs/path`, `mesh://<hex>`. The
300        /// scheme picks the adapter; the rest is passed through
301        /// opaque.
302        uri: String,
303        /// BLAKE3-256 hash of the canonical bytes the URI resolves
304        /// to. The substrate verifies this on every successful
305        /// fetch; an adversarial adapter cannot fake-verify because
306        /// the check runs in the substrate, not the adapter.
307        hash: [u8; 32],
308        /// Size of the resolved content in bytes. Range-fetch
309        /// callers use this to bound their reads; the verification
310        /// path uses it to short-circuit obviously-wrong payloads.
311        size: u64,
312    },
313    /// Chunked-blob path (v0.2). Wire version
314    /// [`BLOB_REF_VERSION_V2_MANIFEST`]; body schema version
315    /// [`BLOB_MANIFEST_BODY_VERSION`].
316    Manifest {
317        /// Outer wire discriminator (always
318        /// [`BLOB_REF_VERSION_V2_MANIFEST`] on fresh constructions).
319        version: u8,
320        /// Adapter-routed URI.
321        uri: String,
322        /// Replication / erasure encoding for the chunks.
323        encoding: Encoding,
324        /// Ordered chunk list. Empty manifests are rejected on
325        /// decode (use [`BlobRef::Small`] for zero-byte payloads).
326        chunks: Vec<ChunkRef>,
327        /// Total payload size = sum of every chunk's `size`. Cached
328        /// for cheap `BlobRef::size`; validated on decode against
329        /// the iterated sum.
330        total_size: u64,
331    },
332    /// Tree-manifest path (v0.3). Wire version
333    /// [`BLOB_REF_VERSION_V3_TREE`]; body schema version
334    /// [`BLOB_TREE_BODY_VERSION`]. Lifts the addressable size
335    /// from the v0.2 16 GiB cap to 128 PiB at fanout 128 + depth
336    /// 4 + 4 MiB chunks. The blob's actual chunk references live
337    /// at the [`TreeNode`](super::blob_tree::TreeNode) leaves,
338    /// reachable via the tree walk starting from `root_hash`.
339    Tree {
340        /// Outer wire discriminator (always
341        /// [`BLOB_REF_VERSION_V3_TREE`] on fresh constructions).
342        version: u8,
343        /// Adapter-routed URI. For the mesh-native path this is
344        /// `mesh://<hex-of-root_hash>`; external adapters use
345        /// their own scheme.
346        uri: String,
347        /// Replication / erasure encoding (inherits the same
348        /// enum surface as `Manifest`).
349        encoding: Encoding,
350        /// BLAKE3 hash of the root
351        /// [`TreeNode`](super::blob_tree::TreeNode) body — the
352        /// substrate fetches this hash to start the tree walk.
353        root_hash: [u8; 32],
354        /// Total payload size in bytes (sum of every leaf
355        /// chunk's `size` across the whole tree). Cached for
356        /// cheap [`Self::size`].
357        total_size: u64,
358        /// Tree depth — `1` for root-as-leaf, up to
359        /// [`super::blob_tree::MAX_TREE_DEPTH`].
360        depth: u8,
361    },
362}
363
364impl BlobRef {
365    // -----------------------------------------------------------
366    // Construction
367    // -----------------------------------------------------------
368
369    /// Construct a v1 [`BlobRef::Small`]. The caller is responsible
370    /// for the `hash` matching the content at `uri` — the substrate
371    /// verifies on fetch, not on construction.
372    pub fn small(uri: impl Into<String>, hash: [u8; 32], size: u64) -> Self {
373        Self::Small {
374            version: BLOB_REF_VERSION_V1,
375            uri: uri.into(),
376            hash,
377            size,
378        }
379    }
380
381    /// Backwards-compatible alias for [`Self::small`]. Pre-v0.2
382    /// callers used `BlobRef::new(uri, hash, size)` which produced
383    /// the single-blob shape; the new enum surface uses
384    /// [`Self::small`] for the same shape.
385    #[deprecated(
386        since = "0.18.0",
387        note = "use `BlobRef::small` for explicit-variant construction"
388    )]
389    pub fn new(uri: impl Into<String>, hash: [u8; 32], size: u64) -> Self {
390        Self::small(uri, hash, size)
391    }
392
393    /// Construct a v2 [`BlobRef::Manifest`] from a chunk list. The
394    /// caller is responsible for each chunk's hash matching the
395    /// stored chunk; the substrate verifies on fetch.
396    pub fn manifest(
397        uri: impl Into<String>,
398        encoding: Encoding,
399        chunks: Vec<ChunkRef>,
400    ) -> Result<Self, BlobError> {
401        if chunks.is_empty() {
402            return Err(BlobError::Decode(
403                "manifest must carry at least one chunk".to_owned(),
404            ));
405        }
406        if chunks.len() > BLOB_MANIFEST_MAX_CHUNKS {
407            return Err(BlobError::Decode(format!(
408                "manifest chunk count {} exceeds cap {}",
409                chunks.len(),
410                BLOB_MANIFEST_MAX_CHUNKS
411            )));
412        }
413        validate_chunk_sizes(&chunks)?;
414        let total_size: u64 = chunks.iter().map(|c| c.size as u64).sum();
415        if total_size > BLOB_REF_MAX_SIZE {
416            return Err(BlobError::Decode(format!(
417                "manifest total_size {} exceeds cap {}",
418                total_size, BLOB_REF_MAX_SIZE
419            )));
420        }
421        Ok(Self::Manifest {
422            version: BLOB_REF_VERSION_V2_MANIFEST,
423            uri: uri.into(),
424            encoding,
425            chunks,
426            total_size,
427        })
428    }
429
430    /// Construct a v3 [`BlobRef::Tree`]. The caller is responsible
431    /// for `root_hash` matching the BLAKE3 of the root
432    /// [`TreeNode`](super::blob_tree::TreeNode)'s encoded bytes,
433    /// and for `total_size` matching the sum of every leaf
434    /// chunk's `size` across the tree — the substrate verifies the
435    /// hash on tree-walk descent and cross-checks total_size at
436    /// the leaves.
437    ///
438    /// Validates:
439    /// - `total_size > 0` (use [`BlobRef::Small`] for zero-byte payloads).
440    /// - `total_size <= BLOB_TREE_MAX_TOTAL_SIZE` (~128 PiB ceiling).
441    /// - `depth` in `1..=MAX_TREE_DEPTH`.
442    pub fn tree(
443        uri: impl Into<String>,
444        encoding: Encoding,
445        root_hash: [u8; 32],
446        total_size: u64,
447        depth: u8,
448    ) -> Result<Self, BlobError> {
449        if total_size == 0 {
450            return Err(BlobError::Decode(
451                "tree total_size must be > 0; use BlobRef::Small for empty payloads".to_owned(),
452            ));
453        }
454        if total_size > BLOB_TREE_MAX_TOTAL_SIZE {
455            return Err(BlobError::Decode(format!(
456                "tree total_size {} exceeds cap {}",
457                total_size, BLOB_TREE_MAX_TOTAL_SIZE
458            )));
459        }
460        if depth == 0 || depth > super::blob_tree::MAX_TREE_DEPTH {
461            return Err(BlobError::Decode(format!(
462                "tree depth {} out of range 1..={}",
463                depth,
464                super::blob_tree::MAX_TREE_DEPTH
465            )));
466        }
467        Ok(Self::Tree {
468            version: BLOB_REF_VERSION_V3_TREE,
469            uri: uri.into(),
470            encoding,
471            root_hash,
472            total_size,
473            depth,
474        })
475    }
476
477    // -----------------------------------------------------------
478    // Accessors (uniform across variants)
479    // -----------------------------------------------------------
480
481    /// Outer wire version discriminator —
482    /// [`BLOB_REF_VERSION_V1`] for Small, [`BLOB_REF_VERSION_V2_MANIFEST`]
483    /// for Manifest, [`BLOB_REF_VERSION_V3_TREE`] for Tree.
484    pub fn version(&self) -> u8 {
485        match self {
486            Self::Small { version, .. }
487            | Self::Manifest { version, .. }
488            | Self::Tree { version, .. } => *version,
489        }
490    }
491
492    /// Adapter-routed URI. The scheme picks the adapter; the rest is
493    /// passed through opaque.
494    pub fn uri(&self) -> &str {
495        match self {
496            Self::Small { uri, .. } | Self::Manifest { uri, .. } | Self::Tree { uri, .. } => {
497                uri.as_str()
498            }
499        }
500    }
501
502    /// Total payload size in bytes — `size` for Small,
503    /// `total_size` for Manifest, `total_size` for Tree.
504    pub fn size(&self) -> u64 {
505        match self {
506            Self::Small { size, .. } => *size,
507            Self::Manifest { total_size, .. } | Self::Tree { total_size, .. } => *total_size,
508        }
509    }
510
511    /// `true` if this is a chunked-blob manifest (flat
512    /// [`Self::Manifest`] or hierarchical [`Self::Tree`]).
513    pub fn is_chunked(&self) -> bool {
514        matches!(self, Self::Manifest { .. } | Self::Tree { .. })
515    }
516
517    /// `true` if this is a hierarchical-manifest tree.
518    pub fn is_tree(&self) -> bool {
519        matches!(self, Self::Tree { .. })
520    }
521
522    /// The single content hash for a Small blob; `None` for a
523    /// Manifest or Tree (manifests reference many chunks, each
524    /// with its own hash — use [`Self::chunks`] for Manifest or
525    /// [`Self::tree_root_hash`] for Tree).
526    pub fn small_hash(&self) -> Option<&[u8; 32]> {
527        match self {
528            Self::Small { hash, .. } => Some(hash),
529            Self::Manifest { .. } | Self::Tree { .. } => None,
530        }
531    }
532
533    /// The root [`TreeNode`](super::blob_tree::TreeNode) hash for
534    /// a [`Self::Tree`]; `None` for [`Self::Small`] or
535    /// [`Self::Manifest`].
536    pub fn tree_root_hash(&self) -> Option<&[u8; 32]> {
537        match self {
538            Self::Tree { root_hash, .. } => Some(root_hash),
539            Self::Small { .. } | Self::Manifest { .. } => None,
540        }
541    }
542
543    /// The tree depth for a [`Self::Tree`]; `None` for
544    /// [`Self::Small`] or [`Self::Manifest`].
545    pub fn tree_depth(&self) -> Option<u8> {
546        match self {
547            Self::Tree { depth, .. } => Some(*depth),
548            Self::Small { .. } | Self::Manifest { .. } => None,
549        }
550    }
551
552    /// The chunk list for a Manifest; empty slice for a Small or
553    /// Tree (Tree chunks live at the leaf [`TreeNode`](super::blob_tree::TreeNode)s,
554    /// reachable via tree walk — not flattened here).
555    pub fn chunks(&self) -> &[ChunkRef] {
556        match self {
557            Self::Small { .. } | Self::Tree { .. } => &[],
558            Self::Manifest { chunks, .. } => chunks,
559        }
560    }
561
562    /// The encoding tag for a Manifest or Tree; `None` for a
563    /// Small (Small has no encoding because the bytes are stored
564    /// directly).
565    pub fn encoding(&self) -> Option<Encoding> {
566        match self {
567            Self::Small { .. } => None,
568            Self::Manifest { encoding, .. } | Self::Tree { encoding, .. } => Some(*encoding),
569        }
570    }
571
572    // -----------------------------------------------------------
573    // Wire format
574    // -----------------------------------------------------------
575
576    /// Encoded length in bytes. The `Small` variant is O(1) —
577    /// header size plus URI length. The `Manifest` / `Tree`
578    /// variants now use [`postcard::experimental::serialized_size`]
579    /// to *measure* without allocating, per dataforts perf #174.
580    ///
581    /// Pre-fix these variants called `self.encode().len()` — a
582    /// full postcard alloc-encode of the entire body just to
583    /// read `.len()` off the temporary and drop it. For a
584    /// 1000-chunk Manifest, that was 64 KB+ allocated and
585    /// thrown away per `encoded_len` call. Workloads that
586    /// pair `encoded_len` + `encode` (typical sizing-then-emit
587    /// pattern) paid 2× the encode cost.
588    ///
589    /// Post-fix `encoded_len` walks the structure measuring
590    /// without allocating the output buffer — same byte count,
591    /// no `Vec` churn.
592    #[expect(
593        clippy::expect_used,
594        reason = "ManifestBodyRef / TreeBodyRef are composed of sized Serialize types — `postcard::experimental::serialized_size` is infallible against them; mirrors the existing `#[expect]` on `encode()`"
595    )]
596    pub fn encoded_len(&self) -> usize {
597        match self {
598            Self::Small { uri, .. } => BLOB_REF_SMALL_HEADER_LEN + uri.len(),
599            Self::Manifest {
600                uri,
601                encoding,
602                chunks,
603                total_size,
604                ..
605            } => {
606                // Per cubic-dev-ai code review: use the borrow-
607                // only [`ManifestBodyRef`] so the sizing walk
608                // doesn't `chunks.clone()` (36 bytes/chunk × N for
609                // a manifest of N chunks — kilobytes of pointless
610                // allocation per `encoded_len` call on large
611                // manifests) or `uri.clone()`. Postcard's
612                // serializer encodes `&str` and `&[T]` identically
613                // to `String` / `Vec<T>` so the walked byte count
614                // matches `encode()`'s output exactly.
615                let body = ManifestBodyRef {
616                    body_version: BLOB_MANIFEST_BODY_VERSION,
617                    uri: uri.as_str(),
618                    encoding: *encoding,
619                    chunks: chunks.as_slice(),
620                    total_size: *total_size,
621                };
622                let body_len = postcard::experimental::serialized_size(&body)
623                    .expect("manifest body postcard-encodes infallibly");
624                BLOB_REF_MAGIC.len() + 1 + body_len
625            }
626            Self::Tree {
627                uri,
628                encoding,
629                root_hash,
630                total_size,
631                depth,
632                ..
633            } => {
634                // Symmetric with the Manifest arm — borrow rather
635                // than clone the URI for the sizing walk.
636                let body = TreeBodyRef {
637                    body_version: BLOB_TREE_BODY_VERSION,
638                    uri: uri.as_str(),
639                    encoding: *encoding,
640                    root_hash: *root_hash,
641                    total_size: *total_size,
642                    depth: *depth,
643                };
644                let body_len = postcard::experimental::serialized_size(&body)
645                    .expect("tree body postcard-encodes infallibly");
646                BLOB_REF_MAGIC.len() + 1 + body_len
647            }
648        }
649    }
650
651    /// Emit the wire form. See the module-level table for the
652    /// byte layout per variant.
653    #[expect(
654        clippy::expect_used,
655        reason = "ManifestBody / TreeBody are composed of sized Serialize types; postcard alloc-encoding is infallible against them"
656    )]
657    pub fn encode(&self) -> Vec<u8> {
658        match self {
659            Self::Small {
660                version,
661                uri,
662                hash,
663                size,
664            } => {
665                let mut buf = Vec::with_capacity(BLOB_REF_SMALL_HEADER_LEN + uri.len());
666                buf.extend_from_slice(&BLOB_REF_MAGIC);
667                buf.push(*version);
668                buf.extend_from_slice(hash);
669                buf.extend_from_slice(&size.to_le_bytes());
670                buf.extend_from_slice(uri.as_bytes());
671                buf
672            }
673            Self::Manifest {
674                version,
675                uri,
676                encoding,
677                chunks,
678                total_size,
679            } => {
680                let body = ManifestBody {
681                    body_version: BLOB_MANIFEST_BODY_VERSION,
682                    uri: uri.clone(),
683                    encoding: *encoding,
684                    chunks: chunks.clone(),
685                    total_size: *total_size,
686                };
687                // Postcard alloc-encode is infallible against
688                // `Serialize` types whose subobjects are all sized;
689                // every field here is sized. The Result-bearing
690                // signature is for fallible writers (e.g. fixed-size
691                // buffers); we use the heap allocator.
692                let body_bytes = postcard::to_allocvec(&body)
693                    .expect("manifest body postcard-encodes infallibly");
694                let mut buf = Vec::with_capacity(5 + body_bytes.len());
695                buf.extend_from_slice(&BLOB_REF_MAGIC);
696                buf.push(*version);
697                buf.extend_from_slice(&body_bytes);
698                buf
699            }
700            Self::Tree {
701                version,
702                uri,
703                encoding,
704                root_hash,
705                total_size,
706                depth,
707            } => {
708                let body = TreeBody {
709                    body_version: BLOB_TREE_BODY_VERSION,
710                    uri: uri.clone(),
711                    encoding: *encoding,
712                    root_hash: *root_hash,
713                    total_size: *total_size,
714                    depth: *depth,
715                };
716                let body_bytes =
717                    postcard::to_allocvec(&body).expect("tree body postcard-encodes infallibly");
718                let mut buf = Vec::with_capacity(5 + body_bytes.len());
719                buf.extend_from_slice(&BLOB_REF_MAGIC);
720                buf.push(*version);
721                buf.extend_from_slice(&body_bytes);
722                buf
723            }
724        }
725    }
726
727    /// Decode a wire form. Returns `Ok(None)` when the first four
728    /// bytes are not [`BLOB_REF_MAGIC`] (caller should treat the
729    /// payload as inline). Returns `Err` only when the magic matches
730    /// but the rest of the frame is malformed.
731    pub fn decode(bytes: &[u8]) -> Result<Option<Self>, BlobError> {
732        if bytes.len() < BLOB_REF_MAGIC.len() || bytes[..BLOB_REF_MAGIC.len()] != BLOB_REF_MAGIC {
733            return Ok(None);
734        }
735        if bytes.len() < 5 {
736            return Err(BlobError::Decode(format!(
737                "frame too short for version byte: {} bytes",
738                bytes.len()
739            )));
740        }
741        let version = bytes[4];
742        match version {
743            BLOB_REF_VERSION_V1 => Self::decode_small(version, &bytes[5..]).map(Some),
744            BLOB_REF_VERSION_V2_MANIFEST => Self::decode_manifest(version, &bytes[5..]).map(Some),
745            BLOB_REF_VERSION_V3_TREE => Self::decode_tree(version, &bytes[5..]).map(Some),
746            other => Err(BlobError::UnsupportedVersion(other)),
747        }
748    }
749
750    fn decode_small(version: u8, rest: &[u8]) -> Result<Self, BlobError> {
751        // rest layout: [hash 32][size 8][uri …]
752        if rest.len() < 40 {
753            return Err(BlobError::Decode(format!(
754                "small frame too short: {} bytes after version, need at least 40",
755                rest.len()
756            )));
757        }
758        let mut hash = [0u8; 32];
759        hash.copy_from_slice(&rest[0..32]);
760        let mut size_bytes = [0u8; 8];
761        size_bytes.copy_from_slice(&rest[32..40]);
762        let size = u64::from_le_bytes(size_bytes);
763        if size > BLOB_REF_MAX_SIZE {
764            return Err(BlobError::Decode(format!(
765                "blob size {} exceeds cap {}",
766                size, BLOB_REF_MAX_SIZE
767            )));
768        }
769        let uri = std::str::from_utf8(&rest[40..])
770            .map_err(|e| BlobError::Decode(format!("URI not UTF-8: {}", e)))?
771            .to_owned();
772        Ok(Self::Small {
773            version,
774            uri,
775            hash,
776            size,
777        })
778    }
779
780    fn decode_manifest(version: u8, rest: &[u8]) -> Result<Self, BlobError> {
781        // Bound the wire size BEFORE postcard allocates the
782        // `Vec<ChunkRef>`. Otherwise a malicious peer can stamp
783        // the chunks-length varint up to ~u32::MAX, forcing a
784        // multi-MB Vec allocation before our post-decode cap
785        // check at line ~25 below fires. The legitimate upper
786        // bound for a well-formed manifest body is:
787        //
788        //   uri (≤ 8 KiB after the substrate's outer length cap)
789        //   + 1 byte encoding discriminant
790        //   + 1 byte body_version
791        //   + ≤ 10 bytes total_size varint
792        //   + ≤ 5 bytes chunks-len varint (covers u32::MAX, far above our cap)
793        //   + BLOB_MANIFEST_MAX_CHUNKS chunks × ≤ 50 bytes max
794        //     each (32 hash + 5 size varint + 10 offset varint +
795        //     framing slack)
796        //
797        // Round up generously to a static upper bound. Anything
798        // past this is by construction malformed; reject without
799        // touching the allocator.
800        const MAX_MANIFEST_WIRE_BYTES: usize = 8192 + 32 + BLOB_MANIFEST_MAX_CHUNKS * 50;
801        if rest.len() > MAX_MANIFEST_WIRE_BYTES {
802            return Err(BlobError::Decode(format!(
803                "manifest body {} bytes exceeds legitimate upper bound {}",
804                rest.len(),
805                MAX_MANIFEST_WIRE_BYTES
806            )));
807        }
808        let body: ManifestBody = postcard::from_bytes(rest)
809            .map_err(|e| BlobError::Decode(format!("manifest body decode failed: {}", e)))?;
810        if body.body_version != BLOB_MANIFEST_BODY_VERSION {
811            return Err(BlobError::UnsupportedVersion(body.body_version));
812        }
813        if body.chunks.is_empty() {
814            return Err(BlobError::Decode(
815                "manifest must carry at least one chunk".to_owned(),
816            ));
817        }
818        if body.chunks.len() > BLOB_MANIFEST_MAX_CHUNKS {
819            return Err(BlobError::Decode(format!(
820                "manifest chunk count {} exceeds cap {}",
821                body.chunks.len(),
822                BLOB_MANIFEST_MAX_CHUNKS
823            )));
824        }
825        validate_chunk_sizes(&body.chunks)?;
826        // Validate the cached total_size matches the iterated sum —
827        // a malicious peer could otherwise lie about total_size to
828        // mislead range math without flipping any chunk's hash.
829        let iterated_sum: u64 = body.chunks.iter().map(|c| c.size as u64).sum();
830        if iterated_sum != body.total_size {
831            return Err(BlobError::Decode(format!(
832                "manifest total_size mismatch: declared {}, iterated {}",
833                body.total_size, iterated_sum
834            )));
835        }
836        if body.total_size > BLOB_REF_MAX_SIZE {
837            return Err(BlobError::Decode(format!(
838                "manifest total_size {} exceeds cap {}",
839                body.total_size, BLOB_REF_MAX_SIZE
840            )));
841        }
842        Ok(Self::Manifest {
843            version,
844            uri: body.uri,
845            encoding: body.encoding,
846            chunks: body.chunks,
847            total_size: body.total_size,
848        })
849    }
850
851    fn decode_tree(version: u8, rest: &[u8]) -> Result<Self, BlobError> {
852        // Bound the wire size BEFORE postcard allocates. The Tree
853        // body carries only fixed-size fields (root_hash, sizes,
854        // depth) plus a URI string — 1 KiB is generous for the
855        // legitimate shape and bounds malicious oversize payloads
856        // before the URI's String allocation runs.
857        if rest.len() > BLOB_REF_TREE_BODY_MAX_BYTES {
858            return Err(BlobError::Decode(format!(
859                "tree body {} bytes exceeds cap {}",
860                rest.len(),
861                BLOB_REF_TREE_BODY_MAX_BYTES
862            )));
863        }
864        let body: TreeBody = postcard::from_bytes(rest)
865            .map_err(|e| BlobError::Decode(format!("tree body decode failed: {}", e)))?;
866        if body.body_version != BLOB_TREE_BODY_VERSION {
867            return Err(BlobError::UnsupportedVersion(body.body_version));
868        }
869        if body.total_size == 0 {
870            return Err(BlobError::Decode(
871                "tree total_size must be > 0; empty payloads use BlobRef::Small".to_owned(),
872            ));
873        }
874        if body.total_size > BLOB_TREE_MAX_TOTAL_SIZE {
875            return Err(BlobError::Decode(format!(
876                "tree total_size {} exceeds cap {}",
877                body.total_size, BLOB_TREE_MAX_TOTAL_SIZE
878            )));
879        }
880        if body.depth == 0 || body.depth > super::blob_tree::MAX_TREE_DEPTH {
881            return Err(BlobError::Decode(format!(
882                "tree depth {} out of range 1..={}",
883                body.depth,
884                super::blob_tree::MAX_TREE_DEPTH
885            )));
886        }
887        // Defensive depth-vs-size lower bound. A well-formed depth=N
888        // tree (N >= 2) requires AT LEAST TREE_FANOUT^(N-1) bytes
889        // to be productive — depth=2 needs > FANOUT (128) bytes
890        // for an Internal root to be useful, depth=3 needs >
891        // FANOUT^2 = 16 384, depth=4 needs > FANOUT^3 ≈ 2 M. A
892        // manifest claiming depth=4 + total_size=1 is structurally
893        // malformed (a single chunk can't justify three internal
894        // levels) — reject before any walk traffic happens. The
895        // walker's depth-shortening check would catch this too,
896        // but at the cost of a round trip to fetch the root.
897        if body.depth >= 2 {
898            let exp = body.depth as u32 - 1;
899            // Compute FANOUT^exp using checked_pow; on overflow
900            // the depth is at the cap and the lower bound is
901            // satisfied by any reasonable total_size, so skip the
902            // check in that direction.
903            if let Some(min_size) = (super::blob_tree::TREE_FANOUT as u64).checked_pow(exp) {
904                if body.total_size < min_size {
905                    return Err(BlobError::Decode(format!(
906                        "tree depth {} requires total_size >= {} (TREE_FANOUT^(depth-1)); got {}",
907                        body.depth, min_size, body.total_size
908                    )));
909                }
910            }
911        }
912        Ok(Self::Tree {
913            version,
914            uri: body.uri,
915            encoding: body.encoding,
916            root_hash: body.root_hash,
917            total_size: body.total_size,
918            depth: body.depth,
919        })
920    }
921
922    /// Verify `bytes` resolves to this `BlobRef`'s hash. Only
923    /// defined for [`BlobRef::Small`] — call sites holding a
924    /// Manifest verify chunk-by-chunk via [`Self::chunks`]; call
925    /// sites holding a Tree verify via tree-walk descent (each
926    /// [`TreeNode`](super::blob_tree::TreeNode)'s bytes hash to
927    /// the parent's stored child-hash entry).
928    /// Returns `Ok(())` on match,
929    /// `Err(BlobError::HashMismatch)` otherwise, `Err(BlobError::Decode)`
930    /// on a Manifest / Tree. Runs inside the substrate, not the
931    /// adapter, so an adversarial adapter cannot fake-verify.
932    pub fn verify(&self, bytes: &[u8]) -> Result<(), BlobError> {
933        match self {
934            Self::Small { hash, .. } => {
935                let actual: [u8; 32] = blake3::hash(bytes).into();
936                if actual == *hash {
937                    Ok(())
938                } else {
939                    Err(BlobError::HashMismatch {
940                        expected: *hash,
941                        actual,
942                    })
943                }
944            }
945            Self::Manifest { .. } => Err(BlobError::Decode(
946                "verify is undefined on a Manifest variant; verify chunks individually".to_owned(),
947            )),
948            Self::Tree { .. } => Err(BlobError::Decode(
949                "verify is undefined on a Tree variant; verify chunks individually via tree walk"
950                    .to_owned(),
951            )),
952        }
953    }
954}
955
956// -------------------------------------------------------------------
957// Chunking + range math (pure logic — no I/O)
958// -------------------------------------------------------------------
959
960/// Reject manifests where any chunk size disagrees with the substrate's
961/// fixed [`BLOB_CHUNK_SIZE_BYTES`] stride. Every non-last chunk MUST
962/// be exactly `BLOB_CHUNK_SIZE_BYTES`; the last chunk MAY be smaller
963/// but must be at least one byte. `byte_range_to_chunks` and the
964/// adapter's range slicer rely on the fixed stride; an attacker-stamped
965/// `{size: u32::MAX}` chunk would otherwise either return wrong-window
966/// bytes silently or trip a panicking slice in the consumer.
967fn validate_chunk_sizes(chunks: &[ChunkRef]) -> Result<(), BlobError> {
968    let last = chunks.len() - 1;
969    for (i, chunk) in chunks.iter().enumerate() {
970        let size = chunk.size as u64;
971        if i < last {
972            if size != BLOB_CHUNK_SIZE_BYTES {
973                return Err(BlobError::Decode(format!(
974                    "manifest non-last chunk {} has size {} (expected {})",
975                    i, size, BLOB_CHUNK_SIZE_BYTES
976                )));
977            }
978        } else {
979            if size == 0 || size > BLOB_CHUNK_SIZE_BYTES {
980                return Err(BlobError::Decode(format!(
981                    "manifest last chunk {} has size {} (expected 1..={})",
982                    i, size, BLOB_CHUNK_SIZE_BYTES
983                )));
984            }
985        }
986    }
987    Ok(())
988}
989
990/// Outcome of [`chunk_payload`] — either the payload fit below the
991/// threshold (single Small blob shape) or it split into N chunks
992/// plus a manifest.
993#[derive(Clone, Debug)]
994pub enum ChunkedPayload<'a> {
995    /// Payload size ≤ [`BLOB_CHUNK_SIZE_BYTES`]; ride as a single
996    /// content-addressed blob. The caller stores `payload` against
997    /// the resulting hash; the [`BlobRef`] returned by
998    /// [`Self::into_blob_ref`] points at that single content.
999    Inline {
1000        /// BLAKE3 of the whole payload.
1001        hash: [u8; 32],
1002        /// Payload bytes (zero-copy slice into the caller's buffer).
1003        payload: &'a [u8],
1004    },
1005    /// Payload size > [`BLOB_CHUNK_SIZE_BYTES`]; split into N
1006    /// 4-MiB chunks (last chunk may be smaller). The caller stores
1007    /// each chunk independently against its hash; the
1008    /// [`BlobRef::Manifest`] returned by [`Self::into_blob_ref`]
1009    /// references all of them.
1010    Chunked {
1011        /// Each chunk's `(hash, byte-slice)`. Slices are zero-copy
1012        /// views into the caller's buffer.
1013        chunks: Vec<(ChunkRef, &'a [u8])>,
1014        /// Total payload length = sum of chunk lengths.
1015        total_size: u64,
1016    },
1017}
1018
1019impl<'a> ChunkedPayload<'a> {
1020    /// Total payload size — `payload.len()` for Inline, sum of chunk
1021    /// sizes for Chunked.
1022    pub fn size(&self) -> u64 {
1023        match self {
1024            Self::Inline { payload, .. } => payload.len() as u64,
1025            Self::Chunked { total_size, .. } => *total_size,
1026        }
1027    }
1028
1029    /// Convert into the corresponding [`BlobRef`] given the
1030    /// adapter-routed URI. Inline produces [`BlobRef::Small`];
1031    /// Chunked produces [`BlobRef::Manifest`] with the supplied
1032    /// encoding. Returns `Err` only when the chunked variant exceeds
1033    /// [`BLOB_MANIFEST_MAX_CHUNKS`] (defense-in-depth — the chunker
1034    /// already enforces the cap).
1035    pub fn into_blob_ref(
1036        self,
1037        uri: impl Into<String>,
1038        encoding: Encoding,
1039    ) -> Result<BlobRef, BlobError> {
1040        match self {
1041            Self::Inline { hash, payload } => Ok(BlobRef::small(uri, hash, payload.len() as u64)),
1042            Self::Chunked { chunks, .. } => {
1043                let chunk_refs: Vec<ChunkRef> = chunks.into_iter().map(|(r, _)| r).collect();
1044                BlobRef::manifest(uri, encoding, chunk_refs)
1045            }
1046        }
1047    }
1048}
1049
1050/// Split a byte payload into either a single Inline blob or N
1051/// fixed-size chunks, content-addressing each part. Locked decisions:
1052///
1053/// - Threshold is a hard `≤` comparison: payload at exactly
1054///   [`BLOB_CHUNK_SIZE_BYTES`] rides as Inline (the chunker
1055///   wouldn't have anything to split into), payloads strictly larger
1056///   split into N = `ceil(len / BLOB_CHUNK_SIZE_BYTES)` chunks.
1057/// - Chunk size is fixed at [`BLOB_CHUNK_SIZE_BYTES`]; the algorithm
1058///   is deterministic — two callers chunking the same `bytes`
1059///   produce identical hash lists.
1060/// - Empty payload produces an Inline result with `payload = &[]`
1061///   and the BLAKE3-of-empty hash.
1062///
1063/// Rejects payloads larger than [`BLOB_REF_MAX_SIZE`] or whose chunk
1064/// count would exceed [`BLOB_MANIFEST_MAX_CHUNKS`].
1065pub fn chunk_payload(bytes: &[u8]) -> Result<ChunkedPayload<'_>, BlobError> {
1066    let len = bytes.len() as u64;
1067    if len > BLOB_REF_MAX_SIZE {
1068        return Err(BlobError::Decode(format!(
1069            "payload size {} exceeds cap {}",
1070            len, BLOB_REF_MAX_SIZE
1071        )));
1072    }
1073    if len <= BLOB_CHUNK_SIZE_BYTES {
1074        let hash: [u8; 32] = blake3::hash(bytes).into();
1075        return Ok(ChunkedPayload::Inline {
1076            hash,
1077            payload: bytes,
1078        });
1079    }
1080    let chunk_size = BLOB_CHUNK_SIZE_BYTES as usize;
1081    let chunk_count = bytes.len().div_ceil(chunk_size);
1082    if chunk_count > BLOB_MANIFEST_MAX_CHUNKS {
1083        return Err(BlobError::Decode(format!(
1084            "payload requires {} chunks, exceeds cap {}",
1085            chunk_count, BLOB_MANIFEST_MAX_CHUNKS
1086        )));
1087    }
1088    let mut chunks = Vec::with_capacity(chunk_count);
1089    for slice in bytes.chunks(chunk_size) {
1090        let hash: [u8; 32] = blake3::hash(slice).into();
1091        chunks.push((
1092            ChunkRef {
1093                hash,
1094                size: slice.len() as u32,
1095            },
1096            slice,
1097        ));
1098    }
1099    Ok(ChunkedPayload::Chunked {
1100        chunks,
1101        total_size: len,
1102    })
1103}
1104
1105/// One chunk-range request emitted by [`byte_range_to_chunks`].
1106#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1107pub struct ChunkRangeRequest {
1108    /// Index into the manifest's chunk list.
1109    pub chunk_index: usize,
1110    /// Byte offset *within the chunk* where the requested range
1111    /// starts (always 0 for non-boundary chunks; non-zero only for
1112    /// the first chunk of a partial fetch).
1113    pub start_in_chunk: u32,
1114    /// Byte offset *within the chunk* where the requested range
1115    /// ends (exclusive). Equals the chunk's `size` for non-boundary
1116    /// chunks; smaller only for the last chunk of a partial fetch.
1117    pub end_in_chunk: u32,
1118}
1119
1120impl ChunkRangeRequest {
1121    /// Length of the requested slice within this chunk.
1122    pub fn len(&self) -> u32 {
1123        self.end_in_chunk - self.start_in_chunk
1124    }
1125
1126    /// `true` if the requested slice is empty.
1127    pub fn is_empty(&self) -> bool {
1128        self.start_in_chunk >= self.end_in_chunk
1129    }
1130}
1131
1132/// Translate a global byte range `[start, end)` over a chunked blob
1133/// into the per-chunk fetch requests needed to satisfy it. Returns
1134/// the requests in chunk-index order so the caller can concatenate
1135/// the returned slices in iteration order. The math:
1136///
1137/// - `chunk_index` walks `[start / CHUNK, ceil(end / CHUNK))`.
1138/// - The first chunk's `start_in_chunk` is `start % CHUNK`; every
1139///   later chunk's `start_in_chunk` is `0`.
1140/// - The last chunk's `end_in_chunk` is `((end - 1) % CHUNK) + 1`
1141///   capped at the chunk's actual `size`; every earlier chunk's
1142///   `end_in_chunk` is the chunk's full `size`.
1143///
1144/// Returns an empty `Vec` for empty ranges (`start == end`) or when
1145/// `start >= total_size`. Errors when `end > total_size` or
1146/// `start > end` (callers should range-check before invoking, but
1147/// we surface a typed error to ease use as a defensive backstop).
1148///
1149/// Pure-logic; no chunk fetches happen here.
1150pub fn byte_range_to_chunks(
1151    manifest: &BlobRef,
1152    start: u64,
1153    end: u64,
1154) -> Result<Vec<ChunkRangeRequest>, BlobError> {
1155    let (chunks, total_size) = match manifest {
1156        BlobRef::Manifest {
1157            chunks, total_size, ..
1158        } => (chunks.as_slice(), *total_size),
1159        BlobRef::Small { .. } => {
1160            return Err(BlobError::Decode(
1161                "byte_range_to_chunks called on a Small BlobRef".to_owned(),
1162            ));
1163        }
1164        BlobRef::Tree { .. } => {
1165            // Tree blobs resolve ranges via tree walk
1166            // (A4 `TreeWalker`), not via the flat-manifest
1167            // helper. Callers holding a Tree BlobRef route
1168            // through `MeshBlobAdapter::fetch_range`'s tree
1169            // path directly.
1170            return Err(BlobError::Decode(
1171                "byte_range_to_chunks called on a Tree BlobRef — \
1172                 use the tree-walker path instead"
1173                    .to_owned(),
1174            ));
1175        }
1176    };
1177    if start > end {
1178        return Err(BlobError::Decode(format!(
1179            "range start {} > end {}",
1180            start, end
1181        )));
1182    }
1183    if end > total_size {
1184        return Err(BlobError::Decode(format!(
1185            "range end {} exceeds total_size {}",
1186            end, total_size
1187        )));
1188    }
1189    if start == end || start >= total_size {
1190        return Ok(Vec::new());
1191    }
1192    let chunk_size = BLOB_CHUNK_SIZE_BYTES;
1193    let first_chunk = (start / chunk_size) as usize;
1194    let last_chunk_inclusive = ((end - 1) / chunk_size) as usize;
1195    let mut out = Vec::with_capacity(last_chunk_inclusive - first_chunk + 1);
1196    for (chunk_index, chunk) in chunks
1197        .iter()
1198        .enumerate()
1199        .skip(first_chunk)
1200        .take(last_chunk_inclusive - first_chunk + 1)
1201    {
1202        let chunk_start_in_blob = chunk_index as u64 * chunk_size;
1203        // Clamp [start, end) against this chunk's
1204        // [chunk_start_in_blob, chunk_start_in_blob + chunk.size).
1205        let local_start = start.saturating_sub(chunk_start_in_blob);
1206        let local_end = (end - chunk_start_in_blob).min(chunk.size as u64);
1207        out.push(ChunkRangeRequest {
1208            chunk_index,
1209            start_in_chunk: local_start as u32,
1210            end_in_chunk: local_end as u32,
1211        });
1212    }
1213    Ok(out)
1214}
1215
1216#[cfg(test)]
1217mod tests {
1218    use super::*;
1219
1220    // -----------------------------------------------------------
1221    // Small variant — round-trip + decode-edge tests
1222    // (preserved from v0.15 for back-compat coverage)
1223    // -----------------------------------------------------------
1224
1225    fn small_fixture() -> BlobRef {
1226        BlobRef::small("s3://bucket/key", [0xAB; 32], 12345)
1227    }
1228
1229    #[test]
1230    fn small_round_trip_encode_decode() {
1231        let original = small_fixture();
1232        let bytes = original.encode();
1233        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
1234        assert_eq!(decoded, original);
1235    }
1236
1237    #[test]
1238    fn decode_returns_none_when_magic_missing() {
1239        let bytes = vec![0x00, 0x01, 0x02, 0x03, 0x04];
1240        assert!(BlobRef::decode(&bytes).unwrap().is_none());
1241    }
1242
1243    #[test]
1244    fn decode_returns_none_for_payloads_starting_with_old_discriminator_only() {
1245        let bytes = vec![0xB0, 0x00, 0x00, 0x00];
1246        assert!(BlobRef::decode(&bytes).unwrap().is_none());
1247        let bytes = vec![0xB0, 0xB1, 0x00, 0x00];
1248        assert!(BlobRef::decode(&bytes).unwrap().is_none());
1249        let bytes = vec![0xB0, 0xB1, 0xB2, 0x00];
1250        assert!(BlobRef::decode(&bytes).unwrap().is_none());
1251    }
1252
1253    #[test]
1254    fn decode_rejects_short_small_frame() {
1255        let mut bytes = BLOB_REF_MAGIC.to_vec();
1256        bytes.push(BLOB_REF_VERSION_V1);
1257        bytes.push(0x00); // truncated mid-hash
1258        let err = BlobRef::decode(&bytes).unwrap_err();
1259        assert!(matches!(err, BlobError::Decode(_)));
1260    }
1261
1262    #[test]
1263    fn decode_rejects_unknown_outer_version() {
1264        let blob = small_fixture();
1265        let mut bytes = blob.encode();
1266        bytes[4] = 0xFE;
1267        let err = BlobRef::decode(&bytes).unwrap_err();
1268        assert!(matches!(err, BlobError::UnsupportedVersion(0xFE)));
1269    }
1270
1271    #[test]
1272    fn encoded_len_matches_real_encoding_small() {
1273        let blob = small_fixture();
1274        assert_eq!(blob.encoded_len(), blob.encode().len());
1275    }
1276
1277    #[test]
1278    fn small_verify_accepts_matching_bytes() {
1279        let payload = b"the lazy dog";
1280        let hash: [u8; 32] = blake3::hash(payload).into();
1281        let blob = BlobRef::small("file:///x", hash, payload.len() as u64);
1282        blob.verify(payload).unwrap();
1283    }
1284
1285    #[test]
1286    fn small_verify_rejects_mismatching_bytes() {
1287        let blob = BlobRef::small("file:///x", [0xCC; 32], 0);
1288        let err = blob.verify(b"different content").unwrap_err();
1289        match err {
1290            BlobError::HashMismatch { expected, actual } => {
1291                assert_eq!(expected, [0xCC; 32]);
1292                assert_ne!(actual, expected);
1293            }
1294            other => panic!("expected HashMismatch, got {:?}", other),
1295        }
1296    }
1297
1298    #[test]
1299    fn small_decode_rejects_oversize_size_field() {
1300        let mut bytes = BLOB_REF_MAGIC.to_vec();
1301        bytes.push(BLOB_REF_VERSION_V1);
1302        bytes.extend_from_slice(&[0u8; 32]);
1303        bytes.extend_from_slice(&u64::MAX.to_le_bytes());
1304        let err = BlobRef::decode(&bytes).unwrap_err();
1305        assert!(matches!(err, BlobError::Decode(_)));
1306    }
1307
1308    #[test]
1309    fn empty_uri_round_trips_small() {
1310        let blob = BlobRef::small("", [0x00; 32], 0);
1311        let bytes = blob.encode();
1312        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
1313        assert_eq!(decoded.uri(), "");
1314        assert_eq!(decoded.size(), 0);
1315    }
1316
1317    // -----------------------------------------------------------
1318    // Manifest variant — round-trip + decode-edge tests
1319    // -----------------------------------------------------------
1320
1321    fn manifest_fixture(chunk_count: usize) -> BlobRef {
1322        let chunks: Vec<ChunkRef> = (0..chunk_count)
1323            .map(|i| ChunkRef {
1324                hash: [i as u8; 32],
1325                size: BLOB_CHUNK_SIZE_BYTES as u32,
1326            })
1327            .collect();
1328        BlobRef::manifest("mesh://abc", Encoding::Replicated, chunks).unwrap()
1329    }
1330
1331    #[test]
1332    fn manifest_round_trip_encode_decode() {
1333        let original = manifest_fixture(8);
1334        let bytes = original.encode();
1335        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
1336        assert_eq!(decoded, original);
1337    }
1338
1339    /// Pin dataforts perf #174: `encoded_len` measures the same
1340    /// byte count `encode()` produces, byte-for-byte, without
1341    /// allocating the encoded buffer. Pre-fix `encoded_len` for
1342    /// Manifest / Tree allocated a `Vec` via `encode()` and
1343    /// threw it away. The post-fix path uses
1344    /// `postcard::experimental::serialized_size` to walk the
1345    /// structure measuring. A regression that drifts the
1346    /// header-prefix accounting (4-byte magic + 1-byte version)
1347    /// would surface as a length mismatch here.
1348    #[test]
1349    fn encoded_len_matches_encode_len_without_allocating() {
1350        // Small variant: closed-form size — sanity check.
1351        let small = small_fixture();
1352        assert_eq!(small.encoded_len(), small.encode().len(), "Small parity");
1353
1354        // Manifest variants across several chunk-count regimes:
1355        // 1 chunk (smallest), 8 chunks (typical), 128 chunks
1356        // (large). Each exercises a different postcard leb128
1357        // length-prefix size.
1358        for count in [1usize, 8, 128] {
1359            let manifest = manifest_fixture(count);
1360            assert_eq!(
1361                manifest.encoded_len(),
1362                manifest.encode().len(),
1363                "Manifest({count} chunks) parity",
1364            );
1365        }
1366
1367        // Tree variant.
1368        let tree = BlobRef::tree("mesh://tree", Encoding::Replicated, [0xCD; 32], 1024, 3)
1369            .expect("tree ref");
1370        assert_eq!(tree.encoded_len(), tree.encode().len(), "Tree parity");
1371
1372        // ReedSolomon-encoded manifest: different encoding variant
1373        // sometimes serializes to a different byte count.
1374        let rs_manifest = BlobRef::manifest(
1375            "mesh://rs",
1376            Encoding::ReedSolomon { k: 4, m: 2 },
1377            vec![ChunkRef {
1378                hash: [0xAA; 32],
1379                size: 1024,
1380            }],
1381        )
1382        .unwrap();
1383        assert_eq!(
1384            rs_manifest.encoded_len(),
1385            rs_manifest.encode().len(),
1386            "RS Manifest parity",
1387        );
1388    }
1389
1390    /// Pin cubic-dev-ai code review for dataforts perf #174:
1391    /// `ManifestBodyRef` and `TreeBodyRef` must serialize
1392    /// byte-for-byte identically to the owned `ManifestBody` /
1393    /// `TreeBody` they mirror. The `encoded_len` sizing path
1394    /// uses the `Ref` form to avoid the per-chunk `.clone()` of
1395    /// the chunk vector; the byte-for-byte serialization
1396    /// equivalence is what makes the substitution safe.
1397    ///
1398    /// A future refactor that adds/reorders/renames a field in
1399    /// one type but not the other would silently corrupt the
1400    /// sizing path (post-corruption an `encoded_len` call would
1401    /// disagree with `encode().len()`). The existing
1402    /// `encoded_len_matches_encode_len_without_allocating` test
1403    /// would also catch this — this companion narrows the
1404    /// signal to specifically "ref form vs owned form" rather
1405    /// than the broader "encoded_len vs encode round-trip".
1406    #[test]
1407    fn manifest_body_ref_serializes_identically_to_owned_form() {
1408        let chunks: Vec<ChunkRef> = (0..32)
1409            .map(|i| ChunkRef {
1410                hash: [i as u8; 32],
1411                size: 1024 + i as u32,
1412            })
1413            .collect();
1414        let owned = ManifestBody {
1415            body_version: BLOB_MANIFEST_BODY_VERSION,
1416            uri: "mesh://parity-test".to_string(),
1417            encoding: Encoding::ReedSolomon { k: 4, m: 2 },
1418            chunks: chunks.clone(),
1419            total_size: 99_999,
1420        };
1421        let borrowed = ManifestBodyRef {
1422            body_version: BLOB_MANIFEST_BODY_VERSION,
1423            uri: "mesh://parity-test",
1424            encoding: Encoding::ReedSolomon { k: 4, m: 2 },
1425            chunks: chunks.as_slice(),
1426            total_size: 99_999,
1427        };
1428        let owned_bytes = postcard::to_allocvec(&owned).unwrap();
1429        let borrowed_bytes = postcard::to_allocvec(&borrowed).unwrap();
1430        assert_eq!(
1431            owned_bytes, borrowed_bytes,
1432            "ManifestBodyRef must serialize byte-for-byte identically to ManifestBody",
1433        );
1434        // And the measured-only path agrees with the alloc'd path.
1435        let measured_owned = postcard::experimental::serialized_size(&owned).unwrap();
1436        let measured_borrowed = postcard::experimental::serialized_size(&borrowed).unwrap();
1437        assert_eq!(measured_owned, measured_borrowed);
1438        assert_eq!(measured_owned, owned_bytes.len());
1439    }
1440
1441    /// Sibling of the Manifest parity pin: same invariant for
1442    /// `TreeBodyRef` against `TreeBody`.
1443    #[test]
1444    fn tree_body_ref_serializes_identically_to_owned_form() {
1445        let owned = TreeBody {
1446            body_version: BLOB_TREE_BODY_VERSION,
1447            uri: "mesh://tree-parity".to_string(),
1448            encoding: Encoding::Replicated,
1449            root_hash: [0xCD; 32],
1450            total_size: 1_234_567,
1451            depth: 3,
1452        };
1453        let borrowed = TreeBodyRef {
1454            body_version: BLOB_TREE_BODY_VERSION,
1455            uri: "mesh://tree-parity",
1456            encoding: Encoding::Replicated,
1457            root_hash: [0xCD; 32],
1458            total_size: 1_234_567,
1459            depth: 3,
1460        };
1461        let owned_bytes = postcard::to_allocvec(&owned).unwrap();
1462        let borrowed_bytes = postcard::to_allocvec(&borrowed).unwrap();
1463        assert_eq!(
1464            owned_bytes, borrowed_bytes,
1465            "TreeBodyRef must serialize byte-for-byte identically to TreeBody",
1466        );
1467    }
1468
1469    #[test]
1470    fn manifest_round_trip_with_reed_solomon_reserved() {
1471        let chunks = vec![ChunkRef {
1472            hash: [0xAA; 32],
1473            size: 1024,
1474        }];
1475        let blob =
1476            BlobRef::manifest("mesh://rs", Encoding::ReedSolomon { k: 4, m: 2 }, chunks).unwrap();
1477        let bytes = blob.encode();
1478        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
1479        assert_eq!(
1480            decoded.encoding(),
1481            Some(Encoding::ReedSolomon { k: 4, m: 2 })
1482        );
1483    }
1484
1485    #[test]
1486    fn manifest_rejects_empty_chunk_list() {
1487        let err = BlobRef::manifest("mesh://", Encoding::Replicated, Vec::new()).unwrap_err();
1488        assert!(matches!(err, BlobError::Decode(_)));
1489    }
1490
1491    #[test]
1492    fn manifest_rejects_too_many_chunks() {
1493        let chunks: Vec<ChunkRef> = (0..BLOB_MANIFEST_MAX_CHUNKS + 1)
1494            .map(|_| ChunkRef {
1495                hash: [0; 32],
1496                size: 1,
1497            })
1498            .collect();
1499        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1500        assert!(matches!(err, BlobError::Decode(_)));
1501    }
1502
1503    #[test]
1504    fn manifest_rejects_total_size_over_cap() {
1505        let chunks = vec![
1506            ChunkRef {
1507                hash: [0; 32],
1508                size: u32::MAX,
1509            };
1510            5
1511        ];
1512        // 5 × 4 GiB ≈ 20 GiB > 16 GiB cap (also fails chunk-size validator)
1513        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1514        assert!(matches!(err, BlobError::Decode(_)));
1515    }
1516
1517    /// `byte_range_to_chunks` and the adapter's range slicer rely on
1518    /// the substrate's fixed `BLOB_CHUNK_SIZE_BYTES` stride. A
1519    /// peer-crafted manifest with non-stride chunk sizes makes the
1520    /// position math return wrong-window bytes silently, so both
1521    /// `manifest()` and `decode_manifest()` must reject those shapes.
1522    #[test]
1523    fn manifest_rejects_non_last_chunk_smaller_than_stride() {
1524        let chunks = vec![
1525            ChunkRef {
1526                hash: [1; 32],
1527                size: 1, // first chunk must be exactly BLOB_CHUNK_SIZE_BYTES
1528            },
1529            ChunkRef {
1530                hash: [2; 32],
1531                size: BLOB_CHUNK_SIZE_BYTES as u32,
1532            },
1533        ];
1534        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1535        assert!(matches!(err, BlobError::Decode(_)));
1536    }
1537
1538    #[test]
1539    fn manifest_rejects_non_last_chunk_larger_than_stride() {
1540        let chunks = vec![
1541            ChunkRef {
1542                hash: [1; 32],
1543                size: (BLOB_CHUNK_SIZE_BYTES as u32) + 1,
1544            },
1545            ChunkRef {
1546                hash: [2; 32],
1547                size: BLOB_CHUNK_SIZE_BYTES as u32,
1548            },
1549        ];
1550        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1551        assert!(matches!(err, BlobError::Decode(_)));
1552    }
1553
1554    #[test]
1555    fn manifest_rejects_last_chunk_above_stride() {
1556        let chunks = vec![ChunkRef {
1557            hash: [1; 32],
1558            size: (BLOB_CHUNK_SIZE_BYTES as u32) + 1,
1559        }];
1560        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1561        assert!(matches!(err, BlobError::Decode(_)));
1562    }
1563
1564    #[test]
1565    fn manifest_rejects_zero_size_chunk() {
1566        let chunks = vec![ChunkRef {
1567            hash: [1; 32],
1568            size: 0,
1569        }];
1570        let err = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap_err();
1571        assert!(matches!(err, BlobError::Decode(_)));
1572    }
1573
1574    #[test]
1575    fn manifest_accepts_single_short_chunk_as_last() {
1576        // A single chunk smaller than the stride is the valid
1577        // single-chunk last-chunk case (a payload less than 4 MiB
1578        // would normally ride as Small, but Manifest with one short
1579        // chunk is structurally legal).
1580        let chunks = vec![ChunkRef {
1581            hash: [1; 32],
1582            size: 1024,
1583        }];
1584        let blob = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap();
1585        assert_eq!(blob.size(), 1024);
1586    }
1587
1588    #[test]
1589    fn manifest_accepts_multichunk_with_short_last() {
1590        let chunks = vec![
1591            ChunkRef {
1592                hash: [1; 32],
1593                size: BLOB_CHUNK_SIZE_BYTES as u32,
1594            },
1595            ChunkRef {
1596                hash: [2; 32],
1597                size: 1024,
1598            },
1599        ];
1600        let blob = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap();
1601        assert_eq!(blob.size(), BLOB_CHUNK_SIZE_BYTES + 1024);
1602    }
1603
1604    #[test]
1605    fn manifest_decode_detects_total_size_lie() {
1606        // Hand-craft a manifest body whose declared total_size is
1607        // wrong vs. the iterated sum — a malicious peer could
1608        // otherwise mislead range math by under-reporting the
1609        // total. Decode must reject.
1610        use serde::Serialize;
1611        #[derive(Serialize)]
1612        struct LyingBody {
1613            body_version: u8,
1614            uri: String,
1615            encoding: Encoding,
1616            chunks: Vec<ChunkRef>,
1617            total_size: u64,
1618        }
1619        let lying = LyingBody {
1620            body_version: BLOB_MANIFEST_BODY_VERSION,
1621            uri: "mesh://lie".to_owned(),
1622            encoding: Encoding::Replicated,
1623            chunks: vec![ChunkRef {
1624                hash: [0; 32],
1625                size: 100,
1626            }],
1627            total_size: 200, // declared 200 but iterated sum is 100
1628        };
1629        let body = postcard::to_allocvec(&lying).unwrap();
1630        let mut bytes = BLOB_REF_MAGIC.to_vec();
1631        bytes.push(BLOB_REF_VERSION_V2_MANIFEST);
1632        bytes.extend_from_slice(&body);
1633        let err = BlobRef::decode(&bytes).unwrap_err();
1634        assert!(matches!(err, BlobError::Decode(_)));
1635    }
1636
1637    #[test]
1638    fn manifest_decode_rejects_unknown_body_version() {
1639        use serde::Serialize;
1640        #[derive(Serialize)]
1641        struct FutureBody {
1642            body_version: u8,
1643            uri: String,
1644            encoding: Encoding,
1645            chunks: Vec<ChunkRef>,
1646            total_size: u64,
1647        }
1648        let body = FutureBody {
1649            body_version: 0xFE,
1650            uri: "mesh://".to_owned(),
1651            encoding: Encoding::Replicated,
1652            chunks: vec![ChunkRef {
1653                hash: [0; 32],
1654                size: 1,
1655            }],
1656            total_size: 1,
1657        };
1658        let body_bytes = postcard::to_allocvec(&body).unwrap();
1659        let mut bytes = BLOB_REF_MAGIC.to_vec();
1660        bytes.push(BLOB_REF_VERSION_V2_MANIFEST);
1661        bytes.extend_from_slice(&body_bytes);
1662        let err = BlobRef::decode(&bytes).unwrap_err();
1663        assert!(matches!(err, BlobError::UnsupportedVersion(0xFE)));
1664    }
1665
1666    #[test]
1667    fn manifest_size_matches_iterated_chunk_sum() {
1668        let blob = manifest_fixture(10);
1669        let iterated: u64 = blob.chunks().iter().map(|c| c.size as u64).sum();
1670        assert_eq!(blob.size(), iterated);
1671    }
1672
1673    #[test]
1674    fn accessors_uniform_across_variants() {
1675        let small = BlobRef::small("file:///s", [0; 32], 99);
1676        assert_eq!(small.uri(), "file:///s");
1677        assert_eq!(small.size(), 99);
1678        assert!(!small.is_chunked());
1679        assert!(small.small_hash().is_some());
1680        assert!(small.chunks().is_empty());
1681        assert_eq!(small.encoding(), None);
1682
1683        let m = manifest_fixture(3);
1684        assert_eq!(m.uri(), "mesh://abc");
1685        assert!(m.is_chunked());
1686        assert!(m.small_hash().is_none());
1687        assert_eq!(m.chunks().len(), 3);
1688        assert_eq!(m.encoding(), Some(Encoding::Replicated));
1689    }
1690
1691    // -----------------------------------------------------------
1692    // Chunking algorithm — idempotency + edge cases
1693    // -----------------------------------------------------------
1694
1695    #[test]
1696    fn chunk_payload_inline_under_threshold() {
1697        let payload = vec![0x42u8; 1024]; // 1 KiB
1698        match chunk_payload(&payload).unwrap() {
1699            ChunkedPayload::Inline { payload: p, hash } => {
1700                assert_eq!(p.len(), 1024);
1701                let expected_hash: [u8; 32] = blake3::hash(&payload).into();
1702                assert_eq!(hash, expected_hash);
1703            }
1704            ChunkedPayload::Chunked { .. } => panic!("expected Inline for 1 KiB payload"),
1705        }
1706    }
1707
1708    #[test]
1709    fn chunk_payload_inline_at_exact_threshold() {
1710        let payload = vec![0x42u8; BLOB_CHUNK_SIZE_BYTES as usize]; // exactly 4 MiB
1711        assert!(matches!(
1712            chunk_payload(&payload).unwrap(),
1713            ChunkedPayload::Inline { .. }
1714        ));
1715    }
1716
1717    #[test]
1718    fn chunk_payload_chunks_above_threshold() {
1719        let payload = vec![0x42u8; (BLOB_CHUNK_SIZE_BYTES as usize) + 1]; // 4 MiB + 1
1720        match chunk_payload(&payload).unwrap() {
1721            ChunkedPayload::Chunked { chunks, total_size } => {
1722                assert_eq!(chunks.len(), 2);
1723                assert_eq!(chunks[0].0.size, BLOB_CHUNK_SIZE_BYTES as u32);
1724                assert_eq!(chunks[1].0.size, 1);
1725                assert_eq!(total_size, payload.len() as u64);
1726            }
1727            ChunkedPayload::Inline { .. } => panic!("expected Chunked for 4MiB+1 payload"),
1728        }
1729    }
1730
1731    #[test]
1732    fn chunk_payload_idempotent_same_bytes_same_hashes() {
1733        // Two callers chunking the same payload must produce
1734        // identical ChunkRef lists — the dedup property the
1735        // replication layer relies on.
1736        let payload: Vec<u8> = (0..(8 * 1024 * 1024 + 17))
1737            .map(|i| (i % 251) as u8)
1738            .collect();
1739        let first = match chunk_payload(&payload).unwrap() {
1740            ChunkedPayload::Chunked { chunks, .. } => {
1741                chunks.iter().map(|(c, _)| *c).collect::<Vec<_>>()
1742            }
1743            _ => panic!("expected Chunked"),
1744        };
1745        let second = match chunk_payload(&payload).unwrap() {
1746            ChunkedPayload::Chunked { chunks, .. } => {
1747                chunks.iter().map(|(c, _)| *c).collect::<Vec<_>>()
1748            }
1749            _ => panic!("expected Chunked"),
1750        };
1751        assert_eq!(first, second);
1752    }
1753
1754    #[test]
1755    fn chunk_payload_empty_is_inline() {
1756        let payload: Vec<u8> = Vec::new();
1757        match chunk_payload(&payload).unwrap() {
1758            ChunkedPayload::Inline { payload, hash } => {
1759                assert!(payload.is_empty());
1760                let expected: [u8; 32] = blake3::hash(b"").into();
1761                assert_eq!(hash, expected);
1762            }
1763            _ => panic!("empty payload must be Inline"),
1764        }
1765    }
1766
1767    #[test]
1768    fn chunk_payload_rejects_oversize() {
1769        // Construct a fake "len" by lying via slice — but we can't
1770        // actually allocate 16 GiB. Instead, test the cap-check
1771        // arithmetic via a payload sized 4 GiB + 1 against a smaller
1772        // synthetic cap. The production cap is BLOB_REF_MAX_SIZE so
1773        // we test the chunk-count cap path here.
1774        // (chunk-count cap fires at MAX_CHUNKS * 4 MiB = 32 GiB,
1775        // before BLOB_REF_MAX_SIZE — verified below.)
1776        assert!(BLOB_MANIFEST_MAX_CHUNKS as u64 * BLOB_CHUNK_SIZE_BYTES > BLOB_REF_MAX_SIZE);
1777    }
1778
1779    // -----------------------------------------------------------
1780    // byte_range_to_chunks — range math
1781    // -----------------------------------------------------------
1782
1783    fn five_chunk_manifest() -> BlobRef {
1784        // Five 4 MiB chunks (20 MiB total).
1785        let chunks: Vec<ChunkRef> = (0..5)
1786            .map(|i| ChunkRef {
1787                hash: [i as u8; 32],
1788                size: BLOB_CHUNK_SIZE_BYTES as u32,
1789            })
1790            .collect();
1791        BlobRef::manifest("mesh://x", Encoding::Replicated, chunks).unwrap()
1792    }
1793
1794    #[test]
1795    fn range_aligned_single_chunk() {
1796        let m = five_chunk_manifest();
1797        let req = byte_range_to_chunks(&m, 0, BLOB_CHUNK_SIZE_BYTES).unwrap();
1798        assert_eq!(req.len(), 1);
1799        assert_eq!(req[0].chunk_index, 0);
1800        assert_eq!(req[0].start_in_chunk, 0);
1801        assert_eq!(req[0].end_in_chunk, BLOB_CHUNK_SIZE_BYTES as u32);
1802    }
1803
1804    #[test]
1805    fn range_unaligned_within_one_chunk() {
1806        let m = five_chunk_manifest();
1807        let req = byte_range_to_chunks(&m, 100, 200).unwrap();
1808        assert_eq!(req.len(), 1);
1809        assert_eq!(req[0].chunk_index, 0);
1810        assert_eq!(req[0].start_in_chunk, 100);
1811        assert_eq!(req[0].end_in_chunk, 200);
1812        assert_eq!(req[0].len(), 100);
1813    }
1814
1815    #[test]
1816    fn range_spans_two_chunks() {
1817        let m = five_chunk_manifest();
1818        let chunk = BLOB_CHUNK_SIZE_BYTES;
1819        // Last 1 KiB of chunk 0, first 1 KiB of chunk 1.
1820        let req = byte_range_to_chunks(&m, chunk - 1024, chunk + 1024).unwrap();
1821        assert_eq!(req.len(), 2);
1822        assert_eq!(req[0].chunk_index, 0);
1823        assert_eq!(req[0].start_in_chunk, (chunk - 1024) as u32);
1824        assert_eq!(req[0].end_in_chunk, chunk as u32);
1825        assert_eq!(req[1].chunk_index, 1);
1826        assert_eq!(req[1].start_in_chunk, 0);
1827        assert_eq!(req[1].end_in_chunk, 1024);
1828    }
1829
1830    #[test]
1831    fn range_spans_all_chunks() {
1832        let m = five_chunk_manifest();
1833        let req = byte_range_to_chunks(&m, 0, m.size()).unwrap();
1834        assert_eq!(req.len(), 5);
1835        for (i, r) in req.iter().enumerate() {
1836            assert_eq!(r.chunk_index, i);
1837            assert_eq!(r.start_in_chunk, 0);
1838            assert_eq!(r.end_in_chunk, BLOB_CHUNK_SIZE_BYTES as u32);
1839        }
1840    }
1841
1842    #[test]
1843    fn range_with_partial_last_chunk() {
1844        // Manifest where the last chunk is smaller than the chunk
1845        // size — exercises the per-chunk clamp on `end_in_chunk`.
1846        let chunks = vec![
1847            ChunkRef {
1848                hash: [0; 32],
1849                size: BLOB_CHUNK_SIZE_BYTES as u32,
1850            },
1851            ChunkRef {
1852                hash: [1; 32],
1853                size: 1024, // last chunk is 1 KiB
1854            },
1855        ];
1856        let m = BlobRef::manifest("mesh://", Encoding::Replicated, chunks).unwrap();
1857        // Range covers all of chunk 0 + first 100 bytes of chunk 1.
1858        let req = byte_range_to_chunks(&m, 0, BLOB_CHUNK_SIZE_BYTES + 100).unwrap();
1859        assert_eq!(req.len(), 2);
1860        assert_eq!(req[1].chunk_index, 1);
1861        assert_eq!(req[1].start_in_chunk, 0);
1862        assert_eq!(req[1].end_in_chunk, 100);
1863    }
1864
1865    #[test]
1866    fn range_empty_is_empty_request_list() {
1867        let m = five_chunk_manifest();
1868        assert!(byte_range_to_chunks(&m, 100, 100).unwrap().is_empty());
1869        // start past end-of-blob → empty too.
1870        assert!(byte_range_to_chunks(&m, m.size(), m.size())
1871            .unwrap()
1872            .is_empty());
1873    }
1874
1875    #[test]
1876    fn range_rejects_end_past_total_size() {
1877        let m = five_chunk_manifest();
1878        let err = byte_range_to_chunks(&m, 0, m.size() + 1).unwrap_err();
1879        assert!(matches!(err, BlobError::Decode(_)));
1880    }
1881
1882    #[test]
1883    fn range_rejects_start_after_end() {
1884        let m = five_chunk_manifest();
1885        let err = byte_range_to_chunks(&m, 200, 100).unwrap_err();
1886        assert!(matches!(err, BlobError::Decode(_)));
1887    }
1888
1889    #[test]
1890    fn range_rejects_call_against_small() {
1891        let s = BlobRef::small("file:///x", [0; 32], 100);
1892        let err = byte_range_to_chunks(&s, 0, 50).unwrap_err();
1893        assert!(matches!(err, BlobError::Decode(_)));
1894    }
1895
1896    #[test]
1897    fn range_math_reassembles_exact_payload() {
1898        // End-to-end sanity: chunk a payload, then for several
1899        // sub-ranges, reconstruct the byte slice by walking the
1900        // chunk-range requests and assembling.
1901        let payload: Vec<u8> = (0..(BLOB_CHUNK_SIZE_BYTES as usize * 3 + 1000))
1902            .map(|i| (i % 251) as u8)
1903            .collect();
1904        let chunked = chunk_payload(&payload).unwrap();
1905        let (chunks_owned, total_size) = match chunked {
1906            ChunkedPayload::Chunked { chunks, total_size } => (chunks, total_size),
1907            _ => panic!("expected Chunked"),
1908        };
1909        let chunk_refs: Vec<ChunkRef> = chunks_owned.iter().map(|(r, _)| *r).collect();
1910        let chunk_bytes: Vec<&[u8]> = chunks_owned.iter().map(|(_, b)| *b).collect();
1911        let m = BlobRef::manifest("mesh://x", Encoding::Replicated, chunk_refs).unwrap();
1912        assert_eq!(m.size(), total_size);
1913
1914        let cases = [
1915            (0u64, total_size),
1916            (10, 5_000_000),
1917            (BLOB_CHUNK_SIZE_BYTES, BLOB_CHUNK_SIZE_BYTES + 1),
1918            (total_size - 100, total_size),
1919        ];
1920        for (start, end) in cases {
1921            let requests = byte_range_to_chunks(&m, start, end).unwrap();
1922            let mut assembled = Vec::with_capacity((end - start) as usize);
1923            for r in requests {
1924                let chunk = chunk_bytes[r.chunk_index];
1925                assembled
1926                    .extend_from_slice(&chunk[r.start_in_chunk as usize..r.end_in_chunk as usize]);
1927            }
1928            assert_eq!(
1929                assembled,
1930                payload[start as usize..end as usize],
1931                "range [{}, {}) reassembly mismatch",
1932                start,
1933                end
1934            );
1935        }
1936    }
1937
1938    // -----------------------------------------------------------
1939    // BlobRef::Tree (v0.3) constructor + wire round-trip
1940    // -----------------------------------------------------------
1941
1942    fn tree_root() -> [u8; 32] {
1943        [0xAB; 32]
1944    }
1945
1946    #[test]
1947    fn tree_constructor_sets_version_and_fields() {
1948        let r = BlobRef::tree(
1949            "mesh://ab".to_string(),
1950            Encoding::Replicated,
1951            tree_root(),
1952            1024 * 1024 * 1024 * 64, // 64 GiB
1953            2,
1954        )
1955        .unwrap();
1956        assert_eq!(r.version(), BLOB_REF_VERSION_V3_TREE);
1957        assert_eq!(r.uri(), "mesh://ab");
1958        assert_eq!(r.size(), 1024 * 1024 * 1024 * 64);
1959        assert_eq!(r.tree_depth(), Some(2));
1960        assert_eq!(r.tree_root_hash(), Some(&tree_root()));
1961        assert_eq!(r.encoding(), Some(Encoding::Replicated));
1962        assert!(r.is_chunked());
1963        assert!(r.is_tree());
1964        assert!(r.small_hash().is_none());
1965        assert!(r.chunks().is_empty());
1966    }
1967
1968    #[test]
1969    fn tree_constructor_rejects_zero_total_size() {
1970        let err = BlobRef::tree("mesh://aa", Encoding::Replicated, tree_root(), 0, 1).unwrap_err();
1971        let msg = err.to_string();
1972        assert!(msg.contains("must be > 0"), "got: {msg}");
1973    }
1974
1975    #[test]
1976    fn tree_constructor_rejects_total_size_above_cap() {
1977        let err = BlobRef::tree(
1978            "mesh://aa",
1979            Encoding::Replicated,
1980            tree_root(),
1981            BLOB_TREE_MAX_TOTAL_SIZE + 1,
1982            4,
1983        )
1984        .unwrap_err();
1985        let msg = err.to_string();
1986        assert!(msg.contains("exceeds cap"), "got: {msg}");
1987    }
1988
1989    #[test]
1990    fn tree_constructor_rejects_zero_depth() {
1991        let err =
1992            BlobRef::tree("mesh://aa", Encoding::Replicated, tree_root(), 1024, 0).unwrap_err();
1993        let msg = err.to_string();
1994        assert!(msg.contains("depth"), "got: {msg}");
1995    }
1996
1997    #[test]
1998    fn tree_constructor_rejects_depth_above_cap() {
1999        let err = BlobRef::tree(
2000            "mesh://aa",
2001            Encoding::Replicated,
2002            tree_root(),
2003            1024,
2004            super::super::blob_tree::MAX_TREE_DEPTH + 1,
2005        )
2006        .unwrap_err();
2007        let msg = err.to_string();
2008        assert!(msg.contains("depth"), "got: {msg}");
2009    }
2010
2011    #[test]
2012    fn tree_encode_decode_round_trips() {
2013        let original = BlobRef::tree(
2014            "mesh://cafe".to_string(),
2015            Encoding::Replicated,
2016            tree_root(),
2017            1024 * 1024 * 1024, // 1 GiB
2018            1,
2019        )
2020        .unwrap();
2021        let bytes = original.encode();
2022        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
2023        assert_eq!(original, decoded);
2024        match decoded {
2025            BlobRef::Tree {
2026                version,
2027                uri,
2028                encoding,
2029                root_hash,
2030                total_size,
2031                depth,
2032            } => {
2033                assert_eq!(version, BLOB_REF_VERSION_V3_TREE);
2034                assert_eq!(uri, "mesh://cafe");
2035                assert_eq!(encoding, Encoding::Replicated);
2036                assert_eq!(root_hash, tree_root());
2037                assert_eq!(total_size, 1024 * 1024 * 1024);
2038                assert_eq!(depth, 1);
2039            }
2040            other => panic!("expected Tree, got {:?}", other),
2041        }
2042    }
2043
2044    #[test]
2045    fn tree_decode_preserves_reedsolomon_encoding_tag() {
2046        let original = BlobRef::tree(
2047            "mesh://ff",
2048            Encoding::ReedSolomon { k: 10, m: 4 },
2049            tree_root(),
2050            1u64 << 40, // 1 TiB
2051            3,
2052        )
2053        .unwrap();
2054        let bytes = original.encode();
2055        let decoded = BlobRef::decode(&bytes).unwrap().unwrap();
2056        assert_eq!(
2057            decoded.encoding(),
2058            Some(Encoding::ReedSolomon { k: 10, m: 4 })
2059        );
2060    }
2061
2062    #[test]
2063    fn tree_decode_rejects_unknown_outer_version() {
2064        // Hand-craft magic + an unknown version byte + arbitrary
2065        // postcard body bytes. Must surface UnsupportedVersion
2066        // rather than mis-decode as Small or Manifest.
2067        let mut bytes = Vec::new();
2068        bytes.extend_from_slice(&BLOB_REF_MAGIC);
2069        bytes.push(0xFE); // not 0x01/0x02/0x03
2070        bytes.extend_from_slice(&[0u8; 64]);
2071        let err = BlobRef::decode(&bytes).unwrap_err();
2072        assert!(
2073            matches!(err, BlobError::UnsupportedVersion(0xFE)),
2074            "expected UnsupportedVersion(0xFE), got {err:?}"
2075        );
2076    }
2077
2078    #[test]
2079    fn tree_decode_rejects_unknown_body_version() {
2080        // Encode a tree, then hand-mutate the body_version field
2081        // (first byte after magic+outer-version) to an unknown
2082        // value. Decoder must surface UnsupportedVersion for the
2083        // body, not silently accept.
2084        let original =
2085            BlobRef::tree("mesh://aa", Encoding::Replicated, tree_root(), 1024, 1).unwrap();
2086        let mut bytes = original.encode();
2087        // The postcard body starts at offset 5. The body's first
2088        // field is `body_version: u8`, which postcard emits as a
2089        // single byte (no leading length prefix on `u8`). Mutate
2090        // it to an unknown value.
2091        bytes[5] = 0xEF;
2092        let err = BlobRef::decode(&bytes).unwrap_err();
2093        assert!(
2094            matches!(err, BlobError::UnsupportedVersion(0xEF)),
2095            "expected UnsupportedVersion(0xEF), got {err:?}"
2096        );
2097    }
2098
2099    #[test]
2100    fn tree_decode_rejects_oversize_body() {
2101        // Hand-construct magic + outer version + a body whose
2102        // length exceeds BLOB_REF_TREE_BODY_MAX_BYTES. Decoder
2103        // must reject BEFORE postcard allocates so a malicious
2104        // peer can't force a large allocation.
2105        let mut bytes = Vec::new();
2106        bytes.extend_from_slice(&BLOB_REF_MAGIC);
2107        bytes.push(BLOB_REF_VERSION_V3_TREE);
2108        bytes.extend(std::iter::repeat_n(0u8, BLOB_REF_TREE_BODY_MAX_BYTES + 1));
2109        let err = BlobRef::decode(&bytes).unwrap_err();
2110        let msg = err.to_string();
2111        assert!(msg.contains("exceeds cap"), "got: {msg}");
2112    }
2113
2114    #[test]
2115    fn tree_decode_rejects_total_size_above_cap() {
2116        // Hand-encode a TreeBody with a u64 total_size past
2117        // BLOB_TREE_MAX_TOTAL_SIZE. Decoder catches it via the
2118        // post-decode validation, not via the constructor.
2119        let body = TreeBody {
2120            body_version: BLOB_TREE_BODY_VERSION,
2121            uri: "mesh://x".to_string(),
2122            encoding: Encoding::Replicated,
2123            root_hash: tree_root(),
2124            total_size: BLOB_TREE_MAX_TOTAL_SIZE + 1,
2125            depth: 4,
2126        };
2127        let body_bytes = postcard::to_allocvec(&body).unwrap();
2128        let mut bytes = Vec::new();
2129        bytes.extend_from_slice(&BLOB_REF_MAGIC);
2130        bytes.push(BLOB_REF_VERSION_V3_TREE);
2131        bytes.extend_from_slice(&body_bytes);
2132        let err = BlobRef::decode(&bytes).unwrap_err();
2133        let msg = err.to_string();
2134        assert!(msg.contains("exceeds cap"), "got: {msg}");
2135    }
2136
2137    #[test]
2138    fn tree_decode_rejects_depth_inconsistent_with_total_size() {
2139        // depth=4 against a 1-byte total_size is structurally
2140        // malformed — TREE_FANOUT^3 ≈ 2 M bytes is the lower bound
2141        // for a productive depth-4 tree. Pre-fix the walker
2142        // would still catch the mismatch at fetch time, but the
2143        // decode-side check short-circuits before any walk traffic.
2144        let body = TreeBody {
2145            body_version: BLOB_TREE_BODY_VERSION,
2146            uri: "mesh://x".to_string(),
2147            encoding: Encoding::Replicated,
2148            root_hash: tree_root(),
2149            total_size: 1,
2150            depth: 4,
2151        };
2152        let body_bytes = postcard::to_allocvec(&body).unwrap();
2153        let mut bytes = Vec::new();
2154        bytes.extend_from_slice(&BLOB_REF_MAGIC);
2155        bytes.push(BLOB_REF_VERSION_V3_TREE);
2156        bytes.extend_from_slice(&body_bytes);
2157        let err = BlobRef::decode(&bytes).unwrap_err();
2158        let msg = err.to_string();
2159        assert!(
2160            msg.contains("requires total_size >="),
2161            "expected depth-vs-size lower-bound error; got: {msg}",
2162        );
2163    }
2164
2165    #[test]
2166    fn tree_decode_rejects_depth_above_cap() {
2167        let body = TreeBody {
2168            body_version: BLOB_TREE_BODY_VERSION,
2169            uri: "mesh://x".to_string(),
2170            encoding: Encoding::Replicated,
2171            root_hash: tree_root(),
2172            total_size: 1024,
2173            depth: super::super::blob_tree::MAX_TREE_DEPTH + 1,
2174        };
2175        let body_bytes = postcard::to_allocvec(&body).unwrap();
2176        let mut bytes = Vec::new();
2177        bytes.extend_from_slice(&BLOB_REF_MAGIC);
2178        bytes.push(BLOB_REF_VERSION_V3_TREE);
2179        bytes.extend_from_slice(&body_bytes);
2180        let err = BlobRef::decode(&bytes).unwrap_err();
2181        let msg = err.to_string();
2182        assert!(msg.contains("depth"), "got: {msg}");
2183    }
2184
2185    #[test]
2186    fn verify_on_tree_returns_typed_error() {
2187        let r = BlobRef::tree("mesh://aa", Encoding::Replicated, tree_root(), 1024, 1).unwrap();
2188        let err = r.verify(b"any bytes").unwrap_err();
2189        let msg = err.to_string();
2190        assert!(
2191            msg.contains("Tree variant"),
2192            "Tree verify should surface a typed Decode error pointing at tree-walk; got: {msg}",
2193        );
2194    }
2195
2196    #[test]
2197    fn tree_does_not_alias_small_or_manifest_via_decode() {
2198        // Round-trip three variants and assert each decodes back
2199        // to its own shape. Pre-fix the version-byte gate ensures
2200        // a Tree wire form is never mis-decoded as Small/Manifest.
2201        let small = BlobRef::small("mesh://aa", [0xAA; 32], 100);
2202        let manifest = BlobRef::manifest(
2203            "mesh://bb",
2204            Encoding::Replicated,
2205            vec![ChunkRef {
2206                hash: [0xBB; 32],
2207                size: 1024,
2208            }],
2209        )
2210        .unwrap();
2211        let tree = BlobRef::tree(
2212            "mesh://cc",
2213            Encoding::Replicated,
2214            [0xCC; 32],
2215            1024 * 1024 * 1024,
2216            1,
2217        )
2218        .unwrap();
2219
2220        let s_decoded = BlobRef::decode(&small.encode()).unwrap().unwrap();
2221        let m_decoded = BlobRef::decode(&manifest.encode()).unwrap().unwrap();
2222        let t_decoded = BlobRef::decode(&tree.encode()).unwrap().unwrap();
2223
2224        assert!(matches!(s_decoded, BlobRef::Small { .. }));
2225        assert!(matches!(m_decoded, BlobRef::Manifest { .. }));
2226        assert!(matches!(t_decoded, BlobRef::Tree { .. }));
2227        assert_eq!(s_decoded.version(), BLOB_REF_VERSION_V1);
2228        assert_eq!(m_decoded.version(), BLOB_REF_VERSION_V2_MANIFEST);
2229        assert_eq!(t_decoded.version(), BLOB_REF_VERSION_V3_TREE);
2230    }
2231}