Skip to main content

freenet_git_types/
chunked.rs

1//! ChunkedPack manifest format and validation.
2//!
3//! When a packfile exceeds [`DEFAULT_CHUNK_SIZE`], the on-host helper
4//! splits it across multiple [`ObjectBundle::ChunkedPack`] contracts:
5//! one immutable `pack-contract` per chunk plus one immutable
6//! `pack-contract` for the manifest. The repo state's
7//! `ObjectBundle::ChunkedPack { manifest_hash, total_size, chunk_count }`
8//! refers to the manifest, which in turn lists the chunks.
9//!
10//! Anything in this module is "view-only" from the contract WASM's
11//! perspective — the contract just sees opaque bytes whose BLAKE3 hash
12//! must equal the manifest hash that the repo state declares. All the
13//! schema and validation logic here runs in the on-host helper.
14//!
15//! ## Wire format (v1)
16//!
17//! ```text
18//! manifest = bincode<{
19//!   version:      u8 = 1,
20//!   chunk_size:   u32,        // bytes per non-final chunk
21//!   total_size:   u64,        // sum of all chunk lengths
22//!   chunk_count:  u32,        // == chunk_hashes.len()
23//!   chunk_hashes: Vec<[u8; 32]>,  // BLAKE3-32 of each chunk's bytes, in order
24//! }>
25//! ```
26//!
27//! `bincode` with default config is deterministic for this shape (only
28//! fixed-size primitives and a length-prefixed Vec of fixed-size byte
29//! arrays). A worked-example test pins the bytes; any change is a
30//! wire-format break.
31//!
32//! ## Validation rules (enforced at decode time)
33//!
34//! [`ChunkedPackManifestV1::validate`] rejects manifests where any of:
35//!
36//! - `version != 1`
37//! - `chunk_count == 0`
38//! - `chunk_count != chunk_hashes.len()`
39//! - `chunk_size == 0`
40//! - `total_size == 0`
41//! - `total_size > chunk_size as u64 * chunk_count as u64` — would imply
42//!   the final chunk is larger than `chunk_size`.
43//! - `total_size <= chunk_size as u64 * (chunk_count - 1) as u64` — would
44//!   imply the final chunk is empty.
45//!
46//! Per-chunk byte length verification (each non-final chunk equals
47//! `chunk_size`, final equals `total_size - (chunk_count-1)*chunk_size`)
48//! is enforced when the helper actually fetches and concatenates the
49//! chunks.
50
51use serde::{Deserialize, Serialize};
52
53use crate::PackHash;
54
55/// Default chunk size used by the on-host helper when splitting large
56/// packs. 1 MiB = 1,048,576 bytes. A pack of exactly 1 MiB is
57/// `SinglePack`; 1 MiB + 1 byte is `ChunkedPack` with two chunks.
58pub const DEFAULT_CHUNK_SIZE: u32 = 1024 * 1024;
59
60/// Errors when validating or decoding a manifest.
61#[derive(Debug, thiserror::Error, PartialEq, Eq)]
62pub enum ManifestError {
63    /// `bincode` failed to decode the bytes.
64    #[error("manifest decode: {0}")]
65    Decode(String),
66    /// Unknown wire-format version.
67    #[error("manifest version {0} is not supported")]
68    UnsupportedVersion(u8),
69    /// Internal field disagreement (chunk_count vs chunk_hashes.len, etc).
70    #[error("manifest internal inconsistency: {0}")]
71    Inconsistent(&'static str),
72    /// `total_size` cannot match the declared chunk count and chunk size.
73    #[error("manifest total_size {total} cannot fit {count} chunks of size {chunk}")]
74    SizeOutOfRange {
75        /// Total size (bytes).
76        total: u64,
77        /// Declared chunk count.
78        count: u32,
79        /// Declared per-chunk size (bytes).
80        chunk: u32,
81    },
82}
83
84/// On-the-wire manifest for a chunked pack.
85#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
86pub struct ChunkedPackManifestV1 {
87    /// Wire-format version. Currently always 1.
88    pub version: u8,
89    /// Bytes per non-final chunk. The final chunk is in
90    /// `(0, chunk_size]`.
91    pub chunk_size: u32,
92    /// Sum of all chunk lengths, equal to the original pack size.
93    pub total_size: u64,
94    /// `== chunk_hashes.len()`. Carried explicitly for cheap size
95    /// checks before we allocate the Vec on decode.
96    pub chunk_count: u32,
97    /// BLAKE3-32 of each chunk's bytes, in stream order.
98    #[serde(with = "serde_bytes_array_vec")]
99    pub chunk_hashes: Vec<PackHash>,
100}
101
102impl ChunkedPackManifestV1 {
103    /// Build a fresh manifest from the chunked pack bytes.
104    pub fn from_chunks(chunk_size: u32, chunks: &[Vec<u8>]) -> Self {
105        let chunk_count = u32::try_from(chunks.len())
106            .expect("freenet-git ChunkedPack with >4G chunks is not supported");
107        let total_size: u64 = chunks.iter().map(|c| c.len() as u64).sum();
108        let chunk_hashes: Vec<PackHash> =
109            chunks.iter().map(|c| *blake3::hash(c).as_bytes()).collect();
110        Self {
111            version: 1,
112            chunk_size,
113            total_size,
114            chunk_count,
115            chunk_hashes,
116        }
117    }
118
119    /// Encode to bytes via `bincode` with default config.
120    pub fn to_bytes(&self) -> Vec<u8> {
121        bincode::serialize(self).expect("ChunkedPackManifestV1 serialization is infallible")
122    }
123
124    /// Decode from bytes and run [`Self::validate`].
125    pub fn from_bytes(bytes: &[u8]) -> Result<Self, ManifestError> {
126        let manifest: Self =
127            bincode::deserialize(bytes).map_err(|e| ManifestError::Decode(e.to_string()))?;
128        manifest.validate()?;
129        Ok(manifest)
130    }
131
132    /// Run all internal-consistency checks. Does NOT verify that the
133    /// chunk bytes themselves match `chunk_hashes` — that is the
134    /// fetcher's job once it actually has the bytes.
135    pub fn validate(&self) -> Result<(), ManifestError> {
136        if self.version != 1 {
137            return Err(ManifestError::UnsupportedVersion(self.version));
138        }
139        if self.chunk_count == 0 {
140            return Err(ManifestError::Inconsistent("chunk_count must be > 0"));
141        }
142        if self.chunk_size == 0 {
143            return Err(ManifestError::Inconsistent("chunk_size must be > 0"));
144        }
145        if self.total_size == 0 {
146            return Err(ManifestError::Inconsistent("total_size must be > 0"));
147        }
148        if self.chunk_count as usize != self.chunk_hashes.len() {
149            return Err(ManifestError::Inconsistent(
150                "chunk_count does not match chunk_hashes length",
151            ));
152        }
153        // total_size must be in (chunk_size * (count - 1), chunk_size * count].
154        // Compute as u64 to avoid overflow.
155        let chunk_size_u64 = self.chunk_size as u64;
156        let count_u64 = self.chunk_count as u64;
157        let upper = chunk_size_u64
158            .checked_mul(count_u64)
159            .ok_or(ManifestError::SizeOutOfRange {
160                total: self.total_size,
161                count: self.chunk_count,
162                chunk: self.chunk_size,
163            })?;
164        let lower =
165            chunk_size_u64
166                .checked_mul(count_u64 - 1)
167                .ok_or(ManifestError::SizeOutOfRange {
168                    total: self.total_size,
169                    count: self.chunk_count,
170                    chunk: self.chunk_size,
171                })?;
172        if self.total_size > upper || self.total_size <= lower {
173            return Err(ManifestError::SizeOutOfRange {
174                total: self.total_size,
175                count: self.chunk_count,
176                chunk: self.chunk_size,
177            });
178        }
179        Ok(())
180    }
181
182    /// Length of the i-th chunk in bytes. All non-final chunks are
183    /// `chunk_size`; the final chunk is `total_size - chunk_size *
184    /// (chunk_count - 1)`. Caller must ensure the manifest has been
185    /// validated.
186    pub fn chunk_len(&self, i: u32) -> u64 {
187        debug_assert!(i < self.chunk_count, "chunk index out of range");
188        if i + 1 < self.chunk_count {
189            self.chunk_size as u64
190        } else {
191            self.total_size - (self.chunk_size as u64) * ((self.chunk_count - 1) as u64)
192        }
193    }
194}
195
196/// Split a packfile's bytes into chunks of `chunk_size` (final chunk
197/// may be smaller). Always produces at least one chunk; `pack` must
198/// not be empty.
199pub fn split_pack(pack: &[u8], chunk_size: u32) -> Vec<Vec<u8>> {
200    assert!(!pack.is_empty(), "split_pack: empty pack");
201    assert!(chunk_size > 0, "split_pack: zero chunk_size");
202    pack.chunks(chunk_size as usize)
203        .map(|c| c.to_vec())
204        .collect()
205}
206
207// `serde_bytes` does not have an array-element variant, so we hand-roll
208// one. We serialize `Vec<[u8; 32]>` as a length-prefixed sequence of
209// 32-byte arrays — exactly what bincode would do by default, with the
210// shape pinned explicitly so future encoder changes do not silently
211// break the wire format.
212mod serde_bytes_array_vec {
213    use serde::de::{SeqAccess, Visitor};
214    use serde::ser::SerializeSeq;
215    use serde::{Deserializer, Serializer};
216
217    pub fn serialize<S: Serializer>(value: &[[u8; 32]], ser: S) -> Result<S::Ok, S::Error> {
218        let mut seq = ser.serialize_seq(Some(value.len()))?;
219        for item in value {
220            seq.serialize_element(serde_bytes::Bytes::new(item))?;
221        }
222        seq.end()
223    }
224
225    pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Vec<[u8; 32]>, D::Error> {
226        struct V;
227        impl<'de> Visitor<'de> for V {
228            type Value = Vec<[u8; 32]>;
229            fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
230                f.write_str("a sequence of 32-byte arrays")
231            }
232            fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
233                let mut out = Vec::with_capacity(seq.size_hint().unwrap_or(0));
234                while let Some(b) = seq.next_element::<serde_bytes::ByteBuf>()? {
235                    let arr: [u8; 32] = b
236                        .as_ref()
237                        .try_into()
238                        .map_err(|_| serde::de::Error::custom("expected 32-byte chunk hash"))?;
239                    out.push(arr);
240                }
241                Ok(out)
242            }
243        }
244        de.deserialize_seq(V)
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn round_trip_small_manifest() {
254        let chunks: Vec<Vec<u8>> = vec![vec![0xAA; 100], vec![0xBB; 50]];
255        let m = ChunkedPackManifestV1::from_chunks(100, &chunks);
256        let bytes = m.to_bytes();
257        let decoded = ChunkedPackManifestV1::from_bytes(&bytes).expect("valid");
258        assert_eq!(decoded, m);
259        assert_eq!(decoded.total_size, 150);
260        assert_eq!(decoded.chunk_count, 2);
261        assert_eq!(decoded.chunk_len(0), 100);
262        assert_eq!(decoded.chunk_len(1), 50);
263    }
264
265    #[test]
266    fn rejects_zero_chunk_count() {
267        let m = ChunkedPackManifestV1 {
268            version: 1,
269            chunk_size: 1024,
270            total_size: 1024,
271            chunk_count: 0,
272            chunk_hashes: vec![],
273        };
274        let bytes = m.to_bytes();
275        let err = ChunkedPackManifestV1::from_bytes(&bytes).unwrap_err();
276        assert!(matches!(err, ManifestError::Inconsistent(_)));
277    }
278
279    #[test]
280    fn rejects_count_hashes_mismatch() {
281        let m = ChunkedPackManifestV1 {
282            version: 1,
283            chunk_size: 100,
284            total_size: 200,
285            chunk_count: 2,
286            chunk_hashes: vec![[0; 32]],
287        };
288        let bytes = m.to_bytes();
289        let err = ChunkedPackManifestV1::from_bytes(&bytes).unwrap_err();
290        assert!(matches!(err, ManifestError::Inconsistent(_)));
291    }
292
293    #[test]
294    fn rejects_total_too_large() {
295        let m = ChunkedPackManifestV1 {
296            version: 1,
297            chunk_size: 100,
298            total_size: 250, // > 100 * 2
299            chunk_count: 2,
300            chunk_hashes: vec![[0; 32]; 2],
301        };
302        let bytes = m.to_bytes();
303        let err = ChunkedPackManifestV1::from_bytes(&bytes).unwrap_err();
304        assert!(matches!(err, ManifestError::SizeOutOfRange { .. }));
305    }
306
307    #[test]
308    fn rejects_total_too_small_for_count() {
309        // chunk_count=2 implies the final chunk has length total -
310        // chunk_size * 1 > 0. total_size = 100 with chunk_size = 100
311        // means the final chunk is empty; reject.
312        let m = ChunkedPackManifestV1 {
313            version: 1,
314            chunk_size: 100,
315            total_size: 100,
316            chunk_count: 2,
317            chunk_hashes: vec![[0; 32]; 2],
318        };
319        let bytes = m.to_bytes();
320        let err = ChunkedPackManifestV1::from_bytes(&bytes).unwrap_err();
321        assert!(matches!(err, ManifestError::SizeOutOfRange { .. }));
322    }
323
324    #[test]
325    fn split_then_manifest_then_validate() {
326        let pack: Vec<u8> = (0..2500u32).map(|i| (i & 0xFF) as u8).collect();
327        let chunks = split_pack(&pack, 1000);
328        assert_eq!(chunks.len(), 3);
329        assert_eq!(chunks[0].len(), 1000);
330        assert_eq!(chunks[1].len(), 1000);
331        assert_eq!(chunks[2].len(), 500);
332
333        let m = ChunkedPackManifestV1::from_chunks(1000, &chunks);
334        assert!(m.validate().is_ok());
335        assert_eq!(m.total_size, 2500);
336        assert_eq!(m.chunk_len(0), 1000);
337        assert_eq!(m.chunk_len(1), 1000);
338        assert_eq!(m.chunk_len(2), 500);
339
340        // Re-assemble and confirm bit-for-bit.
341        let reassembled: Vec<u8> = chunks.into_iter().flatten().collect();
342        assert_eq!(reassembled, pack);
343    }
344
345    /// Wire-format pin. Any drift in either the byte sequence or the
346    /// BLAKE3 hash is a wire-format break and must come together with
347    /// bumping `version` to 2 (and updating every consumer).
348    #[test]
349    fn manifest_wire_format_fixture() {
350        let m = ChunkedPackManifestV1 {
351            version: 1,
352            chunk_size: 4,
353            total_size: 7,
354            chunk_count: 2,
355            chunk_hashes: vec![[0xAA; 32], [0xBB; 32]],
356        };
357        let bytes = m.to_bytes();
358
359        // bincode default config:
360        //   version u8                  -> 01
361        //   chunk_size u32 LE           -> 04 00 00 00
362        //   total_size u64 LE           -> 07 00 00 00 00 00 00 00
363        //   chunk_count u32 LE          -> 02 00 00 00
364        //   Vec<bytes> length u64 LE    -> 02 00 00 00 00 00 00 00
365        //   bytes #1: length u64 LE     -> 20 00 00 00 00 00 00 00
366        //             32 * 0xAA
367        //   bytes #2: length u64 LE     -> 20 00 00 00 00 00 00 00
368        //             32 * 0xBB
369        let expected_hex = "010400000007000000000000000200000002000000000000002000000000000000\
370             aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\
371             2000000000000000\
372             bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
373        let mut actual_hex = String::with_capacity(bytes.len() * 2);
374        for b in &bytes {
375            use std::fmt::Write as _;
376            write!(actual_hex, "{b:02x}").unwrap();
377        }
378        let expected_clean: String = expected_hex
379            .chars()
380            .filter(|c| !c.is_whitespace())
381            .collect();
382        assert_eq!(
383            actual_hex, expected_clean,
384            "ChunkedPackManifestV1 wire format drift — bump version and update consumers"
385        );
386
387        // Pinned BLAKE3 of the wire bytes.
388        assert_eq!(
389            blake3::hash(&bytes).to_hex().as_str(),
390            "7b792da2fc4b787ff10abdbc480596c118e88ad1209da7d0c4d10d0bc060264e",
391            "ChunkedPackManifestV1 BLAKE3 drift",
392        );
393
394        let decoded = ChunkedPackManifestV1::from_bytes(&bytes).unwrap();
395        assert_eq!(decoded, m);
396    }
397}