Skip to main content

mk_codec/string_layer/
chunk.rs

1//! Stream chunking + cross-chunk integrity hash for mk1 multi-string cards.
2//!
3//! Per `design/SPEC_mk_v0_1.md` §2.6, the canonical bytecode is suffixed
4//! with a 4-byte `cross_chunk_hash` (= `SHA-256(canonical_bytecode)[0..4]`)
5//! before splitting into chunk fragments; the hash is verified at
6//! reassembly. This catches dropped, reordered, or substituted chunks
7//! that the per-chunk BCH layer alone cannot detect.
8
9use bitcoin::hashes::{Hash, sha256};
10
11use crate::consts::{CHUNKED_FRAGMENT_LONG_BYTES, CROSS_CHUNK_HASH_BYTES, MAX_CHUNKS};
12use crate::error::{Error, Result};
13use crate::string_layer::header::{MAX_CHUNK_SET_ID, StringLayerHeader, VERSION_V0_1};
14
15/// Maximum canonical-bytecode length that can be chunked under v0.1.
16///
17/// Equals `MAX_CHUNKS * CHUNKED_FRAGMENT_LONG_BYTES − CROSS_CHUNK_HASH_BYTES`
18/// (= 32 * 53 − 4 = 1692). Bytecodes longer than this cannot be encoded
19/// as a single mk1 card and the encoder returns
20/// [`Error::CardPayloadTooLarge`].
21pub const MAX_CHUNKABLE_BYTECODE: usize =
22    (MAX_CHUNKS as usize) * CHUNKED_FRAGMENT_LONG_BYTES - CROSS_CHUNK_HASH_BYTES;
23
24/// One chunk's worth of split output: a parsed header + its fragment bytes.
25#[non_exhaustive]
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct ChunkFragment {
28    /// The string-layer header that prefixes this chunk on the wire.
29    pub header: StringLayerHeader,
30    /// The raw fragment payload bytes for this chunk.
31    pub fragment: Vec<u8>,
32}
33
34/// Split canonical bytecode into chunks, appending the cross-chunk integrity hash.
35///
36/// The split target is `CHUNKED_FRAGMENT_LONG_BYTES` (= 53 bytes) per
37/// fragment so each chunk lands in long-code BCH territory under typical
38/// mk1 sizes; the last fragment may be shorter, in which case the
39/// pipeline auto-falls-back to regular code per
40/// [`encode_5bit_to_string`][crate::string_layer::bch::encode_5bit_to_string].
41///
42/// Returns [`Error::CardPayloadTooLarge`] if the bytecode (plus the 4-byte
43/// hash) exceeds `MAX_CHUNKS * CHUNKED_FRAGMENT_LONG_BYTES`.
44///
45/// # Determinism
46///
47/// The output is byte-deterministic in `(canonical_bytecode, chunk_set_id)`:
48/// callers passing the same arguments produce the same chunk sequence,
49/// which is the property Phase 6 relies on for vector regeneration.
50pub fn split_into_chunks(
51    canonical_bytecode: &[u8],
52    chunk_set_id: u32,
53) -> Result<Vec<ChunkFragment>> {
54    if chunk_set_id > MAX_CHUNK_SET_ID {
55        return Err(Error::ChunkedHeaderMalformed(format!(
56            "chunk_set_id {chunk_set_id:#x} exceeds 20-bit field"
57        )));
58    }
59    if canonical_bytecode.len() > MAX_CHUNKABLE_BYTECODE {
60        return Err(Error::CardPayloadTooLarge {
61            bytecode_len: canonical_bytecode.len(),
62            max_supported: MAX_CHUNKABLE_BYTECODE,
63        });
64    }
65
66    // Stream = bytecode || SHA-256(bytecode)[0..4]
67    let hash = sha256::Hash::hash(canonical_bytecode);
68    let mut stream = Vec::with_capacity(canonical_bytecode.len() + CROSS_CHUNK_HASH_BYTES);
69    stream.extend_from_slice(canonical_bytecode);
70    stream.extend_from_slice(&hash.to_byte_array()[..CROSS_CHUNK_HASH_BYTES]);
71
72    let frag_size = CHUNKED_FRAGMENT_LONG_BYTES;
73    let total: usize = stream.len().div_ceil(frag_size).max(1);
74    debug_assert!(
75        total <= MAX_CHUNKS as usize,
76        "capacity check above guarantees this"
77    );
78    let total_chunks_u8: u8 = total as u8;
79
80    let mut chunks = Vec::with_capacity(total);
81    for i in 0..total {
82        let start = i * frag_size;
83        let end = ((i + 1) * frag_size).min(stream.len());
84        let fragment = stream[start..end].to_vec();
85        let header = StringLayerHeader::Chunked {
86            version: VERSION_V0_1,
87            chunk_set_id,
88            total_chunks: total_chunks_u8,
89            chunk_index: i as u8,
90        };
91        chunks.push(ChunkFragment { header, fragment });
92    }
93    Ok(chunks)
94}
95
96/// Reassemble canonical bytecode from a list of parsed chunks.
97///
98/// Validates SPEC §4 rules 11–13 in order:
99///
100/// 1. All chunks must be `Chunked` (mixing with `SingleString` is rejected).
101/// 2. All chunks share `chunk_set_id` and `total_chunks`
102///    ([`Error::ChunkSetIdMismatch`], [`Error::ChunkedHeaderMalformed`]).
103/// 3. `chunk_index` values cover `0..total_chunks` exactly once
104///    ([`Error::ChunkedHeaderMalformed`] on gaps, duplicates, or out-of-range).
105/// 4. The reassembled stream's trailing 4-byte cross-chunk hash matches
106///    `SHA-256(reassembled_bytecode)[0..4]` ([`Error::CrossChunkHashMismatch`]).
107///
108/// Chunks may arrive in any order; this function sorts internally.
109pub fn reassemble_from_chunks(chunks: Vec<ChunkFragment>) -> Result<Vec<u8>> {
110    if chunks.is_empty() {
111        return Err(Error::ChunkedHeaderMalformed(
112            "empty chunk list".to_string(),
113        ));
114    }
115
116    // All chunks must be `Chunked` (no `SingleString` allowed at this entry).
117    let (set_id, total) = match chunks[0].header {
118        StringLayerHeader::Chunked {
119            chunk_set_id,
120            total_chunks,
121            ..
122        } => (chunk_set_id, total_chunks),
123        StringLayerHeader::SingleString { .. } => {
124            return Err(Error::ChunkedHeaderMalformed(
125                "single-string header in multi-chunk reassembly".to_string(),
126            ));
127        }
128    };
129
130    let total_usize = total as usize;
131    if chunks.len() != total_usize {
132        return Err(Error::ChunkedHeaderMalformed(format!(
133            "received {} chunks, header declares total_chunks = {total}",
134            chunks.len()
135        )));
136    }
137
138    // Place each chunk into a slot indexed by chunk_index; reject duplicates
139    // and gaps by tracking which slots are filled.
140    let mut slots: Vec<Option<Vec<u8>>> = (0..total_usize).map(|_| None).collect();
141    for chunk in chunks {
142        match chunk.header {
143            StringLayerHeader::Chunked {
144                version: _,
145                chunk_set_id,
146                total_chunks,
147                chunk_index,
148            } => {
149                if chunk_set_id != set_id {
150                    return Err(Error::ChunkSetIdMismatch);
151                }
152                if total_chunks != total {
153                    return Err(Error::ChunkedHeaderMalformed(format!(
154                        "total_chunks disagrees across chunks: saw {total} and {total_chunks}"
155                    )));
156                }
157                let idx = chunk_index as usize;
158                if idx >= total_usize {
159                    return Err(Error::ChunkedHeaderMalformed(format!(
160                        "chunk_index {idx} >= total_chunks {total}"
161                    )));
162                }
163                if slots[idx].is_some() {
164                    return Err(Error::ChunkedHeaderMalformed(format!(
165                        "duplicate chunk_index {idx}"
166                    )));
167                }
168                slots[idx] = Some(chunk.fragment);
169            }
170            StringLayerHeader::SingleString { .. } => {
171                // A `SingleString` header at any non-leading position in
172                // a chunked set is a header-types-disagree error, not a
173                // chunked-internal malformation. Emitted as
174                // [`Error::MixedHeaderTypes`] for symmetry with the
175                // forward-direction reject in `pipeline::decode`.
176                return Err(Error::MixedHeaderTypes);
177            }
178        }
179    }
180
181    // Concatenate fragments in chunk_index order.
182    let mut stream = Vec::new();
183    for (i, slot) in slots.into_iter().enumerate() {
184        let frag =
185            slot.ok_or_else(|| Error::ChunkedHeaderMalformed(format!("missing chunk_index {i}")))?;
186        stream.extend_from_slice(&frag);
187    }
188
189    // Verify cross-chunk hash. Stream layout: bytecode || hash[0..4].
190    if stream.len() < CROSS_CHUNK_HASH_BYTES {
191        return Err(Error::ChunkedHeaderMalformed(
192            "reassembled stream shorter than 4-byte cross-chunk hash".to_string(),
193        ));
194    }
195    let split = stream.len() - CROSS_CHUNK_HASH_BYTES;
196    let bytecode = &stream[..split];
197    let recovered_hash = &stream[split..];
198    let computed = sha256::Hash::hash(bytecode);
199    if recovered_hash != &computed.to_byte_array()[..CROSS_CHUNK_HASH_BYTES] {
200        return Err(Error::CrossChunkHashMismatch);
201    }
202    Ok(bytecode.to_vec())
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    fn fixture_bytecode(len: usize) -> Vec<u8> {
210        // Deterministic but not all-zero, so the cross-chunk hash exercises
211        // the SHA-256 path rather than the trivial digest.
212        (0..len).map(|i| (i & 0xFF) as u8).collect()
213    }
214
215    #[test]
216    fn split_then_reassemble_round_trip_short() {
217        let bc = fixture_bytecode(60);
218        let chunks = split_into_chunks(&bc, 0x12345).unwrap();
219        // 60 + 4 = 64 stream bytes → ceil(64/53) = 2 chunks.
220        assert_eq!(chunks.len(), 2);
221        let recovered = reassemble_from_chunks(chunks).unwrap();
222        assert_eq!(recovered, bc);
223    }
224
225    #[test]
226    fn split_then_reassemble_round_trip_typical_mk1_card_size() {
227        // 84 bytes ≈ typical 1-stub mainnet card with std-table indicator
228        // and fingerprint present (per SPEC §3.2 worked example).
229        let bc = fixture_bytecode(84);
230        let chunks = split_into_chunks(&bc, 0xABCDE).unwrap();
231        // 84 + 4 = 88 → ceil(88/53) = 2 chunks.
232        assert_eq!(chunks.len(), 2);
233        let recovered = reassemble_from_chunks(chunks).unwrap();
234        assert_eq!(recovered, bc);
235    }
236
237    #[test]
238    fn split_at_capacity_uses_max_chunks() {
239        let bc = fixture_bytecode(MAX_CHUNKABLE_BYTECODE);
240        let chunks = split_into_chunks(&bc, 0x55555).unwrap();
241        assert_eq!(chunks.len(), MAX_CHUNKS as usize);
242        let recovered = reassemble_from_chunks(chunks).unwrap();
243        assert_eq!(recovered, bc);
244    }
245
246    #[test]
247    fn split_rejects_oversized_bytecode() {
248        let bc = vec![0u8; MAX_CHUNKABLE_BYTECODE + 1];
249        let r = split_into_chunks(&bc, 0);
250        assert!(matches!(r, Err(Error::CardPayloadTooLarge { .. })));
251    }
252
253    #[test]
254    fn split_rejects_chunk_set_id_above_20_bits() {
255        let bc = fixture_bytecode(60);
256        let r = split_into_chunks(&bc, 0x10_0000);
257        assert!(matches!(r, Err(Error::ChunkedHeaderMalformed(_))));
258    }
259
260    #[test]
261    fn reassemble_accepts_out_of_order_chunks() {
262        let bc = fixture_bytecode(150);
263        let mut chunks = split_into_chunks(&bc, 0).unwrap();
264        chunks.reverse();
265        let recovered = reassemble_from_chunks(chunks).unwrap();
266        assert_eq!(recovered, bc);
267    }
268
269    #[test]
270    fn reassemble_rejects_chunk_set_id_mismatch() {
271        let bc = fixture_bytecode(150);
272        let mut chunks = split_into_chunks(&bc, 0x12345).unwrap();
273        // Tamper the second chunk's chunk_set_id.
274        if let StringLayerHeader::Chunked {
275            ref mut chunk_set_id,
276            ..
277        } = chunks[1].header
278        {
279            *chunk_set_id = 0x00001;
280        }
281        assert!(matches!(
282            reassemble_from_chunks(chunks),
283            Err(Error::ChunkSetIdMismatch)
284        ));
285    }
286
287    #[test]
288    fn reassemble_rejects_cross_chunk_hash_mismatch() {
289        let bc = fixture_bytecode(150);
290        let mut chunks = split_into_chunks(&bc, 0).unwrap();
291        // Flip a byte inside the FIRST chunk's payload — this falls in
292        // the bytecode region, so the recomputed SHA-256 will differ.
293        chunks[0].fragment[0] ^= 0x01;
294        assert!(matches!(
295            reassemble_from_chunks(chunks),
296            Err(Error::CrossChunkHashMismatch)
297        ));
298    }
299
300    #[test]
301    fn reassemble_rejects_duplicate_chunk_index() {
302        let bc = fixture_bytecode(150);
303        let mut chunks = split_into_chunks(&bc, 0).unwrap();
304        // Force two chunks to claim chunk_index = 0.
305        if let StringLayerHeader::Chunked {
306            ref mut chunk_index,
307            ..
308        } = chunks[1].header
309        {
310            *chunk_index = 0;
311        }
312        assert!(matches!(
313            reassemble_from_chunks(chunks),
314            Err(Error::ChunkedHeaderMalformed(_))
315        ));
316    }
317
318    #[test]
319    fn reassemble_rejects_missing_chunk() {
320        let bc = fixture_bytecode(150);
321        let mut chunks = split_into_chunks(&bc, 0).unwrap();
322        // Drop the last chunk; reassembly must reject.
323        chunks.pop();
324        assert!(matches!(
325            reassemble_from_chunks(chunks),
326            Err(Error::ChunkedHeaderMalformed(_))
327        ));
328    }
329
330    #[test]
331    fn reassemble_rejects_empty_chunk_list() {
332        assert!(matches!(
333            reassemble_from_chunks(vec![]),
334            Err(Error::ChunkedHeaderMalformed(_))
335        ));
336    }
337
338    #[test]
339    fn split_one_chunk_when_stream_fits_in_53_bytes() {
340        // Bytecode 49 + 4-byte hash = 53 bytes → exactly fills one fragment.
341        let bc = fixture_bytecode(49);
342        let chunks = split_into_chunks(&bc, 0).unwrap();
343        assert_eq!(chunks.len(), 1);
344        assert_eq!(chunks[0].fragment.len(), 53);
345        let recovered = reassemble_from_chunks(chunks).unwrap();
346        assert_eq!(recovered, bc);
347    }
348
349    #[test]
350    fn split_handles_empty_bytecode() {
351        // Degenerate but defined: 0 bytes → stream is just the 4-byte hash.
352        let bc: Vec<u8> = vec![];
353        let chunks = split_into_chunks(&bc, 0).unwrap();
354        assert_eq!(chunks.len(), 1);
355        assert_eq!(chunks[0].fragment.len(), CROSS_CHUNK_HASH_BYTES);
356        let recovered = reassemble_from_chunks(chunks).unwrap();
357        assert_eq!(recovered, bc);
358    }
359}