Skip to main content

mk_codec/string_layer/
pipeline.rs

1//! Public encode/decode entry points: `KeyCard` ↔ `Vec<String>`.
2//!
3//! The encoder is the layer-3 boundary at which the canonical bytecode
4//! produced by [`crate::bytecode::encode_bytecode`] is wrapped in
5//! BCH-checksummed mk1 strings. Two emission paths:
6//!
7//! - **Single string** — bytecode fits in
8//!   [`crate::consts::SINGLE_STRING_LONG_BYTES`] (= 56). Emits a single
9//!   `mk1`-prefixed string with a 2-symbol header and no cross-chunk hash.
10//! - **Chunked** — bytecode exceeds the single-string ceiling. Appends the
11//!   4-byte `cross_chunk_hash`, splits the resulting stream into chunks
12//!   of at most [`crate::consts::CHUNKED_FRAGMENT_LONG_BYTES`] (= 53)
13//!   bytes, and emits one `mk1` string per chunk with an 8-symbol header.
14//!
15//! v0.1 emit policy: each emitted string's per-chunk BCH code variant
16//! (regular vs long) is auto-selected by
17//! [`crate::string_layer::bch::encode_5bit_to_string`] from the resulting
18//! 5-bit-symbol data-part length. For typical mk1 cards (≈84 bytes
19//! bytecode → 88-byte stream → fragments of 53 + 35 bytes), this means
20//! chunk 0 lands in long-code territory and the trailing short chunk
21//! falls back to regular code. Decoders accept either per-chunk
22//! variant — mixed-code emit is wire-permitted by design.
23
24use crate::bytecode::{decode_bytecode, encode_bytecode};
25use crate::consts::SINGLE_STRING_LONG_BYTES;
26use crate::error::{Error, Result};
27use crate::key_card::KeyCard;
28use crate::string_layer::bch::{
29    bytes_to_5bit, decode_string, encode_5bit_to_string, five_bit_to_bytes,
30};
31use crate::string_layer::chunk::{ChunkFragment, reassemble_from_chunks, split_into_chunks};
32use crate::string_layer::header::{MAX_CHUNK_SET_ID, StringLayerHeader, VERSION_V0_1};
33
34/// Draw a fresh 20-bit `chunk_set_id` from the system CSPRNG via
35/// [`getrandom`]. The OS entropy source is used to avoid pulling a
36/// full RNG framework into the codec — `getrandom` is the same crate
37/// that backs `rand`'s `OsRng`, so the entropy quality is identical.
38///
39/// Per closure Q-5, the `chunk_set_id` is opaque and only used for
40/// reassembly mismatch detection, so any uniformly-distributed 20-bit
41/// value is sufficient. Failure to read entropy is treated as an
42/// unrecoverable system error and panics; this matches the failure
43/// mode of `rand::thread_rng()` and is acceptable for an encode call
44/// because no key material has been emitted at the point of failure.
45fn fresh_chunk_set_id() -> u32 {
46    let mut buf = [0u8; 4];
47    getrandom::getrandom(&mut buf).expect("OS CSPRNG must be available for mk1 encode");
48    u32::from_be_bytes(buf) & MAX_CHUNK_SET_ID
49}
50
51/// Encode a `KeyCard` into one or more `mk1`-prefixed strings.
52///
53/// Multi-chunk encodings draw a fresh 20-bit `chunk_set_id` from the
54/// system CSPRNG (`OsRng`). Use [`encode_with_chunk_set_id`] to pin the
55/// value for deterministic output (vector regeneration, conformance tests).
56pub fn encode(card: &KeyCard) -> Result<Vec<String>> {
57    let bytecode = encode_bytecode(card)?;
58    encode_bytecode_stream(&bytecode, None)
59}
60
61/// Like [`encode`], but with an explicit `chunk_set_id` override.
62///
63/// `chunk_set_id` MUST fit in 20 bits (`0..=0x000F_FFFF`); otherwise
64/// returns [`Error::ChunkedHeaderMalformed`]. The override is only
65/// consulted on the chunked path; single-string encodings have no
66/// `chunk_set_id` field, so the value is silently ignored.
67pub fn encode_with_chunk_set_id(card: &KeyCard, chunk_set_id: u32) -> Result<Vec<String>> {
68    let bytecode = encode_bytecode(card)?;
69    encode_bytecode_stream(&bytecode, Some(chunk_set_id))
70}
71
72fn encode_bytecode_stream(bytecode: &[u8], chunk_set_id: Option<u32>) -> Result<Vec<String>> {
73    if bytecode.len() <= SINGLE_STRING_LONG_BYTES {
74        // SingleString path: 2-symbol header + bytes_to_5bit(bytecode).
75        let header = StringLayerHeader::SingleString {
76            version: VERSION_V0_1,
77        };
78        let mut data_5bit = header.to_5bit_symbols();
79        data_5bit.extend(bytes_to_5bit(bytecode));
80        let s = encode_5bit_to_string(&data_5bit)?;
81        return Ok(vec![s]);
82    }
83
84    // Chunked path: derive (or use override) chunk_set_id, then split.
85    let csid = match chunk_set_id {
86        Some(v) => {
87            if v > MAX_CHUNK_SET_ID {
88                return Err(Error::ChunkedHeaderMalformed(format!(
89                    "chunk_set_id {v:#x} exceeds 20-bit field"
90                )));
91            }
92            v
93        }
94        None => fresh_chunk_set_id(),
95    };
96
97    let chunks = split_into_chunks(bytecode, csid)?;
98    let mut strings = Vec::with_capacity(chunks.len());
99    for chunk in chunks {
100        let mut data_5bit = chunk.header.to_5bit_symbols();
101        data_5bit.extend(bytes_to_5bit(&chunk.fragment));
102        strings.push(encode_5bit_to_string(&data_5bit)?);
103    }
104    Ok(strings)
105}
106
107/// Decode one or more `mk1`-prefixed strings into a `KeyCard`.
108///
109/// Supports both single-string and chunked inputs:
110/// - One string with `SingleString` header → decode bytecode directly.
111/// - One or more strings with `Chunked` headers → reassemble with
112///   cross-chunk-hash verification, then decode the bytecode.
113///
114/// Mixing `SingleString` and `Chunked` headers across a multi-string
115/// input is rejected with [`Error::MixedHeaderTypes`]. (An empty input
116/// list is rejected with [`Error::ChunkedHeaderMalformed`] — that's the
117/// "no input at all" case, distinct from "header types disagree.")
118pub fn decode(strings: &[&str]) -> Result<KeyCard> {
119    if strings.is_empty() {
120        return Err(Error::ChunkedHeaderMalformed(
121            "empty input string list".to_string(),
122        ));
123    }
124
125    // Decode each string at the BCH layer; collect (header, fragment_bytes).
126    let mut parsed: Vec<(StringLayerHeader, Vec<u8>)> = Vec::with_capacity(strings.len());
127    for s in strings {
128        let decoded = decode_string(s)?;
129        let data_5bit = decoded.data();
130        let (header, consumed) = StringLayerHeader::from_5bit_symbols(data_5bit)?;
131        let payload_5bit = &data_5bit[consumed..];
132        let fragment = five_bit_to_bytes(payload_5bit).ok_or(Error::MalformedPayloadPadding)?;
133        parsed.push((header, fragment));
134    }
135
136    let first_is_single = matches!(parsed[0].0, StringLayerHeader::SingleString { .. });
137    if first_is_single {
138        if parsed.len() != 1 {
139            return Err(Error::MixedHeaderTypes);
140        }
141        let (_, bytecode) = parsed.into_iter().next().expect("len == 1");
142        return decode_bytecode(&bytecode);
143    }
144
145    // Chunked path: consume all into ChunkFragment list and reassemble.
146    let chunks: Vec<ChunkFragment> = parsed
147        .into_iter()
148        .map(|(header, fragment)| ChunkFragment { header, fragment })
149        .collect();
150    let bytecode = reassemble_from_chunks(chunks)?;
151    decode_bytecode(&bytecode)
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use crate::bytecode::test_helpers::synthetic_xpub;
158    use bitcoin::bip32::{DerivationPath, Fingerprint};
159    use std::str::FromStr;
160
161    fn fixture_card_typical_chunked() -> KeyCard {
162        // 1 stub + std-table indicator + fingerprint + 73-byte compact xpub
163        // = 84 bytes; this exceeds SINGLE_STRING_LONG_BYTES (= 56) and
164        // therefore lands in the chunked path. (`xpub_compact` alone is
165        // already 73 bytes, so no realistic mk1 card fits in a single
166        // string — SingleString remains reachable only through hand-
167        // constructed sub-card test inputs.) The "singlestring_fits" name
168        // is historical and predates the closure-locked compact-73 form.
169        let path = DerivationPath::from_str("48'/0'/0'/2'").unwrap();
170        KeyCard {
171            policy_id_stubs: vec![[0x11, 0x22, 0x33, 0x44]],
172            origin_fingerprint: Some(Fingerprint::from([0xAA, 0xBB, 0xCC, 0xDD])),
173            origin_path: path.clone(),
174            xpub: synthetic_xpub(&path),
175        }
176    }
177
178    fn fixture_card_explicit_path_long() -> KeyCard {
179        // Explicit-path forces a longer bytecode; tests multi-chunk path
180        // explicitly even though typical cards already chunk.
181        let path = DerivationPath::from_str("9999'/1234'/56'/7'/0/1/2/3").unwrap();
182        KeyCard {
183            policy_id_stubs: vec![[0xDE, 0xAD, 0xBE, 0xEF]],
184            origin_fingerprint: Some(Fingerprint::from([0x01, 0x02, 0x03, 0x04])),
185            origin_path: path.clone(),
186            xpub: synthetic_xpub(&path),
187        }
188    }
189
190    #[test]
191    fn round_trip_typical_card_chunked() {
192        let card = fixture_card_typical_chunked();
193        let strings = encode_with_chunk_set_id(&card, 0x12345).unwrap();
194        let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
195        let recovered = decode(&parts).unwrap();
196        assert_eq!(recovered, card);
197    }
198
199    #[test]
200    fn round_trip_explicit_path_chunked() {
201        let card = fixture_card_explicit_path_long();
202        let strings = encode_with_chunk_set_id(&card, 0xABCDE).unwrap();
203        assert!(strings.len() >= 2, "explicit-path card must chunk");
204        let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
205        let recovered = decode(&parts).unwrap();
206        assert_eq!(recovered, card);
207    }
208
209    #[test]
210    fn deterministic_encoding_with_explicit_chunk_set_id() {
211        // encode_with_chunk_set_id MUST be byte-deterministic; this is the
212        // property Phase 6 vector regeneration depends on.
213        let card = fixture_card_typical_chunked();
214        let s1 = encode_with_chunk_set_id(&card, 0x12345).unwrap();
215        let s2 = encode_with_chunk_set_id(&card, 0x12345).unwrap();
216        assert_eq!(s1, s2);
217    }
218
219    #[test]
220    fn random_chunk_set_id_decodes_round_trip() {
221        // encode (CSPRNG-derived chunk_set_id) round-trips even though we
222        // don't pin the chunk_set_id value — the decoder doesn't care
223        // about the value, only that it's consistent across chunks.
224        let card = fixture_card_typical_chunked();
225        let strings = encode(&card).unwrap();
226        let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
227        let recovered = decode(&parts).unwrap();
228        assert_eq!(recovered, card);
229    }
230
231    #[test]
232    fn random_chunk_set_id_fits_20_bits() {
233        // Inspect the produced strings' chunk_set_id field; assert it's
234        // masked to 20 bits, no spillover from a u32 RNG.
235        let card = fixture_card_typical_chunked();
236        let strings = encode(&card).unwrap();
237        // The first chunk's parsed header carries the chunk_set_id.
238        let s0 = &strings[0];
239        let decoded = decode_string(s0).unwrap();
240        let (header, _consumed) = StringLayerHeader::from_5bit_symbols(decoded.data()).unwrap();
241        match header {
242            StringLayerHeader::Chunked { chunk_set_id, .. } => {
243                assert!(
244                    chunk_set_id <= MAX_CHUNK_SET_ID,
245                    "chunk_set_id {chunk_set_id:#x} > 20-bit max"
246                );
247            }
248            StringLayerHeader::SingleString { .. } => {
249                // Card unexpectedly fit in single-string; nothing to check.
250            }
251        }
252    }
253
254    #[test]
255    fn encode_with_chunk_set_id_rejects_oversized_value() {
256        let card = fixture_card_typical_chunked();
257        let r = encode_with_chunk_set_id(&card, 0x10_0000);
258        assert!(matches!(r, Err(Error::ChunkedHeaderMalformed(_))));
259    }
260
261    #[test]
262    fn decode_rejects_chunk_set_id_mismatch() {
263        let card = fixture_card_typical_chunked();
264        let strings = encode_with_chunk_set_id(&card, 0x12345).unwrap();
265        // Re-encode under a different chunk_set_id and splice in chunk 1.
266        let other = encode_with_chunk_set_id(&card, 0x67890).unwrap();
267        let mixed: Vec<&str> = vec![strings[0].as_str(), other[1].as_str()];
268        assert!(matches!(decode(&mixed), Err(Error::ChunkSetIdMismatch)));
269    }
270
271    #[test]
272    fn decode_rejects_5_symbol_burst_in_last_chunk_data_part() {
273        // Perturb at the 5-bit-symbol layer of an already-encoded chunked
274        // string set (no fresh BCH-checksum computation on the perturbed
275        // payload — the decoder must reject or correct the original
276        // codeword's checksum against the modified data).
277        //
278        // BCH(108,93,8) (long) and BCH(93,80,8) (regular) both cover up
279        // to 4 substitutions exactly (`t = 4`); a 5-symbol burst always
280        // exceeds the correction radius. For the typical 84-byte card,
281        // the last chunk is the regular-code chunk (35-byte fragment →
282        // 64-symbol data part + 13-symbol checksum = 77 chars, in
283        // regular-code range), so the BCH-`t = 4` argument applies via
284        // BCH(93,80,8). The decoder must surface one of:
285        //
286        // - `Err(BchUncorrectable(_))` — BM/Forney can't fit a degree-≤4
287        //   error-locator polynomial; rejection is direct.
288        // - `Err(CrossChunkHashMismatch)` — BCH finds a wrong-but-valid
289        //   degree-≤4 fit, applies it, and yields a "corrected" payload
290        //   that decodes through structurally but whose recomputed
291        //   SHA-256 disagrees with the recovered trailing hash.
292        //
293        // Both are acceptable — the property under test is "this
294        // perturbation was caught," not "caught via a specific variant."
295        // Earlier (v0.1.0) test perturbed at the byte level and recomputed
296        // the BCH checksum, which sidestepped the BCH-decode path entirely
297        // and only ever exercised the cross-chunk-hash rejection. The
298        // new test exercises both decoder rejection paths and proves the
299        // 5-symbol-burst > BCH-`t = 4` discipline holds.
300        let card = fixture_card_typical_chunked();
301        let strings = encode_with_chunk_set_id(&card, 0).unwrap();
302        assert!(
303            strings.len() >= 2,
304            "fixture must produce a multi-chunk encoding"
305        );
306
307        // Perturb 5 consecutive characters in the LAST chunk's data part,
308        // **past the 8-symbol chunked header**. The 8-symbol chunked
309        // header occupies string char-indices 3..11 (after the 3-char
310        // `mk1` HRP+separator); the bytecode-fragment region begins at
311        // char-index 11. We perturb char-indices 11..16 — the first 5
312        // fragment symbols. This places the burst inside the bytecode-
313        // fragment region (5 fragment symbols = 25 bits ≈ 3 bytes of
314        // fragment data, which for the typical 84-byte card maps to
315        // bytecode bytes 53..56), so any wrong-but-valid BCH correction
316        // produces corrupted bytecode whose recomputed SHA-256 mismatches
317        // the unperturbed trailing hash → `CrossChunkHashMismatch`.
318        //
319        // Restricting the burst to the post-header region rules out
320        // header-decode rejection paths
321        // (`UnsupportedVersion`, `UnsupportedCardType`,
322        // `ChunkedHeaderMalformed`, `ChunkSetIdMismatch`) that BCH
323        // could otherwise produce by "correcting" 5 errors in the
324        // header into a malformed-but-parseable header.
325        let mut perturbed = strings.last().expect("multi-chunk fixture").clone();
326        let mut chars: Vec<char> = perturbed.chars().collect();
327        // Char-indices 11..16 (5 chars) — past the 3-char `mk1` prefix
328        // and past the 8-symbol chunked header (string indices 3..11).
329        for c in chars.iter_mut().take(16).skip(11) {
330            // Substitute with a different bech32 char to guarantee a
331            // non-zero 5-bit XOR at each position. 'q' is the value-0
332            // symbol; any other char gives a non-zero perturbation.
333            *c = if *c == 'q' { 'p' } else { 'q' };
334        }
335        perturbed = chars.into_iter().collect();
336
337        let mut perturbed_strings: Vec<String> = strings[..strings.len() - 1].to_vec();
338        perturbed_strings.push(perturbed);
339        let parts: Vec<&str> = perturbed_strings.iter().map(|s| s.as_str()).collect();
340
341        match decode(&parts) {
342            Err(Error::CrossChunkHashMismatch) | Err(Error::BchUncorrectable(_)) => (),
343            other => panic!(
344                "5-symbol burst must produce CrossChunkHashMismatch or BchUncorrectable, \
345                 got {other:?}"
346            ),
347        }
348    }
349
350    /// Build a synthetic `SingleString`-shaped mk1 string from arbitrary
351    /// bytecode bytes. v0.1 encoders never emit `SingleString` (smallest
352    /// valid bytecode = 80 bytes > 56-byte single-string capacity per
353    /// SPEC §2.4), so this helper exists purely for tests that need a
354    /// `SingleString`-headered string to exercise the header-types-
355    /// disagree rejection paths.
356    fn synthetic_singlestring(bytecode: &[u8]) -> String {
357        let header = StringLayerHeader::SingleString {
358            version: VERSION_V0_1,
359        };
360        let mut data_5bit = header.to_5bit_symbols();
361        data_5bit.extend(bytes_to_5bit(bytecode));
362        encode_5bit_to_string(&data_5bit).expect("synthetic singlestring encode")
363    }
364
365    #[test]
366    fn decode_rejects_singlestring_then_chunked() {
367        // Forward direction: first string carries a `SingleString` header,
368        // additional strings follow. `pipeline::decode` catches this in
369        // its early branch (`first_is_single && parsed.len() != 1`) and
370        // returns `MixedHeaderTypes` (was `ChunkedHeaderMalformed` in
371        // v0.1.0; renamed in v0.1.1 for precise discrimination).
372        let single = synthetic_singlestring(&[0x42u8; 8]);
373        let card = fixture_card_typical_chunked();
374        let chunked = encode_with_chunk_set_id(&card, 0).unwrap();
375        let parts: Vec<&str> = vec![single.as_str(), chunked[0].as_str()];
376        assert!(matches!(decode(&parts), Err(Error::MixedHeaderTypes)));
377    }
378
379    #[test]
380    fn decode_rejects_chunked_then_singlestring() {
381        // Reverse direction: first chunk is `Chunked` (so `pipeline::decode`
382        // falls into the chunked branch), but a later chunk is
383        // `SingleString`. `chunk::reassemble_from_chunks` catches this
384        // in its per-chunk loop and returns `MixedHeaderTypes`. Symmetric
385        // to the forward-direction case above.
386        let card = fixture_card_typical_chunked();
387        let mut strings = encode_with_chunk_set_id(&card, 0).unwrap();
388        assert!(strings.len() >= 2, "fixture must produce ≥ 2 chunks");
389        // Replace chunk[1] (a Chunked header) with a synthetic SingleString.
390        // The Chunked chunk[0] declares total_chunks = strings.len(), so
391        // the chunk-count check in reassemble_from_chunks passes and the
392        // loop reaches the SingleString chunk's match arm.
393        strings[1] = synthetic_singlestring(&[0xAAu8; 8]);
394        let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
395        assert!(matches!(decode(&parts), Err(Error::MixedHeaderTypes)));
396    }
397
398    #[test]
399    fn decode_rejects_singlestring_padding_bits_nonzero() {
400        // Construct a SingleString-style mk1 string whose 5-bit payload
401        // doesn't byte-align (trailing pad bits non-zero).
402        // Use a bytecode of 1 byte, then pad with a stray 5-bit symbol that
403        // sets the pad bits non-zero.
404        let header = StringLayerHeader::SingleString {
405            version: VERSION_V0_1,
406        };
407        // 1 byte (e.g., 0x00) → 2 5-bit symbols (00, 00).  Adding a third
408        // 5-bit symbol with non-zero low 2 bits inflates the data to 3
409        // payload symbols whose final pad bits are non-zero.
410        let mut data_5bit = header.to_5bit_symbols();
411        data_5bit.extend([0u8, 0u8, 0b00011u8]); // last symbol's low 2 bits = 11
412        let s = encode_5bit_to_string(&data_5bit).unwrap();
413        let r = decode(&[&s]);
414        assert!(matches!(r, Err(Error::MalformedPayloadPadding)));
415    }
416
417    #[test]
418    fn decode_rejects_empty_input() {
419        assert!(matches!(decode(&[]), Err(Error::ChunkedHeaderMalformed(_))));
420    }
421}