mk_codec/string_layer/pipeline.rs
1//! Public encode/decode entry points: `KeyCard` ↔ `Vec<String>`.
2//!
3//! The encoder is the layer-3 boundary at which the canonical bytecode
4//! produced by [`crate::bytecode::encode_bytecode`] is wrapped in
5//! BCH-checksummed mk1 strings. Two emission paths:
6//!
7//! - **Single string** — bytecode fits in
8//! [`crate::consts::SINGLE_STRING_LONG_BYTES`] (= 56). Emits a single
9//! `mk1`-prefixed string with a 2-symbol header and no cross-chunk hash.
10//! - **Chunked** — bytecode exceeds the single-string ceiling. Appends the
11//! 4-byte `cross_chunk_hash`, splits the resulting stream into chunks
12//! of at most [`crate::consts::CHUNKED_FRAGMENT_LONG_BYTES`] (= 53)
13//! bytes, and emits one `mk1` string per chunk with an 8-symbol header.
14//!
15//! v0.1 emit policy: each emitted string's per-chunk BCH code variant
16//! (regular vs long) is auto-selected by
17//! [`crate::string_layer::bch::encode_5bit_to_string`] from the resulting
18//! 5-bit-symbol data-part length. For typical mk1 cards (≈84 bytes
19//! bytecode → 88-byte stream → fragments of 53 + 35 bytes), this means
20//! chunk 0 lands in long-code territory and the trailing short chunk
21//! falls back to regular code. Decoders accept either per-chunk
22//! variant — mixed-code emit is wire-permitted by design.
23
24use crate::bytecode::{decode_bytecode, encode_bytecode};
25use crate::consts::SINGLE_STRING_LONG_BYTES;
26use crate::error::{Error, Result};
27use crate::key_card::KeyCard;
28use crate::string_layer::bch::{
29 bytes_to_5bit, decode_string, encode_5bit_to_string, five_bit_to_bytes,
30};
31use crate::string_layer::chunk::{ChunkFragment, reassemble_from_chunks, split_into_chunks};
32use crate::string_layer::header::{MAX_CHUNK_SET_ID, StringLayerHeader, VERSION_V0_1};
33
34/// Draw a fresh 20-bit `chunk_set_id` from the system CSPRNG via
35/// [`getrandom`]. The OS entropy source is used to avoid pulling a
36/// full RNG framework into the codec — `getrandom` is the same crate
37/// that backs `rand`'s `OsRng`, so the entropy quality is identical.
38///
39/// Per closure Q-5, the `chunk_set_id` is opaque and only used for
40/// reassembly mismatch detection, so any uniformly-distributed 20-bit
41/// value is sufficient. Failure to read entropy is treated as an
42/// unrecoverable system error and panics; this matches the failure
43/// mode of `rand::thread_rng()` and is acceptable for an encode call
44/// because no key material has been emitted at the point of failure.
45fn fresh_chunk_set_id() -> u32 {
46 let mut buf = [0u8; 4];
47 getrandom::getrandom(&mut buf).expect("OS CSPRNG must be available for mk1 encode");
48 u32::from_be_bytes(buf) & MAX_CHUNK_SET_ID
49}
50
51/// Encode a `KeyCard` into one or more `mk1`-prefixed strings.
52///
53/// Multi-chunk encodings draw a fresh 20-bit `chunk_set_id` from the
54/// system CSPRNG (`OsRng`). Use [`encode_with_chunk_set_id`] to pin the
55/// value for deterministic output (vector regeneration, conformance tests).
56pub fn encode(card: &KeyCard) -> Result<Vec<String>> {
57 let bytecode = encode_bytecode(card)?;
58 encode_bytecode_stream(&bytecode, None)
59}
60
61/// Like [`encode`], but with an explicit `chunk_set_id` override.
62///
63/// `chunk_set_id` MUST fit in 20 bits (`0..=0x000F_FFFF`); otherwise
64/// returns [`Error::ChunkedHeaderMalformed`]. The override is only
65/// consulted on the chunked path; single-string encodings have no
66/// `chunk_set_id` field, so the value is silently ignored.
67pub fn encode_with_chunk_set_id(card: &KeyCard, chunk_set_id: u32) -> Result<Vec<String>> {
68 let bytecode = encode_bytecode(card)?;
69 encode_bytecode_stream(&bytecode, Some(chunk_set_id))
70}
71
72fn encode_bytecode_stream(bytecode: &[u8], chunk_set_id: Option<u32>) -> Result<Vec<String>> {
73 if bytecode.len() <= SINGLE_STRING_LONG_BYTES {
74 // SingleString path: 2-symbol header + bytes_to_5bit(bytecode).
75 let header = StringLayerHeader::SingleString {
76 version: VERSION_V0_1,
77 };
78 let mut data_5bit = header.to_5bit_symbols();
79 data_5bit.extend(bytes_to_5bit(bytecode));
80 let s = encode_5bit_to_string(&data_5bit)?;
81 return Ok(vec![s]);
82 }
83
84 // Chunked path: derive (or use override) chunk_set_id, then split.
85 let csid = match chunk_set_id {
86 Some(v) => {
87 if v > MAX_CHUNK_SET_ID {
88 return Err(Error::ChunkedHeaderMalformed(format!(
89 "chunk_set_id {v:#x} exceeds 20-bit field"
90 )));
91 }
92 v
93 }
94 None => fresh_chunk_set_id(),
95 };
96
97 let chunks = split_into_chunks(bytecode, csid)?;
98 let mut strings = Vec::with_capacity(chunks.len());
99 for chunk in chunks {
100 let mut data_5bit = chunk.header.to_5bit_symbols();
101 data_5bit.extend(bytes_to_5bit(&chunk.fragment));
102 strings.push(encode_5bit_to_string(&data_5bit)?);
103 }
104 Ok(strings)
105}
106
107/// Decode one or more `mk1`-prefixed strings into a `KeyCard`.
108///
109/// Supports both single-string and chunked inputs:
110/// - One string with `SingleString` header → decode bytecode directly.
111/// - One or more strings with `Chunked` headers → reassemble with
112/// cross-chunk-hash verification, then decode the bytecode.
113///
114/// Mixing `SingleString` and `Chunked` headers across a multi-string
115/// input is rejected with [`Error::MixedHeaderTypes`]. (An empty input
116/// list is rejected with [`Error::ChunkedHeaderMalformed`] — that's the
117/// "no input at all" case, distinct from "header types disagree.")
118pub fn decode(strings: &[&str]) -> Result<KeyCard> {
119 if strings.is_empty() {
120 return Err(Error::ChunkedHeaderMalformed(
121 "empty input string list".to_string(),
122 ));
123 }
124
125 // Decode each string at the BCH layer; collect (header, fragment_bytes).
126 let mut parsed: Vec<(StringLayerHeader, Vec<u8>)> = Vec::with_capacity(strings.len());
127 for s in strings {
128 let decoded = decode_string(s)?;
129 let data_5bit = decoded.data();
130 let (header, consumed) = StringLayerHeader::from_5bit_symbols(data_5bit)?;
131 let payload_5bit = &data_5bit[consumed..];
132 let fragment = five_bit_to_bytes(payload_5bit).ok_or(Error::MalformedPayloadPadding)?;
133 parsed.push((header, fragment));
134 }
135
136 let first_is_single = matches!(parsed[0].0, StringLayerHeader::SingleString { .. });
137 if first_is_single {
138 if parsed.len() != 1 {
139 return Err(Error::MixedHeaderTypes);
140 }
141 let (_, bytecode) = parsed.into_iter().next().expect("len == 1");
142 return decode_bytecode(&bytecode);
143 }
144
145 // Chunked path: consume all into ChunkFragment list and reassemble.
146 let chunks: Vec<ChunkFragment> = parsed
147 .into_iter()
148 .map(|(header, fragment)| ChunkFragment { header, fragment })
149 .collect();
150 let bytecode = reassemble_from_chunks(chunks)?;
151 decode_bytecode(&bytecode)
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157 use crate::bytecode::test_helpers::synthetic_xpub;
158 use bitcoin::bip32::{DerivationPath, Fingerprint};
159 use std::str::FromStr;
160
161 fn fixture_card_typical_chunked() -> KeyCard {
162 // 1 stub + std-table indicator + fingerprint + 73-byte compact xpub
163 // = 84 bytes; this exceeds SINGLE_STRING_LONG_BYTES (= 56) and
164 // therefore lands in the chunked path. (`xpub_compact` alone is
165 // already 73 bytes, so no realistic mk1 card fits in a single
166 // string — SingleString remains reachable only through hand-
167 // constructed sub-card test inputs.) The "singlestring_fits" name
168 // is historical and predates the closure-locked compact-73 form.
169 let path = DerivationPath::from_str("48'/0'/0'/2'").unwrap();
170 KeyCard {
171 policy_id_stubs: vec![[0x11, 0x22, 0x33, 0x44]],
172 origin_fingerprint: Some(Fingerprint::from([0xAA, 0xBB, 0xCC, 0xDD])),
173 origin_path: path.clone(),
174 xpub: synthetic_xpub(&path),
175 }
176 }
177
178 fn fixture_card_explicit_path_long() -> KeyCard {
179 // Explicit-path forces a longer bytecode; tests multi-chunk path
180 // explicitly even though typical cards already chunk.
181 let path = DerivationPath::from_str("9999'/1234'/56'/7'/0/1/2/3").unwrap();
182 KeyCard {
183 policy_id_stubs: vec![[0xDE, 0xAD, 0xBE, 0xEF]],
184 origin_fingerprint: Some(Fingerprint::from([0x01, 0x02, 0x03, 0x04])),
185 origin_path: path.clone(),
186 xpub: synthetic_xpub(&path),
187 }
188 }
189
190 #[test]
191 fn round_trip_typical_card_chunked() {
192 let card = fixture_card_typical_chunked();
193 let strings = encode_with_chunk_set_id(&card, 0x12345).unwrap();
194 let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
195 let recovered = decode(&parts).unwrap();
196 assert_eq!(recovered, card);
197 }
198
199 #[test]
200 fn round_trip_explicit_path_chunked() {
201 let card = fixture_card_explicit_path_long();
202 let strings = encode_with_chunk_set_id(&card, 0xABCDE).unwrap();
203 assert!(strings.len() >= 2, "explicit-path card must chunk");
204 let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
205 let recovered = decode(&parts).unwrap();
206 assert_eq!(recovered, card);
207 }
208
209 #[test]
210 fn deterministic_encoding_with_explicit_chunk_set_id() {
211 // encode_with_chunk_set_id MUST be byte-deterministic; this is the
212 // property Phase 6 vector regeneration depends on.
213 let card = fixture_card_typical_chunked();
214 let s1 = encode_with_chunk_set_id(&card, 0x12345).unwrap();
215 let s2 = encode_with_chunk_set_id(&card, 0x12345).unwrap();
216 assert_eq!(s1, s2);
217 }
218
219 #[test]
220 fn random_chunk_set_id_decodes_round_trip() {
221 // encode (CSPRNG-derived chunk_set_id) round-trips even though we
222 // don't pin the chunk_set_id value — the decoder doesn't care
223 // about the value, only that it's consistent across chunks.
224 let card = fixture_card_typical_chunked();
225 let strings = encode(&card).unwrap();
226 let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
227 let recovered = decode(&parts).unwrap();
228 assert_eq!(recovered, card);
229 }
230
231 #[test]
232 fn random_chunk_set_id_fits_20_bits() {
233 // Inspect the produced strings' chunk_set_id field; assert it's
234 // masked to 20 bits, no spillover from a u32 RNG.
235 let card = fixture_card_typical_chunked();
236 let strings = encode(&card).unwrap();
237 // The first chunk's parsed header carries the chunk_set_id.
238 let s0 = &strings[0];
239 let decoded = decode_string(s0).unwrap();
240 let (header, _consumed) = StringLayerHeader::from_5bit_symbols(decoded.data()).unwrap();
241 match header {
242 StringLayerHeader::Chunked { chunk_set_id, .. } => {
243 assert!(
244 chunk_set_id <= MAX_CHUNK_SET_ID,
245 "chunk_set_id {chunk_set_id:#x} > 20-bit max"
246 );
247 }
248 StringLayerHeader::SingleString { .. } => {
249 // Card unexpectedly fit in single-string; nothing to check.
250 }
251 }
252 }
253
254 #[test]
255 fn encode_with_chunk_set_id_rejects_oversized_value() {
256 let card = fixture_card_typical_chunked();
257 let r = encode_with_chunk_set_id(&card, 0x10_0000);
258 assert!(matches!(r, Err(Error::ChunkedHeaderMalformed(_))));
259 }
260
261 #[test]
262 fn decode_rejects_chunk_set_id_mismatch() {
263 let card = fixture_card_typical_chunked();
264 let strings = encode_with_chunk_set_id(&card, 0x12345).unwrap();
265 // Re-encode under a different chunk_set_id and splice in chunk 1.
266 let other = encode_with_chunk_set_id(&card, 0x67890).unwrap();
267 let mixed: Vec<&str> = vec![strings[0].as_str(), other[1].as_str()];
268 assert!(matches!(decode(&mixed), Err(Error::ChunkSetIdMismatch)));
269 }
270
271 #[test]
272 fn decode_rejects_5_symbol_burst_in_last_chunk_data_part() {
273 // Perturb at the 5-bit-symbol layer of an already-encoded chunked
274 // string set (no fresh BCH-checksum computation on the perturbed
275 // payload — the decoder must reject or correct the original
276 // codeword's checksum against the modified data).
277 //
278 // BCH(108,93,8) (long) and BCH(93,80,8) (regular) both cover up
279 // to 4 substitutions exactly (`t = 4`); a 5-symbol burst always
280 // exceeds the correction radius. For the typical 84-byte card,
281 // the last chunk is the regular-code chunk (35-byte fragment →
282 // 64-symbol data part + 13-symbol checksum = 77 chars, in
283 // regular-code range), so the BCH-`t = 4` argument applies via
284 // BCH(93,80,8). The decoder must surface one of:
285 //
286 // - `Err(BchUncorrectable(_))` — BM/Forney can't fit a degree-≤4
287 // error-locator polynomial; rejection is direct.
288 // - `Err(CrossChunkHashMismatch)` — BCH finds a wrong-but-valid
289 // degree-≤4 fit, applies it, and yields a "corrected" payload
290 // that decodes through structurally but whose recomputed
291 // SHA-256 disagrees with the recovered trailing hash.
292 //
293 // Both are acceptable — the property under test is "this
294 // perturbation was caught," not "caught via a specific variant."
295 // Earlier (v0.1.0) test perturbed at the byte level and recomputed
296 // the BCH checksum, which sidestepped the BCH-decode path entirely
297 // and only ever exercised the cross-chunk-hash rejection. The
298 // new test exercises both decoder rejection paths and proves the
299 // 5-symbol-burst > BCH-`t = 4` discipline holds.
300 let card = fixture_card_typical_chunked();
301 let strings = encode_with_chunk_set_id(&card, 0).unwrap();
302 assert!(
303 strings.len() >= 2,
304 "fixture must produce a multi-chunk encoding"
305 );
306
307 // Perturb 5 consecutive characters in the LAST chunk's data part,
308 // **past the 8-symbol chunked header**. The 8-symbol chunked
309 // header occupies string char-indices 3..11 (after the 3-char
310 // `mk1` HRP+separator); the bytecode-fragment region begins at
311 // char-index 11. We perturb char-indices 11..16 — the first 5
312 // fragment symbols. This places the burst inside the bytecode-
313 // fragment region (5 fragment symbols = 25 bits ≈ 3 bytes of
314 // fragment data, which for the typical 84-byte card maps to
315 // bytecode bytes 53..56), so any wrong-but-valid BCH correction
316 // produces corrupted bytecode whose recomputed SHA-256 mismatches
317 // the unperturbed trailing hash → `CrossChunkHashMismatch`.
318 //
319 // Restricting the burst to the post-header region rules out
320 // header-decode rejection paths
321 // (`UnsupportedVersion`, `UnsupportedCardType`,
322 // `ChunkedHeaderMalformed`, `ChunkSetIdMismatch`) that BCH
323 // could otherwise produce by "correcting" 5 errors in the
324 // header into a malformed-but-parseable header.
325 let mut perturbed = strings.last().expect("multi-chunk fixture").clone();
326 let mut chars: Vec<char> = perturbed.chars().collect();
327 // Char-indices 11..16 (5 chars) — past the 3-char `mk1` prefix
328 // and past the 8-symbol chunked header (string indices 3..11).
329 for c in chars.iter_mut().take(16).skip(11) {
330 // Substitute with a different bech32 char to guarantee a
331 // non-zero 5-bit XOR at each position. 'q' is the value-0
332 // symbol; any other char gives a non-zero perturbation.
333 *c = if *c == 'q' { 'p' } else { 'q' };
334 }
335 perturbed = chars.into_iter().collect();
336
337 let mut perturbed_strings: Vec<String> = strings[..strings.len() - 1].to_vec();
338 perturbed_strings.push(perturbed);
339 let parts: Vec<&str> = perturbed_strings.iter().map(|s| s.as_str()).collect();
340
341 match decode(&parts) {
342 Err(Error::CrossChunkHashMismatch) | Err(Error::BchUncorrectable(_)) => (),
343 other => panic!(
344 "5-symbol burst must produce CrossChunkHashMismatch or BchUncorrectable, \
345 got {other:?}"
346 ),
347 }
348 }
349
350 /// Build a synthetic `SingleString`-shaped mk1 string from arbitrary
351 /// bytecode bytes. v0.1 encoders never emit `SingleString` (smallest
352 /// valid bytecode = 80 bytes > 56-byte single-string capacity per
353 /// SPEC §2.4), so this helper exists purely for tests that need a
354 /// `SingleString`-headered string to exercise the header-types-
355 /// disagree rejection paths.
356 fn synthetic_singlestring(bytecode: &[u8]) -> String {
357 let header = StringLayerHeader::SingleString {
358 version: VERSION_V0_1,
359 };
360 let mut data_5bit = header.to_5bit_symbols();
361 data_5bit.extend(bytes_to_5bit(bytecode));
362 encode_5bit_to_string(&data_5bit).expect("synthetic singlestring encode")
363 }
364
365 #[test]
366 fn decode_rejects_singlestring_then_chunked() {
367 // Forward direction: first string carries a `SingleString` header,
368 // additional strings follow. `pipeline::decode` catches this in
369 // its early branch (`first_is_single && parsed.len() != 1`) and
370 // returns `MixedHeaderTypes` (was `ChunkedHeaderMalformed` in
371 // v0.1.0; renamed in v0.1.1 for precise discrimination).
372 let single = synthetic_singlestring(&[0x42u8; 8]);
373 let card = fixture_card_typical_chunked();
374 let chunked = encode_with_chunk_set_id(&card, 0).unwrap();
375 let parts: Vec<&str> = vec![single.as_str(), chunked[0].as_str()];
376 assert!(matches!(decode(&parts), Err(Error::MixedHeaderTypes)));
377 }
378
379 #[test]
380 fn decode_rejects_chunked_then_singlestring() {
381 // Reverse direction: first chunk is `Chunked` (so `pipeline::decode`
382 // falls into the chunked branch), but a later chunk is
383 // `SingleString`. `chunk::reassemble_from_chunks` catches this
384 // in its per-chunk loop and returns `MixedHeaderTypes`. Symmetric
385 // to the forward-direction case above.
386 let card = fixture_card_typical_chunked();
387 let mut strings = encode_with_chunk_set_id(&card, 0).unwrap();
388 assert!(strings.len() >= 2, "fixture must produce ≥ 2 chunks");
389 // Replace chunk[1] (a Chunked header) with a synthetic SingleString.
390 // The Chunked chunk[0] declares total_chunks = strings.len(), so
391 // the chunk-count check in reassemble_from_chunks passes and the
392 // loop reaches the SingleString chunk's match arm.
393 strings[1] = synthetic_singlestring(&[0xAAu8; 8]);
394 let parts: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
395 assert!(matches!(decode(&parts), Err(Error::MixedHeaderTypes)));
396 }
397
398 #[test]
399 fn decode_rejects_singlestring_padding_bits_nonzero() {
400 // Construct a SingleString-style mk1 string whose 5-bit payload
401 // doesn't byte-align (trailing pad bits non-zero).
402 // Use a bytecode of 1 byte, then pad with a stray 5-bit symbol that
403 // sets the pad bits non-zero.
404 let header = StringLayerHeader::SingleString {
405 version: VERSION_V0_1,
406 };
407 // 1 byte (e.g., 0x00) → 2 5-bit symbols (00, 00). Adding a third
408 // 5-bit symbol with non-zero low 2 bits inflates the data to 3
409 // payload symbols whose final pad bits are non-zero.
410 let mut data_5bit = header.to_5bit_symbols();
411 data_5bit.extend([0u8, 0u8, 0b00011u8]); // last symbol's low 2 bits = 11
412 let s = encode_5bit_to_string(&data_5bit).unwrap();
413 let r = decode(&[&s]);
414 assert!(matches!(r, Err(Error::MalformedPayloadPadding)));
415 }
416
417 #[test]
418 fn decode_rejects_empty_input() {
419 assert!(matches!(decode(&[]), Err(Error::ChunkedHeaderMalformed(_))));
420 }
421}