mk_codec/string_layer/bch.rs
1//! BCH primitives for the mk1 string layer: bech32 alphabet conversion and
2//! syndrome-based error correction.
3//!
4//! Forked from `md-codec` v0.4.x (`crates/md-codec/src/encoding.rs`) at the
5//! start of the mk1 v0.1 implementation per `design/DECISIONS.md` D-13. The
6//! BCH polynomials and field arithmetic are shared with the sibling md1
7//! format (both reuse BIP 93's `BCH(93,80,8)` regular code and
8//! `BCH(108,93,8)` long code); the only mk1-specific knobs are the HRP
9//! (`"mk"`) and the NUMS-derived target residues ([`crate::consts::MK_REGULAR_CONST`]
10//! / [`crate::consts::MK_LONG_CONST`]).
11//!
12//! Unlike md-codec's encoding module, this file does **not** expose a
13//! top-level `encode_string` / `decode_string`: mk1's string-layer header
14//! lives at the 5-bit symbol layer (per closure Q-5 — 2 symbols for
15//! `SingleString`, 8 symbols for `Chunked`) rather than the byte-aligned
16//! layer md1 uses. The mk1 `string_layer/mod.rs` builds string-level
17//! encode/decode on top of the BCH primitives here.
18
19use super::bch_decode;
20use crate::consts::{HRP, MK_LONG_CONST, MK_REGULAR_CONST};
21
22/// Which BCH code variant a string uses.
23///
24/// Determined by the total data-part length: regular for ≤93 chars,
25/// long for 96–108 chars. Lengths 94–95 are reserved-invalid.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
27pub enum BchCode {
28 /// Regular code: BCH(93,80,8). 13-char checksum.
29 Regular,
30 /// Long code: BCH(108,93,8). 15-char checksum.
31 Long,
32}
33
34/// The bech32 32-character alphabet, in 5-bit-value order.
35///
36/// `q=0, p=1, z=2, r=3, y=4, 9=5, x=6, 8=7, g=8, f=9, 2=10, t=11, v=12,
37/// d=13, w=14, 0=15, s=16, 3=17, j=18, n=19, 5=20, 4=21, k=22, h=23,
38/// c=24, e=25, 6=26, m=27, u=28, a=29, 7=30, l=31`.
39pub const ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
40
41/// Inverse lookup: char (lowercase ASCII) -> 5-bit value, or 0xFF if not in alphabet.
42const ALPHABET_INV: [u8; 128] = build_alphabet_inv();
43
44const fn build_alphabet_inv() -> [u8; 128] {
45 let mut inv = [0xFFu8; 128];
46 let mut i = 0;
47 while i < 32 {
48 inv[ALPHABET[i] as usize] = i as u8;
49 i += 1;
50 }
51 inv
52}
53
54/// Convert a sequence of 8-bit bytes to a sequence of 5-bit values
55/// (padded with zero bits at the end if the bit count is not a multiple of 5).
56pub fn bytes_to_5bit(bytes: &[u8]) -> Vec<u8> {
57 let mut acc: u32 = 0;
58 let mut bits = 0u32;
59 let mut out = Vec::with_capacity((bytes.len() * 8).div_ceil(5));
60 for &b in bytes {
61 acc = (acc << 8) | b as u32;
62 bits += 8;
63 while bits >= 5 {
64 bits -= 5;
65 out.push(((acc >> bits) & 0x1F) as u8);
66 }
67 }
68 if bits > 0 {
69 out.push(((acc << (5 - bits)) & 0x1F) as u8);
70 }
71 out
72}
73
74/// Convert a sequence of 5-bit values back to 8-bit bytes.
75///
76/// Returns `None` if any value in `values` is ≥ 32 (out of 5-bit range),
77/// or if the trailing padding bits are non-zero.
78pub fn five_bit_to_bytes(values: &[u8]) -> Option<Vec<u8>> {
79 let mut acc: u32 = 0;
80 let mut bits = 0u32;
81 let mut out = Vec::with_capacity(values.len() * 5 / 8);
82 for &v in values {
83 if v >= 32 {
84 return None;
85 }
86 acc = (acc << 5) | v as u32;
87 bits += 5;
88 if bits >= 8 {
89 bits -= 8;
90 out.push(((acc >> bits) & 0xFF) as u8);
91 }
92 }
93 // Any remaining bits must be zero (padding).
94 if bits >= 5 {
95 return None;
96 }
97 if (acc & ((1 << bits) - 1)) != 0 {
98 return None;
99 }
100 Some(out)
101}
102
103/// The bech32 separator character between HRP and data-part (BIP 173 §3).
104///
105/// Re-exported by [`crate::consts::HRP`] is `"mk"`; this module's
106/// BCH-checksum helpers consume the HRP through their `hrp` parameter so
107/// that the same primitives can verify any single-HRP codex32-derived
108/// string. Production callers MUST pass [`crate::consts::HRP`].
109pub const SEPARATOR: char = '1';
110
111/// Determine the BchCode variant from a total data-part length.
112///
113/// Boundaries are from BIP 93 (codex32): regular code `BCH(93,80,8)` caps at 93,
114/// long code `BCH(108,93,8)` runs 96–108, and lengths 94–95 are explicitly
115/// reserved-invalid to prevent ambiguity in code-variant selection. Lengths
116/// below 14 or above 108 are also rejected.
117pub fn bch_code_for_length(data_part_len: usize) -> Option<BchCode> {
118 match data_part_len {
119 14..=93 => Some(BchCode::Regular),
120 94..=95 => None,
121 96..=108 => Some(BchCode::Long),
122 _ => None,
123 }
124}
125
126/// Check whether a string is all-lowercase, all-uppercase, or mixed.
127///
128/// Only ASCII letters are considered; non-ASCII characters (digits, punctuation,
129/// Unicode letters) are treated as neither case. This is appropriate for MD
130/// strings, whose alphabet is a subset of ASCII. An empty string or one with
131/// no ASCII letters returns [`CaseStatus::Lower`].
132pub fn case_check(s: &str) -> CaseStatus {
133 let mut has_lower = false;
134 let mut has_upper = false;
135 for c in s.chars() {
136 if c.is_ascii_lowercase() {
137 has_lower = true;
138 } else if c.is_ascii_uppercase() {
139 has_upper = true;
140 }
141 if has_lower && has_upper {
142 break;
143 }
144 }
145 match (has_lower, has_upper) {
146 (true, true) => CaseStatus::Mixed,
147 (true, false) => CaseStatus::Lower,
148 (false, true) => CaseStatus::Upper,
149 (false, false) => CaseStatus::Lower, // empty / no letters; treat as lower
150 }
151}
152
153/// Result of a case check.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum CaseStatus {
156 /// All-lowercase or no letters.
157 Lower,
158 /// All-uppercase.
159 Upper,
160 /// Both lowercase and uppercase letters present (invalid).
161 Mixed,
162}
163
164/// BCH polymod constants for the regular checksum (BCH(93,80,8)).
165///
166/// Source: BIP 93 (codex32) reference implementation, `ms32_polymod` function.
167/// These five values are XORed into the running residue based on the top 5 bits
168/// of the residue at each step. The polymod operation uses a 65-bit residue
169/// (top 5 bits = current `b`, bottom 60 bits = masked state).
170///
171/// Verified against the canonical reference at
172/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
173pub const GEN_REGULAR: [u128; 5] = [
174 0x19dc500ce73fde210,
175 0x1bfae00def77fe529,
176 0x1fbd920fffe7bee52,
177 0x1739640bdeee3fdad,
178 0x07729a039cfc75f5a,
179];
180
181/// Initial residue value for both the regular and long polymod algorithms (BIP 93).
182///
183/// Both `ms32_polymod` and `ms32_long_polymod` start with this residue before
184/// processing any input characters.
185pub const POLYMOD_INIT: u128 = 0x23181b3;
186
187/// Right-shift amount to extract the top 5 bits from a 65-bit regular-code residue.
188///
189/// Usage: `b = residue >> REGULAR_SHIFT` gives the 5-bit feedback selector
190/// for the polymod algorithm.
191pub const REGULAR_SHIFT: u32 = 60;
192
193/// Mask preserving the low 60 bits of a 65-bit regular-code residue.
194pub const REGULAR_MASK: u128 = 0x0fffffffffffffff;
195
196/// BCH polymod constants for the long checksum (BCH(108,93,8)).
197///
198/// Source: BIP 93 (codex32) reference implementation, `ms32_long_polymod` function.
199/// The long polymod uses a 75-bit residue (top 5 bits = `b`, bottom 70 bits = masked state).
200///
201/// Verified against the canonical reference at
202/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
203pub const GEN_LONG: [u128; 5] = [
204 0x3d59d273535ea62d897,
205 0x7a9becb6361c6c51507,
206 0x543f9b7e6c38d8a2a0e,
207 0x0c577eaeccf1990d13c,
208 0x1887f74f8dc71b10651,
209];
210
211/// Right-shift amount to extract the top 5 bits from a 75-bit long-code residue.
212///
213/// Usage: `b = residue >> LONG_SHIFT` gives the 5-bit feedback selector
214/// for the polymod algorithm.
215pub const LONG_SHIFT: u32 = 70;
216
217/// Mask preserving the low 70 bits of a 75-bit long-code residue.
218pub const LONG_MASK: u128 = 0x3fffffffffffffffff;
219
220/// One step of the BCH polymod algorithm from BIP 93.
221///
222/// Updates the running `residue` to incorporate the next 5-bit input `value`
223/// using the polynomial defined by `gen`, shift width `shift`, and mask `mask`.
224/// The same function is used for both the regular and long codes; pass
225/// `(GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK)` for the regular code and
226/// `(GEN_LONG, LONG_SHIFT, LONG_MASK)` for the long code.
227///
228/// Returns the updated residue after incorporating `value`. The top 5 bits of
229/// the returned residue feed the next iteration's `b` selector.
230///
231/// This is a direct port of BIP 93's `ms32_polymod` / `ms32_long_polymod` inner
232/// loop. See <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki> .
233fn polymod_step(residue: u128, value: u128, r#gen: &[u128; 5], shift: u32, mask: u128) -> u128 {
234 let b = residue >> shift;
235 let mut new_residue = ((residue & mask) << 5) ^ value;
236 for (i, &g) in r#gen.iter().enumerate() {
237 if (b >> i) & 1 != 0 {
238 new_residue ^= g;
239 }
240 }
241 new_residue
242}
243
244/// BIP 173-style HRP-expansion: produces the 5-bit-symbol prelude that gets
245/// prepended to the data part before running the BCH polymod.
246///
247/// For each HRP character `c`, emits `c >> 5` (high 3 bits); then emits a
248/// single 0 separator; then emits each character's `c & 31` (low 5 bits).
249/// The result has length `2 * hrp.len() + 1` for ASCII HRPs.
250///
251/// For `hrp_expand("md")` this returns `[3, 3, 0, 13, 4]`.
252pub fn hrp_expand(hrp: &str) -> Vec<u8> {
253 let bytes = hrp.as_bytes();
254 let mut out = Vec::with_capacity(bytes.len() * 2 + 1);
255 for &c in bytes {
256 out.push(c >> 5);
257 }
258 out.push(0);
259 for &c in bytes {
260 out.push(c & 31);
261 }
262 out
263}
264
265/// Run polymod over a sequence of 5-bit values using the parameters for
266/// either the regular or long BCH code, starting from POLYMOD_INIT.
267///
268/// `pub(in crate::string_layer)` so the sibling `bch_decode` module's test
269/// suite can validate field arithmetic against the production primitive
270/// without duplicating it (test-helper drift would otherwise mask real
271/// regressions when both copies agree on a bug).
272pub(in crate::string_layer) fn polymod_run(
273 values: &[u8],
274 r#gen: &[u128; 5],
275 shift: u32,
276 mask: u128,
277) -> u128 {
278 let mut residue = POLYMOD_INIT;
279 for &v in values {
280 residue = polymod_step(residue, v as u128, r#gen, shift, mask);
281 }
282 residue
283}
284
285/// Compute the 13-character BCH checksum for the regular code over the
286/// HRP-expanded preamble plus the data part.
287///
288/// `data` is the sequence of 5-bit values for the data part (header + payload),
289/// not including the checksum. Returns the 13-element checksum array, ready
290/// to append to `data` to form the full data-part-plus-checksum.
291///
292/// The algorithm runs polymod over `hrp_expand(hrp) || data || [0; 13]`,
293/// then XORs the result with [`MK_REGULAR_CONST`] to extract the checksum.
294pub fn bch_create_checksum_regular(hrp: &str, data: &[u8]) -> [u8; 13] {
295 // Regular code: 13-symbol checksum (0..=12), pad/array/extraction all use 13.
296 let mut input = hrp_expand(hrp);
297 input.extend_from_slice(data);
298 input.extend(std::iter::repeat_n(0, 13));
299 let polymod = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
300 let mut out = [0u8; 13];
301 for (i, slot) in out.iter_mut().enumerate() {
302 *slot = ((polymod >> (5 * (12 - i))) & 0x1F) as u8;
303 }
304 out
305}
306
307/// Verify a regular-code BCH checksum.
308///
309/// `data_with_checksum` is the full data part including the trailing 13
310/// checksum characters. Returns `true` iff the polymod over
311/// `hrp_expand(hrp) || data_with_checksum` equals [`MK_REGULAR_CONST`].
312pub fn bch_verify_regular(hrp: &str, data_with_checksum: &[u8]) -> bool {
313 if data_with_checksum.len() < 13 {
314 return false;
315 }
316 let mut input = hrp_expand(hrp);
317 input.extend_from_slice(data_with_checksum);
318 polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) == MK_REGULAR_CONST
319}
320
321/// Compute the 15-character BCH checksum for the long code.
322///
323/// Same algorithm as [`bch_create_checksum_regular`] but uses the long-code
324/// polymod parameters (`GEN_LONG`, `LONG_SHIFT`, `LONG_MASK`) and target
325/// constant ([`MK_LONG_CONST`]). Produces a 15-element checksum array.
326pub fn bch_create_checksum_long(hrp: &str, data: &[u8]) -> [u8; 15] {
327 // Long code: 15-symbol checksum (0..=14), pad/array/extraction all use 15.
328 let mut input = hrp_expand(hrp);
329 input.extend_from_slice(data);
330 input.extend(std::iter::repeat_n(0, 15));
331 let polymod = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
332 let mut out = [0u8; 15];
333 for (i, slot) in out.iter_mut().enumerate() {
334 *slot = ((polymod >> (5 * (14 - i))) & 0x1F) as u8;
335 }
336 out
337}
338
339/// Verify a long-code BCH checksum.
340///
341/// Same algorithm as [`bch_verify_regular`] with long-code parameters.
342/// Returns false if `data_with_checksum` is shorter than 15 symbols.
343pub fn bch_verify_long(hrp: &str, data_with_checksum: &[u8]) -> bool {
344 if data_with_checksum.len() < 15 {
345 return false;
346 }
347 let mut input = hrp_expand(hrp);
348 input.extend_from_slice(data_with_checksum);
349 polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) == MK_LONG_CONST
350}
351
352/// Result of a successful BCH decode + correct attempt.
353///
354/// Returned by [`bch_correct_regular`] / [`bch_correct_long`] when correction
355/// succeeds. `corrections_applied == 0` means the input was already valid;
356/// `> 0` means substitutions were applied at the indicated positions.
357///
358/// Marked `#[non_exhaustive]` to allow future fields (e.g., confidence
359/// score, syndrome metadata) without breaking downstream struct-literal
360/// construction. Construct via the [`bch_correct_regular`] /
361/// [`bch_correct_long`] APIs.
362#[non_exhaustive]
363#[derive(Debug, Clone, PartialEq, Eq)]
364pub struct CorrectionResult {
365 /// The corrected `data_with_checksum` slice (input may have been modified).
366 pub data: Vec<u8>,
367 /// Number of substitutions applied (0 = clean input).
368 pub corrections_applied: usize,
369 /// Indices into `data` of the substituted positions.
370 pub corrected_positions: Vec<usize>,
371}
372
373/// Attempt to correct a regular-code BCH-checksummed string with up to four
374/// substitutions, the full t = 4 capacity of the BCH(93, 80, 8) code.
375///
376/// Implements the standard syndrome-based BCH decoder pipeline: syndrome
377/// computation in `GF(1024) = GF(32²)`, Berlekamp–Massey for the
378/// error-locator polynomial, Chien search for error positions, Forney's
379/// algorithm for error magnitudes. After applying the proposed corrections,
380/// the result is re-verified via [`bch_verify_regular`]; the decoder rejects
381/// any output that does not produce a valid codeword (defensive guard
382/// against pathological 5+-error inputs whose syndromes happen to factor as
383/// a degree-≤ 4 locator).
384///
385/// Returns `Ok(CorrectionResult)` if the input is clean or up to four
386/// substitutions repair it. Returns `Err(Error::BchUncorrectable)` otherwise.
387///
388/// # Algorithm details
389///
390/// See the private `bch_decode` submodule for the algorithm and the
391/// `GF(1024)` field representation.
392pub fn bch_correct_regular(
393 hrp: &str,
394 data_with_checksum: &[u8],
395) -> Result<CorrectionResult, crate::Error> {
396 if bch_verify_regular(hrp, data_with_checksum) {
397 return Ok(CorrectionResult {
398 data: data_with_checksum.to_vec(),
399 corrections_applied: 0,
400 corrected_positions: vec![],
401 });
402 }
403 // Compute polymod over hrp_expand(hrp) || data_with_checksum, XOR with
404 // the MD target constant. The result is congruent to the error
405 // polynomial E(x) modulo g_regular(x).
406 let mut input = hrp_expand(hrp);
407 input.extend_from_slice(data_with_checksum);
408 let residue = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
409
410 if let Some((positions, magnitudes)) =
411 bch_decode::decode_regular_errors(residue, data_with_checksum.len())
412 {
413 if positions.is_empty() {
414 // Should be unreachable (caller already verified); guard anyway.
415 return Ok(CorrectionResult {
416 data: data_with_checksum.to_vec(),
417 corrections_applied: 0,
418 corrected_positions: vec![],
419 });
420 }
421 let mut corrected = data_with_checksum.to_vec();
422 for (&p, &m) in positions.iter().zip(&magnitudes) {
423 if p >= corrected.len() {
424 return Err(crate::Error::BchUncorrectable(format!(
425 "decoder reported error position {p} outside data ({} symbols)",
426 corrected.len()
427 )));
428 }
429 corrected[p] ^= m;
430 }
431 // Defensive: re-verify. Catches the 5+-error edge case.
432 if bch_verify_regular(hrp, &corrected) {
433 return Ok(CorrectionResult {
434 corrections_applied: positions.len(),
435 corrected_positions: positions,
436 data: corrected,
437 });
438 }
439 }
440 Err(crate::Error::BchUncorrectable(
441 "regular code: more than 4 substitutions or pathological pattern".into(),
442 ))
443}
444
445/// Long-code analog of [`bch_correct_regular`].
446///
447/// Implements the same BM/Chien/Forney pipeline against the long-code
448/// generator polynomial, reaching the full t = 4 capacity of
449/// `BCH(108, 93, 8)`.
450pub fn bch_correct_long(
451 hrp: &str,
452 data_with_checksum: &[u8],
453) -> Result<CorrectionResult, crate::Error> {
454 if bch_verify_long(hrp, data_with_checksum) {
455 return Ok(CorrectionResult {
456 data: data_with_checksum.to_vec(),
457 corrections_applied: 0,
458 corrected_positions: vec![],
459 });
460 }
461 let mut input = hrp_expand(hrp);
462 input.extend_from_slice(data_with_checksum);
463 let residue = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
464
465 if let Some((positions, magnitudes)) =
466 bch_decode::decode_long_errors(residue, data_with_checksum.len())
467 {
468 if positions.is_empty() {
469 return Ok(CorrectionResult {
470 data: data_with_checksum.to_vec(),
471 corrections_applied: 0,
472 corrected_positions: vec![],
473 });
474 }
475 let mut corrected = data_with_checksum.to_vec();
476 for (&p, &m) in positions.iter().zip(&magnitudes) {
477 if p >= corrected.len() {
478 return Err(crate::Error::BchUncorrectable(format!(
479 "decoder reported error position {p} outside data ({} symbols)",
480 corrected.len()
481 )));
482 }
483 corrected[p] ^= m;
484 }
485 if bch_verify_long(hrp, &corrected) {
486 return Ok(CorrectionResult {
487 corrections_applied: positions.len(),
488 corrected_positions: positions,
489 data: corrected,
490 });
491 }
492 }
493 Err(crate::Error::BchUncorrectable(
494 "long code: more than 4 substitutions or pathological pattern".into(),
495 ))
496}
497
498/// Encode a 5-bit-symbol data stream as a complete mk1 string.
499///
500/// The data stream is the concatenation `header_symbols || bytes_to_5bit(payload_bytes)`
501/// where `header_symbols` is the 2-symbol single-string header or the
502/// 8-symbol chunked header (closure Q-5). The BCH code variant (regular or
503/// long) is auto-selected from the resulting data-part length per BIP 93:
504/// regular for ≤93-symbol data parts, long for 96–108-symbol data parts.
505/// Lengths in the reserved-invalid 94–95 gap or outside the BIP 93 valid
506/// range return [`Error::InvalidStringLength`].
507///
508/// Per the v0.1 emit policy described in `design/IMPLEMENTATION_PLAN_mk_v0_1.md`
509/// §5.4, callers control fragment sizing so that each chunked fragment lands
510/// within long-code territory. Single-string mk1 may pick regular or long
511/// based on bytecode size.
512///
513/// Returns the full string starting with [`crate::consts::HRP`] and the
514/// BIP 173 separator (`"mk1"`).
515pub fn encode_5bit_to_string(data_5bit: &[u8]) -> Result<String, crate::Error> {
516 use crate::Error;
517
518 // Auto-determine code from the eventual data-part length (data_5bit + checksum).
519 let regular_total = data_5bit.len() + 13;
520 let long_total = data_5bit.len() + 15;
521 let code = match (
522 bch_code_for_length(regular_total),
523 bch_code_for_length(long_total),
524 ) {
525 (Some(BchCode::Regular), _) => BchCode::Regular,
526 (_, Some(BchCode::Long)) => BchCode::Long,
527 // Neither code variant accepts this data-part length: too short, in
528 // the 94–95 reserved-invalid gap, or too long for v0.1.
529 _ => {
530 // Pick the closest length to report — long_total is always larger,
531 // so report that as the "actual length you tried to produce".
532 return Err(Error::InvalidStringLength(long_total));
533 }
534 };
535
536 let checksum: Vec<u8> = match code {
537 BchCode::Regular => bch_create_checksum_regular(HRP, data_5bit).to_vec(),
538 BchCode::Long => bch_create_checksum_long(HRP, data_5bit).to_vec(),
539 };
540
541 let mut full = String::with_capacity(HRP.len() + 1 + data_5bit.len() + checksum.len());
542 full.push_str(HRP);
543 full.push(SEPARATOR);
544 for &v in data_5bit {
545 full.push(ALPHABET[v as usize] as char);
546 }
547 for v in checksum {
548 full.push(ALPHABET[v as usize] as char);
549 }
550 Ok(full)
551}
552
553/// Result of a successful mk1 string decode at the BCH layer.
554///
555/// Use [`Self::data`] to access the data part as 5-bit values (header
556/// symbols + payload, checksum stripped); the string-layer reassembler
557/// in `crate::string_layer` splits header symbols off and feeds the
558/// remaining payload through [`five_bit_to_bytes`] to recover the original
559/// fragment bytes.
560///
561/// The full post-correction 5-bit symbol sequence (data **plus** the trailing
562/// 13- or 15-char checksum) is retained internally as [`Self::data_with_checksum`]
563/// and can be queried by [`Self::corrected_char_at`] for any position in
564/// the data part — including positions that fall inside the checksum region.
565/// The decoder-report layer uses this to surface the real corrected
566/// character when BCH ECC repairs a substitution inside the checksum
567/// (parallels md-codec's `Correction.corrected` field).
568#[non_exhaustive]
569#[derive(Debug, Clone, PartialEq, Eq)]
570pub struct DecodedString {
571 /// Detected BCH code variant.
572 pub code: BchCode,
573 /// Number of substitution errors corrected (0 = clean input, 1 = recovered).
574 pub corrections_applied: usize,
575 /// Indices into the data-part (chars after `"md1"`) of any corrected positions.
576 pub corrected_positions: Vec<usize>,
577 /// Full post-correction 5-bit symbol sequence (data part + checksum), in
578 /// the same coordinate system as [`Self::corrected_positions`].
579 ///
580 /// Length is `data().len() + 13` (regular code) or `data().len() + 15`
581 /// (long code). Indices `0..data().len()` mirror [`Self::data`] symbol-for-symbol;
582 /// indices `data().len()..` are the corrected checksum symbols. Use
583 /// [`Self::corrected_char_at`] for the human-readable bech32 character at
584 /// any position.
585 pub data_with_checksum: Vec<u8>,
586}
587
588impl DecodedString {
589 /// Data part as 5-bit values, with the trailing checksum stripped.
590 ///
591 /// Returns a slice into [`Self::data_with_checksum`] — the data part is
592 /// `data_with_checksum[..len - checksum_len]`, where `checksum_len` is 13
593 /// for [`BchCode::Regular`] and 15 for [`BchCode::Long`].
594 pub fn data(&self) -> &[u8] {
595 let checksum_len = match self.code {
596 BchCode::Regular => 13,
597 BchCode::Long => 15,
598 };
599 &self.data_with_checksum[..self.data_with_checksum.len() - checksum_len]
600 }
601
602 /// Look up the corrected bech32 character at the given position in the
603 /// data part (chars after the `"md1"` HRP+separator).
604 ///
605 /// `char_position` is 0-indexed. Positions `0..data().len()` are in the
606 /// data region; positions `data().len()..data().len() + checksum_len` are
607 /// inside the BCH checksum (13 chars for [`BchCode::Regular`], 15 for
608 /// [`BchCode::Long`]). All positions return the post-correction
609 /// character — i.e., what the symbol *should* be after BCH repair, which
610 /// is exactly what [`Correction.corrected`][crate::Correction::corrected]
611 /// is documented to report.
612 ///
613 /// # Panics
614 ///
615 /// Panics if `char_position >= data_with_checksum.len()`. Callers are
616 /// responsible for clamping the position to a valid range; in the decode
617 /// pipeline this is guaranteed by the BCH layer (it never reports a
618 /// `corrected_position` outside `data_with_checksum`). Note that
619 /// `data_with_checksum` includes the checksum region; "outside the data
620 /// part" elsewhere in this crate excludes the checksum and is a tighter
621 /// bound than what this method requires.
622 pub fn corrected_char_at(&self, char_position: usize) -> char {
623 let v = self.data_with_checksum[char_position];
624 ALPHABET[v as usize] as char
625 }
626}
627
628/// Decode an mk1 string, validating HRP, case, length, and checksum.
629///
630/// Performs full BCH error correction up to four substitutions
631/// (`t = 4` capacity of the BCH(93, 80, 8) regular code and the
632/// BCH(108, 93, 8) long code), via syndrome-based Berlekamp–Massey +
633/// Forney decoding (implemented in the sibling `bch_decode` module).
634///
635/// Errors:
636/// - [`Error::MixedCase`] if the string mixes upper and lower case.
637/// - [`Error::InvalidHrp`] if the HRP is missing or not [`crate::consts::HRP`].
638/// - [`Error::InvalidStringLength`] if the data-part length isn't a valid mk1 length.
639/// - [`Error::InvalidChar`] if the data part contains a non-bech32 character.
640/// - [`Error::BchUncorrectable`] if the checksum can't be repaired within
641/// the BCH `t = 4` correction radius.
642///
643/// [`Error::MixedCase`]: crate::Error::MixedCase
644/// [`Error::InvalidHrp`]: crate::Error::InvalidHrp
645/// [`Error::InvalidStringLength`]: crate::Error::InvalidStringLength
646/// [`Error::InvalidChar`]: crate::Error::InvalidChar
647/// [`Error::BchUncorrectable`]: crate::Error::BchUncorrectable
648pub fn decode_string(s: &str) -> Result<DecodedString, crate::Error> {
649 use crate::Error;
650
651 if matches!(case_check(s), CaseStatus::Mixed) {
652 return Err(Error::MixedCase);
653 }
654 let s_lower = s.to_lowercase();
655
656 let sep_pos = s_lower
657 .rfind(SEPARATOR)
658 .ok_or_else(|| Error::InvalidHrp(s_lower.clone()))?;
659 let (hrp, rest) = s_lower.split_at(sep_pos);
660 let data_part = &rest[1..]; // skip the '1' separator
661
662 if hrp != HRP {
663 return Err(Error::InvalidHrp(hrp.to_string()));
664 }
665
666 let code =
667 bch_code_for_length(data_part.len()).ok_or(Error::InvalidStringLength(data_part.len()))?;
668
669 let mut values: Vec<u8> = Vec::with_capacity(data_part.len());
670 for (i, c) in data_part.chars().enumerate() {
671 if !c.is_ascii() {
672 return Err(Error::InvalidChar { ch: c, position: i });
673 }
674 let v = ALPHABET_INV[c as usize];
675 if v == 0xFF {
676 return Err(Error::InvalidChar { ch: c, position: i });
677 }
678 values.push(v);
679 }
680
681 let correction = match code {
682 BchCode::Regular => bch_correct_regular(hrp, &values),
683 BchCode::Long => bch_correct_long(hrp, &values),
684 };
685 let result = correction?;
686
687 Ok(DecodedString {
688 code,
689 corrections_applied: result.corrections_applied,
690 corrected_positions: result.corrected_positions,
691 data_with_checksum: result.data,
692 })
693}
694
695#[cfg(test)]
696mod tests {
697 use super::*;
698
699 #[test]
700 fn bch_code_equality() {
701 assert_eq!(BchCode::Regular, BchCode::Regular);
702 assert_ne!(BchCode::Regular, BchCode::Long);
703 }
704
705 #[test]
706 fn bch_code_can_be_hashed() {
707 use std::collections::HashSet;
708 let mut set = HashSet::new();
709 set.insert(BchCode::Regular);
710 set.insert(BchCode::Long);
711 set.insert(BchCode::Regular);
712 assert_eq!(set.len(), 2);
713 }
714
715 #[test]
716 fn alphabet_is_32_unique_chars() {
717 let mut seen = std::collections::HashSet::new();
718 for &c in ALPHABET {
719 assert!(seen.insert(c), "duplicate char in alphabet: {}", c as char);
720 }
721 assert_eq!(seen.len(), 32);
722 }
723
724 #[test]
725 fn bytes_to_5bit_round_trip_zero() {
726 let bytes = vec![0x00];
727 let fives = bytes_to_5bit(&bytes);
728 assert_eq!(fives, vec![0, 0]);
729 let back = five_bit_to_bytes(&fives).unwrap();
730 assert_eq!(back, bytes);
731 }
732
733 #[test]
734 fn bytes_to_5bit_round_trip_known_value() {
735 // 0xFF = binary 11111111. Splits as 11111 (=31) and 111 (padded with 00 to 11100=28).
736 let bytes = vec![0xFF];
737 let fives = bytes_to_5bit(&bytes);
738 assert_eq!(fives, vec![31, 28]);
739 }
740
741 #[test]
742 fn bytes_to_5bit_round_trip_multibyte() {
743 // 3 bytes = 24 bits → 5 five-bit groups (25 bits, 1 pad bit).
744 let bytes = vec![0xDE, 0xAD, 0xBE];
745 let back = five_bit_to_bytes(&bytes_to_5bit(&bytes)).unwrap();
746 assert_eq!(back, bytes);
747 }
748
749 #[test]
750 fn five_bit_to_bytes_rejects_nonzero_padding() {
751 // Two 5-bit values = 10 bits, of which 8 form a byte and 2 are padding.
752 // If padding bits are nonzero, decode must fail.
753 // 31 = 11111, 1 = 00001. Last 2 bits (= 01) are nonzero padding.
754 assert!(five_bit_to_bytes(&[31, 1]).is_none());
755 }
756
757 #[test]
758 fn five_bit_to_bytes_rejects_value_out_of_range() {
759 assert!(five_bit_to_bytes(&[32]).is_none());
760 }
761
762 #[test]
763 fn bch_code_for_length_regular() {
764 assert_eq!(bch_code_for_length(14), Some(BchCode::Regular));
765 assert_eq!(bch_code_for_length(93), Some(BchCode::Regular));
766 }
767
768 #[test]
769 fn bch_code_for_length_long() {
770 assert_eq!(bch_code_for_length(96), Some(BchCode::Long));
771 assert_eq!(bch_code_for_length(108), Some(BchCode::Long));
772 }
773
774 #[test]
775 fn bch_code_for_length_rejects_94_and_95() {
776 assert_eq!(bch_code_for_length(94), None);
777 assert_eq!(bch_code_for_length(95), None);
778 }
779
780 #[test]
781 fn bch_code_for_length_rejects_extremes() {
782 assert_eq!(bch_code_for_length(0), None);
783 assert_eq!(bch_code_for_length(13), None);
784 assert_eq!(bch_code_for_length(109), None);
785 assert_eq!(bch_code_for_length(1000), None);
786 }
787
788 #[test]
789 fn case_check_lowercase() {
790 assert_eq!(case_check("md1qq"), CaseStatus::Lower);
791 }
792
793 #[test]
794 fn case_check_uppercase() {
795 assert_eq!(case_check("MD1QQ"), CaseStatus::Upper);
796 }
797
798 #[test]
799 fn case_check_mixed() {
800 assert_eq!(case_check("mD1qq"), CaseStatus::Mixed);
801 }
802
803 #[test]
804 fn case_check_empty_string_is_lower() {
805 assert_eq!(case_check(""), CaseStatus::Lower);
806 }
807
808 #[test]
809 fn case_check_digits_only_is_lower() {
810 // Digits have no case; result must be Lower (BIP 173: no-letter strings are lower).
811 assert_eq!(case_check("1234"), CaseStatus::Lower);
812 }
813
814 #[test]
815 fn gen_regular_has_5_entries() {
816 assert_eq!(GEN_REGULAR.len(), 5);
817 }
818
819 #[test]
820 fn gen_long_has_5_entries() {
821 assert_eq!(GEN_LONG.len(), 5);
822 }
823
824 #[test]
825 fn gen_regular_matches_bip93_canonical_values() {
826 // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
827 // ms32_polymod GEN array. If this fails, the constants drifted from the BIP.
828 assert_eq!(GEN_REGULAR[0], 0x19dc500ce73fde210);
829 assert_eq!(GEN_REGULAR[1], 0x1bfae00def77fe529);
830 assert_eq!(GEN_REGULAR[2], 0x1fbd920fffe7bee52);
831 assert_eq!(GEN_REGULAR[3], 0x1739640bdeee3fdad);
832 assert_eq!(GEN_REGULAR[4], 0x07729a039cfc75f5a);
833 }
834
835 #[test]
836 fn gen_long_matches_bip93_canonical_values() {
837 // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
838 // ms32_long_polymod GEN array.
839 assert_eq!(GEN_LONG[0], 0x3d59d273535ea62d897);
840 assert_eq!(GEN_LONG[1], 0x7a9becb6361c6c51507);
841 assert_eq!(GEN_LONG[2], 0x543f9b7e6c38d8a2a0e);
842 assert_eq!(GEN_LONG[3], 0x0c577eaeccf1990d13c);
843 assert_eq!(GEN_LONG[4], 0x1887f74f8dc71b10651);
844 }
845
846 #[test]
847 fn polymod_init_matches_bip93() {
848 // POLYMOD_INIT is unchanged from BIP 93; the GEN_REGULAR / GEN_LONG
849 // constants have their own value-equality tests.
850 assert_eq!(POLYMOD_INIT, 0x23181b3);
851 }
852
853 // (NUMS-derivation reproducer for `MK_REGULAR_CONST` / `MK_LONG_CONST`
854 // lives in `crate::consts::tests::nums_constants_reproduce_from_domain`,
855 // which uses the mk1-specific domain `b"shibbolethnumskey"`. Duplicating
856 // it here would risk drift if either side were updated in isolation.)
857
858 #[test]
859 fn polymod_masks_are_consistent_with_shifts() {
860 // The mask must be (1 << shift) - 1 so that masking preserves bits below
861 // the shift boundary, exactly matching the BIP 93 algorithm.
862 assert_eq!(REGULAR_MASK, (1u128 << REGULAR_SHIFT) - 1);
863 assert_eq!(LONG_MASK, (1u128 << LONG_SHIFT) - 1);
864 assert_eq!(REGULAR_SHIFT, 60);
865 assert_eq!(LONG_SHIFT, 70);
866 }
867
868 #[test]
869 fn polymod_step_zero_residue_zero_value() {
870 // Both residue and value zero, no GEN XORs since b = 0.
871 assert_eq!(
872 polymod_step(0, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
873 0
874 );
875 }
876
877 #[test]
878 fn polymod_step_value_only_xor_when_residue_zero() {
879 // Residue 0, value 7 → result is 7 (XORed into the shifted-zero residue).
880 assert_eq!(
881 polymod_step(0, 7, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
882 7
883 );
884 }
885
886 #[test]
887 fn polymod_step_isolates_each_gen_entry() {
888 // Setting just bit `shift+i` in the residue → b = 1<<i → only GEN[i] is XORed.
889 for i in 0..5 {
890 let r = 1u128 << (REGULAR_SHIFT + i);
891 assert_eq!(
892 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
893 GEN_REGULAR[i as usize],
894 "bit {} of b should isolate GEN_REGULAR[{}]",
895 i,
896 i
897 );
898 }
899 }
900
901 #[test]
902 fn polymod_step_xors_multiple_gens_when_multiple_b_bits_set() {
903 // b = 0b00011 → XOR GEN[0] and GEN[1].
904 let r = 0b00011u128 << REGULAR_SHIFT;
905 assert_eq!(
906 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
907 GEN_REGULAR[0] ^ GEN_REGULAR[1]
908 );
909 // b = 0b11111 → XOR all 5.
910 let r = 0b11111u128 << REGULAR_SHIFT;
911 let expected =
912 GEN_REGULAR[0] ^ GEN_REGULAR[1] ^ GEN_REGULAR[2] ^ GEN_REGULAR[3] ^ GEN_REGULAR[4];
913 assert_eq!(
914 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
915 expected
916 );
917 }
918
919 #[test]
920 fn polymod_step_works_for_long_code() {
921 // Same parameterization works for the long code (shift=70, mask=LONG_MASK).
922 let r = 1u128 << LONG_SHIFT;
923 assert_eq!(
924 polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
925 GEN_LONG[0]
926 );
927 // b = 0b11111 → XOR all 5 long GENs.
928 let r = 0b11111u128 << LONG_SHIFT;
929 let expected = GEN_LONG[0] ^ GEN_LONG[1] ^ GEN_LONG[2] ^ GEN_LONG[3] ^ GEN_LONG[4];
930 assert_eq!(
931 polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
932 expected
933 );
934 }
935
936 #[test]
937 fn polymod_step_init_residue_first_iteration() {
938 // POLYMOD_INIT < 2^60 so b = 0 in the first iteration; only the shift+xor happens.
939 // Verify: polymod_step(POLYMOD_INIT, 0) = POLYMOD_INIT << 5.
940 assert_eq!(
941 polymod_step(POLYMOD_INIT, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
942 POLYMOD_INIT << 5
943 );
944 // And with value=v: polymod_step(POLYMOD_INIT, v) = (POLYMOD_INIT << 5) ^ v.
945 assert_eq!(
946 polymod_step(POLYMOD_INIT, 31, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
947 (POLYMOD_INIT << 5) ^ 31
948 );
949 }
950
951 #[test]
952 fn polymod_step_value_and_gen_xor_combined() {
953 // Both effects active: b = 1 (bit 0 of b set) AND value = 5.
954 // Expected: ((residue & mask) << 5) ^ value ^ GEN[0]
955 // = (0 << 5) ^ 5 ^ GEN[0]
956 // = GEN_REGULAR[0] ^ 5
957 let r = 1u128 << REGULAR_SHIFT;
958 assert_eq!(
959 polymod_step(r, 5, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
960 GEN_REGULAR[0] ^ 5
961 );
962 }
963
964 #[test]
965 fn hrp_expand_mk_matches_spec() {
966 // BIP 173 hrp_expand for the MK HRP. Each ASCII byte contributes
967 // its high 3 bits then (after the [0] separator) its low 5 bits.
968 // 'm' = 0x6D → high 3 bits = 3, low 5 bits = 13.
969 // 'k' = 0x6B → high 3 bits = 3, low 5 bits = 11.
970 // Result: [3, 3, 0, 13, 11]. Documented in the BIP draft §"Checksum".
971 assert_eq!(hrp_expand(crate::consts::HRP), vec![3, 3, 0, 13, 11]);
972 }
973
974 #[test]
975 fn hrp_expand_empty_returns_just_separator() {
976 // Edge case: empty HRP yields just the [0] separator.
977 assert_eq!(hrp_expand(""), vec![0]);
978 }
979
980 #[test]
981 fn bch_round_trip_regular() {
982 // Encode then verify a small data part. The verify call sees the
983 // full data + checksum, so polymod returns MK_REGULAR_CONST exactly.
984 let hrp = "mk";
985 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
986 let checksum = bch_create_checksum_regular(hrp, &data);
987 assert_eq!(checksum.len(), 13);
988
989 let mut full = data.clone();
990 full.extend_from_slice(&checksum);
991 assert!(bch_verify_regular(hrp, &full));
992 }
993
994 #[test]
995 fn bch_verify_rejects_single_char_tampering_regular() {
996 // Flipping one bit in one symbol breaks verification.
997 // (Spot check; BCH detects all single-symbol errors by construction.)
998 let hrp = "mk";
999 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1000 let checksum = bch_create_checksum_regular(hrp, &data);
1001 let mut full = data.clone();
1002 full.extend_from_slice(&checksum);
1003 full[5] ^= 0x01;
1004 assert!(!bch_verify_regular(hrp, &full));
1005 }
1006
1007 #[test]
1008 fn bch_verify_rejects_too_short_input_regular() {
1009 // Less than 13 symbols cannot hold a checksum.
1010 assert!(!bch_verify_regular("mk", &[0, 1, 2]));
1011 assert!(!bch_verify_regular("mk", &[]));
1012 }
1013
1014 // (mk1-specific pinned-checksum vectors are deferred to Phase 6 vector
1015 // corpus generation, which writes both regular- and long-code conformance
1016 // points to disk under `crates/mk-codec/src/test_vectors/v0.1.json`.
1017 // Forking md-codec's pinned vectors verbatim would record the wrong
1018 // values: mk1's HRP and target constants both differ.)
1019
1020 #[test]
1021 fn bch_zero_data_does_not_self_validate_regular() {
1022 // The all-zeros data + all-zeros checksum must NOT validate, because
1023 // MK_REGULAR_CONST was chosen NUMS-style to avoid this trivial case.
1024 // Data length 8 is arbitrary; any non-empty zero-fill exhibits the same
1025 // negative result. 8 echoes the regular-code known-vector data length.
1026 let mut zero = vec![0u8; 8];
1027 zero.extend(std::iter::repeat_n(0, 13));
1028 assert!(!bch_verify_regular("mk", &zero));
1029 }
1030
1031 #[test]
1032 fn bch_round_trip_empty_data_regular() {
1033 // Empty data part is a degenerate but valid input: the checksum
1034 // covers only the HRP preamble. encode → verify must round-trip.
1035 let checksum = bch_create_checksum_regular("mk", &[]);
1036 assert!(bch_verify_regular("mk", &checksum));
1037 }
1038
1039 #[test]
1040 fn bch_round_trip_long() {
1041 let hrp = "mk";
1042 let data: Vec<u8> = (0..16).collect();
1043 let checksum = bch_create_checksum_long(hrp, &data);
1044 assert_eq!(checksum.len(), 15);
1045 let mut full = data.clone();
1046 full.extend_from_slice(&checksum);
1047 assert!(bch_verify_long(hrp, &full));
1048 }
1049
1050 #[test]
1051 fn bch_verify_rejects_single_char_tampering_long() {
1052 // Flipping one bit in one symbol breaks verification.
1053 // (Spot check; BCH detects all single-symbol errors by construction.)
1054 let hrp = "mk";
1055 let data: Vec<u8> = (0..16).collect();
1056 let checksum = bch_create_checksum_long(hrp, &data);
1057 let mut full = data.clone();
1058 full.extend_from_slice(&checksum);
1059 full[7] ^= 0x01;
1060 assert!(!bch_verify_long(hrp, &full));
1061 }
1062
1063 #[test]
1064 fn bch_verify_rejects_too_short_input_long() {
1065 // Less than 15 symbols cannot hold a long-code checksum.
1066 assert!(!bch_verify_long("mk", &[0; 14]));
1067 assert!(!bch_verify_long("mk", &[]));
1068 }
1069
1070 #[test]
1071 fn bch_zero_data_does_not_self_validate_long() {
1072 // All-zeros must not validate, by NUMS construction of MK_LONG_CONST.
1073 // Data length 16 is arbitrary; any non-empty zero-fill exhibits the same
1074 // negative result. 16 echoes the long-code known-vector data length.
1075 let mut zero = vec![0u8; 16];
1076 zero.extend(std::iter::repeat_n(0, 15));
1077 assert!(!bch_verify_long("mk", &zero));
1078 }
1079
1080 #[test]
1081 fn bch_round_trip_empty_data_long() {
1082 // Degenerate but valid: checksum covers only the HRP preamble.
1083 let checksum = bch_create_checksum_long("mk", &[]);
1084 assert!(bch_verify_long("mk", &checksum));
1085 }
1086
1087 #[test]
1088 fn bch_correct_regular_clean_input() {
1089 // Clean input → 0 corrections, identity result.
1090 let hrp = "mk";
1091 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1092 let checksum = bch_create_checksum_regular(hrp, &data);
1093 let mut full = data.clone();
1094 full.extend_from_slice(&checksum);
1095 let r = bch_correct_regular(hrp, &full).unwrap();
1096 assert_eq!(r.corrections_applied, 0);
1097 assert!(r.corrected_positions.is_empty());
1098 assert_eq!(r.data, full);
1099 }
1100
1101 #[test]
1102 fn bch_correct_regular_one_error() {
1103 // Single-symbol corruption is recoverable.
1104 let hrp = "mk";
1105 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1106 let checksum = bch_create_checksum_regular(hrp, &data);
1107 let mut full = data.clone();
1108 full.extend_from_slice(&checksum);
1109 let original = full.clone();
1110 full[3] = (full[3] + 1) & 0x1F;
1111 let r = bch_correct_regular(hrp, &full).unwrap();
1112 assert_eq!(r.corrections_applied, 1);
1113 assert_eq!(r.corrected_positions, vec![3]);
1114 assert_eq!(r.data, original);
1115 }
1116
1117 #[test]
1118 fn bch_correct_regular_two_errors_recovered_v0_2() {
1119 // v0.2 BM/Forney decoder reaches the BCH(93,80,8) full t = 4
1120 // capacity. A 2-error pattern is now recoverable. This test was
1121 // `..._uncorrectable_v0_1` in v0.1; flipped sign in v0.2.
1122 let hrp = "mk";
1123 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1124 let checksum = bch_create_checksum_regular(hrp, &data);
1125 let mut full = data.clone();
1126 full.extend_from_slice(&checksum);
1127 let original = full.clone();
1128 full[3] = (full[3] + 1) & 0x1F;
1129 full[7] = (full[7] + 1) & 0x1F;
1130 let r = bch_correct_regular(hrp, &full).unwrap();
1131 assert_eq!(r.corrections_applied, 2);
1132 assert!(r.corrected_positions.contains(&3));
1133 assert!(r.corrected_positions.contains(&7));
1134 assert_eq!(r.data, original);
1135 }
1136
1137 #[test]
1138 fn bch_correct_long_clean_input() {
1139 let hrp = "mk";
1140 let data: Vec<u8> = (0..16).collect();
1141 let checksum = bch_create_checksum_long(hrp, &data);
1142 let mut full = data.clone();
1143 full.extend_from_slice(&checksum);
1144 let r = bch_correct_long(hrp, &full).unwrap();
1145 assert_eq!(r.corrections_applied, 0);
1146 }
1147
1148 #[test]
1149 fn bch_correct_long_one_error() {
1150 let hrp = "mk";
1151 let data: Vec<u8> = (0..16).collect();
1152 let checksum = bch_create_checksum_long(hrp, &data);
1153 let mut full = data.clone();
1154 full.extend_from_slice(&checksum);
1155 let original = full.clone();
1156 full[5] = (full[5] + 1) & 0x1F;
1157 let r = bch_correct_long(hrp, &full).unwrap();
1158 assert_eq!(r.corrections_applied, 1);
1159 assert_eq!(r.corrected_positions, vec![5]);
1160 assert_eq!(r.data, original);
1161 }
1162
1163 #[test]
1164 fn bch_correct_returns_correction_result_with_position() {
1165 // Verify the API contract: a successful 1-error correction reports
1166 // exactly the position that was changed.
1167 let hrp = "mk";
1168 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
1169 let checksum = bch_create_checksum_regular(hrp, &data);
1170 let mut full = data.clone();
1171 full.extend_from_slice(&checksum);
1172 // Damage the second checksum byte (position 9 from start).
1173 full[9] = (full[9] + 7) & 0x1F;
1174 let r = bch_correct_regular(hrp, &full).unwrap();
1175 assert_eq!(r.corrected_positions, vec![9]);
1176 }
1177
1178 /// Build a fake mk1 5-bit data stream for round-trip tests:
1179 /// `[v0, v1, ...]` are 2 bech32-symbol single-string-style header
1180 /// symbols; `payload_bytes` is the byte-level fragment.
1181 fn build_5bit_data(header_symbols: &[u8], payload_bytes: &[u8]) -> Vec<u8> {
1182 let mut out = Vec::with_capacity(header_symbols.len() + payload_bytes.len() * 2);
1183 out.extend_from_slice(header_symbols);
1184 out.extend(bytes_to_5bit(payload_bytes));
1185 out
1186 }
1187
1188 #[test]
1189 fn encode_5bit_to_string_round_trip_regular() {
1190 // 2-symbol single-string header + 4-byte payload → 7 5-bit symbols
1191 // (header [v=0, t=0] || bytes_to_5bit(4 bytes) = 2 + 7 = 9 symbols).
1192 // 9 + 13 regular checksum = 22-char data part — well within regular range.
1193 let header_symbols = [0u8, 0u8];
1194 let payload = vec![0xDE, 0xAD, 0xBE, 0xEF];
1195 let data_5bit = build_5bit_data(&header_symbols, &payload);
1196 let s = encode_5bit_to_string(&data_5bit).unwrap();
1197 assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1198
1199 let decoded = decode_string(&s).unwrap();
1200 assert_eq!(decoded.code, BchCode::Regular);
1201 assert_eq!(decoded.corrections_applied, 0);
1202 assert!(decoded.corrected_positions.is_empty());
1203 assert_eq!(decoded.data(), data_5bit.as_slice());
1204
1205 // Recover the payload by stripping the 2-symbol header and byte-decoding.
1206 let payload_5bit = &decoded.data()[2..];
1207 let recovered = five_bit_to_bytes(payload_5bit).unwrap();
1208 assert_eq!(recovered, payload);
1209 }
1210
1211 #[test]
1212 fn encode_5bit_to_string_round_trip_long() {
1213 // Force a long-code path with an 8-symbol chunked-style header +
1214 // a 53-byte fragment: data_5bit.len() = 8 + ceil(53*8/5) = 8 + 85 = 93,
1215 // + 15 long checksum = 108 — exact long-code upper bound.
1216 let header_symbols = [0u8; 8];
1217 let payload = vec![0xA5u8; 53];
1218 let data_5bit = build_5bit_data(&header_symbols, &payload);
1219 assert_eq!(
1220 data_5bit.len(),
1221 93,
1222 "fixture invariant: 8 header + 85 payload symbols"
1223 );
1224 let s = encode_5bit_to_string(&data_5bit).unwrap();
1225 assert!(s.starts_with("mk1"));
1226 let decoded = decode_string(&s).unwrap();
1227 assert_eq!(decoded.code, BchCode::Long);
1228 assert_eq!(decoded.data(), data_5bit.as_slice());
1229
1230 let recovered = five_bit_to_bytes(&decoded.data()[8..]).unwrap();
1231 assert_eq!(recovered, payload);
1232 }
1233
1234 #[test]
1235 fn encode_starts_with_hrp_and_separator() {
1236 // Minimum-shape input: 1 5-bit symbol + 13 regular checksum = 14 — the
1237 // tightest valid regular-code data-part length.
1238 let s = encode_5bit_to_string(&[1u8]).unwrap();
1239 assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1240 }
1241
1242 #[test]
1243 fn decode_rejects_invalid_hrp() {
1244 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1245 let bad = s.replacen("mk", "bt", 1);
1246 assert!(matches!(
1247 decode_string(&bad),
1248 Err(crate::Error::InvalidHrp(_))
1249 ));
1250 }
1251
1252 #[test]
1253 fn decode_rejects_mixed_case() {
1254 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1255 let bad: String = s
1256 .chars()
1257 .enumerate()
1258 .map(|(i, c)| if i == 5 { c.to_ascii_uppercase() } else { c })
1259 .collect();
1260 assert!(matches!(decode_string(&bad), Err(crate::Error::MixedCase)));
1261 }
1262
1263 #[test]
1264 fn decode_rejects_invalid_char() {
1265 // 'b' is excluded from the bech32 alphabet; substitute one in the data
1266 // part to force a parse-time character rejection.
1267 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1268 // s looks like "mk1...". Splice 'b' at index 5 (definitely past "mk1").
1269 let mut chars: Vec<char> = s.chars().collect();
1270 chars[5] = 'b';
1271 let bad: String = chars.into_iter().collect();
1272 assert!(matches!(
1273 decode_string(&bad),
1274 Err(crate::Error::InvalidChar { .. })
1275 ));
1276 }
1277
1278 #[test]
1279 fn decode_rejects_missing_separator() {
1280 // No '1' at all in the string. rfind('1') returns None → InvalidHrp.
1281 let bad = "mknoseparatorhere";
1282 assert!(matches!(
1283 decode_string(bad),
1284 Err(crate::Error::InvalidHrp(_))
1285 ));
1286 }
1287
1288 #[test]
1289 fn decode_recovers_one_error() {
1290 // Encode, corrupt one char in the data part, decode should auto-correct.
1291 let data_5bit = vec![0u8, 0u8, 1, 2, 3, 4, 5];
1292 let s = encode_5bit_to_string(&data_5bit).unwrap();
1293
1294 let mut chars: Vec<char> = s.chars().collect();
1295 // Corrupt position 6 (past "mk1", well within the data part).
1296 let original_char = chars[6];
1297 chars[6] = if original_char == 'q' { 'p' } else { 'q' };
1298 let corrupted: String = chars.into_iter().collect();
1299
1300 let decoded = decode_string(&corrupted).unwrap();
1301 assert_eq!(decoded.corrections_applied, 1);
1302 assert_eq!(decoded.corrected_positions.len(), 1);
1303 assert_eq!(decoded.data(), data_5bit.as_slice());
1304 }
1305
1306 #[test]
1307 fn encode_rejects_data_part_in_reserved_invalid_length_range() {
1308 // For 5-bit data-part lengths 0..=12 (so data_part = 13..=25 with regular
1309 // checksum, or 15..=27 with long), `bch_code_for_length` rejects below 14.
1310 // Empty input → data_5bit.len()=0 → regular_total=13 → None; long_total=15
1311 // → Regular. Wait — regular range starts at 14 not 13.
1312 //
1313 // Actual invariant test: len 0 → regular_total=13 (None, below 14) and
1314 // long_total=15 (Regular). So it falls back to long->regular ladder ...
1315 // Re-checking encode_5bit_to_string: only fails when both miss [14..=93]
1316 // and [96..=108]. For data_5bit.len()=79, regular_total=92 → Regular ✓.
1317 // The provable reserved-invalid case is a length that misses both
1318 // ranges; the BIP 93 BCH ladder leaves no such gap below 109 because
1319 // [14..=93] ∪ [96..=108] only excludes {0..=13, 94..=95, ≥109}. The
1320 // smallest input length that produces invalid data-part lengths in
1321 // BOTH the regular and long branches is therefore data_5bit.len() ≥ 94
1322 // (regular_total ≥ 107 in invalid territory, long_total ≥ 109 too long).
1323 let too_long = vec![0u8; 94];
1324 let result = encode_5bit_to_string(&too_long);
1325 assert!(matches!(result, Err(crate::Error::InvalidStringLength(_))));
1326 }
1327}