mk_codec/string_layer/bch.rs
1//! BCH primitives for the mk1 string layer: bech32 alphabet conversion and
2//! syndrome-based error correction.
3//!
4//! Forked from `md-codec` v0.4.x (`crates/md-codec/src/encoding.rs`) at the
5//! start of the mk1 v0.1 implementation per `design/DECISIONS.md` D-13. The
6//! BCH polynomials and field arithmetic are shared with the sibling md1
7//! format (both reuse BIP 93's `BCH(93,80,8)` regular code and
8//! `BCH(108,93,8)` long code); the only mk1-specific knobs are the HRP
9//! (`"mk"`) and the NUMS-derived target residues ([`crate::consts::MK_REGULAR_CONST`]
10//! / [`crate::consts::MK_LONG_CONST`]).
11//!
12//! Unlike md-codec's encoding module, this file does **not** expose a
13//! top-level `encode_string` / `decode_string`: mk1's string-layer header
14//! lives at the 5-bit symbol layer (per closure Q-5 — 2 symbols for
15//! `SingleString`, 8 symbols for `Chunked`) rather than the byte-aligned
16//! layer md1 uses. The mk1 `string_layer/mod.rs` builds string-level
17//! encode/decode on top of the BCH primitives here.
18
19use super::bch_decode;
20use crate::consts::{HRP, MK_LONG_CONST, MK_REGULAR_CONST};
21
22/// Which BCH code variant a string uses.
23///
24/// Determined by the total data-part length: regular for ≤93 chars,
25/// long for 96–108 chars. Lengths 94–95 are reserved-invalid.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
27pub enum BchCode {
28 /// Regular code: BCH(93,80,8). 13-char checksum.
29 Regular,
30 /// Long code: BCH(108,93,8). 15-char checksum.
31 Long,
32}
33
34/// The bech32 32-character alphabet, in 5-bit-value order.
35///
36/// `q=0, p=1, z=2, r=3, y=4, 9=5, x=6, 8=7, g=8, f=9, 2=10, t=11, v=12,
37/// d=13, w=14, 0=15, s=16, 3=17, j=18, n=19, 5=20, 4=21, k=22, h=23,
38/// c=24, e=25, 6=26, m=27, u=28, a=29, 7=30, l=31`.
39pub const ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
40
41/// Inverse lookup: char (lowercase ASCII) -> 5-bit value, or 0xFF if not in alphabet.
42const ALPHABET_INV: [u8; 128] = build_alphabet_inv();
43
44const fn build_alphabet_inv() -> [u8; 128] {
45 let mut inv = [0xFFu8; 128];
46 let mut i = 0;
47 while i < 32 {
48 inv[ALPHABET[i] as usize] = i as u8;
49 i += 1;
50 }
51 inv
52}
53
54/// Convert a sequence of 8-bit bytes to a sequence of 5-bit values
55/// (padded with zero bits at the end if the bit count is not a multiple of 5).
56pub fn bytes_to_5bit(bytes: &[u8]) -> Vec<u8> {
57 let mut acc: u32 = 0;
58 let mut bits = 0u32;
59 let mut out = Vec::with_capacity((bytes.len() * 8).div_ceil(5));
60 for &b in bytes {
61 acc = (acc << 8) | b as u32;
62 bits += 8;
63 while bits >= 5 {
64 bits -= 5;
65 out.push(((acc >> bits) & 0x1F) as u8);
66 }
67 }
68 if bits > 0 {
69 out.push(((acc << (5 - bits)) & 0x1F) as u8);
70 }
71 out
72}
73
74/// Convert a sequence of 5-bit values back to 8-bit bytes.
75///
76/// Returns `None` if any value in `values` is ≥ 32 (out of 5-bit range),
77/// or if the trailing padding bits are non-zero.
78pub fn five_bit_to_bytes(values: &[u8]) -> Option<Vec<u8>> {
79 let mut acc: u32 = 0;
80 let mut bits = 0u32;
81 let mut out = Vec::with_capacity(values.len() * 5 / 8);
82 for &v in values {
83 if v >= 32 {
84 return None;
85 }
86 acc = (acc << 5) | v as u32;
87 bits += 5;
88 if bits >= 8 {
89 bits -= 8;
90 out.push(((acc >> bits) & 0xFF) as u8);
91 }
92 }
93 // Any remaining bits must be zero (padding).
94 if bits >= 5 {
95 return None;
96 }
97 if (acc & ((1 << bits) - 1)) != 0 {
98 return None;
99 }
100 Some(out)
101}
102
103/// The bech32 separator character between HRP and data-part (BIP 173 §3).
104///
105/// Re-exported by [`crate::consts::HRP`] is `"mk"`; this module's
106/// BCH-checksum helpers consume the HRP through their `hrp` parameter so
107/// that the same primitives can verify any single-HRP codex32-derived
108/// string. Production callers MUST pass [`crate::consts::HRP`].
109pub const SEPARATOR: char = '1';
110
111/// Determine the BchCode variant from a total data-part length.
112///
113/// Boundaries are from BIP 93 (codex32): regular code `BCH(93,80,8)` caps at 93,
114/// long code `BCH(108,93,8)` runs 96–108, and lengths 94–95 are explicitly
115/// reserved-invalid to prevent ambiguity in code-variant selection. Lengths
116/// below 14 or above 108 are also rejected.
117pub fn bch_code_for_length(data_part_len: usize) -> Option<BchCode> {
118 match data_part_len {
119 14..=93 => Some(BchCode::Regular),
120 94..=95 => None,
121 96..=108 => Some(BchCode::Long),
122 _ => None,
123 }
124}
125
126/// Check whether a string is all-lowercase, all-uppercase, or mixed.
127///
128/// Only ASCII letters are considered; non-ASCII characters (digits, punctuation,
129/// Unicode letters) are treated as neither case. This is appropriate for MD
130/// strings, whose alphabet is a subset of ASCII. An empty string or one with
131/// no ASCII letters returns [`CaseStatus::Lower`].
132pub fn case_check(s: &str) -> CaseStatus {
133 let mut has_lower = false;
134 let mut has_upper = false;
135 for c in s.chars() {
136 if c.is_ascii_lowercase() {
137 has_lower = true;
138 } else if c.is_ascii_uppercase() {
139 has_upper = true;
140 }
141 if has_lower && has_upper {
142 break;
143 }
144 }
145 match (has_lower, has_upper) {
146 (true, true) => CaseStatus::Mixed,
147 (true, false) => CaseStatus::Lower,
148 (false, true) => CaseStatus::Upper,
149 (false, false) => CaseStatus::Lower, // empty / no letters; treat as lower
150 }
151}
152
153/// Result of a case check.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum CaseStatus {
156 /// All-lowercase or no letters.
157 Lower,
158 /// All-uppercase.
159 Upper,
160 /// Both lowercase and uppercase letters present (invalid).
161 Mixed,
162}
163
164/// BCH polymod constants for the regular checksum (BCH(93,80,8)).
165///
166/// Source: BIP 93 (codex32) reference implementation, `ms32_polymod` function.
167/// These five values are XORed into the running residue based on the top 5 bits
168/// of the residue at each step. The polymod operation uses a 65-bit residue
169/// (top 5 bits = current `b`, bottom 60 bits = masked state).
170///
171/// Verified against the canonical reference at
172/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
173pub const GEN_REGULAR: [u128; 5] = [
174 0x19dc500ce73fde210,
175 0x1bfae00def77fe529,
176 0x1fbd920fffe7bee52,
177 0x1739640bdeee3fdad,
178 0x07729a039cfc75f5a,
179];
180
181/// Initial residue value for both the regular and long polymod algorithms (BIP 93).
182///
183/// Both `ms32_polymod` and `ms32_long_polymod` start with this residue before
184/// processing any input characters.
185pub const POLYMOD_INIT: u128 = 0x23181b3;
186
187/// Right-shift amount to extract the top 5 bits from a 65-bit regular-code residue.
188///
189/// Usage: `b = residue >> REGULAR_SHIFT` gives the 5-bit feedback selector
190/// for the polymod algorithm.
191pub const REGULAR_SHIFT: u32 = 60;
192
193/// Mask preserving the low 60 bits of a 65-bit regular-code residue.
194pub const REGULAR_MASK: u128 = 0x0fffffffffffffff;
195
196/// BCH polymod constants for the long checksum (BCH(108,93,8)).
197///
198/// Source: BIP 93 (codex32) reference implementation, `ms32_long_polymod` function.
199/// The long polymod uses a 75-bit residue (top 5 bits = `b`, bottom 70 bits = masked state).
200///
201/// Verified against the canonical reference at
202/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
203pub const GEN_LONG: [u128; 5] = [
204 0x3d59d273535ea62d897,
205 0x7a9becb6361c6c51507,
206 0x543f9b7e6c38d8a2a0e,
207 0x0c577eaeccf1990d13c,
208 0x1887f74f8dc71b10651,
209];
210
211/// Right-shift amount to extract the top 5 bits from a 75-bit long-code residue.
212///
213/// Usage: `b = residue >> LONG_SHIFT` gives the 5-bit feedback selector
214/// for the polymod algorithm.
215pub const LONG_SHIFT: u32 = 70;
216
217/// Mask preserving the low 70 bits of a 75-bit long-code residue.
218pub const LONG_MASK: u128 = 0x3fffffffffffffffff;
219
220/// One step of the BCH polymod algorithm from BIP 93.
221///
222/// Updates the running `residue` to incorporate the next 5-bit input `value`
223/// using the polynomial defined by `gen`, shift width `shift`, and mask `mask`.
224/// The same function is used for both the regular and long codes; pass
225/// `(GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK)` for the regular code and
226/// `(GEN_LONG, LONG_SHIFT, LONG_MASK)` for the long code.
227///
228/// Returns the updated residue after incorporating `value`. The top 5 bits of
229/// the returned residue feed the next iteration's `b` selector.
230///
231/// This is a direct port of BIP 93's `ms32_polymod` / `ms32_long_polymod` inner
232/// loop. See <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki> .
233fn polymod_step(residue: u128, value: u128, r#gen: &[u128; 5], shift: u32, mask: u128) -> u128 {
234 let b = residue >> shift;
235 let mut new_residue = ((residue & mask) << 5) ^ value;
236 for (i, &g) in r#gen.iter().enumerate() {
237 if (b >> i) & 1 != 0 {
238 new_residue ^= g;
239 }
240 }
241 new_residue
242}
243
244/// BIP 173-style HRP-expansion: produces the 5-bit-symbol prelude that gets
245/// prepended to the data part before running the BCH polymod.
246///
247/// For each HRP character `c`, emits `c >> 5` (high 3 bits); then emits a
248/// single 0 separator; then emits each character's `c & 31` (low 5 bits).
249/// The result has length `2 * hrp.len() + 1` for ASCII HRPs.
250///
251/// For `hrp_expand("md")` this returns `[3, 3, 0, 13, 4]`.
252pub fn hrp_expand(hrp: &str) -> Vec<u8> {
253 let bytes = hrp.as_bytes();
254 let mut out = Vec::with_capacity(bytes.len() * 2 + 1);
255 for &c in bytes {
256 out.push(c >> 5);
257 }
258 out.push(0);
259 for &c in bytes {
260 out.push(c & 31);
261 }
262 out
263}
264
265/// Run polymod over a sequence of 5-bit values using the parameters for
266/// either the regular or long BCH code, starting from POLYMOD_INIT.
267///
268/// v0.3.1: promoted from `pub(in crate::string_layer)` to `pub` so
269/// downstream consumers (toolkit `repair` feature) can compute polymod
270/// residues against ms / md / mk target constants (all 3 share the
271/// BIP-93 BCH(93,80,8) generator). Test-helper-drift concern remains
272/// resolved by the sibling `bch_decode` module using THIS function
273/// directly rather than re-implementing.
274pub fn polymod_run(
275 values: &[u8],
276 r#gen: &[u128; 5],
277 shift: u32,
278 mask: u128,
279) -> u128 {
280 let mut residue = POLYMOD_INIT;
281 for &v in values {
282 residue = polymod_step(residue, v as u128, r#gen, shift, mask);
283 }
284 residue
285}
286
287/// Compute the 13-character BCH checksum for the regular code over the
288/// HRP-expanded preamble plus the data part.
289///
290/// `data` is the sequence of 5-bit values for the data part (header + payload),
291/// not including the checksum. Returns the 13-element checksum array, ready
292/// to append to `data` to form the full data-part-plus-checksum.
293///
294/// The algorithm runs polymod over `hrp_expand(hrp) || data || [0; 13]`,
295/// then XORs the result with [`MK_REGULAR_CONST`] to extract the checksum.
296pub fn bch_create_checksum_regular(hrp: &str, data: &[u8]) -> [u8; 13] {
297 // Regular code: 13-symbol checksum (0..=12), pad/array/extraction all use 13.
298 let mut input = hrp_expand(hrp);
299 input.extend_from_slice(data);
300 input.extend(std::iter::repeat_n(0, 13));
301 let polymod = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
302 let mut out = [0u8; 13];
303 for (i, slot) in out.iter_mut().enumerate() {
304 *slot = ((polymod >> (5 * (12 - i))) & 0x1F) as u8;
305 }
306 out
307}
308
309/// Verify a regular-code BCH checksum.
310///
311/// `data_with_checksum` is the full data part including the trailing 13
312/// checksum characters. Returns `true` iff the polymod over
313/// `hrp_expand(hrp) || data_with_checksum` equals [`MK_REGULAR_CONST`].
314pub fn bch_verify_regular(hrp: &str, data_with_checksum: &[u8]) -> bool {
315 if data_with_checksum.len() < 13 {
316 return false;
317 }
318 let mut input = hrp_expand(hrp);
319 input.extend_from_slice(data_with_checksum);
320 polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) == MK_REGULAR_CONST
321}
322
323/// Compute the 15-character BCH checksum for the long code.
324///
325/// Same algorithm as [`bch_create_checksum_regular`] but uses the long-code
326/// polymod parameters (`GEN_LONG`, `LONG_SHIFT`, `LONG_MASK`) and target
327/// constant ([`MK_LONG_CONST`]). Produces a 15-element checksum array.
328pub fn bch_create_checksum_long(hrp: &str, data: &[u8]) -> [u8; 15] {
329 // Long code: 15-symbol checksum (0..=14), pad/array/extraction all use 15.
330 let mut input = hrp_expand(hrp);
331 input.extend_from_slice(data);
332 input.extend(std::iter::repeat_n(0, 15));
333 let polymod = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
334 let mut out = [0u8; 15];
335 for (i, slot) in out.iter_mut().enumerate() {
336 *slot = ((polymod >> (5 * (14 - i))) & 0x1F) as u8;
337 }
338 out
339}
340
341/// Verify a long-code BCH checksum.
342///
343/// Same algorithm as [`bch_verify_regular`] with long-code parameters.
344/// Returns false if `data_with_checksum` is shorter than 15 symbols.
345pub fn bch_verify_long(hrp: &str, data_with_checksum: &[u8]) -> bool {
346 if data_with_checksum.len() < 15 {
347 return false;
348 }
349 let mut input = hrp_expand(hrp);
350 input.extend_from_slice(data_with_checksum);
351 polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) == MK_LONG_CONST
352}
353
354/// Result of a successful BCH decode + correct attempt.
355///
356/// Returned by [`bch_correct_regular`] / [`bch_correct_long`] when correction
357/// succeeds. `corrections_applied == 0` means the input was already valid;
358/// `> 0` means substitutions were applied at the indicated positions.
359///
360/// Marked `#[non_exhaustive]` to allow future fields (e.g., confidence
361/// score, syndrome metadata) without breaking downstream struct-literal
362/// construction. Construct via the [`bch_correct_regular`] /
363/// [`bch_correct_long`] APIs.
364#[non_exhaustive]
365#[derive(Debug, Clone, PartialEq, Eq)]
366pub struct CorrectionResult {
367 /// The corrected `data_with_checksum` slice (input may have been modified).
368 pub data: Vec<u8>,
369 /// Number of substitutions applied (0 = clean input).
370 pub corrections_applied: usize,
371 /// Indices into `data` of the substituted positions.
372 pub corrected_positions: Vec<usize>,
373}
374
375/// Attempt to correct a regular-code BCH-checksummed string with up to four
376/// substitutions, the full t = 4 capacity of the BCH(93, 80, 8) code.
377///
378/// Implements the standard syndrome-based BCH decoder pipeline: syndrome
379/// computation in `GF(1024) = GF(32²)`, Berlekamp–Massey for the
380/// error-locator polynomial, Chien search for error positions, Forney's
381/// algorithm for error magnitudes. After applying the proposed corrections,
382/// the result is re-verified via [`bch_verify_regular`]; the decoder rejects
383/// any output that does not produce a valid codeword (defensive guard
384/// against pathological 5+-error inputs whose syndromes happen to factor as
385/// a degree-≤ 4 locator).
386///
387/// Returns `Ok(CorrectionResult)` if the input is clean or up to four
388/// substitutions repair it. Returns `Err(Error::BchUncorrectable)` otherwise.
389///
390/// # Algorithm details
391///
392/// See the private `bch_decode` submodule for the algorithm and the
393/// `GF(1024)` field representation.
394pub fn bch_correct_regular(
395 hrp: &str,
396 data_with_checksum: &[u8],
397) -> Result<CorrectionResult, crate::Error> {
398 if bch_verify_regular(hrp, data_with_checksum) {
399 return Ok(CorrectionResult {
400 data: data_with_checksum.to_vec(),
401 corrections_applied: 0,
402 corrected_positions: vec![],
403 });
404 }
405 // Compute polymod over hrp_expand(hrp) || data_with_checksum, XOR with
406 // the MD target constant. The result is congruent to the error
407 // polynomial E(x) modulo g_regular(x).
408 let mut input = hrp_expand(hrp);
409 input.extend_from_slice(data_with_checksum);
410 let residue = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
411
412 if let Some((positions, magnitudes)) =
413 bch_decode::decode_regular_errors(residue, data_with_checksum.len())
414 {
415 if positions.is_empty() {
416 // Should be unreachable (caller already verified); guard anyway.
417 return Ok(CorrectionResult {
418 data: data_with_checksum.to_vec(),
419 corrections_applied: 0,
420 corrected_positions: vec![],
421 });
422 }
423 let mut corrected = data_with_checksum.to_vec();
424 for (&p, &m) in positions.iter().zip(&magnitudes) {
425 if p >= corrected.len() {
426 return Err(crate::Error::BchUncorrectable(format!(
427 "decoder reported error position {p} outside data ({} symbols)",
428 corrected.len()
429 )));
430 }
431 corrected[p] ^= m;
432 }
433 // Defensive: re-verify. Catches the 5+-error edge case.
434 if bch_verify_regular(hrp, &corrected) {
435 return Ok(CorrectionResult {
436 corrections_applied: positions.len(),
437 corrected_positions: positions,
438 data: corrected,
439 });
440 }
441 }
442 Err(crate::Error::BchUncorrectable(
443 "regular code: more than 4 substitutions or pathological pattern".into(),
444 ))
445}
446
447/// Long-code analog of [`bch_correct_regular`].
448///
449/// Implements the same BM/Chien/Forney pipeline against the long-code
450/// generator polynomial, reaching the full t = 4 capacity of
451/// `BCH(108, 93, 8)`.
452pub fn bch_correct_long(
453 hrp: &str,
454 data_with_checksum: &[u8],
455) -> Result<CorrectionResult, crate::Error> {
456 if bch_verify_long(hrp, data_with_checksum) {
457 return Ok(CorrectionResult {
458 data: data_with_checksum.to_vec(),
459 corrections_applied: 0,
460 corrected_positions: vec![],
461 });
462 }
463 let mut input = hrp_expand(hrp);
464 input.extend_from_slice(data_with_checksum);
465 let residue = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
466
467 if let Some((positions, magnitudes)) =
468 bch_decode::decode_long_errors(residue, data_with_checksum.len())
469 {
470 if positions.is_empty() {
471 return Ok(CorrectionResult {
472 data: data_with_checksum.to_vec(),
473 corrections_applied: 0,
474 corrected_positions: vec![],
475 });
476 }
477 let mut corrected = data_with_checksum.to_vec();
478 for (&p, &m) in positions.iter().zip(&magnitudes) {
479 if p >= corrected.len() {
480 return Err(crate::Error::BchUncorrectable(format!(
481 "decoder reported error position {p} outside data ({} symbols)",
482 corrected.len()
483 )));
484 }
485 corrected[p] ^= m;
486 }
487 if bch_verify_long(hrp, &corrected) {
488 return Ok(CorrectionResult {
489 corrections_applied: positions.len(),
490 corrected_positions: positions,
491 data: corrected,
492 });
493 }
494 }
495 Err(crate::Error::BchUncorrectable(
496 "long code: more than 4 substitutions or pathological pattern".into(),
497 ))
498}
499
500/// Encode a 5-bit-symbol data stream as a complete mk1 string.
501///
502/// The data stream is the concatenation `header_symbols || bytes_to_5bit(payload_bytes)`
503/// where `header_symbols` is the 2-symbol single-string header or the
504/// 8-symbol chunked header (closure Q-5). The BCH code variant (regular or
505/// long) is auto-selected from the resulting data-part length per BIP 93:
506/// regular for ≤93-symbol data parts, long for 96–108-symbol data parts.
507/// Lengths in the reserved-invalid 94–95 gap or outside the BIP 93 valid
508/// range return [`Error::InvalidStringLength`].
509///
510/// Per the v0.1 emit policy described in `design/IMPLEMENTATION_PLAN_mk_v0_1.md`
511/// §5.4, callers control fragment sizing so that each chunked fragment lands
512/// within long-code territory. Single-string mk1 may pick regular or long
513/// based on bytecode size.
514///
515/// Returns the full string starting with [`crate::consts::HRP`] and the
516/// BIP 173 separator (`"mk1"`).
517pub fn encode_5bit_to_string(data_5bit: &[u8]) -> Result<String, crate::Error> {
518 use crate::Error;
519
520 // Auto-determine code from the eventual data-part length (data_5bit + checksum).
521 let regular_total = data_5bit.len() + 13;
522 let long_total = data_5bit.len() + 15;
523 let code = match (
524 bch_code_for_length(regular_total),
525 bch_code_for_length(long_total),
526 ) {
527 (Some(BchCode::Regular), _) => BchCode::Regular,
528 (_, Some(BchCode::Long)) => BchCode::Long,
529 // Neither code variant accepts this data-part length: too short, in
530 // the 94–95 reserved-invalid gap, or too long for v0.1.
531 _ => {
532 // Pick the closest length to report — long_total is always larger,
533 // so report that as the "actual length you tried to produce".
534 return Err(Error::InvalidStringLength(long_total));
535 }
536 };
537
538 let checksum: Vec<u8> = match code {
539 BchCode::Regular => bch_create_checksum_regular(HRP, data_5bit).to_vec(),
540 BchCode::Long => bch_create_checksum_long(HRP, data_5bit).to_vec(),
541 };
542
543 let mut full = String::with_capacity(HRP.len() + 1 + data_5bit.len() + checksum.len());
544 full.push_str(HRP);
545 full.push(SEPARATOR);
546 for &v in data_5bit {
547 full.push(ALPHABET[v as usize] as char);
548 }
549 for v in checksum {
550 full.push(ALPHABET[v as usize] as char);
551 }
552 Ok(full)
553}
554
555/// Result of a successful mk1 string decode at the BCH layer.
556///
557/// Use [`Self::data`] to access the data part as 5-bit values (header
558/// symbols + payload, checksum stripped); the string-layer reassembler
559/// in `crate::string_layer` splits header symbols off and feeds the
560/// remaining payload through [`five_bit_to_bytes`] to recover the original
561/// fragment bytes.
562///
563/// The full post-correction 5-bit symbol sequence (data **plus** the trailing
564/// 13- or 15-char checksum) is retained internally as [`Self::data_with_checksum`]
565/// and can be queried by [`Self::corrected_char_at`] for any position in
566/// the data part — including positions that fall inside the checksum region.
567/// The decoder-report layer uses this to surface the real corrected
568/// character when BCH ECC repairs a substitution inside the checksum
569/// (parallels md-codec's `Correction.corrected` field).
570#[non_exhaustive]
571#[derive(Debug, Clone, PartialEq, Eq)]
572pub struct DecodedString {
573 /// Detected BCH code variant.
574 pub code: BchCode,
575 /// Number of substitution errors corrected (0 = clean input, 1 = recovered).
576 pub corrections_applied: usize,
577 /// Indices into the data-part (chars after `"md1"`) of any corrected positions.
578 pub corrected_positions: Vec<usize>,
579 /// Full post-correction 5-bit symbol sequence (data part + checksum), in
580 /// the same coordinate system as [`Self::corrected_positions`].
581 ///
582 /// Length is `data().len() + 13` (regular code) or `data().len() + 15`
583 /// (long code). Indices `0..data().len()` mirror [`Self::data`] symbol-for-symbol;
584 /// indices `data().len()..` are the corrected checksum symbols. Use
585 /// [`Self::corrected_char_at`] for the human-readable bech32 character at
586 /// any position.
587 pub data_with_checksum: Vec<u8>,
588}
589
590impl DecodedString {
591 /// Data part as 5-bit values, with the trailing checksum stripped.
592 ///
593 /// Returns a slice into [`Self::data_with_checksum`] — the data part is
594 /// `data_with_checksum[..len - checksum_len]`, where `checksum_len` is 13
595 /// for [`BchCode::Regular`] and 15 for [`BchCode::Long`].
596 pub fn data(&self) -> &[u8] {
597 let checksum_len = match self.code {
598 BchCode::Regular => 13,
599 BchCode::Long => 15,
600 };
601 &self.data_with_checksum[..self.data_with_checksum.len() - checksum_len]
602 }
603
604 /// Look up the corrected bech32 character at the given position in the
605 /// data part (chars after the `"md1"` HRP+separator).
606 ///
607 /// `char_position` is 0-indexed. Positions `0..data().len()` are in the
608 /// data region; positions `data().len()..data().len() + checksum_len` are
609 /// inside the BCH checksum (13 chars for [`BchCode::Regular`], 15 for
610 /// [`BchCode::Long`]). All positions return the post-correction
611 /// character — i.e., what the symbol *should* be after BCH repair, which
612 /// is exactly what [`Correction.corrected`][crate::Correction::corrected]
613 /// is documented to report.
614 ///
615 /// # Panics
616 ///
617 /// Panics if `char_position >= data_with_checksum.len()`. Callers are
618 /// responsible for clamping the position to a valid range; in the decode
619 /// pipeline this is guaranteed by the BCH layer (it never reports a
620 /// `corrected_position` outside `data_with_checksum`). Note that
621 /// `data_with_checksum` includes the checksum region; "outside the data
622 /// part" elsewhere in this crate excludes the checksum and is a tighter
623 /// bound than what this method requires.
624 pub fn corrected_char_at(&self, char_position: usize) -> char {
625 let v = self.data_with_checksum[char_position];
626 ALPHABET[v as usize] as char
627 }
628}
629
630/// Decode an mk1 string, validating HRP, case, length, and checksum.
631///
632/// Performs full BCH error correction up to four substitutions
633/// (`t = 4` capacity of the BCH(93, 80, 8) regular code and the
634/// BCH(108, 93, 8) long code), via syndrome-based Berlekamp–Massey +
635/// Forney decoding (implemented in the sibling `bch_decode` module).
636///
637/// Errors:
638/// - [`Error::MixedCase`] if the string mixes upper and lower case.
639/// - [`Error::InvalidHrp`] if the HRP is missing or not [`crate::consts::HRP`].
640/// - [`Error::InvalidStringLength`] if the data-part length isn't a valid mk1 length.
641/// - [`Error::InvalidChar`] if the data part contains a non-bech32 character.
642/// - [`Error::BchUncorrectable`] if the checksum can't be repaired within
643/// the BCH `t = 4` correction radius.
644///
645/// [`Error::MixedCase`]: crate::Error::MixedCase
646/// [`Error::InvalidHrp`]: crate::Error::InvalidHrp
647/// [`Error::InvalidStringLength`]: crate::Error::InvalidStringLength
648/// [`Error::InvalidChar`]: crate::Error::InvalidChar
649/// [`Error::BchUncorrectable`]: crate::Error::BchUncorrectable
650pub fn decode_string(s: &str) -> Result<DecodedString, crate::Error> {
651 use crate::Error;
652
653 if matches!(case_check(s), CaseStatus::Mixed) {
654 return Err(Error::MixedCase);
655 }
656 let s_lower = s.to_lowercase();
657
658 let sep_pos = s_lower
659 .rfind(SEPARATOR)
660 .ok_or_else(|| Error::InvalidHrp(s_lower.clone()))?;
661 let (hrp, rest) = s_lower.split_at(sep_pos);
662 let data_part = &rest[1..]; // skip the '1' separator
663
664 if hrp != HRP {
665 return Err(Error::InvalidHrp(hrp.to_string()));
666 }
667
668 let code =
669 bch_code_for_length(data_part.len()).ok_or(Error::InvalidStringLength(data_part.len()))?;
670
671 let mut values: Vec<u8> = Vec::with_capacity(data_part.len());
672 for (i, c) in data_part.chars().enumerate() {
673 if !c.is_ascii() {
674 return Err(Error::InvalidChar { ch: c, position: i });
675 }
676 let v = ALPHABET_INV[c as usize];
677 if v == 0xFF {
678 return Err(Error::InvalidChar { ch: c, position: i });
679 }
680 values.push(v);
681 }
682
683 let correction = match code {
684 BchCode::Regular => bch_correct_regular(hrp, &values),
685 BchCode::Long => bch_correct_long(hrp, &values),
686 };
687 let result = correction?;
688
689 Ok(DecodedString {
690 code,
691 corrections_applied: result.corrections_applied,
692 corrected_positions: result.corrected_positions,
693 data_with_checksum: result.data,
694 })
695}
696
697#[cfg(test)]
698mod tests {
699 use super::*;
700
701 #[test]
702 fn bch_code_equality() {
703 assert_eq!(BchCode::Regular, BchCode::Regular);
704 assert_ne!(BchCode::Regular, BchCode::Long);
705 }
706
707 #[test]
708 fn bch_code_can_be_hashed() {
709 use std::collections::HashSet;
710 let mut set = HashSet::new();
711 set.insert(BchCode::Regular);
712 set.insert(BchCode::Long);
713 set.insert(BchCode::Regular);
714 assert_eq!(set.len(), 2);
715 }
716
717 #[test]
718 fn alphabet_is_32_unique_chars() {
719 let mut seen = std::collections::HashSet::new();
720 for &c in ALPHABET {
721 assert!(seen.insert(c), "duplicate char in alphabet: {}", c as char);
722 }
723 assert_eq!(seen.len(), 32);
724 }
725
726 #[test]
727 fn bytes_to_5bit_round_trip_zero() {
728 let bytes = vec![0x00];
729 let fives = bytes_to_5bit(&bytes);
730 assert_eq!(fives, vec![0, 0]);
731 let back = five_bit_to_bytes(&fives).unwrap();
732 assert_eq!(back, bytes);
733 }
734
735 #[test]
736 fn bytes_to_5bit_round_trip_known_value() {
737 // 0xFF = binary 11111111. Splits as 11111 (=31) and 111 (padded with 00 to 11100=28).
738 let bytes = vec![0xFF];
739 let fives = bytes_to_5bit(&bytes);
740 assert_eq!(fives, vec![31, 28]);
741 }
742
743 #[test]
744 fn bytes_to_5bit_round_trip_multibyte() {
745 // 3 bytes = 24 bits → 5 five-bit groups (25 bits, 1 pad bit).
746 let bytes = vec![0xDE, 0xAD, 0xBE];
747 let back = five_bit_to_bytes(&bytes_to_5bit(&bytes)).unwrap();
748 assert_eq!(back, bytes);
749 }
750
751 #[test]
752 fn five_bit_to_bytes_rejects_nonzero_padding() {
753 // Two 5-bit values = 10 bits, of which 8 form a byte and 2 are padding.
754 // If padding bits are nonzero, decode must fail.
755 // 31 = 11111, 1 = 00001. Last 2 bits (= 01) are nonzero padding.
756 assert!(five_bit_to_bytes(&[31, 1]).is_none());
757 }
758
759 #[test]
760 fn five_bit_to_bytes_rejects_value_out_of_range() {
761 assert!(five_bit_to_bytes(&[32]).is_none());
762 }
763
764 #[test]
765 fn bch_code_for_length_regular() {
766 assert_eq!(bch_code_for_length(14), Some(BchCode::Regular));
767 assert_eq!(bch_code_for_length(93), Some(BchCode::Regular));
768 }
769
770 #[test]
771 fn bch_code_for_length_long() {
772 assert_eq!(bch_code_for_length(96), Some(BchCode::Long));
773 assert_eq!(bch_code_for_length(108), Some(BchCode::Long));
774 }
775
776 #[test]
777 fn bch_code_for_length_rejects_94_and_95() {
778 assert_eq!(bch_code_for_length(94), None);
779 assert_eq!(bch_code_for_length(95), None);
780 }
781
782 #[test]
783 fn bch_code_for_length_rejects_extremes() {
784 assert_eq!(bch_code_for_length(0), None);
785 assert_eq!(bch_code_for_length(13), None);
786 assert_eq!(bch_code_for_length(109), None);
787 assert_eq!(bch_code_for_length(1000), None);
788 }
789
790 #[test]
791 fn case_check_lowercase() {
792 assert_eq!(case_check("md1qq"), CaseStatus::Lower);
793 }
794
795 #[test]
796 fn case_check_uppercase() {
797 assert_eq!(case_check("MD1QQ"), CaseStatus::Upper);
798 }
799
800 #[test]
801 fn case_check_mixed() {
802 assert_eq!(case_check("mD1qq"), CaseStatus::Mixed);
803 }
804
805 #[test]
806 fn case_check_empty_string_is_lower() {
807 assert_eq!(case_check(""), CaseStatus::Lower);
808 }
809
810 #[test]
811 fn case_check_digits_only_is_lower() {
812 // Digits have no case; result must be Lower (BIP 173: no-letter strings are lower).
813 assert_eq!(case_check("1234"), CaseStatus::Lower);
814 }
815
816 #[test]
817 fn gen_regular_has_5_entries() {
818 assert_eq!(GEN_REGULAR.len(), 5);
819 }
820
821 #[test]
822 fn gen_long_has_5_entries() {
823 assert_eq!(GEN_LONG.len(), 5);
824 }
825
826 #[test]
827 fn gen_regular_matches_bip93_canonical_values() {
828 // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
829 // ms32_polymod GEN array. If this fails, the constants drifted from the BIP.
830 assert_eq!(GEN_REGULAR[0], 0x19dc500ce73fde210);
831 assert_eq!(GEN_REGULAR[1], 0x1bfae00def77fe529);
832 assert_eq!(GEN_REGULAR[2], 0x1fbd920fffe7bee52);
833 assert_eq!(GEN_REGULAR[3], 0x1739640bdeee3fdad);
834 assert_eq!(GEN_REGULAR[4], 0x07729a039cfc75f5a);
835 }
836
837 #[test]
838 fn gen_long_matches_bip93_canonical_values() {
839 // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
840 // ms32_long_polymod GEN array.
841 assert_eq!(GEN_LONG[0], 0x3d59d273535ea62d897);
842 assert_eq!(GEN_LONG[1], 0x7a9becb6361c6c51507);
843 assert_eq!(GEN_LONG[2], 0x543f9b7e6c38d8a2a0e);
844 assert_eq!(GEN_LONG[3], 0x0c577eaeccf1990d13c);
845 assert_eq!(GEN_LONG[4], 0x1887f74f8dc71b10651);
846 }
847
848 #[test]
849 fn polymod_init_matches_bip93() {
850 // POLYMOD_INIT is unchanged from BIP 93; the GEN_REGULAR / GEN_LONG
851 // constants have their own value-equality tests.
852 assert_eq!(POLYMOD_INIT, 0x23181b3);
853 }
854
855 // (NUMS-derivation reproducer for `MK_REGULAR_CONST` / `MK_LONG_CONST`
856 // lives in `crate::consts::tests::nums_constants_reproduce_from_domain`,
857 // which uses the mk1-specific domain `b"shibbolethnumskey"`. Duplicating
858 // it here would risk drift if either side were updated in isolation.)
859
860 #[test]
861 fn polymod_masks_are_consistent_with_shifts() {
862 // The mask must be (1 << shift) - 1 so that masking preserves bits below
863 // the shift boundary, exactly matching the BIP 93 algorithm.
864 assert_eq!(REGULAR_MASK, (1u128 << REGULAR_SHIFT) - 1);
865 assert_eq!(LONG_MASK, (1u128 << LONG_SHIFT) - 1);
866 assert_eq!(REGULAR_SHIFT, 60);
867 assert_eq!(LONG_SHIFT, 70);
868 }
869
870 #[test]
871 fn polymod_step_zero_residue_zero_value() {
872 // Both residue and value zero, no GEN XORs since b = 0.
873 assert_eq!(
874 polymod_step(0, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
875 0
876 );
877 }
878
879 #[test]
880 fn polymod_step_value_only_xor_when_residue_zero() {
881 // Residue 0, value 7 → result is 7 (XORed into the shifted-zero residue).
882 assert_eq!(
883 polymod_step(0, 7, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
884 7
885 );
886 }
887
888 #[test]
889 fn polymod_step_isolates_each_gen_entry() {
890 // Setting just bit `shift+i` in the residue → b = 1<<i → only GEN[i] is XORed.
891 for i in 0..5 {
892 let r = 1u128 << (REGULAR_SHIFT + i);
893 assert_eq!(
894 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
895 GEN_REGULAR[i as usize],
896 "bit {} of b should isolate GEN_REGULAR[{}]",
897 i,
898 i
899 );
900 }
901 }
902
903 #[test]
904 fn polymod_step_xors_multiple_gens_when_multiple_b_bits_set() {
905 // b = 0b00011 → XOR GEN[0] and GEN[1].
906 let r = 0b00011u128 << REGULAR_SHIFT;
907 assert_eq!(
908 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
909 GEN_REGULAR[0] ^ GEN_REGULAR[1]
910 );
911 // b = 0b11111 → XOR all 5.
912 let r = 0b11111u128 << REGULAR_SHIFT;
913 let expected =
914 GEN_REGULAR[0] ^ GEN_REGULAR[1] ^ GEN_REGULAR[2] ^ GEN_REGULAR[3] ^ GEN_REGULAR[4];
915 assert_eq!(
916 polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
917 expected
918 );
919 }
920
921 #[test]
922 fn polymod_step_works_for_long_code() {
923 // Same parameterization works for the long code (shift=70, mask=LONG_MASK).
924 let r = 1u128 << LONG_SHIFT;
925 assert_eq!(
926 polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
927 GEN_LONG[0]
928 );
929 // b = 0b11111 → XOR all 5 long GENs.
930 let r = 0b11111u128 << LONG_SHIFT;
931 let expected = GEN_LONG[0] ^ GEN_LONG[1] ^ GEN_LONG[2] ^ GEN_LONG[3] ^ GEN_LONG[4];
932 assert_eq!(
933 polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
934 expected
935 );
936 }
937
938 #[test]
939 fn polymod_step_init_residue_first_iteration() {
940 // POLYMOD_INIT < 2^60 so b = 0 in the first iteration; only the shift+xor happens.
941 // Verify: polymod_step(POLYMOD_INIT, 0) = POLYMOD_INIT << 5.
942 assert_eq!(
943 polymod_step(POLYMOD_INIT, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
944 POLYMOD_INIT << 5
945 );
946 // And with value=v: polymod_step(POLYMOD_INIT, v) = (POLYMOD_INIT << 5) ^ v.
947 assert_eq!(
948 polymod_step(POLYMOD_INIT, 31, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
949 (POLYMOD_INIT << 5) ^ 31
950 );
951 }
952
953 #[test]
954 fn polymod_step_value_and_gen_xor_combined() {
955 // Both effects active: b = 1 (bit 0 of b set) AND value = 5.
956 // Expected: ((residue & mask) << 5) ^ value ^ GEN[0]
957 // = (0 << 5) ^ 5 ^ GEN[0]
958 // = GEN_REGULAR[0] ^ 5
959 let r = 1u128 << REGULAR_SHIFT;
960 assert_eq!(
961 polymod_step(r, 5, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
962 GEN_REGULAR[0] ^ 5
963 );
964 }
965
966 #[test]
967 fn hrp_expand_mk_matches_spec() {
968 // BIP 173 hrp_expand for the MK HRP. Each ASCII byte contributes
969 // its high 3 bits then (after the [0] separator) its low 5 bits.
970 // 'm' = 0x6D → high 3 bits = 3, low 5 bits = 13.
971 // 'k' = 0x6B → high 3 bits = 3, low 5 bits = 11.
972 // Result: [3, 3, 0, 13, 11]. Documented in the BIP draft §"Checksum".
973 assert_eq!(hrp_expand(crate::consts::HRP), vec![3, 3, 0, 13, 11]);
974 }
975
976 #[test]
977 fn hrp_expand_empty_returns_just_separator() {
978 // Edge case: empty HRP yields just the [0] separator.
979 assert_eq!(hrp_expand(""), vec![0]);
980 }
981
982 #[test]
983 fn bch_round_trip_regular() {
984 // Encode then verify a small data part. The verify call sees the
985 // full data + checksum, so polymod returns MK_REGULAR_CONST exactly.
986 let hrp = "mk";
987 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
988 let checksum = bch_create_checksum_regular(hrp, &data);
989 assert_eq!(checksum.len(), 13);
990
991 let mut full = data.clone();
992 full.extend_from_slice(&checksum);
993 assert!(bch_verify_regular(hrp, &full));
994 }
995
996 #[test]
997 fn bch_verify_rejects_single_char_tampering_regular() {
998 // Flipping one bit in one symbol breaks verification.
999 // (Spot check; BCH detects all single-symbol errors by construction.)
1000 let hrp = "mk";
1001 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1002 let checksum = bch_create_checksum_regular(hrp, &data);
1003 let mut full = data.clone();
1004 full.extend_from_slice(&checksum);
1005 full[5] ^= 0x01;
1006 assert!(!bch_verify_regular(hrp, &full));
1007 }
1008
1009 #[test]
1010 fn bch_verify_rejects_too_short_input_regular() {
1011 // Less than 13 symbols cannot hold a checksum.
1012 assert!(!bch_verify_regular("mk", &[0, 1, 2]));
1013 assert!(!bch_verify_regular("mk", &[]));
1014 }
1015
1016 // (mk1-specific pinned-checksum vectors are deferred to Phase 6 vector
1017 // corpus generation, which writes both regular- and long-code conformance
1018 // points to disk under `crates/mk-codec/src/test_vectors/v0.1.json`.
1019 // Forking md-codec's pinned vectors verbatim would record the wrong
1020 // values: mk1's HRP and target constants both differ.)
1021
1022 #[test]
1023 fn bch_zero_data_does_not_self_validate_regular() {
1024 // The all-zeros data + all-zeros checksum must NOT validate, because
1025 // MK_REGULAR_CONST was chosen NUMS-style to avoid this trivial case.
1026 // Data length 8 is arbitrary; any non-empty zero-fill exhibits the same
1027 // negative result. 8 echoes the regular-code known-vector data length.
1028 let mut zero = vec![0u8; 8];
1029 zero.extend(std::iter::repeat_n(0, 13));
1030 assert!(!bch_verify_regular("mk", &zero));
1031 }
1032
1033 #[test]
1034 fn bch_round_trip_empty_data_regular() {
1035 // Empty data part is a degenerate but valid input: the checksum
1036 // covers only the HRP preamble. encode → verify must round-trip.
1037 let checksum = bch_create_checksum_regular("mk", &[]);
1038 assert!(bch_verify_regular("mk", &checksum));
1039 }
1040
1041 #[test]
1042 fn bch_round_trip_long() {
1043 let hrp = "mk";
1044 let data: Vec<u8> = (0..16).collect();
1045 let checksum = bch_create_checksum_long(hrp, &data);
1046 assert_eq!(checksum.len(), 15);
1047 let mut full = data.clone();
1048 full.extend_from_slice(&checksum);
1049 assert!(bch_verify_long(hrp, &full));
1050 }
1051
1052 #[test]
1053 fn bch_verify_rejects_single_char_tampering_long() {
1054 // Flipping one bit in one symbol breaks verification.
1055 // (Spot check; BCH detects all single-symbol errors by construction.)
1056 let hrp = "mk";
1057 let data: Vec<u8> = (0..16).collect();
1058 let checksum = bch_create_checksum_long(hrp, &data);
1059 let mut full = data.clone();
1060 full.extend_from_slice(&checksum);
1061 full[7] ^= 0x01;
1062 assert!(!bch_verify_long(hrp, &full));
1063 }
1064
1065 #[test]
1066 fn bch_verify_rejects_too_short_input_long() {
1067 // Less than 15 symbols cannot hold a long-code checksum.
1068 assert!(!bch_verify_long("mk", &[0; 14]));
1069 assert!(!bch_verify_long("mk", &[]));
1070 }
1071
1072 #[test]
1073 fn bch_zero_data_does_not_self_validate_long() {
1074 // All-zeros must not validate, by NUMS construction of MK_LONG_CONST.
1075 // Data length 16 is arbitrary; any non-empty zero-fill exhibits the same
1076 // negative result. 16 echoes the long-code known-vector data length.
1077 let mut zero = vec![0u8; 16];
1078 zero.extend(std::iter::repeat_n(0, 15));
1079 assert!(!bch_verify_long("mk", &zero));
1080 }
1081
1082 #[test]
1083 fn bch_round_trip_empty_data_long() {
1084 // Degenerate but valid: checksum covers only the HRP preamble.
1085 let checksum = bch_create_checksum_long("mk", &[]);
1086 assert!(bch_verify_long("mk", &checksum));
1087 }
1088
1089 #[test]
1090 fn bch_correct_regular_clean_input() {
1091 // Clean input → 0 corrections, identity result.
1092 let hrp = "mk";
1093 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1094 let checksum = bch_create_checksum_regular(hrp, &data);
1095 let mut full = data.clone();
1096 full.extend_from_slice(&checksum);
1097 let r = bch_correct_regular(hrp, &full).unwrap();
1098 assert_eq!(r.corrections_applied, 0);
1099 assert!(r.corrected_positions.is_empty());
1100 assert_eq!(r.data, full);
1101 }
1102
1103 #[test]
1104 fn bch_correct_regular_one_error() {
1105 // Single-symbol corruption is recoverable.
1106 let hrp = "mk";
1107 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1108 let checksum = bch_create_checksum_regular(hrp, &data);
1109 let mut full = data.clone();
1110 full.extend_from_slice(&checksum);
1111 let original = full.clone();
1112 full[3] = (full[3] + 1) & 0x1F;
1113 let r = bch_correct_regular(hrp, &full).unwrap();
1114 assert_eq!(r.corrections_applied, 1);
1115 assert_eq!(r.corrected_positions, vec![3]);
1116 assert_eq!(r.data, original);
1117 }
1118
1119 #[test]
1120 fn bch_correct_regular_two_errors_recovered_v0_2() {
1121 // v0.2 BM/Forney decoder reaches the BCH(93,80,8) full t = 4
1122 // capacity. A 2-error pattern is now recoverable. This test was
1123 // `..._uncorrectable_v0_1` in v0.1; flipped sign in v0.2.
1124 let hrp = "mk";
1125 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1126 let checksum = bch_create_checksum_regular(hrp, &data);
1127 let mut full = data.clone();
1128 full.extend_from_slice(&checksum);
1129 let original = full.clone();
1130 full[3] = (full[3] + 1) & 0x1F;
1131 full[7] = (full[7] + 1) & 0x1F;
1132 let r = bch_correct_regular(hrp, &full).unwrap();
1133 assert_eq!(r.corrections_applied, 2);
1134 assert!(r.corrected_positions.contains(&3));
1135 assert!(r.corrected_positions.contains(&7));
1136 assert_eq!(r.data, original);
1137 }
1138
1139 #[test]
1140 fn bch_correct_long_clean_input() {
1141 let hrp = "mk";
1142 let data: Vec<u8> = (0..16).collect();
1143 let checksum = bch_create_checksum_long(hrp, &data);
1144 let mut full = data.clone();
1145 full.extend_from_slice(&checksum);
1146 let r = bch_correct_long(hrp, &full).unwrap();
1147 assert_eq!(r.corrections_applied, 0);
1148 }
1149
1150 #[test]
1151 fn bch_correct_long_one_error() {
1152 let hrp = "mk";
1153 let data: Vec<u8> = (0..16).collect();
1154 let checksum = bch_create_checksum_long(hrp, &data);
1155 let mut full = data.clone();
1156 full.extend_from_slice(&checksum);
1157 let original = full.clone();
1158 full[5] = (full[5] + 1) & 0x1F;
1159 let r = bch_correct_long(hrp, &full).unwrap();
1160 assert_eq!(r.corrections_applied, 1);
1161 assert_eq!(r.corrected_positions, vec![5]);
1162 assert_eq!(r.data, original);
1163 }
1164
1165 #[test]
1166 fn bch_correct_returns_correction_result_with_position() {
1167 // Verify the API contract: a successful 1-error correction reports
1168 // exactly the position that was changed.
1169 let hrp = "mk";
1170 let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
1171 let checksum = bch_create_checksum_regular(hrp, &data);
1172 let mut full = data.clone();
1173 full.extend_from_slice(&checksum);
1174 // Damage the second checksum byte (position 9 from start).
1175 full[9] = (full[9] + 7) & 0x1F;
1176 let r = bch_correct_regular(hrp, &full).unwrap();
1177 assert_eq!(r.corrected_positions, vec![9]);
1178 }
1179
1180 /// Build a fake mk1 5-bit data stream for round-trip tests:
1181 /// `[v0, v1, ...]` are 2 bech32-symbol single-string-style header
1182 /// symbols; `payload_bytes` is the byte-level fragment.
1183 fn build_5bit_data(header_symbols: &[u8], payload_bytes: &[u8]) -> Vec<u8> {
1184 let mut out = Vec::with_capacity(header_symbols.len() + payload_bytes.len() * 2);
1185 out.extend_from_slice(header_symbols);
1186 out.extend(bytes_to_5bit(payload_bytes));
1187 out
1188 }
1189
1190 #[test]
1191 fn encode_5bit_to_string_round_trip_regular() {
1192 // 2-symbol single-string header + 4-byte payload → 7 5-bit symbols
1193 // (header [v=0, t=0] || bytes_to_5bit(4 bytes) = 2 + 7 = 9 symbols).
1194 // 9 + 13 regular checksum = 22-char data part — well within regular range.
1195 let header_symbols = [0u8, 0u8];
1196 let payload = vec![0xDE, 0xAD, 0xBE, 0xEF];
1197 let data_5bit = build_5bit_data(&header_symbols, &payload);
1198 let s = encode_5bit_to_string(&data_5bit).unwrap();
1199 assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1200
1201 let decoded = decode_string(&s).unwrap();
1202 assert_eq!(decoded.code, BchCode::Regular);
1203 assert_eq!(decoded.corrections_applied, 0);
1204 assert!(decoded.corrected_positions.is_empty());
1205 assert_eq!(decoded.data(), data_5bit.as_slice());
1206
1207 // Recover the payload by stripping the 2-symbol header and byte-decoding.
1208 let payload_5bit = &decoded.data()[2..];
1209 let recovered = five_bit_to_bytes(payload_5bit).unwrap();
1210 assert_eq!(recovered, payload);
1211 }
1212
1213 #[test]
1214 fn encode_5bit_to_string_round_trip_long() {
1215 // Force a long-code path with an 8-symbol chunked-style header +
1216 // a 53-byte fragment: data_5bit.len() = 8 + ceil(53*8/5) = 8 + 85 = 93,
1217 // + 15 long checksum = 108 — exact long-code upper bound.
1218 let header_symbols = [0u8; 8];
1219 let payload = vec![0xA5u8; 53];
1220 let data_5bit = build_5bit_data(&header_symbols, &payload);
1221 assert_eq!(
1222 data_5bit.len(),
1223 93,
1224 "fixture invariant: 8 header + 85 payload symbols"
1225 );
1226 let s = encode_5bit_to_string(&data_5bit).unwrap();
1227 assert!(s.starts_with("mk1"));
1228 let decoded = decode_string(&s).unwrap();
1229 assert_eq!(decoded.code, BchCode::Long);
1230 assert_eq!(decoded.data(), data_5bit.as_slice());
1231
1232 let recovered = five_bit_to_bytes(&decoded.data()[8..]).unwrap();
1233 assert_eq!(recovered, payload);
1234 }
1235
1236 #[test]
1237 fn encode_starts_with_hrp_and_separator() {
1238 // Minimum-shape input: 1 5-bit symbol + 13 regular checksum = 14 — the
1239 // tightest valid regular-code data-part length.
1240 let s = encode_5bit_to_string(&[1u8]).unwrap();
1241 assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1242 }
1243
1244 #[test]
1245 fn decode_rejects_invalid_hrp() {
1246 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1247 let bad = s.replacen("mk", "bt", 1);
1248 assert!(matches!(
1249 decode_string(&bad),
1250 Err(crate::Error::InvalidHrp(_))
1251 ));
1252 }
1253
1254 #[test]
1255 fn decode_rejects_mixed_case() {
1256 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1257 let bad: String = s
1258 .chars()
1259 .enumerate()
1260 .map(|(i, c)| if i == 5 { c.to_ascii_uppercase() } else { c })
1261 .collect();
1262 assert!(matches!(decode_string(&bad), Err(crate::Error::MixedCase)));
1263 }
1264
1265 #[test]
1266 fn decode_rejects_invalid_char() {
1267 // 'b' is excluded from the bech32 alphabet; substitute one in the data
1268 // part to force a parse-time character rejection.
1269 let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1270 // s looks like "mk1...". Splice 'b' at index 5 (definitely past "mk1").
1271 let mut chars: Vec<char> = s.chars().collect();
1272 chars[5] = 'b';
1273 let bad: String = chars.into_iter().collect();
1274 assert!(matches!(
1275 decode_string(&bad),
1276 Err(crate::Error::InvalidChar { .. })
1277 ));
1278 }
1279
1280 #[test]
1281 fn decode_rejects_missing_separator() {
1282 // No '1' at all in the string. rfind('1') returns None → InvalidHrp.
1283 let bad = "mknoseparatorhere";
1284 assert!(matches!(
1285 decode_string(bad),
1286 Err(crate::Error::InvalidHrp(_))
1287 ));
1288 }
1289
1290 #[test]
1291 fn decode_recovers_one_error() {
1292 // Encode, corrupt one char in the data part, decode should auto-correct.
1293 let data_5bit = vec![0u8, 0u8, 1, 2, 3, 4, 5];
1294 let s = encode_5bit_to_string(&data_5bit).unwrap();
1295
1296 let mut chars: Vec<char> = s.chars().collect();
1297 // Corrupt position 6 (past "mk1", well within the data part).
1298 let original_char = chars[6];
1299 chars[6] = if original_char == 'q' { 'p' } else { 'q' };
1300 let corrupted: String = chars.into_iter().collect();
1301
1302 let decoded = decode_string(&corrupted).unwrap();
1303 assert_eq!(decoded.corrections_applied, 1);
1304 assert_eq!(decoded.corrected_positions.len(), 1);
1305 assert_eq!(decoded.data(), data_5bit.as_slice());
1306 }
1307
1308 #[test]
1309 fn encode_rejects_data_part_in_reserved_invalid_length_range() {
1310 // For 5-bit data-part lengths 0..=12 (so data_part = 13..=25 with regular
1311 // checksum, or 15..=27 with long), `bch_code_for_length` rejects below 14.
1312 // Empty input → data_5bit.len()=0 → regular_total=13 → None; long_total=15
1313 // → Regular. Wait — regular range starts at 14 not 13.
1314 //
1315 // Actual invariant test: len 0 → regular_total=13 (None, below 14) and
1316 // long_total=15 (Regular). So it falls back to long->regular ladder ...
1317 // Re-checking encode_5bit_to_string: only fails when both miss [14..=93]
1318 // and [96..=108]. For data_5bit.len()=79, regular_total=92 → Regular ✓.
1319 // The provable reserved-invalid case is a length that misses both
1320 // ranges; the BIP 93 BCH ladder leaves no such gap below 109 because
1321 // [14..=93] ∪ [96..=108] only excludes {0..=13, 94..=95, ≥109}. The
1322 // smallest input length that produces invalid data-part lengths in
1323 // BOTH the regular and long branches is therefore data_5bit.len() ≥ 94
1324 // (regular_total ≥ 107 in invalid territory, long_total ≥ 109 too long).
1325 let too_long = vec![0u8; 94];
1326 let result = encode_5bit_to_string(&too_long);
1327 assert!(matches!(result, Err(crate::Error::InvalidStringLength(_))));
1328 }
1329}