Skip to main content

mk_codec/string_layer/
bch.rs

1//! BCH primitives for the mk1 string layer: bech32 alphabet conversion and
2//! syndrome-based error correction.
3//!
4//! Forked from `md-codec` v0.4.x (`crates/md-codec/src/encoding.rs`) at the
5//! start of the mk1 v0.1 implementation per `design/DECISIONS.md` D-13. The
6//! BCH polynomials and field arithmetic are shared with the sibling md1
7//! format (both reuse BIP 93's `BCH(93,80,8)` regular code and
8//! `BCH(108,93,8)` long code); the only mk1-specific knobs are the HRP
9//! (`"mk"`) and the NUMS-derived target residues ([`crate::consts::MK_REGULAR_CONST`]
10//! / [`crate::consts::MK_LONG_CONST`]).
11//!
12//! Unlike md-codec's encoding module, this file does **not** expose a
13//! top-level `encode_string` / `decode_string`: mk1's string-layer header
14//! lives at the 5-bit symbol layer (per closure Q-5 — 2 symbols for
15//! `SingleString`, 8 symbols for `Chunked`) rather than the byte-aligned
16//! layer md1 uses. The mk1 `string_layer/mod.rs` builds string-level
17//! encode/decode on top of the BCH primitives here.
18
19use super::bch_decode;
20use crate::consts::{HRP, MK_LONG_CONST, MK_REGULAR_CONST};
21
22/// Which BCH code variant a string uses.
23///
24/// Determined by the total data-part length: regular for ≤93 chars,
25/// long for 96–108 chars. Lengths 94–95 are reserved-invalid.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
27pub enum BchCode {
28    /// Regular code: BCH(93,80,8). 13-char checksum.
29    Regular,
30    /// Long code: BCH(108,93,8). 15-char checksum.
31    Long,
32}
33
34/// The bech32 32-character alphabet, in 5-bit-value order.
35///
36/// `q=0, p=1, z=2, r=3, y=4, 9=5, x=6, 8=7, g=8, f=9, 2=10, t=11, v=12,
37///  d=13, w=14, 0=15, s=16, 3=17, j=18, n=19, 5=20, 4=21, k=22, h=23,
38///  c=24, e=25, 6=26, m=27, u=28, a=29, 7=30, l=31`.
39pub const ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
40
41/// Inverse lookup: char (lowercase ASCII) -> 5-bit value, or 0xFF if not in alphabet.
42const ALPHABET_INV: [u8; 128] = build_alphabet_inv();
43
44const fn build_alphabet_inv() -> [u8; 128] {
45    let mut inv = [0xFFu8; 128];
46    let mut i = 0;
47    while i < 32 {
48        inv[ALPHABET[i] as usize] = i as u8;
49        i += 1;
50    }
51    inv
52}
53
54/// Convert a sequence of 8-bit bytes to a sequence of 5-bit values
55/// (padded with zero bits at the end if the bit count is not a multiple of 5).
56pub fn bytes_to_5bit(bytes: &[u8]) -> Vec<u8> {
57    let mut acc: u32 = 0;
58    let mut bits = 0u32;
59    let mut out = Vec::with_capacity((bytes.len() * 8).div_ceil(5));
60    for &b in bytes {
61        acc = (acc << 8) | b as u32;
62        bits += 8;
63        while bits >= 5 {
64            bits -= 5;
65            out.push(((acc >> bits) & 0x1F) as u8);
66        }
67    }
68    if bits > 0 {
69        out.push(((acc << (5 - bits)) & 0x1F) as u8);
70    }
71    out
72}
73
74/// Convert a sequence of 5-bit values back to 8-bit bytes.
75///
76/// Returns `None` if any value in `values` is ≥ 32 (out of 5-bit range),
77/// or if the trailing padding bits are non-zero.
78pub fn five_bit_to_bytes(values: &[u8]) -> Option<Vec<u8>> {
79    let mut acc: u32 = 0;
80    let mut bits = 0u32;
81    let mut out = Vec::with_capacity(values.len() * 5 / 8);
82    for &v in values {
83        if v >= 32 {
84            return None;
85        }
86        acc = (acc << 5) | v as u32;
87        bits += 5;
88        if bits >= 8 {
89            bits -= 8;
90            out.push(((acc >> bits) & 0xFF) as u8);
91        }
92    }
93    // Any remaining bits must be zero (padding).
94    if bits >= 5 {
95        return None;
96    }
97    if (acc & ((1 << bits) - 1)) != 0 {
98        return None;
99    }
100    Some(out)
101}
102
103/// The bech32 separator character between HRP and data-part (BIP 173 §3).
104///
105/// Re-exported by [`crate::consts::HRP`] is `"mk"`; this module's
106/// BCH-checksum helpers consume the HRP through their `hrp` parameter so
107/// that the same primitives can verify any single-HRP codex32-derived
108/// string. Production callers MUST pass [`crate::consts::HRP`].
109pub const SEPARATOR: char = '1';
110
111/// Determine the BchCode variant from a total data-part length.
112///
113/// Boundaries are from BIP 93 (codex32): regular code `BCH(93,80,8)` caps at 93,
114/// long code `BCH(108,93,8)` runs 96–108, and lengths 94–95 are explicitly
115/// reserved-invalid to prevent ambiguity in code-variant selection. Lengths
116/// below 14 or above 108 are also rejected.
117pub fn bch_code_for_length(data_part_len: usize) -> Option<BchCode> {
118    match data_part_len {
119        14..=93 => Some(BchCode::Regular),
120        94..=95 => None,
121        96..=108 => Some(BchCode::Long),
122        _ => None,
123    }
124}
125
126/// Check whether a string is all-lowercase, all-uppercase, or mixed.
127///
128/// Only ASCII letters are considered; non-ASCII characters (digits, punctuation,
129/// Unicode letters) are treated as neither case. This is appropriate for MD
130/// strings, whose alphabet is a subset of ASCII. An empty string or one with
131/// no ASCII letters returns [`CaseStatus::Lower`].
132pub fn case_check(s: &str) -> CaseStatus {
133    let mut has_lower = false;
134    let mut has_upper = false;
135    for c in s.chars() {
136        if c.is_ascii_lowercase() {
137            has_lower = true;
138        } else if c.is_ascii_uppercase() {
139            has_upper = true;
140        }
141        if has_lower && has_upper {
142            break;
143        }
144    }
145    match (has_lower, has_upper) {
146        (true, true) => CaseStatus::Mixed,
147        (true, false) => CaseStatus::Lower,
148        (false, true) => CaseStatus::Upper,
149        (false, false) => CaseStatus::Lower, // empty / no letters; treat as lower
150    }
151}
152
153/// Result of a case check.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum CaseStatus {
156    /// All-lowercase or no letters.
157    Lower,
158    /// All-uppercase.
159    Upper,
160    /// Both lowercase and uppercase letters present (invalid).
161    Mixed,
162}
163
164/// BCH polymod constants for the regular checksum (BCH(93,80,8)).
165///
166/// Source: BIP 93 (codex32) reference implementation, `ms32_polymod` function.
167/// These five values are XORed into the running residue based on the top 5 bits
168/// of the residue at each step. The polymod operation uses a 65-bit residue
169/// (top 5 bits = current `b`, bottom 60 bits = masked state).
170///
171/// Verified against the canonical reference at
172/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
173pub const GEN_REGULAR: [u128; 5] = [
174    0x19dc500ce73fde210,
175    0x1bfae00def77fe529,
176    0x1fbd920fffe7bee52,
177    0x1739640bdeee3fdad,
178    0x07729a039cfc75f5a,
179];
180
181/// Initial residue value for both the regular and long polymod algorithms (BIP 93).
182///
183/// Both `ms32_polymod` and `ms32_long_polymod` start with this residue before
184/// processing any input characters.
185pub const POLYMOD_INIT: u128 = 0x23181b3;
186
187/// Right-shift amount to extract the top 5 bits from a 65-bit regular-code residue.
188///
189/// Usage: `b = residue >> REGULAR_SHIFT` gives the 5-bit feedback selector
190/// for the polymod algorithm.
191pub const REGULAR_SHIFT: u32 = 60;
192
193/// Mask preserving the low 60 bits of a 65-bit regular-code residue.
194pub const REGULAR_MASK: u128 = 0x0fffffffffffffff;
195
196/// BCH polymod constants for the long checksum (BCH(108,93,8)).
197///
198/// Source: BIP 93 (codex32) reference implementation, `ms32_long_polymod` function.
199/// The long polymod uses a 75-bit residue (top 5 bits = `b`, bottom 70 bits = masked state).
200///
201/// Verified against the canonical reference at
202/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
203pub const GEN_LONG: [u128; 5] = [
204    0x3d59d273535ea62d897,
205    0x7a9becb6361c6c51507,
206    0x543f9b7e6c38d8a2a0e,
207    0x0c577eaeccf1990d13c,
208    0x1887f74f8dc71b10651,
209];
210
211/// Right-shift amount to extract the top 5 bits from a 75-bit long-code residue.
212///
213/// Usage: `b = residue >> LONG_SHIFT` gives the 5-bit feedback selector
214/// for the polymod algorithm.
215pub const LONG_SHIFT: u32 = 70;
216
217/// Mask preserving the low 70 bits of a 75-bit long-code residue.
218pub const LONG_MASK: u128 = 0x3fffffffffffffffff;
219
220/// One step of the BCH polymod algorithm from BIP 93.
221///
222/// Updates the running `residue` to incorporate the next 5-bit input `value`
223/// using the polynomial defined by `gen`, shift width `shift`, and mask `mask`.
224/// The same function is used for both the regular and long codes; pass
225/// `(GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK)` for the regular code and
226/// `(GEN_LONG, LONG_SHIFT, LONG_MASK)` for the long code.
227///
228/// Returns the updated residue after incorporating `value`. The top 5 bits of
229/// the returned residue feed the next iteration's `b` selector.
230///
231/// This is a direct port of BIP 93's `ms32_polymod` / `ms32_long_polymod` inner
232/// loop. See <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki> .
233fn polymod_step(residue: u128, value: u128, r#gen: &[u128; 5], shift: u32, mask: u128) -> u128 {
234    let b = residue >> shift;
235    let mut new_residue = ((residue & mask) << 5) ^ value;
236    for (i, &g) in r#gen.iter().enumerate() {
237        if (b >> i) & 1 != 0 {
238            new_residue ^= g;
239        }
240    }
241    new_residue
242}
243
244/// BIP 173-style HRP-expansion: produces the 5-bit-symbol prelude that gets
245/// prepended to the data part before running the BCH polymod.
246///
247/// For each HRP character `c`, emits `c >> 5` (high 3 bits); then emits a
248/// single 0 separator; then emits each character's `c & 31` (low 5 bits).
249/// The result has length `2 * hrp.len() + 1` for ASCII HRPs.
250///
251/// For `hrp_expand("md")` this returns `[3, 3, 0, 13, 4]`.
252pub fn hrp_expand(hrp: &str) -> Vec<u8> {
253    let bytes = hrp.as_bytes();
254    let mut out = Vec::with_capacity(bytes.len() * 2 + 1);
255    for &c in bytes {
256        out.push(c >> 5);
257    }
258    out.push(0);
259    for &c in bytes {
260        out.push(c & 31);
261    }
262    out
263}
264
265/// Run polymod over a sequence of 5-bit values using the parameters for
266/// either the regular or long BCH code, starting from POLYMOD_INIT.
267///
268/// v0.3.1: promoted from `pub(in crate::string_layer)` to `pub` so
269/// downstream consumers (toolkit `repair` feature) can compute polymod
270/// residues against ms / md / mk target constants (all 3 share the
271/// BIP-93 BCH(93,80,8) generator). Test-helper-drift concern remains
272/// resolved by the sibling `bch_decode` module using THIS function
273/// directly rather than re-implementing.
274pub fn polymod_run(
275    values: &[u8],
276    r#gen: &[u128; 5],
277    shift: u32,
278    mask: u128,
279) -> u128 {
280    let mut residue = POLYMOD_INIT;
281    for &v in values {
282        residue = polymod_step(residue, v as u128, r#gen, shift, mask);
283    }
284    residue
285}
286
287/// Compute the 13-character BCH checksum for the regular code over the
288/// HRP-expanded preamble plus the data part.
289///
290/// `data` is the sequence of 5-bit values for the data part (header + payload),
291/// not including the checksum. Returns the 13-element checksum array, ready
292/// to append to `data` to form the full data-part-plus-checksum.
293///
294/// The algorithm runs polymod over `hrp_expand(hrp) || data || [0; 13]`,
295/// then XORs the result with [`MK_REGULAR_CONST`] to extract the checksum.
296pub fn bch_create_checksum_regular(hrp: &str, data: &[u8]) -> [u8; 13] {
297    // Regular code: 13-symbol checksum (0..=12), pad/array/extraction all use 13.
298    let mut input = hrp_expand(hrp);
299    input.extend_from_slice(data);
300    input.extend(std::iter::repeat_n(0, 13));
301    let polymod = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
302    let mut out = [0u8; 13];
303    for (i, slot) in out.iter_mut().enumerate() {
304        *slot = ((polymod >> (5 * (12 - i))) & 0x1F) as u8;
305    }
306    out
307}
308
309/// Verify a regular-code BCH checksum.
310///
311/// `data_with_checksum` is the full data part including the trailing 13
312/// checksum characters. Returns `true` iff the polymod over
313/// `hrp_expand(hrp) || data_with_checksum` equals [`MK_REGULAR_CONST`].
314pub fn bch_verify_regular(hrp: &str, data_with_checksum: &[u8]) -> bool {
315    if data_with_checksum.len() < 13 {
316        return false;
317    }
318    let mut input = hrp_expand(hrp);
319    input.extend_from_slice(data_with_checksum);
320    polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) == MK_REGULAR_CONST
321}
322
323/// Compute the 15-character BCH checksum for the long code.
324///
325/// Same algorithm as [`bch_create_checksum_regular`] but uses the long-code
326/// polymod parameters (`GEN_LONG`, `LONG_SHIFT`, `LONG_MASK`) and target
327/// constant ([`MK_LONG_CONST`]). Produces a 15-element checksum array.
328pub fn bch_create_checksum_long(hrp: &str, data: &[u8]) -> [u8; 15] {
329    // Long code: 15-symbol checksum (0..=14), pad/array/extraction all use 15.
330    let mut input = hrp_expand(hrp);
331    input.extend_from_slice(data);
332    input.extend(std::iter::repeat_n(0, 15));
333    let polymod = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
334    let mut out = [0u8; 15];
335    for (i, slot) in out.iter_mut().enumerate() {
336        *slot = ((polymod >> (5 * (14 - i))) & 0x1F) as u8;
337    }
338    out
339}
340
341/// Verify a long-code BCH checksum.
342///
343/// Same algorithm as [`bch_verify_regular`] with long-code parameters.
344/// Returns false if `data_with_checksum` is shorter than 15 symbols.
345pub fn bch_verify_long(hrp: &str, data_with_checksum: &[u8]) -> bool {
346    if data_with_checksum.len() < 15 {
347        return false;
348    }
349    let mut input = hrp_expand(hrp);
350    input.extend_from_slice(data_with_checksum);
351    polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) == MK_LONG_CONST
352}
353
354/// Result of a successful BCH decode + correct attempt.
355///
356/// Returned by [`bch_correct_regular`] / [`bch_correct_long`] when correction
357/// succeeds. `corrections_applied == 0` means the input was already valid;
358/// `> 0` means substitutions were applied at the indicated positions.
359///
360/// Marked `#[non_exhaustive]` to allow future fields (e.g., confidence
361/// score, syndrome metadata) without breaking downstream struct-literal
362/// construction. Construct via the [`bch_correct_regular`] /
363/// [`bch_correct_long`] APIs.
364#[non_exhaustive]
365#[derive(Debug, Clone, PartialEq, Eq)]
366pub struct CorrectionResult {
367    /// The corrected `data_with_checksum` slice (input may have been modified).
368    pub data: Vec<u8>,
369    /// Number of substitutions applied (0 = clean input).
370    pub corrections_applied: usize,
371    /// Indices into `data` of the substituted positions.
372    pub corrected_positions: Vec<usize>,
373}
374
375/// Attempt to correct a regular-code BCH-checksummed string with up to four
376/// substitutions, the full t = 4 capacity of the BCH(93, 80, 8) code.
377///
378/// Implements the standard syndrome-based BCH decoder pipeline: syndrome
379/// computation in `GF(1024) = GF(32²)`, Berlekamp–Massey for the
380/// error-locator polynomial, Chien search for error positions, Forney's
381/// algorithm for error magnitudes. After applying the proposed corrections,
382/// the result is re-verified via [`bch_verify_regular`]; the decoder rejects
383/// any output that does not produce a valid codeword (defensive guard
384/// against pathological 5+-error inputs whose syndromes happen to factor as
385/// a degree-≤ 4 locator).
386///
387/// Returns `Ok(CorrectionResult)` if the input is clean or up to four
388/// substitutions repair it. Returns `Err(Error::BchUncorrectable)` otherwise.
389///
390/// # Algorithm details
391///
392/// See the private `bch_decode` submodule for the algorithm and the
393/// `GF(1024)` field representation.
394pub fn bch_correct_regular(
395    hrp: &str,
396    data_with_checksum: &[u8],
397) -> Result<CorrectionResult, crate::Error> {
398    if bch_verify_regular(hrp, data_with_checksum) {
399        return Ok(CorrectionResult {
400            data: data_with_checksum.to_vec(),
401            corrections_applied: 0,
402            corrected_positions: vec![],
403        });
404    }
405    // Compute polymod over hrp_expand(hrp) || data_with_checksum, XOR with
406    // the MD target constant. The result is congruent to the error
407    // polynomial E(x) modulo g_regular(x).
408    let mut input = hrp_expand(hrp);
409    input.extend_from_slice(data_with_checksum);
410    let residue = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
411
412    if let Some((positions, magnitudes)) =
413        bch_decode::decode_regular_errors(residue, data_with_checksum.len())
414    {
415        if positions.is_empty() {
416            // Should be unreachable (caller already verified); guard anyway.
417            return Ok(CorrectionResult {
418                data: data_with_checksum.to_vec(),
419                corrections_applied: 0,
420                corrected_positions: vec![],
421            });
422        }
423        let mut corrected = data_with_checksum.to_vec();
424        for (&p, &m) in positions.iter().zip(&magnitudes) {
425            if p >= corrected.len() {
426                return Err(crate::Error::BchUncorrectable(format!(
427                    "decoder reported error position {p} outside data ({} symbols)",
428                    corrected.len()
429                )));
430            }
431            corrected[p] ^= m;
432        }
433        // Defensive: re-verify. Catches the 5+-error edge case.
434        if bch_verify_regular(hrp, &corrected) {
435            return Ok(CorrectionResult {
436                corrections_applied: positions.len(),
437                corrected_positions: positions,
438                data: corrected,
439            });
440        }
441    }
442    Err(crate::Error::BchUncorrectable(
443        "regular code: more than 4 substitutions or pathological pattern".into(),
444    ))
445}
446
447/// Long-code analog of [`bch_correct_regular`].
448///
449/// Implements the same BM/Chien/Forney pipeline against the long-code
450/// generator polynomial, reaching the full t = 4 capacity of
451/// `BCH(108, 93, 8)`.
452pub fn bch_correct_long(
453    hrp: &str,
454    data_with_checksum: &[u8],
455) -> Result<CorrectionResult, crate::Error> {
456    if bch_verify_long(hrp, data_with_checksum) {
457        return Ok(CorrectionResult {
458            data: data_with_checksum.to_vec(),
459            corrections_applied: 0,
460            corrected_positions: vec![],
461        });
462    }
463    let mut input = hrp_expand(hrp);
464    input.extend_from_slice(data_with_checksum);
465    let residue = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
466
467    if let Some((positions, magnitudes)) =
468        bch_decode::decode_long_errors(residue, data_with_checksum.len())
469    {
470        if positions.is_empty() {
471            return Ok(CorrectionResult {
472                data: data_with_checksum.to_vec(),
473                corrections_applied: 0,
474                corrected_positions: vec![],
475            });
476        }
477        let mut corrected = data_with_checksum.to_vec();
478        for (&p, &m) in positions.iter().zip(&magnitudes) {
479            if p >= corrected.len() {
480                return Err(crate::Error::BchUncorrectable(format!(
481                    "decoder reported error position {p} outside data ({} symbols)",
482                    corrected.len()
483                )));
484            }
485            corrected[p] ^= m;
486        }
487        if bch_verify_long(hrp, &corrected) {
488            return Ok(CorrectionResult {
489                corrections_applied: positions.len(),
490                corrected_positions: positions,
491                data: corrected,
492            });
493        }
494    }
495    Err(crate::Error::BchUncorrectable(
496        "long code: more than 4 substitutions or pathological pattern".into(),
497    ))
498}
499
500/// Encode a 5-bit-symbol data stream as a complete mk1 string.
501///
502/// The data stream is the concatenation `header_symbols || bytes_to_5bit(payload_bytes)`
503/// where `header_symbols` is the 2-symbol single-string header or the
504/// 8-symbol chunked header (closure Q-5). The BCH code variant (regular or
505/// long) is auto-selected from the resulting data-part length per BIP 93:
506/// regular for ≤93-symbol data parts, long for 96–108-symbol data parts.
507/// Lengths in the reserved-invalid 94–95 gap or outside the BIP 93 valid
508/// range return [`Error::InvalidStringLength`].
509///
510/// Per the v0.1 emit policy described in `design/IMPLEMENTATION_PLAN_mk_v0_1.md`
511/// §5.4, callers control fragment sizing so that each chunked fragment lands
512/// within long-code territory. Single-string mk1 may pick regular or long
513/// based on bytecode size.
514///
515/// Returns the full string starting with [`crate::consts::HRP`] and the
516/// BIP 173 separator (`"mk1"`).
517pub fn encode_5bit_to_string(data_5bit: &[u8]) -> Result<String, crate::Error> {
518    use crate::Error;
519
520    // Auto-determine code from the eventual data-part length (data_5bit + checksum).
521    let regular_total = data_5bit.len() + 13;
522    let long_total = data_5bit.len() + 15;
523    let code = match (
524        bch_code_for_length(regular_total),
525        bch_code_for_length(long_total),
526    ) {
527        (Some(BchCode::Regular), _) => BchCode::Regular,
528        (_, Some(BchCode::Long)) => BchCode::Long,
529        // Neither code variant accepts this data-part length: too short, in
530        // the 94–95 reserved-invalid gap, or too long for v0.1.
531        _ => {
532            // Pick the closest length to report — long_total is always larger,
533            // so report that as the "actual length you tried to produce".
534            return Err(Error::InvalidStringLength(long_total));
535        }
536    };
537
538    let checksum: Vec<u8> = match code {
539        BchCode::Regular => bch_create_checksum_regular(HRP, data_5bit).to_vec(),
540        BchCode::Long => bch_create_checksum_long(HRP, data_5bit).to_vec(),
541    };
542
543    let mut full = String::with_capacity(HRP.len() + 1 + data_5bit.len() + checksum.len());
544    full.push_str(HRP);
545    full.push(SEPARATOR);
546    for &v in data_5bit {
547        full.push(ALPHABET[v as usize] as char);
548    }
549    for v in checksum {
550        full.push(ALPHABET[v as usize] as char);
551    }
552    Ok(full)
553}
554
555/// Result of a successful mk1 string decode at the BCH layer.
556///
557/// Use [`Self::data`] to access the data part as 5-bit values (header
558/// symbols + payload, checksum stripped); the string-layer reassembler
559/// in `crate::string_layer` splits header symbols off and feeds the
560/// remaining payload through [`five_bit_to_bytes`] to recover the original
561/// fragment bytes.
562///
563/// The full post-correction 5-bit symbol sequence (data **plus** the trailing
564/// 13- or 15-char checksum) is retained internally as [`Self::data_with_checksum`]
565/// and can be queried by [`Self::corrected_char_at`] for any position in
566/// the data part — including positions that fall inside the checksum region.
567/// The decoder-report layer uses this to surface the real corrected
568/// character when BCH ECC repairs a substitution inside the checksum
569/// (parallels md-codec's `Correction.corrected` field).
570#[non_exhaustive]
571#[derive(Debug, Clone, PartialEq, Eq)]
572pub struct DecodedString {
573    /// Detected BCH code variant.
574    pub code: BchCode,
575    /// Number of substitution errors corrected (0 = clean input, 1 = recovered).
576    pub corrections_applied: usize,
577    /// Indices into the data-part (chars after `"md1"`) of any corrected positions.
578    pub corrected_positions: Vec<usize>,
579    /// Full post-correction 5-bit symbol sequence (data part + checksum), in
580    /// the same coordinate system as [`Self::corrected_positions`].
581    ///
582    /// Length is `data().len() + 13` (regular code) or `data().len() + 15`
583    /// (long code). Indices `0..data().len()` mirror [`Self::data`] symbol-for-symbol;
584    /// indices `data().len()..` are the corrected checksum symbols. Use
585    /// [`Self::corrected_char_at`] for the human-readable bech32 character at
586    /// any position.
587    pub data_with_checksum: Vec<u8>,
588}
589
590impl DecodedString {
591    /// Data part as 5-bit values, with the trailing checksum stripped.
592    ///
593    /// Returns a slice into [`Self::data_with_checksum`] — the data part is
594    /// `data_with_checksum[..len - checksum_len]`, where `checksum_len` is 13
595    /// for [`BchCode::Regular`] and 15 for [`BchCode::Long`].
596    pub fn data(&self) -> &[u8] {
597        let checksum_len = match self.code {
598            BchCode::Regular => 13,
599            BchCode::Long => 15,
600        };
601        &self.data_with_checksum[..self.data_with_checksum.len() - checksum_len]
602    }
603
604    /// Look up the corrected bech32 character at the given position in the
605    /// data part (chars after the `"md1"` HRP+separator).
606    ///
607    /// `char_position` is 0-indexed. Positions `0..data().len()` are in the
608    /// data region; positions `data().len()..data().len() + checksum_len` are
609    /// inside the BCH checksum (13 chars for [`BchCode::Regular`], 15 for
610    /// [`BchCode::Long`]). All positions return the post-correction
611    /// character — i.e., what the symbol *should* be after BCH repair, which
612    /// is exactly what [`Correction.corrected`][crate::Correction::corrected]
613    /// is documented to report.
614    ///
615    /// # Panics
616    ///
617    /// Panics if `char_position >= data_with_checksum.len()`. Callers are
618    /// responsible for clamping the position to a valid range; in the decode
619    /// pipeline this is guaranteed by the BCH layer (it never reports a
620    /// `corrected_position` outside `data_with_checksum`). Note that
621    /// `data_with_checksum` includes the checksum region; "outside the data
622    /// part" elsewhere in this crate excludes the checksum and is a tighter
623    /// bound than what this method requires.
624    pub fn corrected_char_at(&self, char_position: usize) -> char {
625        let v = self.data_with_checksum[char_position];
626        ALPHABET[v as usize] as char
627    }
628}
629
630/// Decode an mk1 string, validating HRP, case, length, and checksum.
631///
632/// Performs full BCH error correction up to four substitutions
633/// (`t = 4` capacity of the BCH(93, 80, 8) regular code and the
634/// BCH(108, 93, 8) long code), via syndrome-based Berlekamp–Massey +
635/// Forney decoding (implemented in the sibling `bch_decode` module).
636///
637/// Errors:
638/// - [`Error::MixedCase`] if the string mixes upper and lower case.
639/// - [`Error::InvalidHrp`] if the HRP is missing or not [`crate::consts::HRP`].
640/// - [`Error::InvalidStringLength`] if the data-part length isn't a valid mk1 length.
641/// - [`Error::InvalidChar`] if the data part contains a non-bech32 character.
642/// - [`Error::BchUncorrectable`] if the checksum can't be repaired within
643///   the BCH `t = 4` correction radius.
644///
645/// [`Error::MixedCase`]: crate::Error::MixedCase
646/// [`Error::InvalidHrp`]: crate::Error::InvalidHrp
647/// [`Error::InvalidStringLength`]: crate::Error::InvalidStringLength
648/// [`Error::InvalidChar`]: crate::Error::InvalidChar
649/// [`Error::BchUncorrectable`]: crate::Error::BchUncorrectable
650pub fn decode_string(s: &str) -> Result<DecodedString, crate::Error> {
651    use crate::Error;
652
653    if matches!(case_check(s), CaseStatus::Mixed) {
654        return Err(Error::MixedCase);
655    }
656    let s_lower = s.to_lowercase();
657
658    let sep_pos = s_lower
659        .rfind(SEPARATOR)
660        .ok_or_else(|| Error::InvalidHrp(s_lower.clone()))?;
661    let (hrp, rest) = s_lower.split_at(sep_pos);
662    let data_part = &rest[1..]; // skip the '1' separator
663
664    if hrp != HRP {
665        return Err(Error::InvalidHrp(hrp.to_string()));
666    }
667
668    let code =
669        bch_code_for_length(data_part.len()).ok_or(Error::InvalidStringLength(data_part.len()))?;
670
671    let mut values: Vec<u8> = Vec::with_capacity(data_part.len());
672    for (i, c) in data_part.chars().enumerate() {
673        if !c.is_ascii() {
674            return Err(Error::InvalidChar { ch: c, position: i });
675        }
676        let v = ALPHABET_INV[c as usize];
677        if v == 0xFF {
678            return Err(Error::InvalidChar { ch: c, position: i });
679        }
680        values.push(v);
681    }
682
683    let correction = match code {
684        BchCode::Regular => bch_correct_regular(hrp, &values),
685        BchCode::Long => bch_correct_long(hrp, &values),
686    };
687    let result = correction?;
688
689    Ok(DecodedString {
690        code,
691        corrections_applied: result.corrections_applied,
692        corrected_positions: result.corrected_positions,
693        data_with_checksum: result.data,
694    })
695}
696
697#[cfg(test)]
698mod tests {
699    use super::*;
700
701    #[test]
702    fn bch_code_equality() {
703        assert_eq!(BchCode::Regular, BchCode::Regular);
704        assert_ne!(BchCode::Regular, BchCode::Long);
705    }
706
707    #[test]
708    fn bch_code_can_be_hashed() {
709        use std::collections::HashSet;
710        let mut set = HashSet::new();
711        set.insert(BchCode::Regular);
712        set.insert(BchCode::Long);
713        set.insert(BchCode::Regular);
714        assert_eq!(set.len(), 2);
715    }
716
717    #[test]
718    fn alphabet_is_32_unique_chars() {
719        let mut seen = std::collections::HashSet::new();
720        for &c in ALPHABET {
721            assert!(seen.insert(c), "duplicate char in alphabet: {}", c as char);
722        }
723        assert_eq!(seen.len(), 32);
724    }
725
726    #[test]
727    fn bytes_to_5bit_round_trip_zero() {
728        let bytes = vec![0x00];
729        let fives = bytes_to_5bit(&bytes);
730        assert_eq!(fives, vec![0, 0]);
731        let back = five_bit_to_bytes(&fives).unwrap();
732        assert_eq!(back, bytes);
733    }
734
735    #[test]
736    fn bytes_to_5bit_round_trip_known_value() {
737        // 0xFF = binary 11111111. Splits as 11111 (=31) and 111 (padded with 00 to 11100=28).
738        let bytes = vec![0xFF];
739        let fives = bytes_to_5bit(&bytes);
740        assert_eq!(fives, vec![31, 28]);
741    }
742
743    #[test]
744    fn bytes_to_5bit_round_trip_multibyte() {
745        // 3 bytes = 24 bits → 5 five-bit groups (25 bits, 1 pad bit).
746        let bytes = vec![0xDE, 0xAD, 0xBE];
747        let back = five_bit_to_bytes(&bytes_to_5bit(&bytes)).unwrap();
748        assert_eq!(back, bytes);
749    }
750
751    #[test]
752    fn five_bit_to_bytes_rejects_nonzero_padding() {
753        // Two 5-bit values = 10 bits, of which 8 form a byte and 2 are padding.
754        // If padding bits are nonzero, decode must fail.
755        // 31 = 11111, 1 = 00001. Last 2 bits (= 01) are nonzero padding.
756        assert!(five_bit_to_bytes(&[31, 1]).is_none());
757    }
758
759    #[test]
760    fn five_bit_to_bytes_rejects_value_out_of_range() {
761        assert!(five_bit_to_bytes(&[32]).is_none());
762    }
763
764    #[test]
765    fn bch_code_for_length_regular() {
766        assert_eq!(bch_code_for_length(14), Some(BchCode::Regular));
767        assert_eq!(bch_code_for_length(93), Some(BchCode::Regular));
768    }
769
770    #[test]
771    fn bch_code_for_length_long() {
772        assert_eq!(bch_code_for_length(96), Some(BchCode::Long));
773        assert_eq!(bch_code_for_length(108), Some(BchCode::Long));
774    }
775
776    #[test]
777    fn bch_code_for_length_rejects_94_and_95() {
778        assert_eq!(bch_code_for_length(94), None);
779        assert_eq!(bch_code_for_length(95), None);
780    }
781
782    #[test]
783    fn bch_code_for_length_rejects_extremes() {
784        assert_eq!(bch_code_for_length(0), None);
785        assert_eq!(bch_code_for_length(13), None);
786        assert_eq!(bch_code_for_length(109), None);
787        assert_eq!(bch_code_for_length(1000), None);
788    }
789
790    #[test]
791    fn case_check_lowercase() {
792        assert_eq!(case_check("md1qq"), CaseStatus::Lower);
793    }
794
795    #[test]
796    fn case_check_uppercase() {
797        assert_eq!(case_check("MD1QQ"), CaseStatus::Upper);
798    }
799
800    #[test]
801    fn case_check_mixed() {
802        assert_eq!(case_check("mD1qq"), CaseStatus::Mixed);
803    }
804
805    #[test]
806    fn case_check_empty_string_is_lower() {
807        assert_eq!(case_check(""), CaseStatus::Lower);
808    }
809
810    #[test]
811    fn case_check_digits_only_is_lower() {
812        // Digits have no case; result must be Lower (BIP 173: no-letter strings are lower).
813        assert_eq!(case_check("1234"), CaseStatus::Lower);
814    }
815
816    #[test]
817    fn gen_regular_has_5_entries() {
818        assert_eq!(GEN_REGULAR.len(), 5);
819    }
820
821    #[test]
822    fn gen_long_has_5_entries() {
823        assert_eq!(GEN_LONG.len(), 5);
824    }
825
826    #[test]
827    fn gen_regular_matches_bip93_canonical_values() {
828        // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
829        // ms32_polymod GEN array. If this fails, the constants drifted from the BIP.
830        assert_eq!(GEN_REGULAR[0], 0x19dc500ce73fde210);
831        assert_eq!(GEN_REGULAR[1], 0x1bfae00def77fe529);
832        assert_eq!(GEN_REGULAR[2], 0x1fbd920fffe7bee52);
833        assert_eq!(GEN_REGULAR[3], 0x1739640bdeee3fdad);
834        assert_eq!(GEN_REGULAR[4], 0x07729a039cfc75f5a);
835    }
836
837    #[test]
838    fn gen_long_matches_bip93_canonical_values() {
839        // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
840        // ms32_long_polymod GEN array.
841        assert_eq!(GEN_LONG[0], 0x3d59d273535ea62d897);
842        assert_eq!(GEN_LONG[1], 0x7a9becb6361c6c51507);
843        assert_eq!(GEN_LONG[2], 0x543f9b7e6c38d8a2a0e);
844        assert_eq!(GEN_LONG[3], 0x0c577eaeccf1990d13c);
845        assert_eq!(GEN_LONG[4], 0x1887f74f8dc71b10651);
846    }
847
848    #[test]
849    fn polymod_init_matches_bip93() {
850        // POLYMOD_INIT is unchanged from BIP 93; the GEN_REGULAR / GEN_LONG
851        // constants have their own value-equality tests.
852        assert_eq!(POLYMOD_INIT, 0x23181b3);
853    }
854
855    // (NUMS-derivation reproducer for `MK_REGULAR_CONST` / `MK_LONG_CONST`
856    // lives in `crate::consts::tests::nums_constants_reproduce_from_domain`,
857    // which uses the mk1-specific domain `b"shibbolethnumskey"`. Duplicating
858    // it here would risk drift if either side were updated in isolation.)
859
860    #[test]
861    fn polymod_masks_are_consistent_with_shifts() {
862        // The mask must be (1 << shift) - 1 so that masking preserves bits below
863        // the shift boundary, exactly matching the BIP 93 algorithm.
864        assert_eq!(REGULAR_MASK, (1u128 << REGULAR_SHIFT) - 1);
865        assert_eq!(LONG_MASK, (1u128 << LONG_SHIFT) - 1);
866        assert_eq!(REGULAR_SHIFT, 60);
867        assert_eq!(LONG_SHIFT, 70);
868    }
869
870    #[test]
871    fn polymod_step_zero_residue_zero_value() {
872        // Both residue and value zero, no GEN XORs since b = 0.
873        assert_eq!(
874            polymod_step(0, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
875            0
876        );
877    }
878
879    #[test]
880    fn polymod_step_value_only_xor_when_residue_zero() {
881        // Residue 0, value 7 → result is 7 (XORed into the shifted-zero residue).
882        assert_eq!(
883            polymod_step(0, 7, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
884            7
885        );
886    }
887
888    #[test]
889    fn polymod_step_isolates_each_gen_entry() {
890        // Setting just bit `shift+i` in the residue → b = 1<<i → only GEN[i] is XORed.
891        for i in 0..5 {
892            let r = 1u128 << (REGULAR_SHIFT + i);
893            assert_eq!(
894                polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
895                GEN_REGULAR[i as usize],
896                "bit {} of b should isolate GEN_REGULAR[{}]",
897                i,
898                i
899            );
900        }
901    }
902
903    #[test]
904    fn polymod_step_xors_multiple_gens_when_multiple_b_bits_set() {
905        // b = 0b00011 → XOR GEN[0] and GEN[1].
906        let r = 0b00011u128 << REGULAR_SHIFT;
907        assert_eq!(
908            polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
909            GEN_REGULAR[0] ^ GEN_REGULAR[1]
910        );
911        // b = 0b11111 → XOR all 5.
912        let r = 0b11111u128 << REGULAR_SHIFT;
913        let expected =
914            GEN_REGULAR[0] ^ GEN_REGULAR[1] ^ GEN_REGULAR[2] ^ GEN_REGULAR[3] ^ GEN_REGULAR[4];
915        assert_eq!(
916            polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
917            expected
918        );
919    }
920
921    #[test]
922    fn polymod_step_works_for_long_code() {
923        // Same parameterization works for the long code (shift=70, mask=LONG_MASK).
924        let r = 1u128 << LONG_SHIFT;
925        assert_eq!(
926            polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
927            GEN_LONG[0]
928        );
929        // b = 0b11111 → XOR all 5 long GENs.
930        let r = 0b11111u128 << LONG_SHIFT;
931        let expected = GEN_LONG[0] ^ GEN_LONG[1] ^ GEN_LONG[2] ^ GEN_LONG[3] ^ GEN_LONG[4];
932        assert_eq!(
933            polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
934            expected
935        );
936    }
937
938    #[test]
939    fn polymod_step_init_residue_first_iteration() {
940        // POLYMOD_INIT < 2^60 so b = 0 in the first iteration; only the shift+xor happens.
941        // Verify: polymod_step(POLYMOD_INIT, 0) = POLYMOD_INIT << 5.
942        assert_eq!(
943            polymod_step(POLYMOD_INIT, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
944            POLYMOD_INIT << 5
945        );
946        // And with value=v: polymod_step(POLYMOD_INIT, v) = (POLYMOD_INIT << 5) ^ v.
947        assert_eq!(
948            polymod_step(POLYMOD_INIT, 31, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
949            (POLYMOD_INIT << 5) ^ 31
950        );
951    }
952
953    #[test]
954    fn polymod_step_value_and_gen_xor_combined() {
955        // Both effects active: b = 1 (bit 0 of b set) AND value = 5.
956        // Expected: ((residue & mask) << 5) ^ value ^ GEN[0]
957        //         = (0 << 5) ^ 5 ^ GEN[0]
958        //         = GEN_REGULAR[0] ^ 5
959        let r = 1u128 << REGULAR_SHIFT;
960        assert_eq!(
961            polymod_step(r, 5, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
962            GEN_REGULAR[0] ^ 5
963        );
964    }
965
966    #[test]
967    fn hrp_expand_mk_matches_spec() {
968        // BIP 173 hrp_expand for the MK HRP. Each ASCII byte contributes
969        // its high 3 bits then (after the [0] separator) its low 5 bits.
970        // 'm' = 0x6D → high 3 bits = 3, low 5 bits = 13.
971        // 'k' = 0x6B → high 3 bits = 3, low 5 bits = 11.
972        // Result: [3, 3, 0, 13, 11]. Documented in the BIP draft §"Checksum".
973        assert_eq!(hrp_expand(crate::consts::HRP), vec![3, 3, 0, 13, 11]);
974    }
975
976    #[test]
977    fn hrp_expand_empty_returns_just_separator() {
978        // Edge case: empty HRP yields just the [0] separator.
979        assert_eq!(hrp_expand(""), vec![0]);
980    }
981
982    #[test]
983    fn bch_round_trip_regular() {
984        // Encode then verify a small data part. The verify call sees the
985        // full data + checksum, so polymod returns MK_REGULAR_CONST exactly.
986        let hrp = "mk";
987        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
988        let checksum = bch_create_checksum_regular(hrp, &data);
989        assert_eq!(checksum.len(), 13);
990
991        let mut full = data.clone();
992        full.extend_from_slice(&checksum);
993        assert!(bch_verify_regular(hrp, &full));
994    }
995
996    #[test]
997    fn bch_verify_rejects_single_char_tampering_regular() {
998        // Flipping one bit in one symbol breaks verification.
999        // (Spot check; BCH detects all single-symbol errors by construction.)
1000        let hrp = "mk";
1001        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1002        let checksum = bch_create_checksum_regular(hrp, &data);
1003        let mut full = data.clone();
1004        full.extend_from_slice(&checksum);
1005        full[5] ^= 0x01;
1006        assert!(!bch_verify_regular(hrp, &full));
1007    }
1008
1009    #[test]
1010    fn bch_verify_rejects_too_short_input_regular() {
1011        // Less than 13 symbols cannot hold a checksum.
1012        assert!(!bch_verify_regular("mk", &[0, 1, 2]));
1013        assert!(!bch_verify_regular("mk", &[]));
1014    }
1015
1016    // (mk1-specific pinned-checksum vectors are deferred to Phase 6 vector
1017    // corpus generation, which writes both regular- and long-code conformance
1018    // points to disk under `crates/mk-codec/src/test_vectors/v0.1.json`.
1019    // Forking md-codec's pinned vectors verbatim would record the wrong
1020    // values: mk1's HRP and target constants both differ.)
1021
1022    #[test]
1023    fn bch_zero_data_does_not_self_validate_regular() {
1024        // The all-zeros data + all-zeros checksum must NOT validate, because
1025        // MK_REGULAR_CONST was chosen NUMS-style to avoid this trivial case.
1026        // Data length 8 is arbitrary; any non-empty zero-fill exhibits the same
1027        // negative result. 8 echoes the regular-code known-vector data length.
1028        let mut zero = vec![0u8; 8];
1029        zero.extend(std::iter::repeat_n(0, 13));
1030        assert!(!bch_verify_regular("mk", &zero));
1031    }
1032
1033    #[test]
1034    fn bch_round_trip_empty_data_regular() {
1035        // Empty data part is a degenerate but valid input: the checksum
1036        // covers only the HRP preamble. encode → verify must round-trip.
1037        let checksum = bch_create_checksum_regular("mk", &[]);
1038        assert!(bch_verify_regular("mk", &checksum));
1039    }
1040
1041    #[test]
1042    fn bch_round_trip_long() {
1043        let hrp = "mk";
1044        let data: Vec<u8> = (0..16).collect();
1045        let checksum = bch_create_checksum_long(hrp, &data);
1046        assert_eq!(checksum.len(), 15);
1047        let mut full = data.clone();
1048        full.extend_from_slice(&checksum);
1049        assert!(bch_verify_long(hrp, &full));
1050    }
1051
1052    #[test]
1053    fn bch_verify_rejects_single_char_tampering_long() {
1054        // Flipping one bit in one symbol breaks verification.
1055        // (Spot check; BCH detects all single-symbol errors by construction.)
1056        let hrp = "mk";
1057        let data: Vec<u8> = (0..16).collect();
1058        let checksum = bch_create_checksum_long(hrp, &data);
1059        let mut full = data.clone();
1060        full.extend_from_slice(&checksum);
1061        full[7] ^= 0x01;
1062        assert!(!bch_verify_long(hrp, &full));
1063    }
1064
1065    #[test]
1066    fn bch_verify_rejects_too_short_input_long() {
1067        // Less than 15 symbols cannot hold a long-code checksum.
1068        assert!(!bch_verify_long("mk", &[0; 14]));
1069        assert!(!bch_verify_long("mk", &[]));
1070    }
1071
1072    #[test]
1073    fn bch_zero_data_does_not_self_validate_long() {
1074        // All-zeros must not validate, by NUMS construction of MK_LONG_CONST.
1075        // Data length 16 is arbitrary; any non-empty zero-fill exhibits the same
1076        // negative result. 16 echoes the long-code known-vector data length.
1077        let mut zero = vec![0u8; 16];
1078        zero.extend(std::iter::repeat_n(0, 15));
1079        assert!(!bch_verify_long("mk", &zero));
1080    }
1081
1082    #[test]
1083    fn bch_round_trip_empty_data_long() {
1084        // Degenerate but valid: checksum covers only the HRP preamble.
1085        let checksum = bch_create_checksum_long("mk", &[]);
1086        assert!(bch_verify_long("mk", &checksum));
1087    }
1088
1089    #[test]
1090    fn bch_correct_regular_clean_input() {
1091        // Clean input → 0 corrections, identity result.
1092        let hrp = "mk";
1093        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1094        let checksum = bch_create_checksum_regular(hrp, &data);
1095        let mut full = data.clone();
1096        full.extend_from_slice(&checksum);
1097        let r = bch_correct_regular(hrp, &full).unwrap();
1098        assert_eq!(r.corrections_applied, 0);
1099        assert!(r.corrected_positions.is_empty());
1100        assert_eq!(r.data, full);
1101    }
1102
1103    #[test]
1104    fn bch_correct_regular_one_error() {
1105        // Single-symbol corruption is recoverable.
1106        let hrp = "mk";
1107        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1108        let checksum = bch_create_checksum_regular(hrp, &data);
1109        let mut full = data.clone();
1110        full.extend_from_slice(&checksum);
1111        let original = full.clone();
1112        full[3] = (full[3] + 1) & 0x1F;
1113        let r = bch_correct_regular(hrp, &full).unwrap();
1114        assert_eq!(r.corrections_applied, 1);
1115        assert_eq!(r.corrected_positions, vec![3]);
1116        assert_eq!(r.data, original);
1117    }
1118
1119    #[test]
1120    fn bch_correct_regular_two_errors_recovered_v0_2() {
1121        // v0.2 BM/Forney decoder reaches the BCH(93,80,8) full t = 4
1122        // capacity. A 2-error pattern is now recoverable. This test was
1123        // `..._uncorrectable_v0_1` in v0.1; flipped sign in v0.2.
1124        let hrp = "mk";
1125        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1126        let checksum = bch_create_checksum_regular(hrp, &data);
1127        let mut full = data.clone();
1128        full.extend_from_slice(&checksum);
1129        let original = full.clone();
1130        full[3] = (full[3] + 1) & 0x1F;
1131        full[7] = (full[7] + 1) & 0x1F;
1132        let r = bch_correct_regular(hrp, &full).unwrap();
1133        assert_eq!(r.corrections_applied, 2);
1134        assert!(r.corrected_positions.contains(&3));
1135        assert!(r.corrected_positions.contains(&7));
1136        assert_eq!(r.data, original);
1137    }
1138
1139    #[test]
1140    fn bch_correct_long_clean_input() {
1141        let hrp = "mk";
1142        let data: Vec<u8> = (0..16).collect();
1143        let checksum = bch_create_checksum_long(hrp, &data);
1144        let mut full = data.clone();
1145        full.extend_from_slice(&checksum);
1146        let r = bch_correct_long(hrp, &full).unwrap();
1147        assert_eq!(r.corrections_applied, 0);
1148    }
1149
1150    #[test]
1151    fn bch_correct_long_one_error() {
1152        let hrp = "mk";
1153        let data: Vec<u8> = (0..16).collect();
1154        let checksum = bch_create_checksum_long(hrp, &data);
1155        let mut full = data.clone();
1156        full.extend_from_slice(&checksum);
1157        let original = full.clone();
1158        full[5] = (full[5] + 1) & 0x1F;
1159        let r = bch_correct_long(hrp, &full).unwrap();
1160        assert_eq!(r.corrections_applied, 1);
1161        assert_eq!(r.corrected_positions, vec![5]);
1162        assert_eq!(r.data, original);
1163    }
1164
1165    #[test]
1166    fn bch_correct_returns_correction_result_with_position() {
1167        // Verify the API contract: a successful 1-error correction reports
1168        // exactly the position that was changed.
1169        let hrp = "mk";
1170        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
1171        let checksum = bch_create_checksum_regular(hrp, &data);
1172        let mut full = data.clone();
1173        full.extend_from_slice(&checksum);
1174        // Damage the second checksum byte (position 9 from start).
1175        full[9] = (full[9] + 7) & 0x1F;
1176        let r = bch_correct_regular(hrp, &full).unwrap();
1177        assert_eq!(r.corrected_positions, vec![9]);
1178    }
1179
1180    /// Build a fake mk1 5-bit data stream for round-trip tests:
1181    /// `[v0, v1, ...]` are 2 bech32-symbol single-string-style header
1182    /// symbols; `payload_bytes` is the byte-level fragment.
1183    fn build_5bit_data(header_symbols: &[u8], payload_bytes: &[u8]) -> Vec<u8> {
1184        let mut out = Vec::with_capacity(header_symbols.len() + payload_bytes.len() * 2);
1185        out.extend_from_slice(header_symbols);
1186        out.extend(bytes_to_5bit(payload_bytes));
1187        out
1188    }
1189
1190    #[test]
1191    fn encode_5bit_to_string_round_trip_regular() {
1192        // 2-symbol single-string header + 4-byte payload → 7 5-bit symbols
1193        // (header [v=0, t=0] || bytes_to_5bit(4 bytes) = 2 + 7 = 9 symbols).
1194        // 9 + 13 regular checksum = 22-char data part — well within regular range.
1195        let header_symbols = [0u8, 0u8];
1196        let payload = vec![0xDE, 0xAD, 0xBE, 0xEF];
1197        let data_5bit = build_5bit_data(&header_symbols, &payload);
1198        let s = encode_5bit_to_string(&data_5bit).unwrap();
1199        assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1200
1201        let decoded = decode_string(&s).unwrap();
1202        assert_eq!(decoded.code, BchCode::Regular);
1203        assert_eq!(decoded.corrections_applied, 0);
1204        assert!(decoded.corrected_positions.is_empty());
1205        assert_eq!(decoded.data(), data_5bit.as_slice());
1206
1207        // Recover the payload by stripping the 2-symbol header and byte-decoding.
1208        let payload_5bit = &decoded.data()[2..];
1209        let recovered = five_bit_to_bytes(payload_5bit).unwrap();
1210        assert_eq!(recovered, payload);
1211    }
1212
1213    #[test]
1214    fn encode_5bit_to_string_round_trip_long() {
1215        // Force a long-code path with an 8-symbol chunked-style header +
1216        // a 53-byte fragment: data_5bit.len() = 8 + ceil(53*8/5) = 8 + 85 = 93,
1217        // + 15 long checksum = 108 — exact long-code upper bound.
1218        let header_symbols = [0u8; 8];
1219        let payload = vec![0xA5u8; 53];
1220        let data_5bit = build_5bit_data(&header_symbols, &payload);
1221        assert_eq!(
1222            data_5bit.len(),
1223            93,
1224            "fixture invariant: 8 header + 85 payload symbols"
1225        );
1226        let s = encode_5bit_to_string(&data_5bit).unwrap();
1227        assert!(s.starts_with("mk1"));
1228        let decoded = decode_string(&s).unwrap();
1229        assert_eq!(decoded.code, BchCode::Long);
1230        assert_eq!(decoded.data(), data_5bit.as_slice());
1231
1232        let recovered = five_bit_to_bytes(&decoded.data()[8..]).unwrap();
1233        assert_eq!(recovered, payload);
1234    }
1235
1236    #[test]
1237    fn encode_starts_with_hrp_and_separator() {
1238        // Minimum-shape input: 1 5-bit symbol + 13 regular checksum = 14 — the
1239        // tightest valid regular-code data-part length.
1240        let s = encode_5bit_to_string(&[1u8]).unwrap();
1241        assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1242    }
1243
1244    #[test]
1245    fn decode_rejects_invalid_hrp() {
1246        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1247        let bad = s.replacen("mk", "bt", 1);
1248        assert!(matches!(
1249            decode_string(&bad),
1250            Err(crate::Error::InvalidHrp(_))
1251        ));
1252    }
1253
1254    #[test]
1255    fn decode_rejects_mixed_case() {
1256        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1257        let bad: String = s
1258            .chars()
1259            .enumerate()
1260            .map(|(i, c)| if i == 5 { c.to_ascii_uppercase() } else { c })
1261            .collect();
1262        assert!(matches!(decode_string(&bad), Err(crate::Error::MixedCase)));
1263    }
1264
1265    #[test]
1266    fn decode_rejects_invalid_char() {
1267        // 'b' is excluded from the bech32 alphabet; substitute one in the data
1268        // part to force a parse-time character rejection.
1269        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1270        // s looks like "mk1...". Splice 'b' at index 5 (definitely past "mk1").
1271        let mut chars: Vec<char> = s.chars().collect();
1272        chars[5] = 'b';
1273        let bad: String = chars.into_iter().collect();
1274        assert!(matches!(
1275            decode_string(&bad),
1276            Err(crate::Error::InvalidChar { .. })
1277        ));
1278    }
1279
1280    #[test]
1281    fn decode_rejects_missing_separator() {
1282        // No '1' at all in the string. rfind('1') returns None → InvalidHrp.
1283        let bad = "mknoseparatorhere";
1284        assert!(matches!(
1285            decode_string(bad),
1286            Err(crate::Error::InvalidHrp(_))
1287        ));
1288    }
1289
1290    #[test]
1291    fn decode_recovers_one_error() {
1292        // Encode, corrupt one char in the data part, decode should auto-correct.
1293        let data_5bit = vec![0u8, 0u8, 1, 2, 3, 4, 5];
1294        let s = encode_5bit_to_string(&data_5bit).unwrap();
1295
1296        let mut chars: Vec<char> = s.chars().collect();
1297        // Corrupt position 6 (past "mk1", well within the data part).
1298        let original_char = chars[6];
1299        chars[6] = if original_char == 'q' { 'p' } else { 'q' };
1300        let corrupted: String = chars.into_iter().collect();
1301
1302        let decoded = decode_string(&corrupted).unwrap();
1303        assert_eq!(decoded.corrections_applied, 1);
1304        assert_eq!(decoded.corrected_positions.len(), 1);
1305        assert_eq!(decoded.data(), data_5bit.as_slice());
1306    }
1307
1308    #[test]
1309    fn encode_rejects_data_part_in_reserved_invalid_length_range() {
1310        // For 5-bit data-part lengths 0..=12 (so data_part = 13..=25 with regular
1311        // checksum, or 15..=27 with long), `bch_code_for_length` rejects below 14.
1312        // Empty input → data_5bit.len()=0 → regular_total=13 → None; long_total=15
1313        // → Regular. Wait — regular range starts at 14 not 13.
1314        //
1315        // Actual invariant test: len 0 → regular_total=13 (None, below 14) and
1316        // long_total=15 (Regular). So it falls back to long->regular ladder ...
1317        // Re-checking encode_5bit_to_string: only fails when both miss [14..=93]
1318        // and [96..=108]. For data_5bit.len()=79, regular_total=92 → Regular ✓.
1319        // The provable reserved-invalid case is a length that misses both
1320        // ranges; the BIP 93 BCH ladder leaves no such gap below 109 because
1321        // [14..=93] ∪ [96..=108] only excludes {0..=13, 94..=95, ≥109}. The
1322        // smallest input length that produces invalid data-part lengths in
1323        // BOTH the regular and long branches is therefore data_5bit.len() ≥ 94
1324        // (regular_total ≥ 107 in invalid territory, long_total ≥ 109 too long).
1325        let too_long = vec![0u8; 94];
1326        let result = encode_5bit_to_string(&too_long);
1327        assert!(matches!(result, Err(crate::Error::InvalidStringLength(_))));
1328    }
1329}