Skip to main content

mk_codec/string_layer/
bch.rs

1//! BCH primitives for the mk1 string layer: bech32 alphabet conversion and
2//! syndrome-based error correction.
3//!
4//! Forked from `md-codec` v0.4.x (`crates/md-codec/src/encoding.rs`) at the
5//! start of the mk1 v0.1 implementation per `design/DECISIONS.md` D-13. The
6//! BCH polynomials and field arithmetic are shared with the sibling md1
7//! format (both reuse BIP 93's `BCH(93,80,8)` regular code and
8//! `BCH(108,93,8)` long code); the only mk1-specific knobs are the HRP
9//! (`"mk"`) and the NUMS-derived target residues ([`crate::consts::MK_REGULAR_CONST`]
10//! / [`crate::consts::MK_LONG_CONST`]).
11//!
12//! Unlike md-codec's encoding module, this file does **not** expose a
13//! top-level `encode_string` / `decode_string`: mk1's string-layer header
14//! lives at the 5-bit symbol layer (per closure Q-5 — 2 symbols for
15//! `SingleString`, 8 symbols for `Chunked`) rather than the byte-aligned
16//! layer md1 uses. The mk1 `string_layer/mod.rs` builds string-level
17//! encode/decode on top of the BCH primitives here.
18
19use super::bch_decode;
20use crate::consts::{HRP, MK_LONG_CONST, MK_REGULAR_CONST};
21
22/// Which BCH code variant a string uses.
23///
24/// Determined by the total data-part length: regular for ≤93 chars,
25/// long for 96–108 chars. Lengths 94–95 are reserved-invalid.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
27pub enum BchCode {
28    /// Regular code: BCH(93,80,8). 13-char checksum.
29    Regular,
30    /// Long code: BCH(108,93,8). 15-char checksum.
31    Long,
32}
33
34/// The bech32 32-character alphabet, in 5-bit-value order.
35///
36/// `q=0, p=1, z=2, r=3, y=4, 9=5, x=6, 8=7, g=8, f=9, 2=10, t=11, v=12,
37///  d=13, w=14, 0=15, s=16, 3=17, j=18, n=19, 5=20, 4=21, k=22, h=23,
38///  c=24, e=25, 6=26, m=27, u=28, a=29, 7=30, l=31`.
39pub const ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
40
41/// Inverse lookup: char (lowercase ASCII) -> 5-bit value, or 0xFF if not in alphabet.
42const ALPHABET_INV: [u8; 128] = build_alphabet_inv();
43
44const fn build_alphabet_inv() -> [u8; 128] {
45    let mut inv = [0xFFu8; 128];
46    let mut i = 0;
47    while i < 32 {
48        inv[ALPHABET[i] as usize] = i as u8;
49        i += 1;
50    }
51    inv
52}
53
54/// Convert a sequence of 8-bit bytes to a sequence of 5-bit values
55/// (padded with zero bits at the end if the bit count is not a multiple of 5).
56pub fn bytes_to_5bit(bytes: &[u8]) -> Vec<u8> {
57    let mut acc: u32 = 0;
58    let mut bits = 0u32;
59    let mut out = Vec::with_capacity((bytes.len() * 8).div_ceil(5));
60    for &b in bytes {
61        acc = (acc << 8) | b as u32;
62        bits += 8;
63        while bits >= 5 {
64            bits -= 5;
65            out.push(((acc >> bits) & 0x1F) as u8);
66        }
67    }
68    if bits > 0 {
69        out.push(((acc << (5 - bits)) & 0x1F) as u8);
70    }
71    out
72}
73
74/// Convert a sequence of 5-bit values back to 8-bit bytes.
75///
76/// Returns `None` if any value in `values` is ≥ 32 (out of 5-bit range),
77/// or if the trailing padding bits are non-zero.
78pub fn five_bit_to_bytes(values: &[u8]) -> Option<Vec<u8>> {
79    let mut acc: u32 = 0;
80    let mut bits = 0u32;
81    let mut out = Vec::with_capacity(values.len() * 5 / 8);
82    for &v in values {
83        if v >= 32 {
84            return None;
85        }
86        acc = (acc << 5) | v as u32;
87        bits += 5;
88        if bits >= 8 {
89            bits -= 8;
90            out.push(((acc >> bits) & 0xFF) as u8);
91        }
92    }
93    // Any remaining bits must be zero (padding).
94    if bits >= 5 {
95        return None;
96    }
97    if (acc & ((1 << bits) - 1)) != 0 {
98        return None;
99    }
100    Some(out)
101}
102
103/// The bech32 separator character between HRP and data-part (BIP 173 §3).
104///
105/// Re-exported by [`crate::consts::HRP`] is `"mk"`; this module's
106/// BCH-checksum helpers consume the HRP through their `hrp` parameter so
107/// that the same primitives can verify any single-HRP codex32-derived
108/// string. Production callers MUST pass [`crate::consts::HRP`].
109pub const SEPARATOR: char = '1';
110
111/// Determine the BchCode variant from a total data-part length.
112///
113/// Boundaries are from BIP 93 (codex32): regular code `BCH(93,80,8)` caps at 93,
114/// long code `BCH(108,93,8)` runs 96–108, and lengths 94–95 are explicitly
115/// reserved-invalid to prevent ambiguity in code-variant selection. Lengths
116/// below 14 or above 108 are also rejected.
117pub fn bch_code_for_length(data_part_len: usize) -> Option<BchCode> {
118    match data_part_len {
119        14..=93 => Some(BchCode::Regular),
120        94..=95 => None,
121        96..=108 => Some(BchCode::Long),
122        _ => None,
123    }
124}
125
126/// Check whether a string is all-lowercase, all-uppercase, or mixed.
127///
128/// Only ASCII letters are considered; non-ASCII characters (digits, punctuation,
129/// Unicode letters) are treated as neither case. This is appropriate for MD
130/// strings, whose alphabet is a subset of ASCII. An empty string or one with
131/// no ASCII letters returns [`CaseStatus::Lower`].
132pub fn case_check(s: &str) -> CaseStatus {
133    let mut has_lower = false;
134    let mut has_upper = false;
135    for c in s.chars() {
136        if c.is_ascii_lowercase() {
137            has_lower = true;
138        } else if c.is_ascii_uppercase() {
139            has_upper = true;
140        }
141        if has_lower && has_upper {
142            break;
143        }
144    }
145    match (has_lower, has_upper) {
146        (true, true) => CaseStatus::Mixed,
147        (true, false) => CaseStatus::Lower,
148        (false, true) => CaseStatus::Upper,
149        (false, false) => CaseStatus::Lower, // empty / no letters; treat as lower
150    }
151}
152
153/// Result of a case check.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum CaseStatus {
156    /// All-lowercase or no letters.
157    Lower,
158    /// All-uppercase.
159    Upper,
160    /// Both lowercase and uppercase letters present (invalid).
161    Mixed,
162}
163
164/// BCH polymod constants for the regular checksum (BCH(93,80,8)).
165///
166/// Source: BIP 93 (codex32) reference implementation, `ms32_polymod` function.
167/// These five values are XORed into the running residue based on the top 5 bits
168/// of the residue at each step. The polymod operation uses a 65-bit residue
169/// (top 5 bits = current `b`, bottom 60 bits = masked state).
170///
171/// Verified against the canonical reference at
172/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
173pub const GEN_REGULAR: [u128; 5] = [
174    0x19dc500ce73fde210,
175    0x1bfae00def77fe529,
176    0x1fbd920fffe7bee52,
177    0x1739640bdeee3fdad,
178    0x07729a039cfc75f5a,
179];
180
181/// Initial residue value for both the regular and long polymod algorithms (BIP 93).
182///
183/// Both `ms32_polymod` and `ms32_long_polymod` start with this residue before
184/// processing any input characters.
185pub const POLYMOD_INIT: u128 = 0x23181b3;
186
187/// Right-shift amount to extract the top 5 bits from a 65-bit regular-code residue.
188///
189/// Usage: `b = residue >> REGULAR_SHIFT` gives the 5-bit feedback selector
190/// for the polymod algorithm.
191pub const REGULAR_SHIFT: u32 = 60;
192
193/// Mask preserving the low 60 bits of a 65-bit regular-code residue.
194pub const REGULAR_MASK: u128 = 0x0fffffffffffffff;
195
196/// BCH polymod constants for the long checksum (BCH(108,93,8)).
197///
198/// Source: BIP 93 (codex32) reference implementation, `ms32_long_polymod` function.
199/// The long polymod uses a 75-bit residue (top 5 bits = `b`, bottom 70 bits = masked state).
200///
201/// Verified against the canonical reference at
202/// <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki>.
203pub const GEN_LONG: [u128; 5] = [
204    0x3d59d273535ea62d897,
205    0x7a9becb6361c6c51507,
206    0x543f9b7e6c38d8a2a0e,
207    0x0c577eaeccf1990d13c,
208    0x1887f74f8dc71b10651,
209];
210
211/// Right-shift amount to extract the top 5 bits from a 75-bit long-code residue.
212///
213/// Usage: `b = residue >> LONG_SHIFT` gives the 5-bit feedback selector
214/// for the polymod algorithm.
215pub const LONG_SHIFT: u32 = 70;
216
217/// Mask preserving the low 70 bits of a 75-bit long-code residue.
218pub const LONG_MASK: u128 = 0x3fffffffffffffffff;
219
220/// One step of the BCH polymod algorithm from BIP 93.
221///
222/// Updates the running `residue` to incorporate the next 5-bit input `value`
223/// using the polynomial defined by `gen`, shift width `shift`, and mask `mask`.
224/// The same function is used for both the regular and long codes; pass
225/// `(GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK)` for the regular code and
226/// `(GEN_LONG, LONG_SHIFT, LONG_MASK)` for the long code.
227///
228/// Returns the updated residue after incorporating `value`. The top 5 bits of
229/// the returned residue feed the next iteration's `b` selector.
230///
231/// This is a direct port of BIP 93's `ms32_polymod` / `ms32_long_polymod` inner
232/// loop. See <https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki> .
233fn polymod_step(residue: u128, value: u128, r#gen: &[u128; 5], shift: u32, mask: u128) -> u128 {
234    let b = residue >> shift;
235    let mut new_residue = ((residue & mask) << 5) ^ value;
236    for (i, &g) in r#gen.iter().enumerate() {
237        if (b >> i) & 1 != 0 {
238            new_residue ^= g;
239        }
240    }
241    new_residue
242}
243
244/// BIP 173-style HRP-expansion: produces the 5-bit-symbol prelude that gets
245/// prepended to the data part before running the BCH polymod.
246///
247/// For each HRP character `c`, emits `c >> 5` (high 3 bits); then emits a
248/// single 0 separator; then emits each character's `c & 31` (low 5 bits).
249/// The result has length `2 * hrp.len() + 1` for ASCII HRPs.
250///
251/// For `hrp_expand("md")` this returns `[3, 3, 0, 13, 4]`.
252pub fn hrp_expand(hrp: &str) -> Vec<u8> {
253    let bytes = hrp.as_bytes();
254    let mut out = Vec::with_capacity(bytes.len() * 2 + 1);
255    for &c in bytes {
256        out.push(c >> 5);
257    }
258    out.push(0);
259    for &c in bytes {
260        out.push(c & 31);
261    }
262    out
263}
264
265/// Run polymod over a sequence of 5-bit values using the parameters for
266/// either the regular or long BCH code, starting from POLYMOD_INIT.
267///
268/// `pub(in crate::string_layer)` so the sibling `bch_decode` module's test
269/// suite can validate field arithmetic against the production primitive
270/// without duplicating it (test-helper drift would otherwise mask real
271/// regressions when both copies agree on a bug).
272pub(in crate::string_layer) fn polymod_run(
273    values: &[u8],
274    r#gen: &[u128; 5],
275    shift: u32,
276    mask: u128,
277) -> u128 {
278    let mut residue = POLYMOD_INIT;
279    for &v in values {
280        residue = polymod_step(residue, v as u128, r#gen, shift, mask);
281    }
282    residue
283}
284
285/// Compute the 13-character BCH checksum for the regular code over the
286/// HRP-expanded preamble plus the data part.
287///
288/// `data` is the sequence of 5-bit values for the data part (header + payload),
289/// not including the checksum. Returns the 13-element checksum array, ready
290/// to append to `data` to form the full data-part-plus-checksum.
291///
292/// The algorithm runs polymod over `hrp_expand(hrp) || data || [0; 13]`,
293/// then XORs the result with [`MK_REGULAR_CONST`] to extract the checksum.
294pub fn bch_create_checksum_regular(hrp: &str, data: &[u8]) -> [u8; 13] {
295    // Regular code: 13-symbol checksum (0..=12), pad/array/extraction all use 13.
296    let mut input = hrp_expand(hrp);
297    input.extend_from_slice(data);
298    input.extend(std::iter::repeat_n(0, 13));
299    let polymod = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
300    let mut out = [0u8; 13];
301    for (i, slot) in out.iter_mut().enumerate() {
302        *slot = ((polymod >> (5 * (12 - i))) & 0x1F) as u8;
303    }
304    out
305}
306
307/// Verify a regular-code BCH checksum.
308///
309/// `data_with_checksum` is the full data part including the trailing 13
310/// checksum characters. Returns `true` iff the polymod over
311/// `hrp_expand(hrp) || data_with_checksum` equals [`MK_REGULAR_CONST`].
312pub fn bch_verify_regular(hrp: &str, data_with_checksum: &[u8]) -> bool {
313    if data_with_checksum.len() < 13 {
314        return false;
315    }
316    let mut input = hrp_expand(hrp);
317    input.extend_from_slice(data_with_checksum);
318    polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) == MK_REGULAR_CONST
319}
320
321/// Compute the 15-character BCH checksum for the long code.
322///
323/// Same algorithm as [`bch_create_checksum_regular`] but uses the long-code
324/// polymod parameters (`GEN_LONG`, `LONG_SHIFT`, `LONG_MASK`) and target
325/// constant ([`MK_LONG_CONST`]). Produces a 15-element checksum array.
326pub fn bch_create_checksum_long(hrp: &str, data: &[u8]) -> [u8; 15] {
327    // Long code: 15-symbol checksum (0..=14), pad/array/extraction all use 15.
328    let mut input = hrp_expand(hrp);
329    input.extend_from_slice(data);
330    input.extend(std::iter::repeat_n(0, 15));
331    let polymod = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
332    let mut out = [0u8; 15];
333    for (i, slot) in out.iter_mut().enumerate() {
334        *slot = ((polymod >> (5 * (14 - i))) & 0x1F) as u8;
335    }
336    out
337}
338
339/// Verify a long-code BCH checksum.
340///
341/// Same algorithm as [`bch_verify_regular`] with long-code parameters.
342/// Returns false if `data_with_checksum` is shorter than 15 symbols.
343pub fn bch_verify_long(hrp: &str, data_with_checksum: &[u8]) -> bool {
344    if data_with_checksum.len() < 15 {
345        return false;
346    }
347    let mut input = hrp_expand(hrp);
348    input.extend_from_slice(data_with_checksum);
349    polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) == MK_LONG_CONST
350}
351
352/// Result of a successful BCH decode + correct attempt.
353///
354/// Returned by [`bch_correct_regular`] / [`bch_correct_long`] when correction
355/// succeeds. `corrections_applied == 0` means the input was already valid;
356/// `> 0` means substitutions were applied at the indicated positions.
357///
358/// Marked `#[non_exhaustive]` to allow future fields (e.g., confidence
359/// score, syndrome metadata) without breaking downstream struct-literal
360/// construction. Construct via the [`bch_correct_regular`] /
361/// [`bch_correct_long`] APIs.
362#[non_exhaustive]
363#[derive(Debug, Clone, PartialEq, Eq)]
364pub struct CorrectionResult {
365    /// The corrected `data_with_checksum` slice (input may have been modified).
366    pub data: Vec<u8>,
367    /// Number of substitutions applied (0 = clean input).
368    pub corrections_applied: usize,
369    /// Indices into `data` of the substituted positions.
370    pub corrected_positions: Vec<usize>,
371}
372
373/// Attempt to correct a regular-code BCH-checksummed string with up to four
374/// substitutions, the full t = 4 capacity of the BCH(93, 80, 8) code.
375///
376/// Implements the standard syndrome-based BCH decoder pipeline: syndrome
377/// computation in `GF(1024) = GF(32²)`, Berlekamp–Massey for the
378/// error-locator polynomial, Chien search for error positions, Forney's
379/// algorithm for error magnitudes. After applying the proposed corrections,
380/// the result is re-verified via [`bch_verify_regular`]; the decoder rejects
381/// any output that does not produce a valid codeword (defensive guard
382/// against pathological 5+-error inputs whose syndromes happen to factor as
383/// a degree-≤ 4 locator).
384///
385/// Returns `Ok(CorrectionResult)` if the input is clean or up to four
386/// substitutions repair it. Returns `Err(Error::BchUncorrectable)` otherwise.
387///
388/// # Algorithm details
389///
390/// See the private `bch_decode` submodule for the algorithm and the
391/// `GF(1024)` field representation.
392pub fn bch_correct_regular(
393    hrp: &str,
394    data_with_checksum: &[u8],
395) -> Result<CorrectionResult, crate::Error> {
396    if bch_verify_regular(hrp, data_with_checksum) {
397        return Ok(CorrectionResult {
398            data: data_with_checksum.to_vec(),
399            corrections_applied: 0,
400            corrected_positions: vec![],
401        });
402    }
403    // Compute polymod over hrp_expand(hrp) || data_with_checksum, XOR with
404    // the MD target constant. The result is congruent to the error
405    // polynomial E(x) modulo g_regular(x).
406    let mut input = hrp_expand(hrp);
407    input.extend_from_slice(data_with_checksum);
408    let residue = polymod_run(&input, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK) ^ MK_REGULAR_CONST;
409
410    if let Some((positions, magnitudes)) =
411        bch_decode::decode_regular_errors(residue, data_with_checksum.len())
412    {
413        if positions.is_empty() {
414            // Should be unreachable (caller already verified); guard anyway.
415            return Ok(CorrectionResult {
416                data: data_with_checksum.to_vec(),
417                corrections_applied: 0,
418                corrected_positions: vec![],
419            });
420        }
421        let mut corrected = data_with_checksum.to_vec();
422        for (&p, &m) in positions.iter().zip(&magnitudes) {
423            if p >= corrected.len() {
424                return Err(crate::Error::BchUncorrectable(format!(
425                    "decoder reported error position {p} outside data ({} symbols)",
426                    corrected.len()
427                )));
428            }
429            corrected[p] ^= m;
430        }
431        // Defensive: re-verify. Catches the 5+-error edge case.
432        if bch_verify_regular(hrp, &corrected) {
433            return Ok(CorrectionResult {
434                corrections_applied: positions.len(),
435                corrected_positions: positions,
436                data: corrected,
437            });
438        }
439    }
440    Err(crate::Error::BchUncorrectable(
441        "regular code: more than 4 substitutions or pathological pattern".into(),
442    ))
443}
444
445/// Long-code analog of [`bch_correct_regular`].
446///
447/// Implements the same BM/Chien/Forney pipeline against the long-code
448/// generator polynomial, reaching the full t = 4 capacity of
449/// `BCH(108, 93, 8)`.
450pub fn bch_correct_long(
451    hrp: &str,
452    data_with_checksum: &[u8],
453) -> Result<CorrectionResult, crate::Error> {
454    if bch_verify_long(hrp, data_with_checksum) {
455        return Ok(CorrectionResult {
456            data: data_with_checksum.to_vec(),
457            corrections_applied: 0,
458            corrected_positions: vec![],
459        });
460    }
461    let mut input = hrp_expand(hrp);
462    input.extend_from_slice(data_with_checksum);
463    let residue = polymod_run(&input, &GEN_LONG, LONG_SHIFT, LONG_MASK) ^ MK_LONG_CONST;
464
465    if let Some((positions, magnitudes)) =
466        bch_decode::decode_long_errors(residue, data_with_checksum.len())
467    {
468        if positions.is_empty() {
469            return Ok(CorrectionResult {
470                data: data_with_checksum.to_vec(),
471                corrections_applied: 0,
472                corrected_positions: vec![],
473            });
474        }
475        let mut corrected = data_with_checksum.to_vec();
476        for (&p, &m) in positions.iter().zip(&magnitudes) {
477            if p >= corrected.len() {
478                return Err(crate::Error::BchUncorrectable(format!(
479                    "decoder reported error position {p} outside data ({} symbols)",
480                    corrected.len()
481                )));
482            }
483            corrected[p] ^= m;
484        }
485        if bch_verify_long(hrp, &corrected) {
486            return Ok(CorrectionResult {
487                corrections_applied: positions.len(),
488                corrected_positions: positions,
489                data: corrected,
490            });
491        }
492    }
493    Err(crate::Error::BchUncorrectable(
494        "long code: more than 4 substitutions or pathological pattern".into(),
495    ))
496}
497
498/// Encode a 5-bit-symbol data stream as a complete mk1 string.
499///
500/// The data stream is the concatenation `header_symbols || bytes_to_5bit(payload_bytes)`
501/// where `header_symbols` is the 2-symbol single-string header or the
502/// 8-symbol chunked header (closure Q-5). The BCH code variant (regular or
503/// long) is auto-selected from the resulting data-part length per BIP 93:
504/// regular for ≤93-symbol data parts, long for 96–108-symbol data parts.
505/// Lengths in the reserved-invalid 94–95 gap or outside the BIP 93 valid
506/// range return [`Error::InvalidStringLength`].
507///
508/// Per the v0.1 emit policy described in `design/IMPLEMENTATION_PLAN_mk_v0_1.md`
509/// §5.4, callers control fragment sizing so that each chunked fragment lands
510/// within long-code territory. Single-string mk1 may pick regular or long
511/// based on bytecode size.
512///
513/// Returns the full string starting with [`crate::consts::HRP`] and the
514/// BIP 173 separator (`"mk1"`).
515pub fn encode_5bit_to_string(data_5bit: &[u8]) -> Result<String, crate::Error> {
516    use crate::Error;
517
518    // Auto-determine code from the eventual data-part length (data_5bit + checksum).
519    let regular_total = data_5bit.len() + 13;
520    let long_total = data_5bit.len() + 15;
521    let code = match (
522        bch_code_for_length(regular_total),
523        bch_code_for_length(long_total),
524    ) {
525        (Some(BchCode::Regular), _) => BchCode::Regular,
526        (_, Some(BchCode::Long)) => BchCode::Long,
527        // Neither code variant accepts this data-part length: too short, in
528        // the 94–95 reserved-invalid gap, or too long for v0.1.
529        _ => {
530            // Pick the closest length to report — long_total is always larger,
531            // so report that as the "actual length you tried to produce".
532            return Err(Error::InvalidStringLength(long_total));
533        }
534    };
535
536    let checksum: Vec<u8> = match code {
537        BchCode::Regular => bch_create_checksum_regular(HRP, data_5bit).to_vec(),
538        BchCode::Long => bch_create_checksum_long(HRP, data_5bit).to_vec(),
539    };
540
541    let mut full = String::with_capacity(HRP.len() + 1 + data_5bit.len() + checksum.len());
542    full.push_str(HRP);
543    full.push(SEPARATOR);
544    for &v in data_5bit {
545        full.push(ALPHABET[v as usize] as char);
546    }
547    for v in checksum {
548        full.push(ALPHABET[v as usize] as char);
549    }
550    Ok(full)
551}
552
553/// Result of a successful mk1 string decode at the BCH layer.
554///
555/// Use [`Self::data`] to access the data part as 5-bit values (header
556/// symbols + payload, checksum stripped); the string-layer reassembler
557/// in `crate::string_layer` splits header symbols off and feeds the
558/// remaining payload through [`five_bit_to_bytes`] to recover the original
559/// fragment bytes.
560///
561/// The full post-correction 5-bit symbol sequence (data **plus** the trailing
562/// 13- or 15-char checksum) is retained internally as [`Self::data_with_checksum`]
563/// and can be queried by [`Self::corrected_char_at`] for any position in
564/// the data part — including positions that fall inside the checksum region.
565/// The decoder-report layer uses this to surface the real corrected
566/// character when BCH ECC repairs a substitution inside the checksum
567/// (parallels md-codec's `Correction.corrected` field).
568#[non_exhaustive]
569#[derive(Debug, Clone, PartialEq, Eq)]
570pub struct DecodedString {
571    /// Detected BCH code variant.
572    pub code: BchCode,
573    /// Number of substitution errors corrected (0 = clean input, 1 = recovered).
574    pub corrections_applied: usize,
575    /// Indices into the data-part (chars after `"md1"`) of any corrected positions.
576    pub corrected_positions: Vec<usize>,
577    /// Full post-correction 5-bit symbol sequence (data part + checksum), in
578    /// the same coordinate system as [`Self::corrected_positions`].
579    ///
580    /// Length is `data().len() + 13` (regular code) or `data().len() + 15`
581    /// (long code). Indices `0..data().len()` mirror [`Self::data`] symbol-for-symbol;
582    /// indices `data().len()..` are the corrected checksum symbols. Use
583    /// [`Self::corrected_char_at`] for the human-readable bech32 character at
584    /// any position.
585    pub data_with_checksum: Vec<u8>,
586}
587
588impl DecodedString {
589    /// Data part as 5-bit values, with the trailing checksum stripped.
590    ///
591    /// Returns a slice into [`Self::data_with_checksum`] — the data part is
592    /// `data_with_checksum[..len - checksum_len]`, where `checksum_len` is 13
593    /// for [`BchCode::Regular`] and 15 for [`BchCode::Long`].
594    pub fn data(&self) -> &[u8] {
595        let checksum_len = match self.code {
596            BchCode::Regular => 13,
597            BchCode::Long => 15,
598        };
599        &self.data_with_checksum[..self.data_with_checksum.len() - checksum_len]
600    }
601
602    /// Look up the corrected bech32 character at the given position in the
603    /// data part (chars after the `"md1"` HRP+separator).
604    ///
605    /// `char_position` is 0-indexed. Positions `0..data().len()` are in the
606    /// data region; positions `data().len()..data().len() + checksum_len` are
607    /// inside the BCH checksum (13 chars for [`BchCode::Regular`], 15 for
608    /// [`BchCode::Long`]). All positions return the post-correction
609    /// character — i.e., what the symbol *should* be after BCH repair, which
610    /// is exactly what [`Correction.corrected`][crate::Correction::corrected]
611    /// is documented to report.
612    ///
613    /// # Panics
614    ///
615    /// Panics if `char_position >= data_with_checksum.len()`. Callers are
616    /// responsible for clamping the position to a valid range; in the decode
617    /// pipeline this is guaranteed by the BCH layer (it never reports a
618    /// `corrected_position` outside `data_with_checksum`). Note that
619    /// `data_with_checksum` includes the checksum region; "outside the data
620    /// part" elsewhere in this crate excludes the checksum and is a tighter
621    /// bound than what this method requires.
622    pub fn corrected_char_at(&self, char_position: usize) -> char {
623        let v = self.data_with_checksum[char_position];
624        ALPHABET[v as usize] as char
625    }
626}
627
628/// Decode an mk1 string, validating HRP, case, length, and checksum.
629///
630/// Performs full BCH error correction up to four substitutions
631/// (`t = 4` capacity of the BCH(93, 80, 8) regular code and the
632/// BCH(108, 93, 8) long code), via syndrome-based Berlekamp–Massey +
633/// Forney decoding (implemented in the sibling `bch_decode` module).
634///
635/// Errors:
636/// - [`Error::MixedCase`] if the string mixes upper and lower case.
637/// - [`Error::InvalidHrp`] if the HRP is missing or not [`crate::consts::HRP`].
638/// - [`Error::InvalidStringLength`] if the data-part length isn't a valid mk1 length.
639/// - [`Error::InvalidChar`] if the data part contains a non-bech32 character.
640/// - [`Error::BchUncorrectable`] if the checksum can't be repaired within
641///   the BCH `t = 4` correction radius.
642///
643/// [`Error::MixedCase`]: crate::Error::MixedCase
644/// [`Error::InvalidHrp`]: crate::Error::InvalidHrp
645/// [`Error::InvalidStringLength`]: crate::Error::InvalidStringLength
646/// [`Error::InvalidChar`]: crate::Error::InvalidChar
647/// [`Error::BchUncorrectable`]: crate::Error::BchUncorrectable
648pub fn decode_string(s: &str) -> Result<DecodedString, crate::Error> {
649    use crate::Error;
650
651    if matches!(case_check(s), CaseStatus::Mixed) {
652        return Err(Error::MixedCase);
653    }
654    let s_lower = s.to_lowercase();
655
656    let sep_pos = s_lower
657        .rfind(SEPARATOR)
658        .ok_or_else(|| Error::InvalidHrp(s_lower.clone()))?;
659    let (hrp, rest) = s_lower.split_at(sep_pos);
660    let data_part = &rest[1..]; // skip the '1' separator
661
662    if hrp != HRP {
663        return Err(Error::InvalidHrp(hrp.to_string()));
664    }
665
666    let code =
667        bch_code_for_length(data_part.len()).ok_or(Error::InvalidStringLength(data_part.len()))?;
668
669    let mut values: Vec<u8> = Vec::with_capacity(data_part.len());
670    for (i, c) in data_part.chars().enumerate() {
671        if !c.is_ascii() {
672            return Err(Error::InvalidChar { ch: c, position: i });
673        }
674        let v = ALPHABET_INV[c as usize];
675        if v == 0xFF {
676            return Err(Error::InvalidChar { ch: c, position: i });
677        }
678        values.push(v);
679    }
680
681    let correction = match code {
682        BchCode::Regular => bch_correct_regular(hrp, &values),
683        BchCode::Long => bch_correct_long(hrp, &values),
684    };
685    let result = correction?;
686
687    Ok(DecodedString {
688        code,
689        corrections_applied: result.corrections_applied,
690        corrected_positions: result.corrected_positions,
691        data_with_checksum: result.data,
692    })
693}
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698
699    #[test]
700    fn bch_code_equality() {
701        assert_eq!(BchCode::Regular, BchCode::Regular);
702        assert_ne!(BchCode::Regular, BchCode::Long);
703    }
704
705    #[test]
706    fn bch_code_can_be_hashed() {
707        use std::collections::HashSet;
708        let mut set = HashSet::new();
709        set.insert(BchCode::Regular);
710        set.insert(BchCode::Long);
711        set.insert(BchCode::Regular);
712        assert_eq!(set.len(), 2);
713    }
714
715    #[test]
716    fn alphabet_is_32_unique_chars() {
717        let mut seen = std::collections::HashSet::new();
718        for &c in ALPHABET {
719            assert!(seen.insert(c), "duplicate char in alphabet: {}", c as char);
720        }
721        assert_eq!(seen.len(), 32);
722    }
723
724    #[test]
725    fn bytes_to_5bit_round_trip_zero() {
726        let bytes = vec![0x00];
727        let fives = bytes_to_5bit(&bytes);
728        assert_eq!(fives, vec![0, 0]);
729        let back = five_bit_to_bytes(&fives).unwrap();
730        assert_eq!(back, bytes);
731    }
732
733    #[test]
734    fn bytes_to_5bit_round_trip_known_value() {
735        // 0xFF = binary 11111111. Splits as 11111 (=31) and 111 (padded with 00 to 11100=28).
736        let bytes = vec![0xFF];
737        let fives = bytes_to_5bit(&bytes);
738        assert_eq!(fives, vec![31, 28]);
739    }
740
741    #[test]
742    fn bytes_to_5bit_round_trip_multibyte() {
743        // 3 bytes = 24 bits → 5 five-bit groups (25 bits, 1 pad bit).
744        let bytes = vec![0xDE, 0xAD, 0xBE];
745        let back = five_bit_to_bytes(&bytes_to_5bit(&bytes)).unwrap();
746        assert_eq!(back, bytes);
747    }
748
749    #[test]
750    fn five_bit_to_bytes_rejects_nonzero_padding() {
751        // Two 5-bit values = 10 bits, of which 8 form a byte and 2 are padding.
752        // If padding bits are nonzero, decode must fail.
753        // 31 = 11111, 1 = 00001. Last 2 bits (= 01) are nonzero padding.
754        assert!(five_bit_to_bytes(&[31, 1]).is_none());
755    }
756
757    #[test]
758    fn five_bit_to_bytes_rejects_value_out_of_range() {
759        assert!(five_bit_to_bytes(&[32]).is_none());
760    }
761
762    #[test]
763    fn bch_code_for_length_regular() {
764        assert_eq!(bch_code_for_length(14), Some(BchCode::Regular));
765        assert_eq!(bch_code_for_length(93), Some(BchCode::Regular));
766    }
767
768    #[test]
769    fn bch_code_for_length_long() {
770        assert_eq!(bch_code_for_length(96), Some(BchCode::Long));
771        assert_eq!(bch_code_for_length(108), Some(BchCode::Long));
772    }
773
774    #[test]
775    fn bch_code_for_length_rejects_94_and_95() {
776        assert_eq!(bch_code_for_length(94), None);
777        assert_eq!(bch_code_for_length(95), None);
778    }
779
780    #[test]
781    fn bch_code_for_length_rejects_extremes() {
782        assert_eq!(bch_code_for_length(0), None);
783        assert_eq!(bch_code_for_length(13), None);
784        assert_eq!(bch_code_for_length(109), None);
785        assert_eq!(bch_code_for_length(1000), None);
786    }
787
788    #[test]
789    fn case_check_lowercase() {
790        assert_eq!(case_check("md1qq"), CaseStatus::Lower);
791    }
792
793    #[test]
794    fn case_check_uppercase() {
795        assert_eq!(case_check("MD1QQ"), CaseStatus::Upper);
796    }
797
798    #[test]
799    fn case_check_mixed() {
800        assert_eq!(case_check("mD1qq"), CaseStatus::Mixed);
801    }
802
803    #[test]
804    fn case_check_empty_string_is_lower() {
805        assert_eq!(case_check(""), CaseStatus::Lower);
806    }
807
808    #[test]
809    fn case_check_digits_only_is_lower() {
810        // Digits have no case; result must be Lower (BIP 173: no-letter strings are lower).
811        assert_eq!(case_check("1234"), CaseStatus::Lower);
812    }
813
814    #[test]
815    fn gen_regular_has_5_entries() {
816        assert_eq!(GEN_REGULAR.len(), 5);
817    }
818
819    #[test]
820    fn gen_long_has_5_entries() {
821        assert_eq!(GEN_LONG.len(), 5);
822    }
823
824    #[test]
825    fn gen_regular_matches_bip93_canonical_values() {
826        // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
827        // ms32_polymod GEN array. If this fails, the constants drifted from the BIP.
828        assert_eq!(GEN_REGULAR[0], 0x19dc500ce73fde210);
829        assert_eq!(GEN_REGULAR[1], 0x1bfae00def77fe529);
830        assert_eq!(GEN_REGULAR[2], 0x1fbd920fffe7bee52);
831        assert_eq!(GEN_REGULAR[3], 0x1739640bdeee3fdad);
832        assert_eq!(GEN_REGULAR[4], 0x07729a039cfc75f5a);
833    }
834
835    #[test]
836    fn gen_long_matches_bip93_canonical_values() {
837        // Cross-checked against https://github.com/bitcoin/bips/blob/master/bip-0093.mediawiki
838        // ms32_long_polymod GEN array.
839        assert_eq!(GEN_LONG[0], 0x3d59d273535ea62d897);
840        assert_eq!(GEN_LONG[1], 0x7a9becb6361c6c51507);
841        assert_eq!(GEN_LONG[2], 0x543f9b7e6c38d8a2a0e);
842        assert_eq!(GEN_LONG[3], 0x0c577eaeccf1990d13c);
843        assert_eq!(GEN_LONG[4], 0x1887f74f8dc71b10651);
844    }
845
846    #[test]
847    fn polymod_init_matches_bip93() {
848        // POLYMOD_INIT is unchanged from BIP 93; the GEN_REGULAR / GEN_LONG
849        // constants have their own value-equality tests.
850        assert_eq!(POLYMOD_INIT, 0x23181b3);
851    }
852
853    // (NUMS-derivation reproducer for `MK_REGULAR_CONST` / `MK_LONG_CONST`
854    // lives in `crate::consts::tests::nums_constants_reproduce_from_domain`,
855    // which uses the mk1-specific domain `b"shibbolethnumskey"`. Duplicating
856    // it here would risk drift if either side were updated in isolation.)
857
858    #[test]
859    fn polymod_masks_are_consistent_with_shifts() {
860        // The mask must be (1 << shift) - 1 so that masking preserves bits below
861        // the shift boundary, exactly matching the BIP 93 algorithm.
862        assert_eq!(REGULAR_MASK, (1u128 << REGULAR_SHIFT) - 1);
863        assert_eq!(LONG_MASK, (1u128 << LONG_SHIFT) - 1);
864        assert_eq!(REGULAR_SHIFT, 60);
865        assert_eq!(LONG_SHIFT, 70);
866    }
867
868    #[test]
869    fn polymod_step_zero_residue_zero_value() {
870        // Both residue and value zero, no GEN XORs since b = 0.
871        assert_eq!(
872            polymod_step(0, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
873            0
874        );
875    }
876
877    #[test]
878    fn polymod_step_value_only_xor_when_residue_zero() {
879        // Residue 0, value 7 → result is 7 (XORed into the shifted-zero residue).
880        assert_eq!(
881            polymod_step(0, 7, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
882            7
883        );
884    }
885
886    #[test]
887    fn polymod_step_isolates_each_gen_entry() {
888        // Setting just bit `shift+i` in the residue → b = 1<<i → only GEN[i] is XORed.
889        for i in 0..5 {
890            let r = 1u128 << (REGULAR_SHIFT + i);
891            assert_eq!(
892                polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
893                GEN_REGULAR[i as usize],
894                "bit {} of b should isolate GEN_REGULAR[{}]",
895                i,
896                i
897            );
898        }
899    }
900
901    #[test]
902    fn polymod_step_xors_multiple_gens_when_multiple_b_bits_set() {
903        // b = 0b00011 → XOR GEN[0] and GEN[1].
904        let r = 0b00011u128 << REGULAR_SHIFT;
905        assert_eq!(
906            polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
907            GEN_REGULAR[0] ^ GEN_REGULAR[1]
908        );
909        // b = 0b11111 → XOR all 5.
910        let r = 0b11111u128 << REGULAR_SHIFT;
911        let expected =
912            GEN_REGULAR[0] ^ GEN_REGULAR[1] ^ GEN_REGULAR[2] ^ GEN_REGULAR[3] ^ GEN_REGULAR[4];
913        assert_eq!(
914            polymod_step(r, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
915            expected
916        );
917    }
918
919    #[test]
920    fn polymod_step_works_for_long_code() {
921        // Same parameterization works for the long code (shift=70, mask=LONG_MASK).
922        let r = 1u128 << LONG_SHIFT;
923        assert_eq!(
924            polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
925            GEN_LONG[0]
926        );
927        // b = 0b11111 → XOR all 5 long GENs.
928        let r = 0b11111u128 << LONG_SHIFT;
929        let expected = GEN_LONG[0] ^ GEN_LONG[1] ^ GEN_LONG[2] ^ GEN_LONG[3] ^ GEN_LONG[4];
930        assert_eq!(
931            polymod_step(r, 0, &GEN_LONG, LONG_SHIFT, LONG_MASK),
932            expected
933        );
934    }
935
936    #[test]
937    fn polymod_step_init_residue_first_iteration() {
938        // POLYMOD_INIT < 2^60 so b = 0 in the first iteration; only the shift+xor happens.
939        // Verify: polymod_step(POLYMOD_INIT, 0) = POLYMOD_INIT << 5.
940        assert_eq!(
941            polymod_step(POLYMOD_INIT, 0, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
942            POLYMOD_INIT << 5
943        );
944        // And with value=v: polymod_step(POLYMOD_INIT, v) = (POLYMOD_INIT << 5) ^ v.
945        assert_eq!(
946            polymod_step(POLYMOD_INIT, 31, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
947            (POLYMOD_INIT << 5) ^ 31
948        );
949    }
950
951    #[test]
952    fn polymod_step_value_and_gen_xor_combined() {
953        // Both effects active: b = 1 (bit 0 of b set) AND value = 5.
954        // Expected: ((residue & mask) << 5) ^ value ^ GEN[0]
955        //         = (0 << 5) ^ 5 ^ GEN[0]
956        //         = GEN_REGULAR[0] ^ 5
957        let r = 1u128 << REGULAR_SHIFT;
958        assert_eq!(
959            polymod_step(r, 5, &GEN_REGULAR, REGULAR_SHIFT, REGULAR_MASK),
960            GEN_REGULAR[0] ^ 5
961        );
962    }
963
964    #[test]
965    fn hrp_expand_mk_matches_spec() {
966        // BIP 173 hrp_expand for the MK HRP. Each ASCII byte contributes
967        // its high 3 bits then (after the [0] separator) its low 5 bits.
968        // 'm' = 0x6D → high 3 bits = 3, low 5 bits = 13.
969        // 'k' = 0x6B → high 3 bits = 3, low 5 bits = 11.
970        // Result: [3, 3, 0, 13, 11]. Documented in the BIP draft §"Checksum".
971        assert_eq!(hrp_expand(crate::consts::HRP), vec![3, 3, 0, 13, 11]);
972    }
973
974    #[test]
975    fn hrp_expand_empty_returns_just_separator() {
976        // Edge case: empty HRP yields just the [0] separator.
977        assert_eq!(hrp_expand(""), vec![0]);
978    }
979
980    #[test]
981    fn bch_round_trip_regular() {
982        // Encode then verify a small data part. The verify call sees the
983        // full data + checksum, so polymod returns MK_REGULAR_CONST exactly.
984        let hrp = "mk";
985        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
986        let checksum = bch_create_checksum_regular(hrp, &data);
987        assert_eq!(checksum.len(), 13);
988
989        let mut full = data.clone();
990        full.extend_from_slice(&checksum);
991        assert!(bch_verify_regular(hrp, &full));
992    }
993
994    #[test]
995    fn bch_verify_rejects_single_char_tampering_regular() {
996        // Flipping one bit in one symbol breaks verification.
997        // (Spot check; BCH detects all single-symbol errors by construction.)
998        let hrp = "mk";
999        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1000        let checksum = bch_create_checksum_regular(hrp, &data);
1001        let mut full = data.clone();
1002        full.extend_from_slice(&checksum);
1003        full[5] ^= 0x01;
1004        assert!(!bch_verify_regular(hrp, &full));
1005    }
1006
1007    #[test]
1008    fn bch_verify_rejects_too_short_input_regular() {
1009        // Less than 13 symbols cannot hold a checksum.
1010        assert!(!bch_verify_regular("mk", &[0, 1, 2]));
1011        assert!(!bch_verify_regular("mk", &[]));
1012    }
1013
1014    // (mk1-specific pinned-checksum vectors are deferred to Phase 6 vector
1015    // corpus generation, which writes both regular- and long-code conformance
1016    // points to disk under `crates/mk-codec/src/test_vectors/v0.1.json`.
1017    // Forking md-codec's pinned vectors verbatim would record the wrong
1018    // values: mk1's HRP and target constants both differ.)
1019
1020    #[test]
1021    fn bch_zero_data_does_not_self_validate_regular() {
1022        // The all-zeros data + all-zeros checksum must NOT validate, because
1023        // MK_REGULAR_CONST was chosen NUMS-style to avoid this trivial case.
1024        // Data length 8 is arbitrary; any non-empty zero-fill exhibits the same
1025        // negative result. 8 echoes the regular-code known-vector data length.
1026        let mut zero = vec![0u8; 8];
1027        zero.extend(std::iter::repeat_n(0, 13));
1028        assert!(!bch_verify_regular("mk", &zero));
1029    }
1030
1031    #[test]
1032    fn bch_round_trip_empty_data_regular() {
1033        // Empty data part is a degenerate but valid input: the checksum
1034        // covers only the HRP preamble. encode → verify must round-trip.
1035        let checksum = bch_create_checksum_regular("mk", &[]);
1036        assert!(bch_verify_regular("mk", &checksum));
1037    }
1038
1039    #[test]
1040    fn bch_round_trip_long() {
1041        let hrp = "mk";
1042        let data: Vec<u8> = (0..16).collect();
1043        let checksum = bch_create_checksum_long(hrp, &data);
1044        assert_eq!(checksum.len(), 15);
1045        let mut full = data.clone();
1046        full.extend_from_slice(&checksum);
1047        assert!(bch_verify_long(hrp, &full));
1048    }
1049
1050    #[test]
1051    fn bch_verify_rejects_single_char_tampering_long() {
1052        // Flipping one bit in one symbol breaks verification.
1053        // (Spot check; BCH detects all single-symbol errors by construction.)
1054        let hrp = "mk";
1055        let data: Vec<u8> = (0..16).collect();
1056        let checksum = bch_create_checksum_long(hrp, &data);
1057        let mut full = data.clone();
1058        full.extend_from_slice(&checksum);
1059        full[7] ^= 0x01;
1060        assert!(!bch_verify_long(hrp, &full));
1061    }
1062
1063    #[test]
1064    fn bch_verify_rejects_too_short_input_long() {
1065        // Less than 15 symbols cannot hold a long-code checksum.
1066        assert!(!bch_verify_long("mk", &[0; 14]));
1067        assert!(!bch_verify_long("mk", &[]));
1068    }
1069
1070    #[test]
1071    fn bch_zero_data_does_not_self_validate_long() {
1072        // All-zeros must not validate, by NUMS construction of MK_LONG_CONST.
1073        // Data length 16 is arbitrary; any non-empty zero-fill exhibits the same
1074        // negative result. 16 echoes the long-code known-vector data length.
1075        let mut zero = vec![0u8; 16];
1076        zero.extend(std::iter::repeat_n(0, 15));
1077        assert!(!bch_verify_long("mk", &zero));
1078    }
1079
1080    #[test]
1081    fn bch_round_trip_empty_data_long() {
1082        // Degenerate but valid: checksum covers only the HRP preamble.
1083        let checksum = bch_create_checksum_long("mk", &[]);
1084        assert!(bch_verify_long("mk", &checksum));
1085    }
1086
1087    #[test]
1088    fn bch_correct_regular_clean_input() {
1089        // Clean input → 0 corrections, identity result.
1090        let hrp = "mk";
1091        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1092        let checksum = bch_create_checksum_regular(hrp, &data);
1093        let mut full = data.clone();
1094        full.extend_from_slice(&checksum);
1095        let r = bch_correct_regular(hrp, &full).unwrap();
1096        assert_eq!(r.corrections_applied, 0);
1097        assert!(r.corrected_positions.is_empty());
1098        assert_eq!(r.data, full);
1099    }
1100
1101    #[test]
1102    fn bch_correct_regular_one_error() {
1103        // Single-symbol corruption is recoverable.
1104        let hrp = "mk";
1105        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1106        let checksum = bch_create_checksum_regular(hrp, &data);
1107        let mut full = data.clone();
1108        full.extend_from_slice(&checksum);
1109        let original = full.clone();
1110        full[3] = (full[3] + 1) & 0x1F;
1111        let r = bch_correct_regular(hrp, &full).unwrap();
1112        assert_eq!(r.corrections_applied, 1);
1113        assert_eq!(r.corrected_positions, vec![3]);
1114        assert_eq!(r.data, original);
1115    }
1116
1117    #[test]
1118    fn bch_correct_regular_two_errors_recovered_v0_2() {
1119        // v0.2 BM/Forney decoder reaches the BCH(93,80,8) full t = 4
1120        // capacity. A 2-error pattern is now recoverable. This test was
1121        // `..._uncorrectable_v0_1` in v0.1; flipped sign in v0.2.
1122        let hrp = "mk";
1123        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1124        let checksum = bch_create_checksum_regular(hrp, &data);
1125        let mut full = data.clone();
1126        full.extend_from_slice(&checksum);
1127        let original = full.clone();
1128        full[3] = (full[3] + 1) & 0x1F;
1129        full[7] = (full[7] + 1) & 0x1F;
1130        let r = bch_correct_regular(hrp, &full).unwrap();
1131        assert_eq!(r.corrections_applied, 2);
1132        assert!(r.corrected_positions.contains(&3));
1133        assert!(r.corrected_positions.contains(&7));
1134        assert_eq!(r.data, original);
1135    }
1136
1137    #[test]
1138    fn bch_correct_long_clean_input() {
1139        let hrp = "mk";
1140        let data: Vec<u8> = (0..16).collect();
1141        let checksum = bch_create_checksum_long(hrp, &data);
1142        let mut full = data.clone();
1143        full.extend_from_slice(&checksum);
1144        let r = bch_correct_long(hrp, &full).unwrap();
1145        assert_eq!(r.corrections_applied, 0);
1146    }
1147
1148    #[test]
1149    fn bch_correct_long_one_error() {
1150        let hrp = "mk";
1151        let data: Vec<u8> = (0..16).collect();
1152        let checksum = bch_create_checksum_long(hrp, &data);
1153        let mut full = data.clone();
1154        full.extend_from_slice(&checksum);
1155        let original = full.clone();
1156        full[5] = (full[5] + 1) & 0x1F;
1157        let r = bch_correct_long(hrp, &full).unwrap();
1158        assert_eq!(r.corrections_applied, 1);
1159        assert_eq!(r.corrected_positions, vec![5]);
1160        assert_eq!(r.data, original);
1161    }
1162
1163    #[test]
1164    fn bch_correct_returns_correction_result_with_position() {
1165        // Verify the API contract: a successful 1-error correction reports
1166        // exactly the position that was changed.
1167        let hrp = "mk";
1168        let data: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
1169        let checksum = bch_create_checksum_regular(hrp, &data);
1170        let mut full = data.clone();
1171        full.extend_from_slice(&checksum);
1172        // Damage the second checksum byte (position 9 from start).
1173        full[9] = (full[9] + 7) & 0x1F;
1174        let r = bch_correct_regular(hrp, &full).unwrap();
1175        assert_eq!(r.corrected_positions, vec![9]);
1176    }
1177
1178    /// Build a fake mk1 5-bit data stream for round-trip tests:
1179    /// `[v0, v1, ...]` are 2 bech32-symbol single-string-style header
1180    /// symbols; `payload_bytes` is the byte-level fragment.
1181    fn build_5bit_data(header_symbols: &[u8], payload_bytes: &[u8]) -> Vec<u8> {
1182        let mut out = Vec::with_capacity(header_symbols.len() + payload_bytes.len() * 2);
1183        out.extend_from_slice(header_symbols);
1184        out.extend(bytes_to_5bit(payload_bytes));
1185        out
1186    }
1187
1188    #[test]
1189    fn encode_5bit_to_string_round_trip_regular() {
1190        // 2-symbol single-string header + 4-byte payload → 7 5-bit symbols
1191        // (header [v=0, t=0] || bytes_to_5bit(4 bytes) = 2 + 7 = 9 symbols).
1192        // 9 + 13 regular checksum = 22-char data part — well within regular range.
1193        let header_symbols = [0u8, 0u8];
1194        let payload = vec![0xDE, 0xAD, 0xBE, 0xEF];
1195        let data_5bit = build_5bit_data(&header_symbols, &payload);
1196        let s = encode_5bit_to_string(&data_5bit).unwrap();
1197        assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1198
1199        let decoded = decode_string(&s).unwrap();
1200        assert_eq!(decoded.code, BchCode::Regular);
1201        assert_eq!(decoded.corrections_applied, 0);
1202        assert!(decoded.corrected_positions.is_empty());
1203        assert_eq!(decoded.data(), data_5bit.as_slice());
1204
1205        // Recover the payload by stripping the 2-symbol header and byte-decoding.
1206        let payload_5bit = &decoded.data()[2..];
1207        let recovered = five_bit_to_bytes(payload_5bit).unwrap();
1208        assert_eq!(recovered, payload);
1209    }
1210
1211    #[test]
1212    fn encode_5bit_to_string_round_trip_long() {
1213        // Force a long-code path with an 8-symbol chunked-style header +
1214        // a 53-byte fragment: data_5bit.len() = 8 + ceil(53*8/5) = 8 + 85 = 93,
1215        // + 15 long checksum = 108 — exact long-code upper bound.
1216        let header_symbols = [0u8; 8];
1217        let payload = vec![0xA5u8; 53];
1218        let data_5bit = build_5bit_data(&header_symbols, &payload);
1219        assert_eq!(
1220            data_5bit.len(),
1221            93,
1222            "fixture invariant: 8 header + 85 payload symbols"
1223        );
1224        let s = encode_5bit_to_string(&data_5bit).unwrap();
1225        assert!(s.starts_with("mk1"));
1226        let decoded = decode_string(&s).unwrap();
1227        assert_eq!(decoded.code, BchCode::Long);
1228        assert_eq!(decoded.data(), data_5bit.as_slice());
1229
1230        let recovered = five_bit_to_bytes(&decoded.data()[8..]).unwrap();
1231        assert_eq!(recovered, payload);
1232    }
1233
1234    #[test]
1235    fn encode_starts_with_hrp_and_separator() {
1236        // Minimum-shape input: 1 5-bit symbol + 13 regular checksum = 14 — the
1237        // tightest valid regular-code data-part length.
1238        let s = encode_5bit_to_string(&[1u8]).unwrap();
1239        assert!(s.starts_with("mk1"), "string did not start with mk1: {}", s);
1240    }
1241
1242    #[test]
1243    fn decode_rejects_invalid_hrp() {
1244        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1245        let bad = s.replacen("mk", "bt", 1);
1246        assert!(matches!(
1247            decode_string(&bad),
1248            Err(crate::Error::InvalidHrp(_))
1249        ));
1250    }
1251
1252    #[test]
1253    fn decode_rejects_mixed_case() {
1254        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1255        let bad: String = s
1256            .chars()
1257            .enumerate()
1258            .map(|(i, c)| if i == 5 { c.to_ascii_uppercase() } else { c })
1259            .collect();
1260        assert!(matches!(decode_string(&bad), Err(crate::Error::MixedCase)));
1261    }
1262
1263    #[test]
1264    fn decode_rejects_invalid_char() {
1265        // 'b' is excluded from the bech32 alphabet; substitute one in the data
1266        // part to force a parse-time character rejection.
1267        let s = encode_5bit_to_string(&[0u8; 10]).unwrap();
1268        // s looks like "mk1...". Splice 'b' at index 5 (definitely past "mk1").
1269        let mut chars: Vec<char> = s.chars().collect();
1270        chars[5] = 'b';
1271        let bad: String = chars.into_iter().collect();
1272        assert!(matches!(
1273            decode_string(&bad),
1274            Err(crate::Error::InvalidChar { .. })
1275        ));
1276    }
1277
1278    #[test]
1279    fn decode_rejects_missing_separator() {
1280        // No '1' at all in the string. rfind('1') returns None → InvalidHrp.
1281        let bad = "mknoseparatorhere";
1282        assert!(matches!(
1283            decode_string(bad),
1284            Err(crate::Error::InvalidHrp(_))
1285        ));
1286    }
1287
1288    #[test]
1289    fn decode_recovers_one_error() {
1290        // Encode, corrupt one char in the data part, decode should auto-correct.
1291        let data_5bit = vec![0u8, 0u8, 1, 2, 3, 4, 5];
1292        let s = encode_5bit_to_string(&data_5bit).unwrap();
1293
1294        let mut chars: Vec<char> = s.chars().collect();
1295        // Corrupt position 6 (past "mk1", well within the data part).
1296        let original_char = chars[6];
1297        chars[6] = if original_char == 'q' { 'p' } else { 'q' };
1298        let corrupted: String = chars.into_iter().collect();
1299
1300        let decoded = decode_string(&corrupted).unwrap();
1301        assert_eq!(decoded.corrections_applied, 1);
1302        assert_eq!(decoded.corrected_positions.len(), 1);
1303        assert_eq!(decoded.data(), data_5bit.as_slice());
1304    }
1305
1306    #[test]
1307    fn encode_rejects_data_part_in_reserved_invalid_length_range() {
1308        // For 5-bit data-part lengths 0..=12 (so data_part = 13..=25 with regular
1309        // checksum, or 15..=27 with long), `bch_code_for_length` rejects below 14.
1310        // Empty input → data_5bit.len()=0 → regular_total=13 → None; long_total=15
1311        // → Regular. Wait — regular range starts at 14 not 13.
1312        //
1313        // Actual invariant test: len 0 → regular_total=13 (None, below 14) and
1314        // long_total=15 (Regular). So it falls back to long->regular ladder ...
1315        // Re-checking encode_5bit_to_string: only fails when both miss [14..=93]
1316        // and [96..=108]. For data_5bit.len()=79, regular_total=92 → Regular ✓.
1317        // The provable reserved-invalid case is a length that misses both
1318        // ranges; the BIP 93 BCH ladder leaves no such gap below 109 because
1319        // [14..=93] ∪ [96..=108] only excludes {0..=13, 94..=95, ≥109}. The
1320        // smallest input length that produces invalid data-part lengths in
1321        // BOTH the regular and long branches is therefore data_5bit.len() ≥ 94
1322        // (regular_total ≥ 107 in invalid territory, long_total ≥ 109 too long).
1323        let too_long = vec![0u8; 94];
1324        let result = encode_5bit_to_string(&too_long);
1325        assert!(matches!(result, Err(crate::Error::InvalidStringLength(_))));
1326    }
1327}