Skip to main content

md_codec/
codex32.rs

1//! v0.11 ↔ codex32 BCH layer adapter, symbol-aligned per spec §3.1 / D7.
2//!
3//! Bypasses v0.x's byte-oriented `encode_string` / `decode_string` to avoid
4//! adding an extra codex32 char per encoding due to byte-padding. Uses v0.x's
5//! lower-level BCH primitives (`bch_create_checksum_regular`,
6//! `bch_verify_regular`) which operate on `&[u8]` slices of 5-bit symbols.
7
8use crate::bitstream::{BitReader, BitWriter};
9use crate::error::Error;
10
11/// Codex32 alphabet (BIP 173 lowercase). Each char = one 5-bit symbol.
12const CODEX32_ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
13
14/// HRP for v0.11 (matches v0.x).
15const HRP: &str = "md";
16
17/// Regular-BCH checksum length, in 5-bit symbols.
18pub(crate) const REGULAR_CHECKSUM_SYMBOLS: usize = 13;
19
20/// Pack `bit_count` bits from `payload_bytes` into 5-bit symbols. Pads the
21/// final symbol with zeros if `bit_count` is not a multiple of 5. Returns
22/// `ceil(bit_count / 5)` symbols. Each output u8 contains a 5-bit value.
23fn bits_to_symbols(payload_bytes: &[u8], bit_count: usize) -> Result<Vec<u8>, Error> {
24    let symbol_count = (bit_count + 4) / 5;
25    let mut r = BitReader::with_bit_limit(payload_bytes, bit_count);
26    let mut symbols = Vec::with_capacity(symbol_count);
27    for _ in 0..symbol_count {
28        let take = r.remaining_bits().min(5);
29        let val = if take == 0 {
30            0
31        } else {
32            r.read_bits(take)? as u8
33        };
34        // Left-justify within 5 bits if final symbol is short. (For decoder
35        // round-trip purposes the spec defines bit-packing MSB-first into
36        // 5-bit symbols, so zero-padding the LOW bits of the final symbol is
37        // the canonical form.)
38        let symbol = (val << (5 - take as u32)) & 0x1F;
39        symbols.push(symbol);
40    }
41    Ok(symbols)
42}
43
44/// Convert a stream of 5-bit symbols back into byte-padded bytes (MSB-first).
45fn symbols_to_bytes(symbols: &[u8]) -> Vec<u8> {
46    let mut w = BitWriter::new();
47    for &s in symbols {
48        w.write_bits((s & 0x1F) as u64, 5);
49    }
50    w.into_bytes()
51}
52
53fn symbol_to_char(s: u8) -> char {
54    CODEX32_ALPHABET[(s & 0x1F) as usize] as char
55}
56
57fn char_to_symbol(c: char) -> Option<u8> {
58    let lc = c.to_ascii_lowercase() as u8;
59    CODEX32_ALPHABET
60        .iter()
61        .position(|&b| b == lc)
62        .map(|i| i as u8)
63}
64
65/// Wrap a v0.11 payload bit stream (byte-padded with exact `bit_count`)
66/// into a complete codex32 md1 string with HRP and BCH checksum, symbol-aligned.
67pub fn wrap_payload(payload_bytes: &[u8], bit_count: usize) -> Result<String, Error> {
68    let data_symbols = bits_to_symbols(payload_bytes, bit_count)?;
69    // v0.x exposes `bch_create_checksum_regular(hrp: &str, data: &[u8]) -> [u8; 13]`.
70    let checksum: [u8; 13] = crate::bch::bch_create_checksum_regular(HRP, &data_symbols);
71
72    let mut s =
73        String::with_capacity(HRP.len() + 1 + data_symbols.len() + REGULAR_CHECKSUM_SYMBOLS);
74    s.push_str(HRP);
75    s.push('1'); // BIP 173-style HRP separator
76    for sym in &data_symbols {
77        s.push(symbol_to_char(*sym));
78    }
79    for sym in checksum.iter() {
80        s.push(symbol_to_char(*sym));
81    }
82    Ok(s)
83}
84
85/// Unwrap a v0.11 md1 string into (byte-padded payload bytes, symbol-aligned bit count).
86///
87/// The returned `symbol_aligned_bit_count = 5 × data_symbol_count`. This is
88/// the EXACT bit length carried by the codex32 BCH layer (rounded up to the
89/// next 5-bit boundary from the actual payload). The caller uses this as
90/// `decode_payload`'s `bit_len` so the v11 decoder's TLV-rollback only sees
91/// ≤4 bits of trailing zero-padding (well under the 7-bit threshold).
92pub fn unwrap_string(s: &str) -> Result<(Vec<u8>, usize), Error> {
93    // 1. Strip HRP + separator.
94    let prefix = format!("{}1", HRP);
95    if !s.to_ascii_lowercase().starts_with(&prefix) {
96        return Err(Error::Codex32DecodeError(format!(
97            "string does not start with HRP {prefix}"
98        )));
99    }
100    let symbols_str = &s[prefix.len()..];
101
102    // 2. Char-to-symbol decode (tolerate visual separators per D11).
103    let mut symbols = Vec::with_capacity(symbols_str.len());
104    for c in symbols_str.chars() {
105        if c.is_whitespace() || c == '-' {
106            continue;
107        }
108        let sym = char_to_symbol(c).ok_or_else(|| {
109            Error::Codex32DecodeError(format!("character {c:?} not in codex32 alphabet"))
110        })?;
111        symbols.push(sym);
112    }
113
114    // 3. BCH-verify.
115    if !crate::bch::bch_verify_regular(HRP, &symbols) {
116        return Err(Error::Codex32DecodeError(
117            "BCH checksum verification failed".into(),
118        ));
119    }
120
121    // 4. Strip the 13-symbol checksum.
122    if symbols.len() < REGULAR_CHECKSUM_SYMBOLS {
123        return Err(Error::Codex32DecodeError(
124            "string too short for BCH checksum".into(),
125        ));
126    }
127    let data_symbols = &symbols[..symbols.len() - REGULAR_CHECKSUM_SYMBOLS];
128    let bit_count = 5 * data_symbols.len();
129
130    // 5. Convert symbols → byte-padded bytes.
131    Ok((symbols_to_bytes(data_symbols), bit_count))
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn wrap_unwrap_round_trip_57_bits() {
140        // Synthetic 57-bit payload (mimics BIP 84 single-sig length).
141        let mut w = BitWriter::new();
142        w.write_bits(0xDEAD_BEEF_CAFE_BABE_u64 >> 7, 57);
143        let bytes = w.into_bytes();
144        let s = wrap_payload(&bytes, 57).unwrap();
145        // HRP "md1" (3 chars) + 12 data symbols + 13 checksum = 28 chars.
146        assert_eq!(s.len(), 28);
147        assert!(s.starts_with("md1"));
148        let (out_bytes, out_bits) = unwrap_string(&s).unwrap();
149        // Symbol-aligned bit count = 5 * 12 = 60 (≥ 57 by ≤4 padding bits).
150        assert_eq!(out_bits, 60);
151        // First 7 bytes match exactly; last byte's high bits match (low bits = padding).
152        assert_eq!(&out_bytes[..7], &bytes[..7]);
153        assert_eq!(out_bytes[7] & 0x80, bytes[7] & 0x80);
154    }
155
156    /// Critical: covers an N-byte chunk whose round-trip would mismatch under
157    /// byte-aligned `bytes.len() * 8` accounting. N=3 is the smallest such case
158    /// (encoder writes 8 bytes; symbol-aligned packing produces 13 symbols which
159    /// unpack to 9 bytes — but symbol_aligned_bit_count = 65 stays the right
160    /// reference).
161    #[test]
162    fn wrap_unwrap_n3_chunk_byte_count_recovers_correctly() {
163        // Chunk-format wire: 37-bit header + 8*3 = 24-bit payload = 61 bits.
164        let bit_count = 37 + 24;
165        let mut w = BitWriter::new();
166        w.write_bits(0x1FFF_FFFF_FFFF_u64, 37); // arbitrary header bits
167        w.write_bits(0x00AA_BBCC_u64, 24);
168        let bytes = w.into_bytes();
169        assert_eq!(bytes.len(), 8); // ceil(61/8)
170        let s = wrap_payload(&bytes, bit_count).unwrap();
171        let (_out_bytes, out_bits) = unwrap_string(&s).unwrap();
172        // Symbol-aligned bit count = 5 * ceil(61/5) = 5 * 13 = 65.
173        assert_eq!(out_bits, 65);
174        // (out_bits - 37) / 8 = (65 - 37) / 8 = 3 → 3 chunk-payload bytes recovered.
175        let recovered_payload_byte_count = (out_bits - 37) / 8;
176        assert_eq!(recovered_payload_byte_count, 3);
177    }
178
179    #[test]
180    fn unwrap_rejects_non_md_string() {
181        assert!(unwrap_string("xx1qpz9r4cy7").is_err());
182    }
183
184    #[test]
185    fn unwrap_tolerates_visual_separators() {
186        let mut w = BitWriter::new();
187        w.write_bits(0b1010, 4);
188        let bytes = w.into_bytes();
189        let s = wrap_payload(&bytes, 4).unwrap();
190        let mut grouped = String::new();
191        for (i, c) in s.chars().enumerate() {
192            grouped.push(c);
193            if i == 3 {
194                grouped.push('-');
195            }
196            if i == 8 {
197                grouped.push(' ');
198            }
199        }
200        let _ = unwrap_string(&grouped).unwrap();
201    }
202}