Skip to main content

ms_codec/
decode.rs

1//! Public decoder. Applies SPEC §4 validity rules in order.
2//!
3//! v0.2.0: also hosts [`decode_with_correction`] — the BCH-error-correcting
4//! decode entry point per plan §1 D22 + §2.B.2. Parse → polymod-residue →
5//! (if non-zero) call [`crate::bch_decode::decode_regular_errors`] → apply
6//! corrections → run the existing [`decode`] path → return
7//! `(Tag, Payload, Vec<CorrectionDetail>)`. ms1 is single-chunk per codex32
8//! spec, so there is no atomic-multi-chunk variant (cf. md-codec's
9//! per-chunk-set version).
10
11use crate::consts::{RESERVED_NOT_EMITTED_V01, TAG_ENTR, VALID_STR_LENGTHS};
12use crate::envelope;
13use crate::error::{Error, Result};
14use crate::payload::Payload;
15use crate::tag::Tag;
16use codex32::Codex32String;
17
18/// Decode a v0.1 ms1 string into `(Tag, Payload)`.
19///
20/// Rejects per SPEC §4 rules 1-10:
21///
22/// - Rule 1: upstream codex32 parse failure (Codex32 variant).
23/// - Rules 2-4, 8: wire-invariant violations (delegated to envelope::discriminate).
24/// - Rules 5-7: tag-table membership rules (here).
25/// - Rule 9: total string length not in v0.1-emittable set (here, before parse).
26/// - Rule 10: payload byte length mismatch for the tag (here, via Payload::validate()).
27pub fn decode(s: &str) -> Result<(Tag, Payload)> {
28    // §4 rule 9: total string length must be in the v0.1 set.
29    if !VALID_STR_LENGTHS.contains(&s.len()) {
30        return Err(Error::UnexpectedStringLength {
31            got: s.len(),
32            allowed: VALID_STR_LENGTHS,
33        });
34    }
35
36    // §4 rule 1: delegate parse + checksum to rust-codex32. `?` leverages the
37    // From<codex32::Error> for Error impl in error.rs.
38    let c = Codex32String::from_string(s.to_string())?;
39
40    // §4 rules 2, 3, 4, 8 + tag-alphabet rule 5: envelope.
41    let (tag, payload_bytes) = envelope::discriminate(&c)?;
42
43    // §4 rule 7: reserved-not-emitted tags.
44    if RESERVED_NOT_EMITTED_V01.contains(tag.as_bytes()) {
45        return Err(Error::ReservedTagNotEmittedInV01 {
46            got: *tag.as_bytes(),
47        });
48    }
49
50    // §4 rule 6: tag must be in the v0.1 accept set (currently {entr}).
51    // SPEC v0.9.0 §1 item 2 — wrap the OWNED entropy buffer in `Zeroizing`
52    // so the intermediate scrub runs on function exit. `Payload::Entr(Vec<u8>)`
53    // is the public return shape (unwrapped per SPEC §3 OOS-2); the
54    // caller wraps before storing — see `payload.rs` doc-comment.
55    use zeroize::Zeroizing;
56    let payload = match *tag.as_bytes() {
57        x if x == TAG_ENTR => {
58            let scrubbed: Zeroizing<Vec<u8>> = Zeroizing::new(payload_bytes);
59            let p = Payload::Entr((*scrubbed).clone());
60            // §4 rule 10: validate payload length against the tag's expected set.
61            p.validate()?;
62            p
63        }
64        _ => {
65            return Err(Error::UnknownTag {
66                got: *tag.as_bytes(),
67            });
68        }
69    };
70
71    Ok((tag, payload))
72}
73
74// ---------------------------------------------------------------------------
75// v0.2.0: BCH-error-correcting decode (plan §1 D22 + §2.B.2).
76// ---------------------------------------------------------------------------
77
78/// Per-correction report emitted by [`decode_with_correction`]. One entry
79/// per repaired character. `position` is 0-indexed into the codex32
80/// data-part (i.e. the characters following the `ms1` HRP + separator);
81/// `was` is the original (corrupted) char from the input; `now` is the
82/// corrected char.
83///
84/// ms1 is single-chunk per codex32 spec, so there is no `chunk_index`
85/// field (cf. md-codec's `CorrectionDetail`).
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct CorrectionDetail {
88    /// 0-indexed position of the corrected character within the codex32
89    /// data-part (post-HRP-and-separator).
90    pub position: usize,
91    /// The original (corrupted) character at this position.
92    pub was: char,
93    /// The corrected character at this position.
94    pub now: char,
95}
96
97/// Local codex32 alphabet (BIP 173 lowercase). Each char = one 5-bit
98/// symbol. Mirrors md-codec's `chunk.rs` local copy — kept private here so
99/// this module doesn't widen the codex32 public surface.
100const CODEX32_ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
101
102/// BIP 173 HRP for ms1 strings (HRP + separator).
103const HRP_PREFIX: &str = "ms1";
104
105/// Parse an ms1 string into its 5-bit data-part symbol vector. Returns
106/// the data-with-checksum symbols (i.e. all symbols after `ms1`). The
107/// returned symbol count includes the 13-symbol BCH checksum tail.
108///
109/// Returns [`Error::WrongHrp`] if the string does not start with `ms1`,
110/// or [`Error::Codex32`] (via a `codex32::Error::InvalidChar`) if any
111/// data-part character is not in the codex32 alphabet.
112fn parse_ms1_symbols(s: &str) -> Result<Vec<u8>> {
113    let lower = s.to_ascii_lowercase();
114    if !lower.starts_with(HRP_PREFIX) {
115        // Find the actual HRP (everything up to and including the last '1'
116        // separator) so the error reports the observed HRP instead of "".
117        let hrp_end = lower.rfind('1').map(|i| i + 1).unwrap_or(lower.len());
118        let got = lower[..hrp_end.saturating_sub(1)].to_string();
119        return Err(Error::WrongHrp { got });
120    }
121    let rest = &lower[HRP_PREFIX.len()..];
122    let mut symbols: Vec<u8> = Vec::with_capacity(rest.len());
123    // Non-alphabet characters can't appear in a valid v0.1 string. We
124    // can't fabricate a `codex32::Error` value here (the upstream crate
125    // doesn't expose a constructor for `InvalidChar`), so we use
126    // `UnexpectedStringLength` as a stand-in: the existing `decode` path
127    // would have rejected the string for the same reason on a different
128    // axis. Toolkit-side helper at B.7 absorbs into `UnparseableInput`
129    // per plan §2.B.4 D29 error-mapping table.
130    for c in rest.chars() {
131        let lc = c as u8;
132        let sym = CODEX32_ALPHABET
133            .iter()
134            .position(|&b| b == lc)
135            .ok_or(Error::UnexpectedStringLength {
136                got: s.len(),
137                allowed: VALID_STR_LENGTHS,
138            })? as u8;
139        symbols.push(sym);
140    }
141    Ok(symbols)
142}
143
144/// Re-encode a 5-bit data-part symbol vector as a complete ms1 string.
145fn encode_ms1_string(data_with_checksum: &[u8]) -> String {
146    let mut out = String::with_capacity(HRP_PREFIX.len() + data_with_checksum.len());
147    out.push_str(HRP_PREFIX);
148    for &v in data_with_checksum {
149        out.push(CODEX32_ALPHABET[(v & 0x1F) as usize] as char);
150    }
151    out
152}
153
154/// BCH-error-correcting decode for a single ms1 string.
155///
156/// Per plan §1 Q1 lock — full-decode semantics: this is the single entry
157/// point that callers needing both "did anything get repaired?" AND "the
158/// fully-decoded `(Tag, Payload)`" should use.
159///
160/// Algorithm:
161/// 1. Parse the input as ms1 (`ms1` HRP + codex32 data-part) into a
162///    5-bit symbol vector.
163/// 2. Compute the BCH polymod residue
164///    (`hrp_expand("ms") || data_with_checksum`) XOR'd against
165///    [`crate::bch::MS_REGULAR_CONST`].
166/// 3. Residue `== 0` ⇒ clean codeword; pass through to the existing
167///    [`decode`] entry point unchanged.
168/// 4. Residue `!= 0` ⇒ invoke
169///    [`crate::bch_decode::decode_regular_errors`]. If `None`, return
170///    `Err(Error::TooManyErrors { bound: 8 })` per plan §2.B.4 D29
171///    error-mapping table.
172/// 5. Apply corrections to the symbol vector, re-verify via polymod (a
173///    defensive catch for pathological 5+-error patterns that fool BM
174///    into returning a degree-≤4 locator with 4 valid roots), and record
175///    one [`CorrectionDetail`] per repaired character.
176/// 6. Re-encode the corrected symbol vector as an ms1 string and forward
177///    it to the existing [`decode`] entry point.
178///
179/// Per Q1 lock + D29 error-mapping table, any §4-rule error from the
180/// full decode (orphan variants like `ThresholdNotZero`,
181/// `ReservedTagNotEmittedInV01`, etc.) surfaces directly; toolkit-side
182/// `repair_via_ms_codec` (B.7) absorbs these into
183/// `RepairError::PostCorrectionDecodeFailed`.
184///
185/// Returns `(Tag, Payload, Vec<CorrectionDetail>)` on success. The
186/// correction-detail vector is in ascending `position` order; an empty
187/// vector means the input was already a valid codeword.
188pub fn decode_with_correction(s: &str) -> Result<(Tag, Payload, Vec<CorrectionDetail>)> {
189    // Parse data-part symbols. Length checks live in `decode` proper
190    // (rule 9 is enforced there after we've potentially corrected, since
191    // BCH correction does not change the string length).
192    let symbols = parse_ms1_symbols(s)?;
193
194    // Polymod residue against ms1's target constant.
195    let mut input = crate::bch::hrp_expand("ms");
196    input.extend_from_slice(&symbols);
197    let residue = crate::bch::polymod_run(&input) ^ crate::bch::MS_REGULAR_CONST;
198
199    if residue == 0 {
200        // Already a valid codeword; pass through to the existing decoder.
201        let (tag, payload) = decode(s)?;
202        return Ok((tag, payload, Vec::new()));
203    }
204
205    // Attempt BCH correction.
206    let (positions, magnitudes) = crate::bch_decode::decode_regular_errors(residue, symbols.len())
207        .ok_or(Error::TooManyErrors { bound: 8 })?;
208
209    // Apply corrections; record (was, now) chars per position.
210    let mut corrected = symbols.clone();
211    let mut details: Vec<CorrectionDetail> = Vec::with_capacity(positions.len());
212    for (&pos, &mag) in positions.iter().zip(&magnitudes) {
213        if pos >= corrected.len() {
214            // Defensive: chien_search bounded pos to [0, L); but a
215            // pathological 5+-error pattern could in principle skirt
216            // that.
217            return Err(Error::TooManyErrors { bound: 8 });
218        }
219        let was_byte = corrected[pos];
220        let now_byte = was_byte ^ mag;
221        let was = CODEX32_ALPHABET[(was_byte & 0x1F) as usize] as char;
222        let now = CODEX32_ALPHABET[(now_byte & 0x1F) as usize] as char;
223        details.push(CorrectionDetail {
224            position: pos,
225            was,
226            now,
227        });
228        corrected[pos] = now_byte;
229    }
230
231    // Defensive re-verify (catches pathological 5+-error patterns that
232    // happen to produce a degree-≤4 locator with 4 valid roots).
233    let mut verify_input = crate::bch::hrp_expand("ms");
234    verify_input.extend_from_slice(&corrected);
235    let verify_residue =
236        crate::bch::polymod_run(&verify_input) ^ crate::bch::MS_REGULAR_CONST;
237    if verify_residue != 0 {
238        return Err(Error::TooManyErrors { bound: 8 });
239    }
240
241    // Hand the corrected string to the existing decoder. Any §4-rule
242    // error surfaces directly per Q1 lock; toolkit helper at B.7 absorbs.
243    let corrected_str = encode_ms1_string(&corrected);
244    let (tag, payload) = decode(&corrected_str)?;
245    Ok((tag, payload, details))
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use crate::encode;
252
253    #[test]
254    fn round_trip_entr_all_lengths() {
255        for len in [16usize, 20, 24, 28, 32] {
256            let entropy = (0..len as u8)
257                .map(|i| i.wrapping_mul(7))
258                .collect::<Vec<_>>();
259            let p = Payload::Entr(entropy.clone());
260            let s = encode::encode(Tag::ENTR, &p).unwrap();
261            let (tag, recovered) = decode(&s).unwrap();
262            assert_eq!(tag, Tag::ENTR);
263            assert_eq!(recovered, p);
264        }
265    }
266
267    #[test]
268    fn decode_rejects_unexpected_length() {
269        // 51 chars is not a v0.1 emittable length.
270        let s = "ms10entrsxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
271        assert!(matches!(
272            decode(s),
273            Err(Error::UnexpectedStringLength { .. })
274        ));
275    }
276
277    #[test]
278    fn decode_rejects_short_seed_string_with_reserved_tag() {
279        // Hand-build a 50-char string with id="seed" — 16-B entropy worth.
280        // The string-length check passes; tag-rule 7 fails.
281        let mut data = vec![0x00u8];
282        data.extend_from_slice(&[0xAAu8; 16]);
283        let c = Codex32String::from_seed("ms", 0, "seed", codex32::Fe::S, &data).unwrap();
284        let s = c.to_string();
285        assert_eq!(s.len(), 50, "expected str.len 50 for 16-B + prefix");
286        assert!(matches!(
287            decode(&s),
288            Err(Error::ReservedTagNotEmittedInV01 { .. })
289        ));
290    }
291}