ms_codec/decode.rs
1//! Public decoder. Applies SPEC §4 validity rules in order.
2//!
3//! v0.2.0: also hosts [`decode_with_correction`] — the BCH-error-correcting
4//! decode entry point per plan §1 D22 + §2.B.2. Parse → polymod-residue →
5//! (if non-zero) call [`crate::bch_decode::decode_regular_errors`] → apply
6//! corrections → run the existing [`decode`] path → return
7//! `(Tag, Payload, Vec<CorrectionDetail>)`. ms1 is single-chunk per codex32
8//! spec, so there is no atomic-multi-chunk variant (cf. md-codec's
9//! per-chunk-set version).
10
11use crate::consts::{RESERVED_NOT_EMITTED_V01, TAG_ENTR, VALID_STR_LENGTHS};
12use crate::envelope;
13use crate::error::{Error, Result};
14use crate::payload::Payload;
15use crate::tag::Tag;
16use codex32::Codex32String;
17
18/// Decode a v0.1 ms1 string into `(Tag, Payload)`.
19///
20/// Rejects per SPEC §4 rules 1-10:
21///
22/// - Rule 1: upstream codex32 parse failure (Codex32 variant).
23/// - Rules 2-4, 8: wire-invariant violations (delegated to envelope::discriminate).
24/// - Rules 5-7: tag-table membership rules (here).
25/// - Rule 9: total string length not in v0.1-emittable set (here, before parse).
26/// - Rule 10: payload byte length mismatch for the tag (here, via Payload::validate()).
27pub fn decode(s: &str) -> Result<(Tag, Payload)> {
28 // §4 rule 9: total string length must be in the v0.1 set.
29 if !VALID_STR_LENGTHS.contains(&s.len()) {
30 return Err(Error::UnexpectedStringLength {
31 got: s.len(),
32 allowed: VALID_STR_LENGTHS,
33 });
34 }
35
36 // §4 rule 1: delegate parse + checksum to rust-codex32. `?` leverages the
37 // From<codex32::Error> for Error impl in error.rs.
38 let c = Codex32String::from_string(s.to_string())?;
39
40 // §4 rules 2, 3, 4, 8 + tag-alphabet rule 5: envelope.
41 let (tag, payload_bytes) = envelope::discriminate(&c)?;
42
43 // §4 rule 7: reserved-not-emitted tags.
44 if RESERVED_NOT_EMITTED_V01.contains(tag.as_bytes()) {
45 return Err(Error::ReservedTagNotEmittedInV01 {
46 got: *tag.as_bytes(),
47 });
48 }
49
50 // §4 rule 6: tag must be in the v0.1 accept set (currently {entr}).
51 // SPEC v0.9.0 §1 item 2 — wrap the OWNED entropy buffer in `Zeroizing`
52 // so the intermediate scrub runs on function exit. `Payload::Entr(Vec<u8>)`
53 // is the public return shape (unwrapped per SPEC §3 OOS-2); the
54 // caller wraps before storing — see `payload.rs` doc-comment.
55 use zeroize::Zeroizing;
56 let payload = match *tag.as_bytes() {
57 x if x == TAG_ENTR => {
58 let scrubbed: Zeroizing<Vec<u8>> = Zeroizing::new(payload_bytes);
59 let p = Payload::Entr((*scrubbed).clone());
60 // §4 rule 10: validate payload length against the tag's expected set.
61 p.validate()?;
62 p
63 }
64 _ => {
65 return Err(Error::UnknownTag {
66 got: *tag.as_bytes(),
67 });
68 }
69 };
70
71 Ok((tag, payload))
72}
73
74// ---------------------------------------------------------------------------
75// v0.2.0: BCH-error-correcting decode (plan §1 D22 + §2.B.2).
76// ---------------------------------------------------------------------------
77
78/// Per-correction report emitted by [`decode_with_correction`]. One entry
79/// per repaired character. `position` is 0-indexed into the codex32
80/// data-part (i.e. the characters following the `ms1` HRP + separator);
81/// `was` is the original (corrupted) char from the input; `now` is the
82/// corrected char.
83///
84/// ms1 is single-chunk per codex32 spec, so there is no `chunk_index`
85/// field (cf. md-codec's `CorrectionDetail`).
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct CorrectionDetail {
88 /// 0-indexed position of the corrected character within the codex32
89 /// data-part (post-HRP-and-separator).
90 pub position: usize,
91 /// The original (corrupted) character at this position.
92 pub was: char,
93 /// The corrected character at this position.
94 pub now: char,
95}
96
97/// Local codex32 alphabet (BIP 173 lowercase). Each char = one 5-bit
98/// symbol. Mirrors md-codec's `chunk.rs` local copy — kept private here so
99/// this module doesn't widen the codex32 public surface.
100const CODEX32_ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
101
102/// BIP 173 HRP for ms1 strings (HRP + separator).
103const HRP_PREFIX: &str = "ms1";
104
105/// Parse an ms1 string into its 5-bit data-part symbol vector. Returns
106/// the data-with-checksum symbols (i.e. all symbols after `ms1`). The
107/// returned symbol count includes the 13-symbol BCH checksum tail.
108///
109/// Returns [`Error::WrongHrp`] if the string does not start with `ms1`,
110/// or [`Error::Codex32`] (via a `codex32::Error::InvalidChar`) if any
111/// data-part character is not in the codex32 alphabet.
112fn parse_ms1_symbols(s: &str) -> Result<Vec<u8>> {
113 let lower = s.to_ascii_lowercase();
114 if !lower.starts_with(HRP_PREFIX) {
115 // Find the actual HRP (everything up to and including the last '1'
116 // separator) so the error reports the observed HRP instead of "".
117 let hrp_end = lower.rfind('1').map(|i| i + 1).unwrap_or(lower.len());
118 let got = lower[..hrp_end.saturating_sub(1)].to_string();
119 return Err(Error::WrongHrp { got });
120 }
121 let rest = &lower[HRP_PREFIX.len()..];
122 let mut symbols: Vec<u8> = Vec::with_capacity(rest.len());
123 // Non-alphabet characters can't appear in a valid v0.1 string. We
124 // can't fabricate a `codex32::Error` value here (the upstream crate
125 // doesn't expose a constructor for `InvalidChar`), so we use
126 // `UnexpectedStringLength` as a stand-in: the existing `decode` path
127 // would have rejected the string for the same reason on a different
128 // axis. Toolkit-side helper at B.7 absorbs into `UnparseableInput`
129 // per plan §2.B.4 D29 error-mapping table.
130 for c in rest.chars() {
131 let lc = c as u8;
132 let sym = CODEX32_ALPHABET
133 .iter()
134 .position(|&b| b == lc)
135 .ok_or(Error::UnexpectedStringLength {
136 got: s.len(),
137 allowed: VALID_STR_LENGTHS,
138 })? as u8;
139 symbols.push(sym);
140 }
141 Ok(symbols)
142}
143
144/// Re-encode a 5-bit data-part symbol vector as a complete ms1 string.
145fn encode_ms1_string(data_with_checksum: &[u8]) -> String {
146 let mut out = String::with_capacity(HRP_PREFIX.len() + data_with_checksum.len());
147 out.push_str(HRP_PREFIX);
148 for &v in data_with_checksum {
149 out.push(CODEX32_ALPHABET[(v & 0x1F) as usize] as char);
150 }
151 out
152}
153
154/// BCH-error-correcting decode for a single ms1 string.
155///
156/// Per plan §1 Q1 lock — full-decode semantics: this is the single entry
157/// point that callers needing both "did anything get repaired?" AND "the
158/// fully-decoded `(Tag, Payload)`" should use.
159///
160/// Algorithm:
161/// 1. Parse the input as ms1 (`ms1` HRP + codex32 data-part) into a
162/// 5-bit symbol vector.
163/// 2. Compute the BCH polymod residue
164/// (`hrp_expand("ms") || data_with_checksum`) XOR'd against
165/// [`crate::bch::MS_REGULAR_CONST`].
166/// 3. Residue `== 0` ⇒ clean codeword; pass through to the existing
167/// [`decode`] entry point unchanged.
168/// 4. Residue `!= 0` ⇒ invoke
169/// [`crate::bch_decode::decode_regular_errors`]. If `None`, return
170/// `Err(Error::TooManyErrors { bound: 8 })` per plan §2.B.4 D29
171/// error-mapping table.
172/// 5. Apply corrections to the symbol vector, re-verify via polymod (a
173/// defensive catch for pathological 5+-error patterns that fool BM
174/// into returning a degree-≤4 locator with 4 valid roots), and record
175/// one [`CorrectionDetail`] per repaired character.
176/// 6. Re-encode the corrected symbol vector as an ms1 string and forward
177/// it to the existing [`decode`] entry point.
178///
179/// Per Q1 lock + D29 error-mapping table, any §4-rule error from the
180/// full decode (orphan variants like `ThresholdNotZero`,
181/// `ReservedTagNotEmittedInV01`, etc.) surfaces directly; toolkit-side
182/// `repair_via_ms_codec` (B.7) absorbs these into
183/// `RepairError::PostCorrectionDecodeFailed`.
184///
185/// Returns `(Tag, Payload, Vec<CorrectionDetail>)` on success. The
186/// correction-detail vector is in ascending `position` order; an empty
187/// vector means the input was already a valid codeword.
188pub fn decode_with_correction(s: &str) -> Result<(Tag, Payload, Vec<CorrectionDetail>)> {
189 // Parse data-part symbols. Length checks live in `decode` proper
190 // (rule 9 is enforced there after we've potentially corrected, since
191 // BCH correction does not change the string length).
192 let symbols = parse_ms1_symbols(s)?;
193
194 // Polymod residue against ms1's target constant.
195 let mut input = crate::bch::hrp_expand("ms");
196 input.extend_from_slice(&symbols);
197 let residue = crate::bch::polymod_run(&input) ^ crate::bch::MS_REGULAR_CONST;
198
199 if residue == 0 {
200 // Already a valid codeword; pass through to the existing decoder.
201 let (tag, payload) = decode(s)?;
202 return Ok((tag, payload, Vec::new()));
203 }
204
205 // Attempt BCH correction.
206 let (positions, magnitudes) = crate::bch_decode::decode_regular_errors(residue, symbols.len())
207 .ok_or(Error::TooManyErrors { bound: 8 })?;
208
209 // Apply corrections; record (was, now) chars per position.
210 let mut corrected = symbols.clone();
211 let mut details: Vec<CorrectionDetail> = Vec::with_capacity(positions.len());
212 for (&pos, &mag) in positions.iter().zip(&magnitudes) {
213 if pos >= corrected.len() {
214 // Defensive: chien_search bounded pos to [0, L); but a
215 // pathological 5+-error pattern could in principle skirt
216 // that.
217 return Err(Error::TooManyErrors { bound: 8 });
218 }
219 let was_byte = corrected[pos];
220 let now_byte = was_byte ^ mag;
221 let was = CODEX32_ALPHABET[(was_byte & 0x1F) as usize] as char;
222 let now = CODEX32_ALPHABET[(now_byte & 0x1F) as usize] as char;
223 details.push(CorrectionDetail {
224 position: pos,
225 was,
226 now,
227 });
228 corrected[pos] = now_byte;
229 }
230
231 // Defensive re-verify (catches pathological 5+-error patterns that
232 // happen to produce a degree-≤4 locator with 4 valid roots).
233 let mut verify_input = crate::bch::hrp_expand("ms");
234 verify_input.extend_from_slice(&corrected);
235 let verify_residue =
236 crate::bch::polymod_run(&verify_input) ^ crate::bch::MS_REGULAR_CONST;
237 if verify_residue != 0 {
238 return Err(Error::TooManyErrors { bound: 8 });
239 }
240
241 // Hand the corrected string to the existing decoder. Any §4-rule
242 // error surfaces directly per Q1 lock; toolkit helper at B.7 absorbs.
243 let corrected_str = encode_ms1_string(&corrected);
244 let (tag, payload) = decode(&corrected_str)?;
245 Ok((tag, payload, details))
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251 use crate::encode;
252
253 #[test]
254 fn round_trip_entr_all_lengths() {
255 for len in [16usize, 20, 24, 28, 32] {
256 let entropy = (0..len as u8)
257 .map(|i| i.wrapping_mul(7))
258 .collect::<Vec<_>>();
259 let p = Payload::Entr(entropy.clone());
260 let s = encode::encode(Tag::ENTR, &p).unwrap();
261 let (tag, recovered) = decode(&s).unwrap();
262 assert_eq!(tag, Tag::ENTR);
263 assert_eq!(recovered, p);
264 }
265 }
266
267 #[test]
268 fn decode_rejects_unexpected_length() {
269 // 51 chars is not a v0.1 emittable length.
270 let s = "ms10entrsxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
271 assert!(matches!(
272 decode(s),
273 Err(Error::UnexpectedStringLength { .. })
274 ));
275 }
276
277 #[test]
278 fn decode_rejects_short_seed_string_with_reserved_tag() {
279 // Hand-build a 50-char string with id="seed" — 16-B entropy worth.
280 // The string-length check passes; tag-rule 7 fails.
281 let mut data = vec![0x00u8];
282 data.extend_from_slice(&[0xAAu8; 16]);
283 let c = Codex32String::from_seed("ms", 0, "seed", codex32::Fe::S, &data).unwrap();
284 let s = c.to_string();
285 assert_eq!(s.len(), 50, "expected str.len 50 for 16-B + prefix");
286 assert!(matches!(
287 decode(&s),
288 Err(Error::ReservedTagNotEmittedInV01 { .. })
289 ));
290 }
291}