subtle_encoding/
bech32.rs

1//! Bech32 (BIP-173) checksummed Base32 data encoding (WARNING: preview!)
2//!
3//! NOTE: This implementation is not yet constant time, but we intend to make
4//! it such. It is provided as a preview of an upcoming feature, and is
5//! not enabled by default.
6//!
7//! To enable it, add the following cargo feature: `bech32-preview`
8
9use zeroize::Zeroize;
10
11mod base32;
12mod checksum;
13
14use self::checksum::{Checksum, CHECKSUM_SIZE};
15use crate::error::Error;
16use alloc::{string::String, vec::Vec};
17
18/// Default separator character
19pub const DEFAULT_SEPARATOR: char = '1';
20
21/// Bech32 default alphabet (lower case)
22pub const DEFAULT_CHARSET: [char; 32] = [
23    'q', 'p', 'z', 'r', 'y', '9', 'x', '8', 'g', 'f', '2', 't', 'v', 'd', 'w', '0', 's', '3', 'j',
24    'n', '5', '4', 'k', 'h', 'c', 'e', '6', 'm', 'u', 'a', '7', 'l',
25];
26
27/// Bech32 default alphabet (upper case)
28pub const DEFAULT_CHARSET_UPCASE: [char; 32] = [
29    'Q', 'P', 'Z', 'R', 'Y', '9', 'X', '8', 'G', 'F', '2', 'T', 'V', 'D', 'W', '0', 'S', '3', 'J',
30    'N', '5', '4', 'K', 'H', 'C', 'E', '6', 'M', 'U', 'A', '7', 'L',
31];
32
33/// Encode the given data as lower-case Bech32, returning a `String`
34pub fn encode<S, D>(hrp: S, data: D) -> String
35where
36    S: AsRef<str>,
37    D: AsRef<[u8]>,
38{
39    Bech32::lower_case().encode(hrp, data)
40}
41
42/// Decode the given data from lower-case Bech32, returning a 2-tuple of the
43/// "human readable part" of the message as a `String` and a `Vec<u8>` of data,
44/// or an `Error` if decoding failed.
45pub fn decode<S>(encoded: S) -> Result<(String, Vec<u8>), Error>
46where
47    S: AsRef<str>,
48{
49    Bech32::lower_case().decode(encoded)
50}
51
52/// Encode the given data as upper-case Bech32, returning a `Vec<u8>`
53pub fn encode_upper<S, D>(hrp: S, data: D) -> String
54where
55    S: AsRef<str>,
56    D: AsRef<[u8]>,
57{
58    Bech32::upper_case().encode(hrp, data)
59}
60
61/// Decode the given data from upper-case Bech32, returning a 2-tuple of the
62/// "human readable part" of the message as a `String` and a `Vec<u8>` of data,
63/// or an `Error` if decoding failed.
64pub fn decode_upper<S>(encoded: S) -> Result<(String, Vec<u8>), Error>
65where
66    S: AsRef<str>,
67{
68    Bech32::upper_case().decode(encoded)
69}
70
71/// Bech32 encoder/decoder
72pub struct Bech32 {
73    /// Encoding character set
74    charset: [char; 32],
75
76    /// Inverse alphabet used to decode
77    charset_inverse: [Option<u8>; 128],
78
79    /// Separator between the human readable and base32-encoded parts of a Bech32 string
80    separator: char,
81}
82
83impl Default for Bech32 {
84    fn default() -> Self {
85        Bech32::lower_case()
86    }
87}
88
89impl Bech32 {
90    /// Decode lower case Bech32 strings
91    pub fn lower_case() -> Self {
92        Self::new(DEFAULT_CHARSET, DEFAULT_SEPARATOR)
93    }
94
95    /// Decode upper case Bech32 strings
96    pub fn upper_case() -> Self {
97        Self::new(DEFAULT_CHARSET_UPCASE, DEFAULT_SEPARATOR)
98    }
99
100    /// Create a `Bech32` encoder with the given separator character
101    ///
102    /// Panics if the separator character is invalid
103    pub fn new(charset: [char; 32], separator: char) -> Self {
104        // Check separator validity
105        match separator {
106            '1' | 'B' | 'I' | 'O' | 'b' | 'i' | 'o' => (),
107            '0'..='9' | 'A'..='Z' | 'a'..='z' => panic!("invalid separator: {:?}", separator),
108            _ => (),
109        }
110
111        let mut charset_inverse = [None; 128];
112
113        for (i, char) in charset.iter().enumerate() {
114            let mut byte = [0u8];
115            char.encode_utf8(byte.as_mut());
116            charset_inverse[byte[0] as usize] = Some(i as u8);
117        }
118
119        Self {
120            charset,
121            charset_inverse,
122            separator,
123        }
124    }
125
126    /// Return the separator character currently in use
127    pub fn separator(&self) -> char {
128        self.separator
129    }
130
131    /// Encode a bech32 string from a human-readable part (hrp) and binary data
132    pub fn encode<S, D>(&self, hrp: S, data: D) -> String
133    where
134        S: AsRef<str>,
135        D: AsRef<[u8]>,
136    {
137        let mut base32_data = base32::encode(data.as_ref());
138        let mut result =
139            String::with_capacity(hrp.as_ref().len() + 1 + base32_data.len() + CHECKSUM_SIZE);
140
141        result.push_str(hrp.as_ref());
142        result.push(self.separator);
143
144        let checksum = Checksum::new(hrp.as_ref().as_bytes(), &base32_data);
145        for byte in base32_data.iter().chain(checksum.as_ref().iter()) {
146            let c = self
147                .charset
148                .get(*byte as usize)
149                .expect("out of range character for alphabet");
150
151            result.push(*c);
152        }
153
154        // Clear any potential secrets
155        base32_data.as_mut_slice().zeroize();
156
157        result
158    }
159
160    /// Decode a bech32 string to a human-readable part (HRP) and binary data
161    pub fn decode<S>(&self, encoded: S) -> Result<(String, Vec<u8>), Error>
162    where
163        S: AsRef<str>,
164    {
165        let encoded_str = encoded.as_ref();
166
167        // TODO: constant-time whitespace tolerance
168        if encoded_str
169            .chars()
170            .last()
171            .map(|c| c.is_whitespace())
172            .unwrap_or(false)
173        {
174            return Err(Error::TrailingWhitespace);
175        }
176
177        let pos = encoded_str
178            .rfind(self.separator)
179            .ok_or_else(|| Error::EncodingInvalid)?;
180
181        if pos == encoded_str.len() {
182            return Err(Error::EncodingInvalid);
183        }
184
185        let hrp = encoded_str[..pos].to_lowercase();
186
187        if hrp.is_empty() {
188            return Err(Error::EncodingInvalid);
189        }
190
191        // Ensure all characters in the human readable part are in a valid range
192        for c in hrp.chars() {
193            match c {
194                '!'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~' => (),
195                _ => return Err(Error::EncodingInvalid),
196            }
197        }
198
199        let encoded_data = &encoded_str[(pos + 1)..];
200
201        if encoded_data.len() < CHECKSUM_SIZE {
202            return Err(Error::LengthInvalid);
203        }
204
205        let mut base32_data = Vec::with_capacity(encoded_data.len());
206
207        for encoded_byte in encoded_data.bytes() {
208            let decoded_byte = self
209                .charset_inverse
210                .get(encoded_byte as usize)
211                .and_then(|byte| *byte)
212                .ok_or_else(|| Error::EncodingInvalid)?;
213
214            base32_data.push(decoded_byte);
215        }
216
217        // TODO: use catch here?
218        if let Err(e) = Checksum::verify(hrp.as_bytes(), &base32_data) {
219            // Clear any secrets that might be in base32_data
220            base32_data.as_mut_slice().zeroize();
221            return Err(e);
222        }
223
224        let base32_len = base32_data.len() - CHECKSUM_SIZE;
225        let decode_result = base32::decode(&base32_data[..base32_len]);
226
227        // Clear any secrets that might be in data_bytes
228        base32_data.as_mut_slice().zeroize();
229        decode_result.map(|decoded| (hrp, decoded))
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    /// Bech32 test vector
238    struct TestVector {
239        /// Bech32-encoded string
240        encoded: &'static str,
241
242        /// Human readable part
243        hrp: &'static str,
244
245        /// Binary data
246        bytes: &'static [u8],
247
248        /// Is the test vector upper case?
249        upper_case: bool,
250    }
251
252    // BIP-173 test vectors
253    // https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#Test_vectors
254    const VALID_TEST_VECTORS: &[TestVector] = &[
255        TestVector {
256            encoded: "A12UEL5L",
257            hrp: "a",
258            bytes: &[],
259            upper_case: true
260        },
261        TestVector {
262            encoded: "a12uel5l",
263            hrp: "a",
264            bytes: &[],
265            upper_case: false
266        },
267        TestVector {
268            encoded: "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs",
269            hrp: "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio",
270            bytes: &[],
271            upper_case: false
272        },
273        TestVector {
274            hrp: "abcdef",
275            bytes: &[0, 68, 50, 20, 199, 66, 84, 182, 53, 207, 132, 101, 58, 86, 215, 198, 117, 190, 119, 223],
276            encoded: "abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw",
277            upper_case: false
278        },
279        TestVector {
280            encoded: "11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j",
281            hrp: "1",
282            bytes: &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
283            upper_case: false
284        },
285        TestVector {
286            encoded: "split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w",
287            hrp: "split",
288            bytes: &[197, 243, 139, 112, 48, 95, 81, 155, 246, 109, 133, 251, 108, 240, 48, 88, 243, 221, 228, 99, 236, 215, 145, 143, 45, 199, 67, 145, 143, 45],
289            upper_case: false
290        },
291        TestVector {
292            encoded: "?1ezyfcl",
293            hrp: "?",
294            bytes: &[],
295            upper_case: false
296        },
297    ];
298
299    #[test]
300    fn encode_valid_test_vectors() {
301        let bech32 = Bech32::default();
302        for vector in VALID_TEST_VECTORS {
303            let encoded = bech32.encode(vector.hrp, vector.bytes);
304            assert_eq!(encoded, vector.encoded.to_lowercase());
305        }
306    }
307
308    #[test]
309    fn decode_valid_test_vectors() {
310        for vector in VALID_TEST_VECTORS {
311            let bech32 = if vector.upper_case {
312                Bech32::upper_case()
313            } else {
314                Bech32::default()
315            };
316
317            let (hrp, data) = bech32.decode(vector.encoded).unwrap();
318            assert_eq!(hrp, vector.hrp.to_lowercase());
319            assert_eq!(data, vector.bytes);
320        }
321    }
322
323    #[test]
324    fn hrp_character_out_of_range() {
325        let bech32 = Bech32::default();
326        assert_eq!(bech32.decode("\x201nwldj5"), Err(Error::EncodingInvalid));
327        assert_eq!(bech32.decode("\x7F1axkwrx"), Err(Error::EncodingInvalid));
328    }
329
330    #[test]
331    fn no_separator_character() {
332        assert_eq!(
333            Bech32::default().decode("pzry9x0s0muk"),
334            Err(Error::EncodingInvalid)
335        );
336    }
337
338    #[test]
339    fn empty_hrp() {
340        for empty_hrp_str in &["1pzry9x0s0muk", "10a06t8", "1qzzfhee"] {
341            assert_eq!(
342                Bech32::default().decode(empty_hrp_str),
343                Err(Error::EncodingInvalid)
344            );
345        }
346    }
347
348    #[test]
349    fn invalid_data_character() {
350        assert_eq!(
351            Bech32::default().decode("x1b4n0q5v"),
352            Err(Error::EncodingInvalid)
353        );
354    }
355
356    #[test]
357    fn checksum_too_short() {
358        assert_eq!(
359            Bech32::default().decode("li1dgmt3"),
360            Err(Error::LengthInvalid)
361        );
362    }
363
364    #[test]
365    fn invalid_character_in_checksum() {
366        assert_eq!(
367            Bech32::default().decode("de1lg7wt\x7F"),
368            Err(Error::EncodingInvalid)
369        );
370    }
371
372    #[test]
373    fn checksum_calculated_with_uppercase_hrp() {
374        assert_eq!(
375            Bech32::upper_case().decode("A1G7SGD8"),
376            Err(Error::ChecksumInvalid)
377        );
378    }
379
380    // NOTE: not in test vectors but worth testing for anyway
381    #[test]
382    fn invalid_mixed_case() {
383        assert_eq!(
384            Bech32::default().decode("a12UEL5L"),
385            Err(Error::EncodingInvalid)
386        );
387    }
388}