crypto/keys/
bip39.rs

1// Copyright 2024 IOTA Stiftung
2// SPDX-License-Identifier: Apache-2.0
3
4// https://en.bitcoin.it/wiki/BIP_0039
5
6// https://doc.rust-lang.org/std/primitive.str.html
7// "String slices are always valid UTF-8."
8
9use alloc::borrow::{Borrow, ToOwned};
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12use core::convert::TryFrom;
13use core::fmt;
14use core::ops::Deref;
15
16use unicode_normalization::{is_nfkd, UnicodeNormalization};
17use zeroize::{Zeroize, ZeroizeOnDrop, Zeroizing};
18
19/// BIP39 coded error.
20#[derive(Debug, PartialEq, Eq)]
21pub enum Error {
22    /// Mnemonic entropy amount is invalid (should be 128 or 160 or 192 or 224 or 256 bits)
23    InvalidEntropyCount(usize),
24    /// Mnemonic contains a word not present in word list
25    NoSuchWord(String),
26    /// Mnemonic corrupted, checksum mismatch
27    ChecksumMismatch,
28    /// Mnemonic is not in NFKD form
29    UnnormalizedMnemonic,
30    /// Passphrase is not in NFKD form
31    UnnormalizedPassphrase,
32    /// Word list contains unnormalized word or word with a separator
33    BadWordlistWord(String),
34    /// Word list contains duplicate words
35    UnsortedWordlist,
36    /// Separator is not in NFKD form
37    BadSeparator,
38}
39
40/// Reference to a normalized (unicode NFKD) mnemonic.
41#[repr(transparent)]
42pub struct MnemonicRef(str);
43
44impl Deref for MnemonicRef {
45    type Target = str;
46    fn deref(&self) -> &str {
47        &self.0
48    }
49}
50
51impl ToOwned for MnemonicRef {
52    type Owned = Mnemonic;
53    fn to_owned(&self) -> Mnemonic {
54        Mnemonic(self.deref().to_owned())
55    }
56}
57
58impl<'a> TryFrom<&'a str> for &'a MnemonicRef {
59    type Error = Error;
60    fn try_from(mnemonic_str: &'a str) -> Result<Self, Error> {
61        if is_nfkd(mnemonic_str) {
62            // SAFETY: MnemonicRef is represented exactly as str due to repr(transparent)
63            Ok(unsafe { core::mem::transmute::<&str, Self>(mnemonic_str) })
64        } else {
65            Err(Error::UnnormalizedMnemonic)
66        }
67    }
68}
69
70/// Owned normalized (unicode NFKD) mnemonic.
71///
72/// Mnemonic is the encoding of secret entropy using words from a given word list.
73/// Mnemonic is used to derive a seed which serves as a master key.
74/// If mnemonic is leaked then the seed is compromised (unless a strong passphrase is used).
75/// Mnemonic should be kept secret on analog media.
76/// Mnemonic should be verified against a given word list before deriving a seed from it.
77#[derive(Clone, Zeroize, ZeroizeOnDrop)]
78pub struct Mnemonic(String);
79
80impl Deref for Mnemonic {
81    type Target = MnemonicRef;
82    fn deref(&self) -> &MnemonicRef {
83        // SAFETY: MnemonicRef is represented exactly as str due to repr(transparent)
84        unsafe { core::mem::transmute(self.0.as_str()) }
85    }
86}
87
88impl Borrow<MnemonicRef> for Mnemonic {
89    fn borrow(&self) -> &MnemonicRef {
90        self
91    }
92}
93
94/// Normalize the input string and use it as mnemonic.
95/// The resulting mnemonic should be verified against a given word list before deriving a seed from it.
96impl From<String> for Mnemonic {
97    fn from(mut unnormalized_mnemonic: String) -> Self {
98        let mnemonic = Self(unnormalized_mnemonic.chars().nfkd().collect());
99        unnormalized_mnemonic.zeroize();
100        mnemonic
101    }
102}
103
104/// Normalize the input string and use it as mnemonic.
105/// The resulting mnemonic should be verified against a given word list before deriving a seed from it.
106/// If the input is guaranteed to be normalized then consider using `MnemonicRef`.
107/// The input contains secret data and should be handled accordingly.
108impl From<&str> for Mnemonic {
109    fn from(unnormalized_mnemonic: &str) -> Self {
110        Self(unnormalized_mnemonic.chars().nfkd().collect())
111    }
112}
113
114/// Normalize the input string and use it as mnemonic.
115/// The resulting mnemonic should be verified against a given word list before deriving a seed from it.
116impl From<Zeroizing<String>> for Mnemonic {
117    fn from(unnormalized_mnemonic: Zeroizing<String>) -> Self {
118        Self(unnormalized_mnemonic.chars().nfkd().collect())
119    }
120}
121
122/// Join the input words with the space character (U+0020) and normalize into a mnemonic.
123/// The resulting mnemonic should be verified against a given word list before deriving a seed from it.
124///
125/// Note, the initial word list could have had a separator different from the space. An incorrect separator will result
126/// in a different mnemonic (and seed).
127impl From<Vec<String>> for Mnemonic {
128    fn from(mut words: Vec<String>) -> Self {
129        let mnemonic = words.join(" ").into();
130        words.zeroize();
131        mnemonic
132    }
133}
134
135macro_rules! impl_from_words {
136    ($n:literal) => {
137        /// Join the input words with the space character (U+0020) and normalize into a mnemonic.
138        /// The resulting mnemonic should be verified against a given word list before deriving a seed from it.
139        ///
140        /// Note, the initial word list could have had a separator different from the space. An incorrect separator will
141        /// result in a different mnemonic (and seed).
142        impl<'a> From<&'a [&'a str; $n]> for Mnemonic {
143            fn from(words: &'a [&'a str; $n]) -> Self {
144                words.join(" ").into()
145            }
146        }
147    };
148}
149
150impl_from_words!(12);
151impl_from_words!(15);
152impl_from_words!(18);
153impl_from_words!(21);
154impl_from_words!(24);
155
156impl AsRef<str> for Mnemonic {
157    fn as_ref(&self) -> &str {
158        &self.0
159    }
160}
161
162impl fmt::Debug for Mnemonic {
163    #[inline]
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        "<bip39::Mnemonic>".fmt(f)
166    }
167}
168
169/// Reference to a normalized (unicode NFKD) passphrase.
170#[repr(transparent)]
171pub struct PassphraseRef(str);
172
173impl Deref for PassphraseRef {
174    type Target = str;
175    fn deref(&self) -> &str {
176        // SAFETY: PassphraseRef is represented exactly as str due to repr(transparent)
177        unsafe { core::mem::transmute(self) }
178    }
179}
180
181impl ToOwned for PassphraseRef {
182    type Owned = Passphrase;
183    fn to_owned(&self) -> Passphrase {
184        Passphrase(self.deref().to_owned())
185    }
186}
187
188impl<'a> From<&'a Passphrase> for &'a PassphraseRef {
189    fn from(passphrase_ref: &'a Passphrase) -> Self {
190        passphrase_ref.borrow()
191    }
192}
193
194impl<'a> TryFrom<&'a str> for &'a PassphraseRef {
195    type Error = Error;
196    fn try_from(passphrase_str: &'a str) -> Result<Self, Error> {
197        if is_nfkd(passphrase_str) {
198            // SAFETY: PassphraseRef is represented exactly as str due to repr(transparent)
199            Ok(unsafe { core::mem::transmute::<&str, Self>(passphrase_str) })
200        } else {
201            Err(Error::UnnormalizedPassphrase)
202        }
203    }
204}
205
206/// Owned normalized (unicode NFKD) passphrase.
207///
208/// Passphrase is a memorable secret and is used as additional secret used together with mnemonic to derive seed.
209/// If passphrase and mnemonic are leaked then the seed is compromised.
210#[derive(Clone, Zeroize, ZeroizeOnDrop)]
211pub struct Passphrase(String);
212
213impl Passphrase {
214    pub fn new() -> Self {
215        Self(String::new())
216    }
217}
218
219impl Default for Passphrase {
220    fn default() -> Self {
221        Self::new()
222    }
223}
224
225impl Deref for Passphrase {
226    type Target = PassphraseRef;
227    fn deref(&self) -> &PassphraseRef {
228        // SAFETY: PassphraseRef is represented exactly as str due to repr(transparent)
229        unsafe { core::mem::transmute(self.0.as_str()) }
230    }
231}
232
233impl Borrow<PassphraseRef> for Passphrase {
234    fn borrow(&self) -> &PassphraseRef {
235        self
236    }
237}
238
239impl From<String> for Passphrase {
240    fn from(mut unnormalized_passphrase: String) -> Self {
241        let passphrase = Self(unnormalized_passphrase.chars().nfkd().collect());
242        unnormalized_passphrase.zeroize();
243        passphrase
244    }
245}
246
247impl From<&str> for Passphrase {
248    fn from(unnormalized_passphrase: &str) -> Self {
249        Self(unnormalized_passphrase.chars().nfkd().collect())
250    }
251}
252
253impl From<Zeroizing<String>> for Passphrase {
254    fn from(unnormalized_passphrase: Zeroizing<String>) -> Self {
255        Self(unnormalized_passphrase.chars().nfkd().collect())
256    }
257}
258
259impl AsRef<str> for Passphrase {
260    fn as_ref(&self) -> &str {
261        &self.0
262    }
263}
264
265impl fmt::Debug for Passphrase {
266    #[inline]
267    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268        "<bip39::Passphrase>".fmt(f)
269    }
270}
271
272/// Seed is a secret used as master key (ie. other keys are derived/computed from it).
273///
274/// Seed must either be securely stored (on a hardware token, for example) or it can be derived from mnemonic and
275/// optional passphrase. If seed is leaked then all keys derived from it might be compromised.
276#[derive(Clone, Zeroize, ZeroizeOnDrop)]
277pub struct Seed([u8; 64]);
278
279impl Seed {
280    pub fn bytes(&self) -> &[u8; 64] {
281        &self.0
282    }
283}
284
285impl AsRef<[u8]> for Seed {
286    fn as_ref(&self) -> &[u8] {
287        &self.0
288    }
289}
290
291impl fmt::Debug for Seed {
292    #[inline]
293    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294        "<bip39::Seed>".fmt(f)
295    }
296}
297
298/// Derive seed from mnemonic and optional (can be empty) passphrase.
299// Return seed via mutable reference to avoid potential leaks into stack memory.
300pub fn mnemonic_to_seed(m: &MnemonicRef, p: &PassphraseRef) -> Seed {
301    let mut salt = [b"mnemonic", p.0.as_bytes()].concat();
302    const ROUNDS: core::num::NonZeroU32 = unsafe { core::num::NonZeroU32::new_unchecked(2048) };
303    let mut seed = Seed([0_u8; 64]);
304    crate::keys::pbkdf::PBKDF2_HMAC_SHA512(m.as_bytes(), &salt, ROUNDS, &mut seed.0);
305    salt.zeroize();
306    seed
307}
308
309pub mod wordlist {
310    use alloc::vec::Vec;
311
312    use super::*;
313
314    /// Word list complying with BIP39 rules.
315    ///
316    /// All words should be different and easily distinguishable from other words in the list.
317    pub struct Wordlist<'a> {
318        words: &'a [&'a str; 2048],
319        separator: char,
320    }
321
322    #[cfg(feature = "bip39-en")]
323    #[cfg_attr(docsrs, doc(cfg(feature = "bip39-en")))]
324    include!("bip39.en.rs");
325
326    #[cfg(feature = "bip39-jp")]
327    #[cfg_attr(docsrs, doc(cfg(feature = "bip39-jp")))]
328    include!("bip39.jp.rs");
329
330    impl<'a> Wordlist<'a> {
331        const fn new_unchecked(separator: char, words: &'a [&'a str; 2048]) -> Self {
332            Self { words, separator }
333        }
334
335        /// Verify and construct a word list from separator and set of words.
336        ///
337        /// Separator character must normalize to a single character.
338        /// Words must be in NFKD form, can't contain separator. All words must be unique.
339        pub fn new(separator: char, words: &'a [&'a str; 2048]) -> Result<Self, Error> {
340            // normalize separator char
341            let s = String::from(separator);
342            let mut s_chars = s.nfkd();
343
344            if let Some(separator) = s_chars.next() {
345                if s_chars.next().is_none() {
346                    // each word is normalized and without separator
347                    words.iter().try_for_each(|word| {
348                        if is_nfkd(word) && !word.contains(separator) {
349                            Ok(())
350                        } else {
351                            Err(Error::BadWordlistWord(word.to_string()))
352                        }
353                    })?;
354
355                    // all words are unique, but not necessarily sorted
356                    let mut words_set = words.to_vec();
357                    words_set.sort();
358                    if iterator_sorted::is_unique_sorted(words_set.into_iter()) {
359                        Ok(Self { words, separator })
360                    } else {
361                        Err(Error::UnsortedWordlist)
362                    }
363                } else {
364                    Err(Error::BadSeparator)
365                }
366            } else {
367                Err(Error::BadSeparator)
368            }
369        }
370
371        pub fn separator(&self) -> char {
372            self.separator
373        }
374
375        pub fn words(&self) -> &'a [&'a str; 2048] {
376            self.words
377        }
378
379        pub fn lookup(&self, word: &str) -> Option<usize> {
380            self.words.iter().position(|w| *w == word)
381        }
382    }
383
384    /// Encode the given secret entropy bytestring as a mnemonic sentence using the specified word list.
385    /// Only bytestrings of length 128, 160, 192, 224 and 256 bits are accepted, and this is the
386    /// only expected error case.
387    ///
388    /// Currently the Japanese language is not supported, or at least the implementation is not
389    /// generating the expected sentences compared to our test vectors. Use at your own risk!
390    #[allow(non_snake_case)]
391    #[allow(clippy::many_single_char_names)]
392    pub fn encode(secret_entropy: &[u8], wordlist: &Wordlist) -> Result<Mnemonic, Error> {
393        match secret_entropy.len() {
394            16 | 20 | 24 | 28 | 32 => {}
395            _ => return Err(Error::InvalidEntropyCount(secret_entropy.len() * 8)),
396        }
397
398        let mut checksum = [0; 32];
399        crate::hashes::sha::SHA256(secret_entropy, &mut checksum);
400
401        let (_, leftover_bits, mut capacity, words) = secret_entropy.iter().chain(Some(&checksum[0])).fold(
402            (0_u32, 0, 0_usize, Vec::new()),
403            |(mut acc, mut bits, mut mnemonic_capacity, mut mnemonic_words), entropy_byte| {
404                const MASK: u32 = (1_u32 << 11) - 1;
405                acc = (acc << 8) | (*entropy_byte as u32);
406                bits += 8;
407                if bits >= 11 {
408                    debug_assert!(bits <= 18);
409                    bits -= 11;
410                    let idx = (MASK & (acc >> bits)) as usize;
411                    let word = wordlist.words[idx];
412                    mnemonic_words.push(word);
413                    mnemonic_capacity += word.as_bytes().len();
414                }
415                debug_assert!(bits <= 10);
416                (acc, bits, mnemonic_capacity, mnemonic_words)
417            },
418        );
419        // leftover_bits here represent the number of left-over low bits in checksum byte
420        debug_assert_eq!(8, secret_entropy.len() / 4 + leftover_bits as usize);
421
422        if !words.is_empty() {
423            capacity += (words.len() - 1) * wordlist.separator.encode_utf8(&mut [0_u8; 4]).len();
424        }
425
426        // allocate the exact number of bytes required for secret mnemonic to avoid reallocations and potential secret
427        // leakage
428        let mut mnemonic = String::with_capacity(capacity);
429        words.into_iter().for_each(|word| {
430            if !mnemonic.is_empty() {
431                mnemonic.push(wordlist.separator);
432            }
433            mnemonic.push_str(word);
434        });
435        debug_assert_eq!(capacity, mnemonic.as_bytes().len());
436
437        Ok(Mnemonic(mnemonic))
438    }
439
440    /// Decode and compare the checksum given a mnemonic sentence and the wordlist used in the
441    /// generation process.
442    ///
443    /// Be aware that the error detection has a noticable rate of false positives. Given CS
444    /// checksum bits (CS := ENT / 32) the expected rate of false positives are one in 2^CS. For
445    /// example given 128 bit entropy that's 1 in 16.
446    pub fn decode(mnemonic: &MnemonicRef, wordlist: &Wordlist) -> Result<Zeroizing<Vec<u8>>, Error> {
447        // allocate maximal entropy capacity of 32 bytes to avoid reallocations
448        let mut entropy = Zeroizing::new(Vec::with_capacity(32));
449
450        let (checksum_acc, checksum_bits) =
451            mnemonic
452                .split(wordlist.separator)
453                .try_fold((0_u32, 0), |(mut acc, mut bits), word| {
454                    let idx = wordlist
455                        .lookup(word)
456                        .ok_or_else(|| Error::NoSuchWord(word.to_string()))? as u32;
457
458                    acc = (acc << 11) | idx;
459                    bits += 11;
460
461                    while bits > 8 {
462                        debug_assert!(bits <= 19);
463                        if entropy.len() == entropy.capacity() {
464                            return Err(Error::InvalidEntropyCount(32));
465                        }
466                        bits -= 8;
467                        entropy.push((acc >> bits) as u8);
468                    }
469
470                    debug_assert!(bits <= 8);
471                    Ok((acc, bits))
472                })?;
473        // checksum_bits here represent the number of high bits in checksum byte
474        match entropy.len() {
475            16 | 20 | 24 | 28 | 32 => {
476                debug_assert_eq!(entropy.len() / 4, checksum_bits as usize);
477            }
478            _ => {
479                return Err(Error::InvalidEntropyCount(entropy.len() * 8 + checksum_bits as usize));
480            }
481        }
482
483        let mut checksum = [0; 32];
484        crate::hashes::sha::SHA256(&entropy, &mut checksum);
485        if (checksum_acc & ((1 << checksum_bits) - 1)) as u8 != checksum[0] >> (8 - checksum_bits) {
486            return Err(Error::ChecksumMismatch);
487        }
488
489        Ok(entropy)
490    }
491
492    pub fn verify(mnemonic: &MnemonicRef, wordlist: &Wordlist) -> Result<(), Error> {
493        decode(mnemonic, wordlist).map(|_| ())
494    }
495}
496
497#[cfg(feature = "bip39-en")]
498#[test]
499fn test_encode_decode() {
500    fn inc(e: u8, i: usize) -> u8 {
501        ((e as usize + 0x9b17f203) * (i + 0x4792a0e2) + 7) as u8
502    }
503
504    let mut entropy = [0_u8; 32];
505    for _ in 0..5 {
506        entropy
507            .iter_mut()
508            .enumerate()
509            .for_each(|(i, e)| *e = e.wrapping_add(inc(*e, i)));
510
511        for i in 4..9 {
512            let n = 4 * i;
513
514            let mnemonic = wordlist::encode(&entropy[..n], &wordlist::ENGLISH).unwrap();
515            let decoded_entropy = wordlist::decode(&mnemonic, &wordlist::ENGLISH).unwrap();
516            assert_eq!(&entropy[..n], &decoded_entropy[..]);
517        }
518    }
519}