hedera/mnemonic/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use std::cmp::Ordering;
4use std::fmt;
5use std::str::FromStr;
6
7use fraction::{
8    Integer,
9    ToPrimitive,
10};
11use num_bigint::BigInt;
12use once_cell::sync::Lazy;
13use rand::{
14    thread_rng,
15    RngCore,
16};
17use sha2::Digest;
18
19use crate::error::{
20    MnemonicEntropyError,
21    MnemonicParseError,
22};
23use crate::{
24    Error,
25    PrivateKey,
26};
27
28const BIP39: &str = include_str!("bip39-english.txt");
29const LEGACY: &str = include_str!("legacy-english.txt");
30
31// `is_sorted` is unstable.
32// this is just lifted from the stdlib impl.
33fn is_sorted<T: PartialOrd>(vs: &[T]) -> bool {
34    vs.windows(2).all(|w| w[0].partial_cmp(&w[1]).map_or(false, |o| o != Ordering::Greater))
35}
36
37// sadly can't do this with a const.
38static BIP39_WORD_LIST: Lazy<Vec<&'static str>> = Lazy::new(|| {
39    let it: Vec<_> = BIP39.split_whitespace().collect();
40    // if the word list is sorted we can use the power of `binary_search` which turns the `O(n)` search into a `O(log n)`
41    // n is a constant here, but perf is perf.
42    assert!(is_sorted(&it));
43    it
44});
45
46// the legacy word list isn't sorted.
47static LEGACY_WORD_LIST: Lazy<Vec<&'static str>> =
48    Lazy::new(|| LEGACY.split_whitespace().collect());
49
50///  `BIP-39` 24-word mnemonic phrase compatible with the Android and iOS mobile wallets.
51pub struct Mnemonic(MnemonicData);
52
53// pretend to be the API we want to show
54impl fmt::Debug for Mnemonic {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        f.debug_struct("Mnemonic")
57            .field("words", &self.words())
58            .field("is_legacy", &self.is_legacy())
59            .finish()
60    }
61}
62
63impl Mnemonic {
64    // todo(sr): before release, try to find a better internal representation
65    // lets not expose this until we know what the final signature should be
66    pub(crate) fn words(&self) -> &[String] {
67        match &self.0 {
68            MnemonicData::V1(it) => it.words(),
69            MnemonicData::V2V3(it) => it.words(),
70        }
71    }
72
73    /// Returns `true` if `self` is a legacy mnemonic.
74    #[must_use]
75    pub fn is_legacy(&self) -> bool {
76        matches!(&self.0, MnemonicData::V1(_))
77    }
78
79    // todo(sr): Not too happy about requiring a `Vec<String>`
80    /// Constructs a `Mnemonic` from a 24-word list.
81    ///
82    /// # Errors
83    /// * if the mnemonic has an invalid length.
84    /// * if the mnemonic uses invalid words.
85    /// * if the mnemonic has an invalid checksum.
86    pub fn from_words(words: Vec<String>) -> crate::Result<Self> {
87        let words = match words.try_into() {
88            Ok(words) => return Ok(Self(MnemonicData::V1(MnemonicV1 { words: Box::new(words) }))),
89            Err(words) => words,
90        };
91
92        let mnemonic = Self(MnemonicV2V3 { words }.into());
93
94        if mnemonic.words().len() != 12 && mnemonic.words().len() != 24 {
95            return Err(Error::MnemonicParse {
96                reason: MnemonicParseError::BadLength(mnemonic.words().len()),
97                mnemonic,
98            });
99        }
100
101        let mut word_indecies = Vec::with_capacity(mnemonic.words().len());
102        let mut unknown_words = Vec::new();
103        for (word_index, word) in mnemonic.words().iter().enumerate() {
104            match BIP39_WORD_LIST.binary_search(&&**word) {
105                Ok(i) => {
106                    word_indecies.push(i as u16);
107                }
108                // error (word not in list)
109                Err(_) => {
110                    unknown_words.push(word_index);
111                }
112            }
113        }
114
115        if !unknown_words.is_empty() {
116            return Err(Error::MnemonicParse {
117                reason: MnemonicParseError::UnknownWords(unknown_words),
118                mnemonic,
119            });
120        }
121
122        let (entropy, actual_checksum) = incecies_to_entropy_and_checksum(&word_indecies);
123        let expected_checksum = checksum(&entropy);
124        let expected_checksum =
125            if mnemonic.words().len() == 12 { expected_checksum & 0xf0 } else { expected_checksum };
126
127        if expected_checksum != actual_checksum {
128            // error: checksum mismatch.
129            return Err(Error::MnemonicParse {
130                reason: MnemonicParseError::ChecksumMismatch {
131                    expected: expected_checksum,
132                    actual: actual_checksum,
133                },
134                mnemonic,
135            });
136        }
137
138        Ok(mnemonic)
139    }
140
141    /// Generate a new 12 word `Mnemonic` from the BIP-39 standard English word list.
142    #[must_use]
143    pub fn generate_12() -> Self {
144        Self(MnemonicV2V3::generate_12().into())
145    }
146
147    /// Generate a new 24 word `Mnemonic` from the BIP-39 standard English word list.
148    #[must_use]
149    pub fn generate_24() -> Self {
150        Self(MnemonicV2V3::generate_24().into())
151    }
152
153    /// Recover a [`PrivateKey`] from this `Mnemonic`.
154    ///
155    /// # Errors
156    /// Under certain circumstances, this function will return a [`Error::MnemonicEntropy`].
157    /// - [`MnemonicEntropyError::ChecksumMismatch`] if the computed checksum doesn't match the actual checksum.
158    /// - [`MnemonicEntropyError::BadLength`] if this is a v2 legacy mnemonic and doesn't have `24` words.
159    pub fn to_legacy_private_key(&self) -> crate::Result<PrivateKey> {
160        let entropy = match &self.0 {
161            MnemonicData::V1(it) => it.to_entropy()?,
162            MnemonicData::V2V3(it) => it.to_legacy_entropy()?,
163        };
164
165        PrivateKey::from_bytes(&entropy)
166    }
167
168    /// Recover a [`PrivateKey`] from this `Mnemonic`.
169    ///
170    /// # Errors
171    /// Under certain circumstances, this function will return a [`Error::MnemonicEntropy`].
172    /// - [`MnemonicEntropyError::LegacyWithPassphrase`] if this is a legacy private key, and the passphrase isn't empty.
173    /// - [`MnemonicEntropyError::ChecksumMismatch`] if this is a legacy private key,
174    ///   and the `Mnemonic`'s checksum doesn't match up with the computed one.
175    // the only panic here should be unreachable, if it isn't, that's a library bug.
176    #[allow(clippy::missing_panics_doc)]
177    pub fn to_private_key(&self, passphrase: &str) -> crate::Result<PrivateKey> {
178        match &self.0 {
179            MnemonicData::V1(_) if !passphrase.is_empty() => {
180                Err(Error::from(MnemonicEntropyError::LegacyWithPassphrase))
181            }
182            MnemonicData::V1(it) => Ok(PrivateKey::from_bytes(&it.to_entropy()?).expect(
183                "BUG: invariant broken - V1 mnemonic should always have exactly enough entropy",
184            )),
185            // known unfixable bug: `PrivateKey::from_mnemonic` can be called with a legacy private key.
186            MnemonicData::V2V3(_) => Ok(PrivateKey::from_mnemonic(self, passphrase)),
187        }
188    }
189
190    pub(crate) fn to_seed(&self, phrase: &str) -> [u8; 64] {
191        let mut salt = String::from("mnemonic");
192        salt.push_str(phrase);
193
194        pbkdf2::pbkdf2_hmac_array::<sha2::Sha512, 64>(
195            self.to_string().as_bytes(),
196            salt.as_bytes(),
197            2048,
198        )
199    }
200}
201
202impl fmt::Display for Mnemonic {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        if let Some((first, rest)) = self.words().split_first() {
205            f.write_str(first)?;
206
207            for word in rest {
208                write!(f, " {word}")?;
209            }
210        }
211
212        Ok(())
213    }
214}
215
216impl FromStr for Mnemonic {
217    type Err = crate::Error;
218
219    fn from_str(s: &str) -> Result<Self, Self::Err> {
220        Self::from_words(s.split_whitespace().map(str::to_owned).collect())
221    }
222}
223
224struct MnemonicV1 {
225    words: Box<[String; 22]>,
226}
227
228impl MnemonicV1 {
229    // clippy bug.
230    #[allow(clippy::explicit_auto_deref)]
231    fn words(&self) -> &[String] {
232        &*self.words
233    }
234
235    fn to_entropy(&self) -> crate::Result<Vec<u8>> {
236        let indecies = self.words.iter().map(|w| {
237            LEGACY_WORD_LIST
238                .iter()
239                .enumerate()
240                .find_map(|(idx, w2)| (w == w2).then_some(idx))
241                .map_or(-1, |it| it as i32)
242        });
243
244        let data = convert_radix(indecies, 4096, 256, 33);
245        let mut data: Vec<_> = data.into_iter().map(|it| it as u8).collect();
246
247        let (crc, data) = data.split_last_mut().unwrap();
248
249        for item in &mut *data {
250            *item ^= *crc;
251        }
252
253        let crc2 = crc8(data);
254        // checksum mismatch
255        if *crc != crc2 {
256            return Err(Error::from(MnemonicEntropyError::ChecksumMismatch {
257                expected: crc2,
258                actual: *crc,
259            }));
260        }
261
262        Ok(data.to_vec())
263    }
264}
265
266struct MnemonicV2V3 {
267    words: Vec<String>,
268}
269
270impl MnemonicV2V3 {
271    fn words(&self) -> &[String] {
272        &self.words
273    }
274
275    fn from_entropy(entropy: &[u8]) -> Self {
276        assert!(entropy.len() == 16 || entropy.len() == 32);
277
278        let entropy = {
279            let mut it = Vec::with_capacity(entropy.len() + 1);
280            it.extend_from_slice(entropy);
281            let checksum = checksum(entropy);
282            it.push(if entropy.len() == 16 { checksum & 0xf0 } else { checksum });
283            it
284        };
285
286        let mut buffer = 0_u32;
287        let mut offset: u8 = 0;
288
289        let mut words = Vec::with_capacity((entropy.len() * 8 + 1) / 11);
290
291        for byte in entropy {
292            buffer = (buffer << 8) | u32::from(byte);
293            offset += 8;
294            if offset >= 11 {
295                let index = (buffer >> (offset - 11) & 0x7ff) as usize;
296                words.push(BIP39_WORD_LIST[index].to_owned());
297                offset -= 11;
298            }
299        }
300
301        Self { words }
302    }
303
304    fn generate_12() -> Self {
305        let mut rng = thread_rng();
306        let mut entropy = [0; 16];
307        rng.fill_bytes(&mut entropy);
308
309        Self::from_entropy(&entropy)
310    }
311
312    fn generate_24() -> Self {
313        let mut rng = thread_rng();
314        let mut entropy = [0; 32];
315        rng.fill_bytes(&mut entropy);
316
317        Self::from_entropy(&entropy)
318    }
319
320    fn to_legacy_entropy(&self) -> crate::Result<Vec<u8>> {
321        // error here where we'll have more context than `PrivateKey::from_bytes`.
322        if self.words.len() != 24 {
323            return Err(Error::from(MnemonicEntropyError::BadLength {
324                expected: 24,
325                actual: self.words.len(),
326            }));
327        }
328
329        // technically, this code all works for 12 words, but I'm going to pretend it doesn't.
330        let (entropy, actual_checksum) = words_to_entropy_and_checksum(&self.words);
331
332        let expected_checksum = checksum(&entropy);
333        let expected_checksum =
334            if self.words.len() == 12 { expected_checksum & 0xf0 } else { expected_checksum };
335
336        if expected_checksum != actual_checksum {
337            return Err(Error::from(MnemonicEntropyError::ChecksumMismatch {
338                expected: expected_checksum,
339                actual: actual_checksum,
340            }));
341        }
342
343        Ok(entropy)
344    }
345}
346
347enum MnemonicData {
348    V1(MnemonicV1),
349    V2V3(MnemonicV2V3),
350}
351
352impl From<MnemonicV1> for MnemonicData {
353    fn from(v: MnemonicV1) -> Self {
354        Self::V1(v)
355    }
356}
357
358impl From<MnemonicV2V3> for MnemonicData {
359    fn from(v: MnemonicV2V3) -> Self {
360        Self::V2V3(v)
361    }
362}
363
364fn crc8(data: &[u8]) -> u8 {
365    let mut crc = 0xff;
366
367    for &it in &data[..(data.len() - 1)] {
368        crc ^= it;
369        for _ in 0..8 {
370            crc = (crc >> 1) ^ if (crc & 1) == 0 { 0 } else { 0xb2 };
371        }
372    }
373
374    crc ^ 0xff
375}
376
377// forgive me, universe, for the crimes I'm about to commit.
378//
379// todo: this is only done with one base pair and it's 4096->256, maybe there's a much nicer way to do this?
380fn convert_radix<I: IntoIterator<Item = i32>>(
381    nums: I,
382    from_radix: i32,
383    to_radix: i32,
384    to_length: usize,
385) -> Vec<i32> {
386    let mut buf = BigInt::from(0);
387    let from_radix = BigInt::from(i64::from(from_radix));
388
389    for num in nums {
390        buf *= &from_radix;
391        buf += num;
392    }
393
394    let mut out = vec![0; to_length];
395
396    let to_radix = BigInt::from(i64::from(to_radix));
397
398    for out in out.iter_mut().rev() {
399        let rem;
400        (buf, rem) = buf.div_rem(&to_radix);
401        *out = rem.to_i32().unwrap();
402    }
403
404    out
405}
406
407fn words_to_entropy_and_checksum<T: AsRef<str>>(words: &[T]) -> (Vec<u8>, u8) {
408    let indecies: Vec<_> = words
409        .iter()
410        .map(T::as_ref)
411        .map(|it| BIP39_WORD_LIST.binary_search(&it).unwrap() as u16)
412        .collect();
413
414    incecies_to_entropy_and_checksum(&indecies)
415}
416
417fn incecies_to_entropy_and_checksum(indecies: &[u16]) -> (Vec<u8>, u8) {
418    assert!(matches!(indecies.len(), 12 | 24));
419
420    let mut output = Vec::with_capacity(if indecies.len() == 12 { 17 } else { 33 });
421    let mut buf = 0_u32;
422    let mut offset: u8 = 0;
423
424    for index in indecies {
425        assert!(*index <= 0x7ff);
426
427        buf = (buf << 11) | u32::from(*index);
428        offset += 11;
429        while offset >= 8 {
430            // we want to truncate.
431            let byte = (buf >> (offset - 8)) as u8;
432            output.push(byte);
433            offset -= 8;
434        }
435    }
436
437    if offset != 0 {
438        output.push((buf << offset) as u8);
439    }
440
441    let checksum = output.pop().unwrap();
442    let checksum = if indecies.len() == 12 { checksum & 0xf0 } else { checksum };
443    (output, checksum)
444}
445
446fn checksum(bytes: &[u8]) -> u8 {
447    assert!(bytes.len() <= 32);
448    let checksum = sha2::Sha256::digest(bytes);
449    checksum[0]
450}
451
452#[cfg(test)]
453mod tests {
454    use std::str::FromStr;
455
456    use assert_matches::assert_matches;
457    use expect_test::expect;
458    use hex_literal::hex;
459
460    use super::Mnemonic;
461    use crate::error::MnemonicParseError;
462    use crate::Error;
463
464    const KNOWN_GOOD_MNEMONICS: &[&str] = &[
465        "inmate flip alley wear offer often piece magnet surge toddler submit right radio absent pear floor belt raven price stove replace reduce plate home",
466        "tiny denial casual grass skull spare awkward indoor ethics dash enough flavor good daughter early hard rug staff capable swallow raise flavor empty angle",
467        "ramp april job flavor surround pyramid fish sea good know blame gate village viable include mixed term draft among monitor swear swing novel track",
468        "evoke rich bicycle fire promote climb zero squeeze little spoil slight damage"
469    ];
470
471    #[test]
472    fn from_string() {
473        for s in KNOWN_GOOD_MNEMONICS {
474            assert_matches!(Mnemonic::from_str(s), Ok(_))
475        }
476    }
477
478    #[test]
479    fn error_invalid_length() {
480        // we can't test for up to `usize` length, but we can test several lengths to be modestly sure.
481        // it might seem that testing many lengths would be slow.
482        // we test:
483
484        // todo: this feels overengineered.
485        // every length up to (and including `DENSE_LIMIT`).
486        // arbitrarily chosen to be 48.
487        const DENSE_LIMIT: usize = 48;
488
489        let dense_lengths = 0..=DENSE_LIMIT;
490        let sparse_lengths = (0..=10).map(|it| it * 12).skip_while(|&it| it <= DENSE_LIMIT);
491
492        for length in dense_lengths.chain(sparse_lengths) {
493            if matches!(length, 12 | 22 | 24) {
494                continue;
495            }
496
497            // this is a word that's explicitly in the word list,
498            // to ensure we aren't accidentally testing that this error happens before "word(s) not in list"
499            let words = std::iter::repeat("apple".to_owned()).take(length).collect();
500
501            let reason = assert_matches!(Mnemonic::from_words(words), Err(Error::MnemonicParse { reason, .. }) => reason);
502            let reported_length = assert_matches!(reason, MnemonicParseError::BadLength(reported_length) => reported_length);
503
504            assert_eq!(reported_length, length);
505        }
506    }
507
508    #[test]
509    fn unknown_words_1() {
510        // probably fails on checksum, doesn't matter.
511        const MNEMONIC: &str = concat!(
512            "obvious favorite remain caution ",
513            "remove laptop base vacant ",
514            "alone fever slush dune"
515        );
516
517        // replace words in `MNEMONIC` one at a time.
518        for i in 0..12 {
519            let mut words: Vec<_> = MNEMONIC.split_whitespace().map(str::to_owned).collect();
520            words[i] = "lorum".to_owned();
521
522            let reason = assert_matches!(Mnemonic::from_words(words), Err(Error::MnemonicParse { reason, .. }) => reason);
523            let reported_words = assert_matches!(reason, MnemonicParseError::UnknownWords(reported_words) => reported_words);
524
525            assert_eq!(reported_words, vec![i]);
526        }
527    }
528
529    #[test]
530    fn unknown_words_2() {
531        // a 24 word mnemonic containing the following typos:
532        // absorb -> adsorb
533        // account -> acount
534        // acquire -> acquired
535        const MNEMONIC: &str = concat!(
536            "abandon ability able about above absent ",
537            "adsorb abstract absurd abuse access accident ",
538            "acount accuse achieve acid acoustic acquired ",
539            "across act action actor actress actual"
540        );
541
542        let reason = assert_matches!(Mnemonic::from_str(MNEMONIC), Err(Error::MnemonicParse { reason, .. }) => reason);
543        let reported_words = assert_matches!(reason, MnemonicParseError::UnknownWords(reported_words) => reported_words);
544
545        assert_eq!(reported_words, vec![6, 12, 17]);
546    }
547
548    #[test]
549    fn checksum_mismatch_1() {
550        const MNEMONIC: &str = concat!(
551            "abandon ability able about above absent ",
552            "absorb abstract absurd abuse access accident ",
553            "account accuse achieve acid acoustic acquire ",
554            "across act action actor actress actual"
555        );
556
557        let reason = assert_matches!(Mnemonic::from_str(MNEMONIC), Err(Error::MnemonicParse { reason, .. }) => reason);
558        let (expected, actual) = assert_matches!(reason, MnemonicParseError::ChecksumMismatch { expected, actual } => (expected, actual));
559
560        assert_eq!(expected, 0xba);
561        assert_eq!(actual, 0x17);
562    }
563
564    #[test]
565    fn checksum_mismatch_2() {
566        const MNEMONIC: &str =
567            "abandon ability able about above absent absorb abstract absurd abuse access accident";
568
569        let reason = assert_matches!(Mnemonic::from_str(MNEMONIC), Err(Error::MnemonicParse { reason, .. }) => reason);
570        let (expected, actual) = assert_matches!(reason, MnemonicParseError::ChecksumMismatch { expected, actual } => (expected, actual));
571
572        assert_eq!(expected, 0x10);
573        assert_eq!(actual, 0xb0);
574    }
575
576    // inverse of the `from_string` test.
577    #[test]
578    fn from_entropy() {
579        const ENTROPY: &[&[u8]] = &[
580            &hex!("744b201a7c399733691c2fda5c6f605ceb0c016882cb14f64ea9eb5b6d68298b"),
581            &hex!("e2674c8eb2fcada0c433984da6f52bac56466f914b49bd1a8087ed8b12b15248"),
582            &hex!("b1615de02c5da95e15ee0f646f7c5cb02f41e69c9c71df683c1fc78db9b825c7"),
583            &hex!("4e172857ab9ac2563fee9c829a4b2e9b"),
584        ];
585
586        for (entropy, s) in ENTROPY.iter().zip(KNOWN_GOOD_MNEMONICS) {
587            let mnemonic = Mnemonic(super::MnemonicV2V3::from_entropy(entropy).into());
588
589            assert_eq!(&mnemonic.to_string(), s);
590        }
591    }
592
593    #[test]
594    fn mnemonic_3() {
595        // rustfmt does *not* like long strings.
596        const MNEMONIC: &str = concat!(
597            "obvious favorite remain caution ",
598            "remove laptop base vacant ",
599            "increase video erase pass ",
600            "sniff sausage knock grid ",
601            "argue salt romance way ",
602            "alone fever slush dune"
603        );
604
605        let mnemonic = Mnemonic::from_str(MNEMONIC).unwrap();
606        let key = mnemonic.to_legacy_private_key().unwrap();
607
608        // skip the derives and just test the key.
609        // (bugs in `legacy_derive` shouldn't make this function fail.)
610        expect![[r#"
611            PrivateKeyData {
612                algorithm: Ed25519,
613                key: "98aa82d6125b5efa04bf8372be7931d05cd77f5ef3330b97d6ee7c006eaaf312",
614                chain_code: None,
615            }
616        "#]]
617        .assert_debug_eq(key.debug_pretty())
618    }
619
620    #[test]
621    fn legacy_mnemonic() {
622        const MNEMONIC: &str = concat!(
623            "jolly kidnap tom lawn drunk chick optic lust mutter mole bride ",
624            "galley dense member sage neural widow decide curb aboard margin manure"
625        );
626
627        let mnemonic = Mnemonic::from_str(MNEMONIC).unwrap();
628
629        let key = mnemonic.to_legacy_private_key().unwrap();
630
631        expect![[r#"
632            PrivateKeyData {
633                algorithm: Ed25519,
634                key: "00c2f59212cb3417f0ee0d38e7bd876810d04f2dd2cb5c2d8f26ff406573f2bd",
635                chain_code: None,
636            }
637        "#]]
638        .assert_debug_eq(key.debug_pretty());
639    }
640
641    #[test]
642    fn to_private_key() {
643        const MNEMONIC: &str = concat!(
644            "inmate flip alley wear offer often ",
645            "piece magnet surge toddler submit right ",
646            "radio absent pear floor belt raven ",
647            "price stove replace reduce plate home"
648        );
649
650        let mnemonic = Mnemonic::from_str(MNEMONIC).unwrap();
651
652        let key = mnemonic.to_private_key("").unwrap();
653
654        expect![[r#"
655            PrivateKeyData {
656                algorithm: Ed25519,
657                key: "853f15aecd22706b105da1d709b4ac05b4906170c2b9c7495dff9af49e1391da",
658                chain_code: Some(
659                    "eb001273d3d54073c42a32c17178d00677e8420631716cd57814cad9db0e64fc",
660                ),
661            }
662        "#]]
663        .assert_debug_eq(key.debug_pretty());
664    }
665}