Skip to main content

huddle_protocol/crypto/
mnemonic.rs

1//! huddle 2.0: BIP39 seed-phrase encoding of the 32-byte Ed25519 identity
2//! seed (F6).
3//!
4//! This is a deterministic 1:1 encoding, **not** a key-derivation step. The
5//! 256-bit Ed25519 seed maps to a checksummed 24-word English mnemonic and
6//! back, so the phrase *is* the crown-jewel root secret: written on paper it
7//! restores the whole identity — PeerId, the deterministically-derived
8//! ML-KEM-768 keypair, and every DM key — on a fresh install, without ever
9//! touching the database (the DB only ever stores the raw 32 bytes). There is
10//! no passphrase / PBKDF2 stretching and no RNG here: we use only bip39's
11//! entropy <-> word mapping. See `crate::identity::IdentityKeys::{seed, from_seed}`
12//! for the identity-level export/import that sits on top of this.
13
14use bip39::{Language, Mnemonic};
15use zeroize::Zeroizing;
16
17use crate::error::{ProtocolError, Result};
18
19/// Encode a 32-byte Ed25519 identity seed as a 24-word BIP39 English
20/// mnemonic. 256 bits of entropy → 24 words, where the last word folds in an
21/// 8-bit SHA-256 checksum, so a single mistyped or transposed word is caught
22/// on import. Deterministic: the same seed always yields the same phrase.
23pub fn seed_to_phrase(seed: &[u8; 32]) -> String {
24    // `from_entropy` only errors for entropy lengths BIP39 doesn't define
25    // (must be a multiple of 32 bits in [128, 256]). 256 bits is valid, so
26    // this is infallible for our fixed 32-byte seed — the `expect` documents
27    // that invariant rather than guarding a reachable failure.
28    Mnemonic::from_entropy(seed)
29        .expect("a 32-byte seed is a valid 256-bit BIP39 entropy length")
30        .to_string()
31}
32
33/// Decode a 24-word BIP39 mnemonic back to the 32-byte Ed25519 seed,
34/// validating the checksum. Input is trimmed and lower-cased first (BIP39
35/// English words are ASCII-lowercase, and `split_whitespace` inside the
36/// parser collapses any run of spaces/tabs/newlines), so a phrase pasted
37/// with odd casing or stray whitespace still imports cleanly.
38///
39/// Errors if any word is off the wordlist, the word count is wrong, or the
40/// checksum doesn't match — i.e. a corrupted or mistyped phrase. The returned
41/// seed is the sole input to `IdentityKeys::from_seed`, so a successful decode
42/// reproduces the original identity byte-for-byte. Handed back in `Zeroizing`
43/// (F6) so the crown-jewel seed never lands as an un-scrubbed `[u8; 32]` on the
44/// caller's stack — every recovery caller already wants it wrapped.
45pub fn phrase_to_seed(phrase: &str) -> Result<Zeroizing<[u8; 32]>> {
46    let normalized = phrase.trim().to_lowercase();
47    let mnemonic = Mnemonic::parse_in(Language::English, normalized)
48        .map_err(|e| ProtocolError::Identity(format!("invalid seed phrase: {e}")))?;
49
50    // `to_entropy_array` writes the entropy into a fixed 33-byte buffer and
51    // reports its real length; for a checksum-valid 24-word phrase that length
52    // is exactly 32. Anything else (e.g. a 12-word phrase) is a syntactically
53    // valid mnemonic that simply isn't a 256-bit Ed25519 seed. Keep the buffer
54    // in `Zeroizing` so the secret entropy is scrubbed when we return.
55    let (raw, len) = mnemonic.to_entropy_array();
56    let raw = Zeroizing::new(raw);
57    if len != 32 {
58        return Err(ProtocolError::Identity(format!(
59            "seed phrase decodes to {len} bytes; expected a 24-word (32-byte) phrase"
60        )));
61    }
62
63    // Copy straight into a `Zeroizing` target so the 32 seed bytes are never
64    // exposed as a bare array between here and the caller.
65    let mut seed = Zeroizing::new([0u8; 32]);
66    seed.copy_from_slice(&raw[..32]);
67    Ok(seed)
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    /// Canonical BIP39 256-bit all-zero entropy test vector (Trezor/BIP39
75    /// spec): 23 × `abandon` + the checksum word `art`.
76    fn zero_seed_phrase() -> String {
77        let mut words = vec!["abandon"; 23];
78        words.push("art");
79        words.join(" ")
80    }
81
82    #[test]
83    fn zero_seed_matches_bip39_vector() {
84        // Encode the spec's all-zero seed and confirm we land on the exact
85        // published 24-word phrase (covers the English wordlist + checksum).
86        let phrase = seed_to_phrase(&[0u8; 32]);
87        assert_eq!(phrase, zero_seed_phrase());
88        assert_eq!(phrase.split_whitespace().count(), 24);
89        // …and decode it back to the all-zero seed (deref the `Zeroizing`).
90        assert_eq!(*phrase_to_seed(&zero_seed_phrase()).unwrap(), [0u8; 32]);
91    }
92
93    #[test]
94    fn round_trips_random_seeds() {
95        for _ in 0..32 {
96            let seed: [u8; 32] = rand::random();
97            let phrase = seed_to_phrase(&seed);
98            assert_eq!(phrase.split_whitespace().count(), 24);
99            assert_eq!(*phrase_to_seed(&phrase).unwrap(), seed);
100        }
101    }
102
103    #[test]
104    fn is_case_insensitive_and_trims_whitespace() {
105        let seed = [7u8; 32];
106        let phrase = seed_to_phrase(&seed);
107        // Upper-case the whole phrase and bury it in tabs / newlines / double
108        // spaces — it must still decode to the same seed.
109        let messy = format!("  \t {} \n  ", phrase.to_uppercase().replace(' ', "  "));
110        assert_eq!(*phrase_to_seed(&messy).unwrap(), seed);
111    }
112
113    #[test]
114    fn rejects_bad_checksum() {
115        // 24 × `abandon` is a syntactically valid word list but encodes the
116        // wrong checksum for all-zero entropy (the correct word is `art`), so
117        // the checksum guard must reject it.
118        let bad_checksum = vec!["abandon"; 24].join(" ");
119        assert!(phrase_to_seed(&bad_checksum).is_err());
120    }
121
122    #[test]
123    fn rejects_off_wordlist_word() {
124        let mut words = vec!["abandon"; 23];
125        words.push("notabip39word");
126        assert!(phrase_to_seed(&words.join(" ")).is_err());
127    }
128
129    #[test]
130    fn decode_returns_zeroizing_seed() {
131        // F6: `phrase_to_seed` hands the seed back already wrapped in
132        // `Zeroizing`, so callers can move it straight into `IdentityKeys::from_seed`
133        // without ever materializing a bare `[u8; 32]`. The wrapper derefs to the
134        // expected bytes; this also pins the return type at compile time.
135        let seed = [9u8; 32];
136        let phrase = seed_to_phrase(&seed);
137        let decoded: Zeroizing<[u8; 32]> = phrase_to_seed(&phrase).unwrap();
138        assert_eq!(*decoded, seed);
139    }
140
141    #[test]
142    fn rejects_wrong_word_count() {
143        // 23 words is not a defined BIP39 length.
144        let short = vec!["abandon"; 23].join(" ");
145        assert!(phrase_to_seed(&short).is_err());
146    }
147}