bip39/
language.rs

1use crate::util::{Bits, Bits11};
2use rustc_hash::FxHashMap;
3
4pub struct WordMap {
5    inner: FxHashMap<&'static str, Bits11>,
6}
7
8pub struct WordList {
9    inner: Vec<&'static str>,
10}
11
12impl WordMap {
13    pub fn get_bits(&self, word: &str) -> Option<Bits11> {
14        self.inner.get(word).cloned()
15    }
16}
17
18impl WordList {
19    pub fn get_word(&self, bits: Bits11) -> &'static str {
20        self.inner[bits.bits() as usize]
21    }
22
23    pub fn get_words_by_prefix(&self, prefix: &str) -> &[&'static str] {
24        let start = self.inner.binary_search(&prefix).unwrap_or_else(|idx| idx);
25        let count = self.inner[start..]
26            .iter()
27            .take_while(|word| word.starts_with(prefix))
28            .count();
29
30        &self.inner[start..start + count]
31    }
32}
33
34mod lazy {
35    use super::{Bits11, WordList, WordMap};
36    use once_cell::sync::Lazy;
37
38    /// lazy generation of the word list
39    fn gen_wordlist(lang_words: &'static str) -> WordList {
40        let inner: Vec<_> = lang_words.split_whitespace().collect();
41
42        debug_assert!(inner.len() == 2048, "Invalid wordlist length");
43
44        WordList { inner }
45    }
46
47    /// lazy generation of the word map
48    fn gen_wordmap(wordlist: &WordList) -> WordMap {
49        let inner = wordlist
50            .inner
51            .iter()
52            .enumerate()
53            .map(|(i, item)| (*item, Bits11::from(i as u16)))
54            .collect();
55
56        WordMap { inner }
57    }
58
59    pub static WORDLIST_ENGLISH: Lazy<WordList> =
60        Lazy::new(|| gen_wordlist(include_str!("langs/english.txt")));
61    #[cfg(feature = "chinese-simplified")]
62    pub static WORDLIST_CHINESE_SIMPLIFIED: Lazy<WordList> =
63        Lazy::new(|| gen_wordlist(include_str!("langs/chinese_simplified.txt")));
64    #[cfg(feature = "chinese-traditional")]
65    pub static WORDLIST_CHINESE_TRADITIONAL: Lazy<WordList> =
66        Lazy::new(|| gen_wordlist(include_str!("langs/chinese_traditional.txt")));
67    #[cfg(feature = "french")]
68    pub static WORDLIST_FRENCH: Lazy<WordList> =
69        Lazy::new(|| gen_wordlist(include_str!("langs/french.txt")));
70    #[cfg(feature = "italian")]
71    pub static WORDLIST_ITALIAN: Lazy<WordList> =
72        Lazy::new(|| gen_wordlist(include_str!("langs/italian.txt")));
73    #[cfg(feature = "japanese")]
74    pub static WORDLIST_JAPANESE: Lazy<WordList> =
75        Lazy::new(|| gen_wordlist(include_str!("langs/japanese.txt")));
76    #[cfg(feature = "korean")]
77    pub static WORDLIST_KOREAN: Lazy<WordList> =
78        Lazy::new(|| gen_wordlist(include_str!("langs/korean.txt")));
79    #[cfg(feature = "spanish")]
80    pub static WORDLIST_SPANISH: Lazy<WordList> =
81        Lazy::new(|| gen_wordlist(include_str!("langs/spanish.txt")));
82
83    pub static WORDMAP_ENGLISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ENGLISH));
84    #[cfg(feature = "chinese-simplified")]
85    pub static WORDMAP_CHINESE_SIMPLIFIED: Lazy<WordMap> =
86        Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_SIMPLIFIED));
87    #[cfg(feature = "chinese-traditional")]
88    pub static WORDMAP_CHINESE_TRADITIONAL: Lazy<WordMap> =
89        Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_TRADITIONAL));
90    #[cfg(feature = "french")]
91    pub static WORDMAP_FRENCH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_FRENCH));
92    #[cfg(feature = "italian")]
93    pub static WORDMAP_ITALIAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ITALIAN));
94    #[cfg(feature = "japanese")]
95    pub static WORDMAP_JAPANESE: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_JAPANESE));
96    #[cfg(feature = "korean")]
97    pub static WORDMAP_KOREAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_KOREAN));
98    #[cfg(feature = "spanish")]
99    pub static WORDMAP_SPANISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_SPANISH));
100}
101
102/// The language determines which words will be used in a mnemonic phrase, but also indirectly
103/// determines the binary value of each word when a [`Mnemonic`][Mnemonic] is turned into a [`Seed`][Seed].
104///
105/// These are not of much use right now, and may even be removed from the crate, as there is no
106/// official language specified by the standard except English.
107///
108/// [Mnemonic]: ./mnemonic/struct.Mnemonic.html
109/// [Seed]: ./seed/struct.Seed.html
110#[derive(Debug, Clone, Copy, PartialEq, Default)]
111pub enum Language {
112    #[default]
113    English,
114    #[cfg(feature = "chinese-simplified")]
115    ChineseSimplified,
116    #[cfg(feature = "chinese-traditional")]
117    ChineseTraditional,
118    #[cfg(feature = "french")]
119    French,
120    #[cfg(feature = "italian")]
121    Italian,
122    #[cfg(feature = "japanese")]
123    Japanese,
124    #[cfg(feature = "korean")]
125    Korean,
126    #[cfg(feature = "spanish")]
127    Spanish,
128}
129
130impl Language {
131    /// Construct a word list from its language code. Returns None
132    /// if the language code is not valid or not supported.
133    pub fn from_language_code(language_code: &str) -> Option<Self> {
134        match &language_code.to_ascii_lowercase()[..] {
135            "en" => Some(Language::English),
136            #[cfg(feature = "chinese-simplified")]
137            "zh-hans" => Some(Language::ChineseSimplified),
138            #[cfg(feature = "chinese-traditional")]
139            "zh-hant" => Some(Language::ChineseTraditional),
140            #[cfg(feature = "french")]
141            "fr" => Some(Language::French),
142            #[cfg(feature = "italian")]
143            "it" => Some(Language::Italian),
144            #[cfg(feature = "japanese")]
145            "ja" => Some(Language::Japanese),
146            #[cfg(feature = "korean")]
147            "ko" => Some(Language::Korean),
148            #[cfg(feature = "spanish")]
149            "es" => Some(Language::Spanish),
150            _ => None,
151        }
152    }
153
154    /// Get the word list for this language
155    pub fn wordlist(&self) -> &'static WordList {
156        match *self {
157            Language::English => &lazy::WORDLIST_ENGLISH,
158            #[cfg(feature = "chinese-simplified")]
159            Language::ChineseSimplified => &lazy::WORDLIST_CHINESE_SIMPLIFIED,
160            #[cfg(feature = "chinese-traditional")]
161            Language::ChineseTraditional => &lazy::WORDLIST_CHINESE_TRADITIONAL,
162            #[cfg(feature = "french")]
163            Language::French => &lazy::WORDLIST_FRENCH,
164            #[cfg(feature = "italian")]
165            Language::Italian => &lazy::WORDLIST_ITALIAN,
166            #[cfg(feature = "japanese")]
167            Language::Japanese => &lazy::WORDLIST_JAPANESE,
168            #[cfg(feature = "korean")]
169            Language::Korean => &lazy::WORDLIST_KOREAN,
170            #[cfg(feature = "spanish")]
171            Language::Spanish => &lazy::WORDLIST_SPANISH,
172        }
173    }
174
175    /// Get a [`WordMap`][WordMap] that allows word -> index lookups in the word list
176    ///
177    /// The index of an individual word in the word list is used as the binary value of that word
178    /// when the phrase is turned into a [`Seed`][Seed].
179    pub fn wordmap(&self) -> &'static WordMap {
180        match *self {
181            Language::English => &lazy::WORDMAP_ENGLISH,
182            #[cfg(feature = "chinese-simplified")]
183            Language::ChineseSimplified => &lazy::WORDMAP_CHINESE_SIMPLIFIED,
184            #[cfg(feature = "chinese-traditional")]
185            Language::ChineseTraditional => &lazy::WORDMAP_CHINESE_TRADITIONAL,
186            #[cfg(feature = "french")]
187            Language::French => &lazy::WORDMAP_FRENCH,
188            #[cfg(feature = "italian")]
189            Language::Italian => &lazy::WORDMAP_ITALIAN,
190            #[cfg(feature = "japanese")]
191            Language::Japanese => &lazy::WORDMAP_JAPANESE,
192            #[cfg(feature = "korean")]
193            Language::Korean => &lazy::WORDMAP_KOREAN,
194            #[cfg(feature = "spanish")]
195            Language::Spanish => &lazy::WORDMAP_SPANISH,
196        }
197    }
198}
199
200#[cfg(test)]
201mod test {
202    use super::lazy;
203    use super::Language;
204    use super::WordList;
205    #[cfg(target_arch = "wasm32")]
206    use wasm_bindgen_test::*;
207
208    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
209    #[cfg_attr(not(target_arch = "wasm32"), test)]
210    fn words_by_prefix() {
211        let wl = &lazy::WORDLIST_ENGLISH;
212        let res = wl.get_words_by_prefix("woo");
213        assert_eq!(res, ["wood", "wool"]);
214    }
215
216    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
217    #[cfg_attr(not(target_arch = "wasm32"), test)]
218    fn all_words_by_prefix() {
219        let wl = &lazy::WORDLIST_ENGLISH;
220        let res = wl.get_words_by_prefix("");
221        assert_eq!(res.len(), 2048);
222    }
223
224    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
225    #[cfg_attr(not(target_arch = "wasm32"), test)]
226    fn words_by_invalid_prefix() {
227        let wl = &lazy::WORDLIST_ENGLISH;
228        let res = wl.get_words_by_prefix("woof");
229        assert!(res.is_empty());
230    }
231
232    fn is_wordlist_nfkd(wl: &WordList) -> bool {
233        for idx in 0..2047 {
234            let word = wl.get_word(idx.into());
235            if !unicode_normalization::is_nfkd(word) {
236                return false;
237            }
238        }
239        true
240    }
241
242    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
243    #[cfg_attr(not(target_arch = "wasm32"), test)]
244    #[cfg(feature = "chinese-simplified")]
245    fn chinese_simplified_wordlist_is_nfkd() {
246        assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_SIMPLIFIED));
247    }
248
249    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
250    #[cfg_attr(not(target_arch = "wasm32"), test)]
251    #[cfg(feature = "chinese-traditional")]
252    fn chinese_traditional_wordlist_is_nfkd() {
253        assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_TRADITIONAL));
254    }
255
256    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
257    #[cfg_attr(not(target_arch = "wasm32"), test)]
258    #[cfg(feature = "french")]
259    fn french_wordlist_is_nfkd() {
260        assert!(is_wordlist_nfkd(&lazy::WORDLIST_FRENCH));
261    }
262
263    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
264    #[cfg_attr(not(target_arch = "wasm32"), test)]
265    #[cfg(feature = "italian")]
266    fn italian_wordlist_is_nfkd() {
267        assert!(is_wordlist_nfkd(&lazy::WORDLIST_ITALIAN));
268    }
269
270    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
271    #[cfg_attr(not(target_arch = "wasm32"), test)]
272    #[cfg(feature = "japanese")]
273    fn japanese_wordlist_is_nfkd() {
274        assert!(is_wordlist_nfkd(&lazy::WORDLIST_JAPANESE));
275    }
276
277    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
278    #[cfg_attr(not(target_arch = "wasm32"), test)]
279    #[cfg(feature = "korean")]
280    fn korean_wordlist_is_nfkd() {
281        assert!(is_wordlist_nfkd(&lazy::WORDLIST_KOREAN));
282    }
283
284    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
285    #[cfg_attr(not(target_arch = "wasm32"), test)]
286    #[cfg(feature = "spanish")]
287    fn spanish_wordlist_is_nfkd() {
288        assert!(is_wordlist_nfkd(&lazy::WORDLIST_SPANISH));
289    }
290
291    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
292    #[cfg_attr(not(target_arch = "wasm32"), test)]
293    fn from_language_code_en() {
294        assert_eq!(
295            Language::from_language_code("En").expect("en is a valid language"),
296            Language::English
297        );
298    }
299
300    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
301    #[cfg_attr(not(target_arch = "wasm32"), test)]
302    #[cfg(feature = "chinese-simplified")]
303    fn from_language_code_cn_hans() {
304        assert_eq!(
305            Language::from_language_code("Zh-Hans").expect("zh-hans is a valid language"),
306            Language::ChineseSimplified
307        );
308    }
309
310    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
311    #[cfg_attr(not(target_arch = "wasm32"), test)]
312    #[cfg(feature = "chinese-traditional")]
313    fn from_language_code_cn_hant() {
314        assert_eq!(
315            Language::from_language_code("zh-hanT").expect("zh-hant is a valid language"),
316            Language::ChineseTraditional
317        );
318    }
319
320    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
321    #[cfg_attr(not(target_arch = "wasm32"), test)]
322    #[cfg(feature = "french")]
323    fn from_language_code_fr() {
324        assert_eq!(
325            Language::from_language_code("fr").expect("fr is a valid language"),
326            Language::French
327        );
328    }
329
330    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
331    #[cfg_attr(not(target_arch = "wasm32"), test)]
332    #[cfg(feature = "italian")]
333    fn from_language_code_it() {
334        assert_eq!(
335            Language::from_language_code("It").expect("it is a valid language"),
336            Language::Italian
337        );
338    }
339
340    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
341    #[cfg_attr(not(target_arch = "wasm32"), test)]
342    #[cfg(feature = "japanese")]
343    fn from_language_code_ja() {
344        assert_eq!(
345            Language::from_language_code("Ja").expect("ja is a valid language"),
346            Language::Japanese
347        );
348    }
349
350    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
351    #[cfg_attr(not(target_arch = "wasm32"), test)]
352    #[cfg(feature = "korean")]
353    fn from_language_code_ko() {
354        assert_eq!(
355            Language::from_language_code("kO").expect("ko is a valid language"),
356            Language::Korean
357        );
358    }
359
360    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
361    #[cfg_attr(not(target_arch = "wasm32"), test)]
362    #[cfg(feature = "spanish")]
363    fn from_language_code_es() {
364        assert_eq!(
365            Language::from_language_code("ES").expect("es is a valid language"),
366            Language::Spanish
367        );
368    }
369
370    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
371    #[cfg_attr(not(target_arch = "wasm32"), test)]
372    fn from_invalid_language_code() {
373        assert_eq!(Language::from_language_code("not a real language"), None);
374    }
375}