anychain_kms/bip39/
language.rs

1use super::util::{Bits, Bits11};
2use super::ErrorKind;
3use rustc_hash::FxHashMap;
4
5pub struct WordMap {
6    inner: FxHashMap<&'static str, Bits11>,
7}
8
9pub struct WordList {
10    inner: Vec<&'static str>,
11}
12
13impl WordMap {
14    pub fn get_bits(&self, word: &str) -> Result<Bits11, ErrorKind> {
15        match self.inner.get(word) {
16            Some(n) => Ok(*n),
17            None => Err(ErrorKind::InvalidWord)?,
18        }
19    }
20}
21
22impl WordList {
23    pub fn get_word(&self, bits: Bits11) -> &'static str {
24        self.inner[bits.bits() as usize]
25    }
26
27    pub fn get_words_by_prefix(&self, prefix: &str) -> &[&'static str] {
28        let start = self.inner.binary_search(&prefix).unwrap_or_else(|idx| idx);
29        let count = self.inner[start..]
30            .iter()
31            .take_while(|word| word.starts_with(prefix))
32            .count();
33
34        &self.inner[start..start + count]
35    }
36}
37
38mod lazy {
39    use super::{Bits11, WordList, WordMap};
40    use once_cell::sync::Lazy;
41
42    /// lazy generation of the word list
43    fn gen_wordlist(lang_words: &'static str) -> WordList {
44        let inner: Vec<_> = lang_words.split_whitespace().collect();
45
46        debug_assert!(inner.len() == 2048, "Invalid wordlist length");
47
48        WordList { inner }
49    }
50
51    /// lazy generation of the word map
52    fn gen_wordmap(wordlist: &WordList) -> WordMap {
53        let inner = wordlist
54            .inner
55            .iter()
56            .enumerate()
57            .map(|(i, item)| (*item, Bits11::from(i as u16)))
58            .collect();
59
60        WordMap { inner }
61    }
62
63    pub static WORDLIST_ENGLISH: Lazy<WordList> =
64        Lazy::new(|| gen_wordlist(include_str!("langs/english.txt")));
65    pub static WORDLIST_CHINESE_SIMPLIFIED: Lazy<WordList> =
66        Lazy::new(|| gen_wordlist(include_str!("langs/chinese_simplified.txt")));
67    #[cfg(feature = "chinese-traditional")]
68    pub static WORDLIST_CHINESE_TRADITIONAL: Lazy<WordList> =
69        Lazy::new(|| gen_wordlist(include_str!("langs/chinese_traditional.txt")));
70    #[cfg(feature = "french")]
71    pub static WORDLIST_FRENCH: Lazy<WordList> =
72        Lazy::new(|| gen_wordlist(include_str!("langs/french.txt")));
73    #[cfg(feature = "italian")]
74    pub static WORDLIST_ITALIAN: Lazy<WordList> =
75        Lazy::new(|| gen_wordlist(include_str!("langs/italian.txt")));
76    #[cfg(feature = "japanese")]
77    pub static WORDLIST_JAPANESE: Lazy<WordList> =
78        Lazy::new(|| gen_wordlist(include_str!("langs/japanese.txt")));
79    #[cfg(feature = "korean")]
80    pub static WORDLIST_KOREAN: Lazy<WordList> =
81        Lazy::new(|| gen_wordlist(include_str!("langs/korean.txt")));
82    #[cfg(feature = "spanish")]
83    pub static WORDLIST_SPANISH: Lazy<WordList> =
84        Lazy::new(|| gen_wordlist(include_str!("langs/spanish.txt")));
85
86    pub static WORDMAP_ENGLISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ENGLISH));
87    pub static WORDMAP_CHINESE_SIMPLIFIED: Lazy<WordMap> =
88        Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_SIMPLIFIED));
89    #[cfg(feature = "chinese-traditional")]
90    pub static WORDMAP_CHINESE_TRADITIONAL: Lazy<WordMap> =
91        Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_TRADITIONAL));
92    #[cfg(feature = "french")]
93    pub static WORDMAP_FRENCH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_FRENCH));
94    #[cfg(feature = "italian")]
95    pub static WORDMAP_ITALIAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ITALIAN));
96    #[cfg(feature = "japanese")]
97    pub static WORDMAP_JAPANESE: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_JAPANESE));
98    #[cfg(feature = "korean")]
99    pub static WORDMAP_KOREAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_KOREAN));
100    #[cfg(feature = "spanish")]
101    pub static WORDMAP_SPANISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_SPANISH));
102}
103
104/// The language determines which words will be used in a mnemonic phrase, but also indirectly
105/// determines the binary value of each word when a [`Mnemonic`][Mnemonic] is turned into a [`Seed`][Seed].
106///
107/// These are not of much use right now, and may even be removed from the crate, as there is no
108/// official language specified by the standard except English.
109///
110/// [Mnemonic]: ./mnemonic/struct.Mnemonic.html
111/// [Seed]: ./seed/struct.Seed.html
112#[derive(Debug, Clone, Copy, PartialEq, Default)]
113pub enum Language {
114    #[default]
115    English,
116    ChineseSimplified,
117    #[cfg(feature = "chinese-traditional")]
118    ChineseTraditional,
119    #[cfg(feature = "french")]
120    French,
121    #[cfg(feature = "italian")]
122    Italian,
123    #[cfg(feature = "japanese")]
124    Japanese,
125    #[cfg(feature = "korean")]
126    Korean,
127    #[cfg(feature = "spanish")]
128    Spanish,
129}
130
131impl Language {
132    /// Construct a word list from its language code. Returns None
133    /// if the language code is not valid or not supported.
134    pub fn from_language_code(language_code: &str) -> Option<Self> {
135        match &language_code.to_ascii_lowercase()[..] {
136            "en" => Some(Language::English),
137            #[cfg(feature = "chinese-simplified")]
138            "zh-hans" => Some(Language::ChineseSimplified),
139            #[cfg(feature = "chinese-simplified")]
140            "zh-cn" => Some(Language::ChineseSimplified),
141            #[cfg(feature = "chinese-traditional")]
142            "zh-hant" => Some(Language::ChineseTraditional),
143            #[cfg(feature = "chinese-traditional")]
144            "zh-tw" => Some(Language::ChineseTraditional),
145            #[cfg(feature = "french")]
146            "fr" => Some(Language::French),
147            #[cfg(feature = "italian")]
148            "it" => Some(Language::Italian),
149            #[cfg(feature = "japanese")]
150            "ja" => Some(Language::Japanese),
151            #[cfg(feature = "korean")]
152            "ko" => Some(Language::Korean),
153            #[cfg(feature = "spanish")]
154            "es" => Some(Language::Spanish),
155            _ => None,
156        }
157    }
158
159    pub fn from_phrase(phrase: &str) -> Option<Self> {
160        let mut iter = phrase.split_whitespace();
161        iter.next(); // skip the first word, which might not be included in the bip32 chinese wordlist.
162        match iter.next() {
163            Some(word) => {
164                if lazy::WORDMAP_ENGLISH.get_bits(word).is_ok() {
165                    Some(Language::English)
166                } else if lazy::WORDMAP_CHINESE_SIMPLIFIED.get_bits(word).is_ok() {
167                    Some(Language::ChineseSimplified)
168                } else {
169                    None
170                }
171            }
172            _ => None,
173        }
174    }
175
176    /// Get the word list for this language
177    pub fn wordlist(&self) -> &'static WordList {
178        match *self {
179            Language::English => &lazy::WORDLIST_ENGLISH,
180            Language::ChineseSimplified => &lazy::WORDLIST_CHINESE_SIMPLIFIED,
181            #[cfg(feature = "chinese-traditional")]
182            Language::ChineseTraditional => &lazy::WORDLIST_CHINESE_TRADITIONAL,
183            #[cfg(feature = "french")]
184            Language::French => &lazy::WORDLIST_FRENCH,
185            #[cfg(feature = "italian")]
186            Language::Italian => &lazy::WORDLIST_ITALIAN,
187            #[cfg(feature = "japanese")]
188            Language::Japanese => &lazy::WORDLIST_JAPANESE,
189            #[cfg(feature = "korean")]
190            Language::Korean => &lazy::WORDLIST_KOREAN,
191            #[cfg(feature = "spanish")]
192            Language::Spanish => &lazy::WORDLIST_SPANISH,
193        }
194    }
195
196    /// Get a WordMap that allows word -> index lookups in the word list
197    ///
198    /// The index of an individual word in the word list is used as the binary value of that word
199    /// when the phrase is turned into a Seed
200    pub fn wordmap(&self) -> &'static WordMap {
201        match *self {
202            Language::English => &lazy::WORDMAP_ENGLISH,
203            Language::ChineseSimplified => &lazy::WORDMAP_CHINESE_SIMPLIFIED,
204            #[cfg(feature = "chinese-traditional")]
205            Language::ChineseTraditional => &lazy::WORDMAP_CHINESE_TRADITIONAL,
206            #[cfg(feature = "french")]
207            Language::French => &lazy::WORDMAP_FRENCH,
208            #[cfg(feature = "italian")]
209            Language::Italian => &lazy::WORDMAP_ITALIAN,
210            #[cfg(feature = "japanese")]
211            Language::Japanese => &lazy::WORDMAP_JAPANESE,
212            #[cfg(feature = "korean")]
213            Language::Korean => &lazy::WORDMAP_KOREAN,
214            #[cfg(feature = "spanish")]
215            Language::Spanish => &lazy::WORDMAP_SPANISH,
216        }
217    }
218}
219
220#[cfg(test)]
221mod test {
222    use super::lazy;
223    use super::Language;
224    use super::WordList;
225    #[cfg(target_arch = "wasm32")]
226    use wasm_bindgen_test::*;
227
228    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
229    #[cfg_attr(not(target_arch = "wasm32"), test)]
230    fn words_by_prefix() {
231        let wl = &lazy::WORDLIST_ENGLISH;
232        let res = wl.get_words_by_prefix("woo");
233        assert_eq!(res, ["wood", "wool"]);
234    }
235
236    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
237    #[cfg_attr(not(target_arch = "wasm32"), test)]
238    fn all_words_by_prefix() {
239        let wl = &lazy::WORDLIST_ENGLISH;
240        let res = wl.get_words_by_prefix("");
241        assert_eq!(res.len(), 2048);
242    }
243
244    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
245    #[cfg_attr(not(target_arch = "wasm32"), test)]
246    fn words_by_invalid_prefix() {
247        let wl = &lazy::WORDLIST_ENGLISH;
248        let res = wl.get_words_by_prefix("woof");
249        assert!(res.is_empty());
250    }
251
252    fn is_wordlist_nfkd(wl: &WordList) -> bool {
253        for idx in 0..2047 {
254            let word = wl.get_word(idx.into());
255            if !unicode_normalization::is_nfkd(word) {
256                return false;
257            }
258        }
259        true
260    }
261
262    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
263    #[cfg_attr(not(target_arch = "wasm32"), test)]
264    #[cfg(feature = "chinese-simplified")]
265    fn chinese_simplified_wordlist_is_nfkd() {
266        assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_SIMPLIFIED));
267    }
268
269    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
270    #[cfg_attr(not(target_arch = "wasm32"), test)]
271    #[cfg(feature = "chinese-traditional")]
272    fn chinese_traditional_wordlist_is_nfkd() {
273        assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_TRADITIONAL));
274    }
275
276    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
277    #[cfg_attr(not(target_arch = "wasm32"), test)]
278    #[cfg(feature = "french")]
279    fn french_wordlist_is_nfkd() {
280        assert!(is_wordlist_nfkd(&lazy::WORDLIST_FRENCH));
281    }
282
283    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
284    #[cfg_attr(not(target_arch = "wasm32"), test)]
285    #[cfg(feature = "italian")]
286    fn italian_wordlist_is_nfkd() {
287        assert!(is_wordlist_nfkd(&lazy::WORDLIST_ITALIAN));
288    }
289
290    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
291    #[cfg_attr(not(target_arch = "wasm32"), test)]
292    #[cfg(feature = "japanese")]
293    fn japanese_wordlist_is_nfkd() {
294        assert!(is_wordlist_nfkd(&lazy::WORDLIST_JAPANESE));
295    }
296
297    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
298    #[cfg_attr(not(target_arch = "wasm32"), test)]
299    #[cfg(feature = "korean")]
300    fn korean_wordlist_is_nfkd() {
301        assert!(is_wordlist_nfkd(&lazy::WORDLIST_KOREAN));
302    }
303
304    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
305    #[cfg_attr(not(target_arch = "wasm32"), test)]
306    #[cfg(feature = "spanish")]
307    fn spanish_wordlist_is_nfkd() {
308        assert!(is_wordlist_nfkd(&lazy::WORDLIST_SPANISH));
309    }
310
311    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
312    #[cfg_attr(not(target_arch = "wasm32"), test)]
313    fn from_language_code_en() {
314        assert_eq!(
315            Language::from_language_code("En").expect("en is a valid language"),
316            Language::English
317        );
318    }
319
320    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
321    #[cfg_attr(not(target_arch = "wasm32"), test)]
322    #[cfg(feature = "chinese-simplified")]
323    fn from_language_code_cn_hans() {
324        assert_eq!(
325            Language::from_language_code("Zh-Hans").expect("zh-hans is a valid language"),
326            Language::ChineseSimplified
327        );
328    }
329
330    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
331    #[cfg_attr(not(target_arch = "wasm32"), test)]
332    #[cfg(feature = "chinese-traditional")]
333    fn from_language_code_cn_hant() {
334        assert_eq!(
335            Language::from_language_code("zh-hanT").expect("zh-hant is a valid language"),
336            Language::ChineseTraditional
337        );
338    }
339
340    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
341    #[cfg_attr(not(target_arch = "wasm32"), test)]
342    #[cfg(feature = "french")]
343    fn from_language_code_fr() {
344        assert_eq!(
345            Language::from_language_code("fr").expect("fr is a valid language"),
346            Language::French
347        );
348    }
349
350    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
351    #[cfg_attr(not(target_arch = "wasm32"), test)]
352    #[cfg(feature = "italian")]
353    fn from_language_code_it() {
354        assert_eq!(
355            Language::from_language_code("It").expect("it is a valid language"),
356            Language::Italian
357        );
358    }
359
360    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
361    #[cfg_attr(not(target_arch = "wasm32"), test)]
362    #[cfg(feature = "japanese")]
363    fn from_language_code_ja() {
364        assert_eq!(
365            Language::from_language_code("Ja").expect("ja is a valid language"),
366            Language::Japanese
367        );
368    }
369
370    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
371    #[cfg_attr(not(target_arch = "wasm32"), test)]
372    #[cfg(feature = "korean")]
373    fn from_language_code_ko() {
374        assert_eq!(
375            Language::from_language_code("kO").expect("ko is a valid language"),
376            Language::Korean
377        );
378    }
379
380    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
381    #[cfg_attr(not(target_arch = "wasm32"), test)]
382    #[cfg(feature = "spanish")]
383    fn from_language_code_es() {
384        assert_eq!(
385            Language::from_language_code("ES").expect("es is a valid language"),
386            Language::Spanish
387        );
388    }
389
390    #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
391    #[cfg_attr(not(target_arch = "wasm32"), test)]
392    fn from_invalid_language_code() {
393        assert_eq!(Language::from_language_code("not a real language"), None);
394    }
395
396    #[test]
397    fn test_ffrom_phrase() {
398        let language = Language::from_phrase(
399            "heavy face learn track claw jaguar pigeon uncle seven enough glow where",
400        );
401        assert_eq!(Some(Language::English), language);
402    }
403}