bip39/language/
mod.rs

1use core::fmt;
2
3#[cfg(feature = "chinese-simplified")]
4mod chinese_simplified;
5#[cfg(feature = "chinese-traditional")]
6mod chinese_traditional;
7#[cfg(feature = "czech")]
8mod czech;
9mod english;
10#[cfg(feature = "french")]
11mod french;
12#[cfg(feature = "italian")]
13mod italian;
14#[cfg(feature = "japanese")]
15mod japanese;
16#[cfg(feature = "korean")]
17mod korean;
18#[cfg(feature = "portuguese")]
19mod portuguese;
20#[cfg(feature = "spanish")]
21mod spanish;
22
23/// The maximum number of languages enabled.
24pub(crate) const MAX_NB_LANGUAGES: usize = 10;
25
26/// Language to be used for the mnemonic phrase.
27///
28/// The English language is always available, other languages are enabled using
29/// the compilation features.
30#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
31pub enum Language {
32	/// The English language.
33	English,
34	#[cfg(feature = "chinese-simplified")]
35	/// The Simplified Chinese language.
36	SimplifiedChinese,
37	#[cfg(feature = "chinese-traditional")]
38	/// The Traditional Chinese language.
39	TraditionalChinese,
40	#[cfg(feature = "czech")]
41	/// The Czech language.
42	Czech,
43	#[cfg(feature = "french")]
44	/// The French language.
45	French,
46	#[cfg(feature = "italian")]
47	/// The Italian language.
48	Italian,
49	#[cfg(feature = "japanese")]
50	/// The Japanese language.
51	Japanese,
52	#[cfg(feature = "korean")]
53	/// The Korean language.
54	Korean,
55	#[cfg(feature = "portuguese")]
56	/// The Portuguese language.
57	Portuguese,
58	#[cfg(feature = "spanish")]
59	/// The Spanish language.
60	Spanish,
61}
62
63impl Default for Language {
64	fn default() -> Self {
65		Language::English
66	}
67}
68
69impl Language {
70	/// The list of supported languages.
71	/// Language support is managed by compile features.
72	pub const ALL: &'static [Language] = &[
73		Language::English,
74		#[cfg(feature = "chinese-simplified")]
75		Language::SimplifiedChinese,
76		#[cfg(feature = "chinese-traditional")]
77		Language::TraditionalChinese,
78		#[cfg(feature = "czech")]
79		Language::Czech,
80		#[cfg(feature = "french")]
81		Language::French,
82		#[cfg(feature = "italian")]
83		Language::Italian,
84		#[cfg(feature = "japanese")]
85		Language::Japanese,
86		#[cfg(feature = "korean")]
87		Language::Korean,
88		#[cfg(feature = "portuguese")]
89		Language::Portuguese,
90		#[cfg(feature = "spanish")]
91		Language::Spanish,
92	];
93
94	/// The list of supported languages.
95	/// Language support is managed by compile features.
96	#[deprecated(since = "2.1.0", note = "use constant Language::ALL instead")]
97	pub fn all() -> &'static [Language] {
98		Language::ALL
99	}
100
101	/// The word list for this language.
102	#[inline]
103	pub fn word_list(self) -> &'static [&'static str; 2048] {
104		match self {
105			Language::English => &english::WORDS,
106			#[cfg(feature = "chinese-simplified")]
107			Language::SimplifiedChinese => &chinese_simplified::WORDS,
108			#[cfg(feature = "chinese-traditional")]
109			Language::TraditionalChinese => &chinese_traditional::WORDS,
110			#[cfg(feature = "czech")]
111			Language::Czech => &czech::WORDS,
112			#[cfg(feature = "french")]
113			Language::French => &french::WORDS,
114			#[cfg(feature = "italian")]
115			Language::Italian => &italian::WORDS,
116			#[cfg(feature = "japanese")]
117			Language::Japanese => &japanese::WORDS,
118			#[cfg(feature = "korean")]
119			Language::Korean => &korean::WORDS,
120			#[cfg(feature = "portuguese")]
121			Language::Portuguese => &portuguese::WORDS,
122			#[cfg(feature = "spanish")]
123			Language::Spanish => &spanish::WORDS,
124		}
125	}
126
127	/// Returns true if all words in the list are guaranteed to
128	/// only be in this list and not in any other.
129	#[inline]
130	pub(crate) fn unique_words(self) -> bool {
131		match self {
132			Language::English => false,
133			#[cfg(feature = "chinese-simplified")]
134			Language::SimplifiedChinese => false,
135			#[cfg(feature = "chinese-traditional")]
136			Language::TraditionalChinese => false,
137			#[cfg(feature = "czech")]
138			Language::Czech => true,
139			#[cfg(feature = "french")]
140			Language::French => false,
141			#[cfg(feature = "italian")]
142			Language::Italian => true,
143			#[cfg(feature = "japanese")]
144			Language::Japanese => true,
145			#[cfg(feature = "korean")]
146			Language::Korean => true,
147			#[cfg(feature = "portuguese")]
148			Language::Portuguese => true,
149			#[cfg(feature = "spanish")]
150			Language::Spanish => true,
151		}
152	}
153
154	/// Get words from the word list that start with the given prefix.
155	pub fn words_by_prefix(self, prefix: &str) -> &[&'static str] {
156		// The words in the word list are ordered lexicographically. This means
157		// that we cannot use `binary_search` to find words more efficiently,
158		// because the Rust ordering is based on the byte values. However, it
159		// does mean that words that share a prefix will follow each other.
160
161		let first = match self.word_list().iter().position(|w| w.starts_with(prefix)) {
162			Some(i) => i,
163			None => return &[],
164		};
165		let count = self.word_list()[first..].iter().take_while(|w| w.starts_with(prefix)).count();
166		&self.word_list()[first..first + count]
167	}
168
169	/// Get the index of the word in the word list.
170	#[inline]
171	pub fn find_word(self, word: &str) -> Option<u16> {
172		match self {
173			// English, Portuguese, Italian, and Korean wordlists are already lexicographically
174			// sorted, so they are candidates for optimization via binary_search
175			Self::English => self.word_list().binary_search(&word).map(|x| x as _).ok(),
176			#[cfg(feature = "portuguese")]
177			Self::Portuguese => self.word_list().binary_search(&word).map(|x| x as _).ok(),
178			#[cfg(feature = "italian")]
179			Self::Italian => self.word_list().binary_search(&word).map(|x| x as _).ok(),
180			#[cfg(feature = "korean")]
181			Self::Korean => self.word_list().binary_search(&word).map(|x| x as _).ok(),
182
183			// All other languages' wordlists are not lexicographically sorted, so we have to
184			// resort to linear search
185			#[cfg(any(
186				feature = "chinese-simplified",
187				feature = "chinese-traditional",
188				feature = "czech",
189				feature = "french",
190				feature = "japanese",
191				feature = "spanish",
192			))]
193			_ => self.word_list().iter().position(|w| *w == word).map(|i| i as u16),
194		}
195	}
196}
197
198impl fmt::Display for Language {
199	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200		fmt::Debug::fmt(self, f)
201	}
202}
203
204#[cfg(test)]
205mod tests {
206	use super::*;
207
208	#[cfg(all(
209		feature = "chinese-simplified",
210		feature = "chinese-traditional",
211		feature = "czech",
212		feature = "french",
213		feature = "italian",
214		feature = "japanese",
215		feature = "korean",
216		feature = "portuguese",
217		feature = "spanish"
218	))]
219	#[test]
220	fn validate_word_list_checksums() {
221		//! In this test, we ensure that the word lists are identical.
222		//!
223		//! They are as follows in the bips repository:
224		//! 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726  chinese_simplified.txt
225		//! 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f  chinese_traditional.txt
226		//! 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc  czech.txt
227		//! 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda  english.txt
228		//! ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59  french.txt
229		//! d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2  italian.txt
230		//! 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd  japanese.txt
231		//! 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60  korean.txt
232		//! 2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f  portuguese.txt
233		//! 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b  spanish.txt
234
235		use bitcoin_hashes::{sha256, Hash, HashEngine};
236
237		let checksums = [
238			(
239				"5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726",
240				Language::SimplifiedChinese,
241			),
242			(
243				"417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f",
244				Language::TraditionalChinese,
245			),
246			("7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc", Language::Czech),
247			("2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda", Language::English),
248			("ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59", Language::French),
249			("d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2", Language::Italian),
250			(
251				"2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd",
252				Language::Japanese,
253			),
254			("9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60", Language::Korean),
255			(
256				"2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f",
257				Language::Portuguese,
258			),
259			("46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b", Language::Spanish),
260		];
261		assert_eq!(MAX_NB_LANGUAGES, checksums.len());
262
263		for &(_sum, lang) in &checksums {
264			let mut digest = sha256::Hash::engine();
265			for (_idx, word) in lang.word_list().iter().enumerate() {
266				#[cfg(feature = "std")]
267				assert!(::unicode_normalization::is_nfkd(&word));
268				digest.input(word.as_bytes());
269				digest.input("\n".as_bytes());
270			}
271			#[cfg(feature = "std")]
272			assert_eq!(
273				sha256::Hash::from_engine(digest).to_string(),
274				_sum,
275				"word list for language {} failed checksum check",
276				lang,
277			);
278		}
279	}
280
281	#[test]
282	fn words_by_prefix() {
283		let lang = Language::English;
284
285		let res = lang.words_by_prefix("woo");
286		assert_eq!(res, ["wood", "wool"]);
287
288		let res = lang.words_by_prefix("");
289		assert_eq!(res.len(), 2048);
290
291		let res = lang.words_by_prefix("woof");
292		assert!(res.is_empty());
293	}
294
295	#[cfg(all(
296		feature = "chinese-simplified",
297		feature = "chinese-traditional",
298		feature = "czech",
299		feature = "french",
300		feature = "italian",
301		feature = "japanese",
302		feature = "korean",
303		feature = "portuguese",
304		feature = "spanish"
305	))]
306	#[test]
307	fn words_overlaps() {
308		use std::collections::HashMap;
309
310		// We keep a map of all words and the languages they occur in.
311		// Afterwards, we make sure that no word maps to multiple languages
312		// if either of those is guaranteed to have unique words.
313		let mut words: HashMap<&str, Vec<Language>> = HashMap::new();
314		for lang in Language::ALL.iter() {
315			for word in lang.word_list().iter() {
316				words.entry(word).or_insert(Vec::new()).push(*lang);
317			}
318		}
319
320		let mut ok = true;
321		for (word, langs) in words.into_iter() {
322			if langs.len() == 1 {
323				continue;
324			}
325			if langs.iter().any(|l| l.unique_words()) {
326				println!("Word {} is not unique: {:?}", word, langs);
327				ok = false;
328			}
329		}
330		assert!(ok);
331	}
332
333	#[test]
334	fn test_ordered_lists() {
335		let languages = [
336			Language::English,
337			#[cfg(feature = "portuguese")]
338			Language::Portuguese,
339			#[cfg(feature = "italian")]
340			Language::Italian,
341			#[cfg(feature = "korean")]
342			Language::Korean,
343		];
344
345		for lang in languages.iter() {
346			let mut list = lang.word_list().to_vec();
347			list.sort();
348			assert_eq!(&list[..], &lang.word_list()[..]);
349		}
350	}
351
352	/// Test the full round trip from index -> word-string -> index for all langauges
353	mod round_trip {
354		use super::*;
355
356		#[test]
357		fn english() {
358			for i in 0..0x800 {
359				let word_str = Language::English.word_list()[i];
360				assert_eq!(Language::English.find_word(word_str), Some(i as _));
361			}
362		}
363
364		#[cfg(feature = "chinese-simplified")]
365		#[test]
366		fn simplified_chinese() {
367			for i in 0..0x800 {
368				let word_str = Language::SimplifiedChinese.word_list()[i];
369				assert_eq!(Language::SimplifiedChinese.find_word(word_str), Some(i as _));
370			}
371		}
372
373		#[cfg(feature = "chinese-traditional")]
374		#[test]
375		fn traditional_chinese() {
376			for i in 0..0x800 {
377				let word_str = Language::TraditionalChinese.word_list()[i];
378				assert_eq!(Language::TraditionalChinese.find_word(word_str), Some(i as _));
379			}
380		}
381
382		#[cfg(feature = "czech")]
383		#[test]
384		fn czech() {
385			for i in 0..0x800 {
386				let word_str = Language::Czech.word_list()[i];
387				assert_eq!(Language::Czech.find_word(word_str), Some(i as _));
388			}
389		}
390
391		#[cfg(feature = "french")]
392		#[test]
393		fn french() {
394			for i in 0..0x800 {
395				let word_str = Language::French.word_list()[i];
396				assert_eq!(Language::French.find_word(word_str), Some(i as _));
397			}
398		}
399
400		#[cfg(feature = "italian")]
401		#[test]
402		fn italian() {
403			for i in 0..0x800 {
404				let word_str = Language::Italian.word_list()[i];
405				assert_eq!(Language::Italian.find_word(word_str), Some(i as _));
406			}
407		}
408
409		#[cfg(feature = "japanese")]
410		#[test]
411		fn japanese() {
412			for i in 0..0x800 {
413				let word_str = Language::Japanese.word_list()[i];
414				assert_eq!(Language::Japanese.find_word(word_str), Some(i as _));
415			}
416		}
417
418		#[cfg(feature = "korean")]
419		#[test]
420		fn korean() {
421			for i in 0..0x800 {
422				let word_str = Language::Korean.word_list()[i];
423				assert_eq!(Language::Korean.find_word(word_str), Some(i as _));
424			}
425		}
426
427		#[cfg(feature = "portuguese")]
428		#[test]
429		fn portuguese() {
430			for i in 0..0x800 {
431				let word_str = Language::Portuguese.word_list()[i];
432				assert_eq!(Language::Portuguese.find_word(word_str), Some(i as _));
433			}
434		}
435
436		#[cfg(feature = "spanish")]
437		#[test]
438		fn spanish() {
439			for i in 0..0x800 {
440				let word_str = Language::Spanish.word_list()[i];
441				assert_eq!(Language::Spanish.find_word(word_str), Some(i as _));
442			}
443		}
444	}
445}