bip39/language/
mod.rs

1use core::fmt;
2
3#[cfg(feature = "chinese-simplified")]
4mod chinese_simplified;
5#[cfg(feature = "chinese-traditional")]
6mod chinese_traditional;
7#[cfg(feature = "czech")]
8mod czech;
9mod english;
10#[cfg(feature = "french")]
11mod french;
12#[cfg(feature = "italian")]
13mod italian;
14#[cfg(feature = "japanese")]
15mod japanese;
16#[cfg(feature = "korean")]
17mod korean;
18#[cfg(feature = "spanish")]
19mod spanish;
20
21/// The maximum number of languages enabled.
22pub(crate) const MAX_NB_LANGUAGES: usize = 9;
23
24/// Language to be used for the mnemonic phrase.
25///
26/// The English language is always available, other languages are enabled using
27/// the compilation features.
28#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
29pub enum Language {
30	/// The English language.
31	English,
32	#[cfg(feature = "chinese-simplified")]
33	/// The Simplified Chinese language.
34	SimplifiedChinese,
35	#[cfg(feature = "chinese-traditional")]
36	/// The Traditional Chinese language.
37	TraditionalChinese,
38	#[cfg(feature = "czech")]
39	/// The Czech language.
40	Czech,
41	#[cfg(feature = "french")]
42	/// The French language.
43	French,
44	#[cfg(feature = "italian")]
45	/// The Italian language.
46	Italian,
47	#[cfg(feature = "japanese")]
48	/// The Japanese language.
49	Japanese,
50	#[cfg(feature = "korean")]
51	/// The Korean language.
52	Korean,
53	#[cfg(feature = "spanish")]
54	/// The Spanish language.
55	Spanish,
56}
57
58impl Default for Language {
59	fn default() -> Self {
60		Language::English
61	}
62}
63
64impl Language {
65	/// The list of supported languages.
66	/// Language support is managed by compile features.
67	pub fn all() -> &'static [Language] {
68		&[
69			Language::English,
70			#[cfg(feature = "chinese-simplified")]
71			Language::SimplifiedChinese,
72			#[cfg(feature = "chinese-traditional")]
73			Language::TraditionalChinese,
74			#[cfg(feature = "czech")]
75			Language::Czech,
76			#[cfg(feature = "french")]
77			Language::French,
78			#[cfg(feature = "italian")]
79			Language::Italian,
80			#[cfg(feature = "japanese")]
81			Language::Japanese,
82			#[cfg(feature = "korean")]
83			Language::Korean,
84			#[cfg(feature = "spanish")]
85			Language::Spanish,
86		]
87	}
88
89	/// The word list for this language.
90	#[inline]
91	pub fn word_list(self) -> &'static [&'static str; 2048] {
92		match self {
93			Language::English => &english::WORDS,
94			#[cfg(feature = "chinese-simplified")]
95			Language::SimplifiedChinese => &chinese_simplified::WORDS,
96			#[cfg(feature = "chinese-traditional")]
97			Language::TraditionalChinese => &chinese_traditional::WORDS,
98			#[cfg(feature = "czech")]
99			Language::Czech => &czech::WORDS,
100			#[cfg(feature = "french")]
101			Language::French => &french::WORDS,
102			#[cfg(feature = "italian")]
103			Language::Italian => &italian::WORDS,
104			#[cfg(feature = "japanese")]
105			Language::Japanese => &japanese::WORDS,
106			#[cfg(feature = "korean")]
107			Language::Korean => &korean::WORDS,
108			#[cfg(feature = "spanish")]
109			Language::Spanish => &spanish::WORDS,
110		}
111	}
112
113	/// Returns true if all words in the list are guaranteed to
114	/// only be in this list and not in any other.
115	#[inline]
116	pub(crate) fn unique_words(self) -> bool {
117		match self {
118			Language::English => false,
119			#[cfg(feature = "chinese-simplified")]
120			Language::SimplifiedChinese => false,
121			#[cfg(feature = "chinese-traditional")]
122			Language::TraditionalChinese => false,
123			#[cfg(feature = "czech")]
124			Language::Czech => true,
125			#[cfg(feature = "french")]
126			Language::French => false,
127			#[cfg(feature = "italian")]
128			Language::Italian => true,
129			#[cfg(feature = "japanese")]
130			Language::Japanese => true,
131			#[cfg(feature = "korean")]
132			Language::Korean => true,
133			#[cfg(feature = "spanish")]
134			Language::Spanish => true,
135		}
136	}
137
138	/// Get words from the word list that start with the given prefix.
139	pub fn words_by_prefix(self, prefix: &str) -> &[&'static str] {
140		// The words in the word list are ordered lexicographically. This means
141		// that we cannot use `binary_search` to find words more efficiently,
142		// because the Rust ordering is based on the byte values. However, it
143		// does mean that words that share a prefix will follow each other.
144
145		let first = match self.word_list().iter().position(|w| w.starts_with(prefix)) {
146			Some(i) => i,
147			None => return &[],
148		};
149		let count = self.word_list()[first..].iter().take_while(|w| w.starts_with(prefix)).count();
150		&self.word_list()[first..first + count]
151	}
152
153	/// Get the index of the word in the word list.
154	#[inline]
155	pub fn find_word(self, word: &str) -> Option<u16> {
156		self.word_list().iter().position(|w| *w == word).map(|i| i as u16)
157	}
158}
159
160impl fmt::Display for Language {
161	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
162		fmt::Debug::fmt(self, f)
163	}
164}
165
166#[cfg(test)]
167mod tests {
168	use super::*;
169
170	#[cfg(all(
171		feature = "chinese-simplified",
172		feature = "chinese-traditional",
173		feature = "czech",
174		feature = "french",
175		feature = "italian",
176		feature = "japanese",
177		feature = "korean",
178		feature = "spanish"
179	))]
180	#[test]
181	fn validate_word_list_checksums() {
182		//! In this test, we ensure that the word lists are identical.
183		//!
184		//! They are as follows in the bips repository:
185		//! 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726  chinese_simplified.txt
186		//! 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f  chinese_traditional.txt
187		//! 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc  czech.txt
188		//! 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda  english.txt
189		//! ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59  french.txt
190		//! d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2  italian.txt
191		//! 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd  japanese.txt
192		//! 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60  korean.txt
193		//! 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b  spanish.txt
194
195		use bitcoin_hashes::{sha256, Hash, HashEngine};
196
197		let checksums = [
198			(
199				"5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726",
200				Language::SimplifiedChinese,
201			),
202			(
203				"417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f",
204				Language::TraditionalChinese,
205			),
206			("7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc", Language::Czech),
207			("2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda", Language::English),
208			("ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59", Language::French),
209			("d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2", Language::Italian),
210			(
211				"2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd",
212				Language::Japanese,
213			),
214			("9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60", Language::Korean),
215			("46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b", Language::Spanish),
216		];
217		assert_eq!(MAX_NB_LANGUAGES, checksums.len());
218
219		for &(_sum, lang) in &checksums {
220			let mut digest = sha256::Hash::engine();
221			for (_idx, word) in lang.word_list().iter().enumerate() {
222				#[cfg(feature = "std")]
223				assert!(::unicode_normalization::is_nfkd(&word));
224				digest.input(word.as_bytes());
225				digest.input("\n".as_bytes());
226			}
227			#[cfg(feature = "std")]
228			assert_eq!(
229				sha256::Hash::from_engine(digest).to_string(),
230				_sum,
231				"word list for language {} failed checksum check",
232				lang,
233			);
234		}
235	}
236
237	#[test]
238	fn words_by_prefix() {
239		let lang = Language::English;
240
241		let res = lang.words_by_prefix("woo");
242		assert_eq!(res, ["wood", "wool"]);
243
244		let res = lang.words_by_prefix("");
245		assert_eq!(res.len(), 2048);
246
247		let res = lang.words_by_prefix("woof");
248		assert!(res.is_empty());
249	}
250
251	#[cfg(all(
252		feature = "chinese-simplified",
253		feature = "chinese-traditional",
254		feature = "czech",
255		feature = "french",
256		feature = "italian",
257		feature = "japanese",
258		feature = "korean",
259		feature = "spanish"
260	))]
261	#[test]
262	fn words_overlaps() {
263		use std::collections::HashMap;
264
265		// We keep a map of all words and the languages they occur in.
266		// Afterwards, we make sure that no word maps to multiple languages
267		// if either of those is guaranteed to have unique words.
268		let mut words: HashMap<&str, Vec<Language>> = HashMap::new();
269		for lang in Language::all().iter() {
270			for word in lang.word_list().iter() {
271				words.entry(word).or_insert(Vec::new()).push(*lang);
272			}
273		}
274
275		let mut ok = true;
276		for (word, langs) in words.into_iter() {
277			if langs.len() == 1 {
278				continue;
279			}
280			if langs.iter().any(|l| l.unique_words()) {
281				println!("Word {} is not unique: {:?}", word, langs);
282				ok = false;
283			}
284		}
285		assert!(ok);
286	}
287}