1use super::util::{Bits, Bits11};
2use super::ErrorKind;
3use rustc_hash::FxHashMap;
4
5pub struct WordMap {
6 inner: FxHashMap<&'static str, Bits11>,
7}
8
9pub struct WordList {
10 inner: Vec<&'static str>,
11}
12
13impl WordMap {
14 pub fn get_bits(&self, word: &str) -> Result<Bits11, ErrorKind> {
15 match self.inner.get(word) {
16 Some(n) => Ok(*n),
17 None => Err(ErrorKind::InvalidWord)?,
18 }
19 }
20}
21
22impl WordList {
23 pub fn get_word(&self, bits: Bits11) -> &'static str {
24 self.inner[bits.bits() as usize]
25 }
26
27 pub fn get_words_by_prefix(&self, prefix: &str) -> &[&'static str] {
28 let start = self.inner.binary_search(&prefix).unwrap_or_else(|idx| idx);
29 let count = self.inner[start..]
30 .iter()
31 .take_while(|word| word.starts_with(prefix))
32 .count();
33
34 &self.inner[start..start + count]
35 }
36}
37
38mod lazy {
39 use super::{Bits11, WordList, WordMap};
40 use once_cell::sync::Lazy;
41
42 fn gen_wordlist(lang_words: &'static str) -> WordList {
44 let inner: Vec<_> = lang_words.split_whitespace().collect();
45
46 debug_assert!(inner.len() == 2048, "Invalid wordlist length");
47
48 WordList { inner }
49 }
50
51 fn gen_wordmap(wordlist: &WordList) -> WordMap {
53 let inner = wordlist
54 .inner
55 .iter()
56 .enumerate()
57 .map(|(i, item)| (*item, Bits11::from(i as u16)))
58 .collect();
59
60 WordMap { inner }
61 }
62
63 pub static WORDLIST_ENGLISH: Lazy<WordList> =
64 Lazy::new(|| gen_wordlist(include_str!("langs/english.txt")));
65 pub static WORDLIST_CHINESE_SIMPLIFIED: Lazy<WordList> =
66 Lazy::new(|| gen_wordlist(include_str!("langs/chinese_simplified.txt")));
67 #[cfg(feature = "chinese-traditional")]
68 pub static WORDLIST_CHINESE_TRADITIONAL: Lazy<WordList> =
69 Lazy::new(|| gen_wordlist(include_str!("langs/chinese_traditional.txt")));
70 #[cfg(feature = "french")]
71 pub static WORDLIST_FRENCH: Lazy<WordList> =
72 Lazy::new(|| gen_wordlist(include_str!("langs/french.txt")));
73 #[cfg(feature = "italian")]
74 pub static WORDLIST_ITALIAN: Lazy<WordList> =
75 Lazy::new(|| gen_wordlist(include_str!("langs/italian.txt")));
76 #[cfg(feature = "japanese")]
77 pub static WORDLIST_JAPANESE: Lazy<WordList> =
78 Lazy::new(|| gen_wordlist(include_str!("langs/japanese.txt")));
79 #[cfg(feature = "korean")]
80 pub static WORDLIST_KOREAN: Lazy<WordList> =
81 Lazy::new(|| gen_wordlist(include_str!("langs/korean.txt")));
82 #[cfg(feature = "spanish")]
83 pub static WORDLIST_SPANISH: Lazy<WordList> =
84 Lazy::new(|| gen_wordlist(include_str!("langs/spanish.txt")));
85
86 pub static WORDMAP_ENGLISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ENGLISH));
87 pub static WORDMAP_CHINESE_SIMPLIFIED: Lazy<WordMap> =
88 Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_SIMPLIFIED));
89 #[cfg(feature = "chinese-traditional")]
90 pub static WORDMAP_CHINESE_TRADITIONAL: Lazy<WordMap> =
91 Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_TRADITIONAL));
92 #[cfg(feature = "french")]
93 pub static WORDMAP_FRENCH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_FRENCH));
94 #[cfg(feature = "italian")]
95 pub static WORDMAP_ITALIAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ITALIAN));
96 #[cfg(feature = "japanese")]
97 pub static WORDMAP_JAPANESE: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_JAPANESE));
98 #[cfg(feature = "korean")]
99 pub static WORDMAP_KOREAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_KOREAN));
100 #[cfg(feature = "spanish")]
101 pub static WORDMAP_SPANISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_SPANISH));
102}
103
104#[derive(Debug, Clone, Copy, PartialEq, Default)]
113pub enum Language {
114 #[default]
115 English,
116 ChineseSimplified,
117 #[cfg(feature = "chinese-traditional")]
118 ChineseTraditional,
119 #[cfg(feature = "french")]
120 French,
121 #[cfg(feature = "italian")]
122 Italian,
123 #[cfg(feature = "japanese")]
124 Japanese,
125 #[cfg(feature = "korean")]
126 Korean,
127 #[cfg(feature = "spanish")]
128 Spanish,
129}
130
131impl Language {
132 pub fn from_language_code(language_code: &str) -> Option<Self> {
135 match &language_code.to_ascii_lowercase()[..] {
136 "en" => Some(Language::English),
137 #[cfg(feature = "chinese-simplified")]
138 "zh-hans" => Some(Language::ChineseSimplified),
139 #[cfg(feature = "chinese-simplified")]
140 "zh-cn" => Some(Language::ChineseSimplified),
141 #[cfg(feature = "chinese-traditional")]
142 "zh-hant" => Some(Language::ChineseTraditional),
143 #[cfg(feature = "chinese-traditional")]
144 "zh-tw" => Some(Language::ChineseTraditional),
145 #[cfg(feature = "french")]
146 "fr" => Some(Language::French),
147 #[cfg(feature = "italian")]
148 "it" => Some(Language::Italian),
149 #[cfg(feature = "japanese")]
150 "ja" => Some(Language::Japanese),
151 #[cfg(feature = "korean")]
152 "ko" => Some(Language::Korean),
153 #[cfg(feature = "spanish")]
154 "es" => Some(Language::Spanish),
155 _ => None,
156 }
157 }
158
159 pub fn from_phrase(phrase: &str) -> Option<Self> {
160 let mut iter = phrase.split_whitespace();
161 iter.next(); match iter.next() {
163 Some(word) => {
164 if lazy::WORDMAP_ENGLISH.get_bits(word).is_ok() {
165 Some(Language::English)
166 } else if lazy::WORDMAP_CHINESE_SIMPLIFIED.get_bits(word).is_ok() {
167 Some(Language::ChineseSimplified)
168 } else {
169 None
170 }
171 }
172 _ => None,
173 }
174 }
175
176 pub fn wordlist(&self) -> &'static WordList {
178 match *self {
179 Language::English => &lazy::WORDLIST_ENGLISH,
180 Language::ChineseSimplified => &lazy::WORDLIST_CHINESE_SIMPLIFIED,
181 #[cfg(feature = "chinese-traditional")]
182 Language::ChineseTraditional => &lazy::WORDLIST_CHINESE_TRADITIONAL,
183 #[cfg(feature = "french")]
184 Language::French => &lazy::WORDLIST_FRENCH,
185 #[cfg(feature = "italian")]
186 Language::Italian => &lazy::WORDLIST_ITALIAN,
187 #[cfg(feature = "japanese")]
188 Language::Japanese => &lazy::WORDLIST_JAPANESE,
189 #[cfg(feature = "korean")]
190 Language::Korean => &lazy::WORDLIST_KOREAN,
191 #[cfg(feature = "spanish")]
192 Language::Spanish => &lazy::WORDLIST_SPANISH,
193 }
194 }
195
196 pub fn wordmap(&self) -> &'static WordMap {
201 match *self {
202 Language::English => &lazy::WORDMAP_ENGLISH,
203 Language::ChineseSimplified => &lazy::WORDMAP_CHINESE_SIMPLIFIED,
204 #[cfg(feature = "chinese-traditional")]
205 Language::ChineseTraditional => &lazy::WORDMAP_CHINESE_TRADITIONAL,
206 #[cfg(feature = "french")]
207 Language::French => &lazy::WORDMAP_FRENCH,
208 #[cfg(feature = "italian")]
209 Language::Italian => &lazy::WORDMAP_ITALIAN,
210 #[cfg(feature = "japanese")]
211 Language::Japanese => &lazy::WORDMAP_JAPANESE,
212 #[cfg(feature = "korean")]
213 Language::Korean => &lazy::WORDMAP_KOREAN,
214 #[cfg(feature = "spanish")]
215 Language::Spanish => &lazy::WORDMAP_SPANISH,
216 }
217 }
218}
219
220#[cfg(test)]
221mod test {
222 use super::lazy;
223 use super::Language;
224 use super::WordList;
225 #[cfg(target_arch = "wasm32")]
226 use wasm_bindgen_test::*;
227
228 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
229 #[cfg_attr(not(target_arch = "wasm32"), test)]
230 fn words_by_prefix() {
231 let wl = &lazy::WORDLIST_ENGLISH;
232 let res = wl.get_words_by_prefix("woo");
233 assert_eq!(res, ["wood", "wool"]);
234 }
235
236 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
237 #[cfg_attr(not(target_arch = "wasm32"), test)]
238 fn all_words_by_prefix() {
239 let wl = &lazy::WORDLIST_ENGLISH;
240 let res = wl.get_words_by_prefix("");
241 assert_eq!(res.len(), 2048);
242 }
243
244 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
245 #[cfg_attr(not(target_arch = "wasm32"), test)]
246 fn words_by_invalid_prefix() {
247 let wl = &lazy::WORDLIST_ENGLISH;
248 let res = wl.get_words_by_prefix("woof");
249 assert!(res.is_empty());
250 }
251
252 fn is_wordlist_nfkd(wl: &WordList) -> bool {
253 for idx in 0..2047 {
254 let word = wl.get_word(idx.into());
255 if !unicode_normalization::is_nfkd(word) {
256 return false;
257 }
258 }
259 true
260 }
261
262 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
263 #[cfg_attr(not(target_arch = "wasm32"), test)]
264 #[cfg(feature = "chinese-simplified")]
265 fn chinese_simplified_wordlist_is_nfkd() {
266 assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_SIMPLIFIED));
267 }
268
269 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
270 #[cfg_attr(not(target_arch = "wasm32"), test)]
271 #[cfg(feature = "chinese-traditional")]
272 fn chinese_traditional_wordlist_is_nfkd() {
273 assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_TRADITIONAL));
274 }
275
276 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
277 #[cfg_attr(not(target_arch = "wasm32"), test)]
278 #[cfg(feature = "french")]
279 fn french_wordlist_is_nfkd() {
280 assert!(is_wordlist_nfkd(&lazy::WORDLIST_FRENCH));
281 }
282
283 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
284 #[cfg_attr(not(target_arch = "wasm32"), test)]
285 #[cfg(feature = "italian")]
286 fn italian_wordlist_is_nfkd() {
287 assert!(is_wordlist_nfkd(&lazy::WORDLIST_ITALIAN));
288 }
289
290 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
291 #[cfg_attr(not(target_arch = "wasm32"), test)]
292 #[cfg(feature = "japanese")]
293 fn japanese_wordlist_is_nfkd() {
294 assert!(is_wordlist_nfkd(&lazy::WORDLIST_JAPANESE));
295 }
296
297 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
298 #[cfg_attr(not(target_arch = "wasm32"), test)]
299 #[cfg(feature = "korean")]
300 fn korean_wordlist_is_nfkd() {
301 assert!(is_wordlist_nfkd(&lazy::WORDLIST_KOREAN));
302 }
303
304 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
305 #[cfg_attr(not(target_arch = "wasm32"), test)]
306 #[cfg(feature = "spanish")]
307 fn spanish_wordlist_is_nfkd() {
308 assert!(is_wordlist_nfkd(&lazy::WORDLIST_SPANISH));
309 }
310
311 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
312 #[cfg_attr(not(target_arch = "wasm32"), test)]
313 fn from_language_code_en() {
314 assert_eq!(
315 Language::from_language_code("En").expect("en is a valid language"),
316 Language::English
317 );
318 }
319
320 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
321 #[cfg_attr(not(target_arch = "wasm32"), test)]
322 #[cfg(feature = "chinese-simplified")]
323 fn from_language_code_cn_hans() {
324 assert_eq!(
325 Language::from_language_code("Zh-Hans").expect("zh-hans is a valid language"),
326 Language::ChineseSimplified
327 );
328 }
329
330 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
331 #[cfg_attr(not(target_arch = "wasm32"), test)]
332 #[cfg(feature = "chinese-traditional")]
333 fn from_language_code_cn_hant() {
334 assert_eq!(
335 Language::from_language_code("zh-hanT").expect("zh-hant is a valid language"),
336 Language::ChineseTraditional
337 );
338 }
339
340 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
341 #[cfg_attr(not(target_arch = "wasm32"), test)]
342 #[cfg(feature = "french")]
343 fn from_language_code_fr() {
344 assert_eq!(
345 Language::from_language_code("fr").expect("fr is a valid language"),
346 Language::French
347 );
348 }
349
350 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
351 #[cfg_attr(not(target_arch = "wasm32"), test)]
352 #[cfg(feature = "italian")]
353 fn from_language_code_it() {
354 assert_eq!(
355 Language::from_language_code("It").expect("it is a valid language"),
356 Language::Italian
357 );
358 }
359
360 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
361 #[cfg_attr(not(target_arch = "wasm32"), test)]
362 #[cfg(feature = "japanese")]
363 fn from_language_code_ja() {
364 assert_eq!(
365 Language::from_language_code("Ja").expect("ja is a valid language"),
366 Language::Japanese
367 );
368 }
369
370 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
371 #[cfg_attr(not(target_arch = "wasm32"), test)]
372 #[cfg(feature = "korean")]
373 fn from_language_code_ko() {
374 assert_eq!(
375 Language::from_language_code("kO").expect("ko is a valid language"),
376 Language::Korean
377 );
378 }
379
380 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
381 #[cfg_attr(not(target_arch = "wasm32"), test)]
382 #[cfg(feature = "spanish")]
383 fn from_language_code_es() {
384 assert_eq!(
385 Language::from_language_code("ES").expect("es is a valid language"),
386 Language::Spanish
387 );
388 }
389
390 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
391 #[cfg_attr(not(target_arch = "wasm32"), test)]
392 fn from_invalid_language_code() {
393 assert_eq!(Language::from_language_code("not a real language"), None);
394 }
395
396 #[test]
397 fn test_ffrom_phrase() {
398 let language = Language::from_phrase(
399 "heavy face learn track claw jaguar pigeon uncle seven enough glow where",
400 );
401 assert_eq!(Some(Language::English), language);
402 }
403}