1use crate::util::{Bits, Bits11};
2use rustc_hash::FxHashMap;
3
4pub struct WordMap {
5 inner: FxHashMap<&'static str, Bits11>,
6}
7
8pub struct WordList {
9 inner: Vec<&'static str>,
10}
11
12impl WordMap {
13 pub fn get_bits(&self, word: &str) -> Option<Bits11> {
14 self.inner.get(word).cloned()
15 }
16}
17
18impl WordList {
19 pub fn get_word(&self, bits: Bits11) -> &'static str {
20 self.inner[bits.bits() as usize]
21 }
22
23 pub fn get_words_by_prefix(&self, prefix: &str) -> &[&'static str] {
24 let start = self.inner.binary_search(&prefix).unwrap_or_else(|idx| idx);
25 let count = self.inner[start..]
26 .iter()
27 .take_while(|word| word.starts_with(prefix))
28 .count();
29
30 &self.inner[start..start + count]
31 }
32}
33
34mod lazy {
35 use super::{Bits11, WordList, WordMap};
36 use once_cell::sync::Lazy;
37
38 fn gen_wordlist(lang_words: &'static str) -> WordList {
40 let inner: Vec<_> = lang_words.split_whitespace().collect();
41
42 debug_assert!(inner.len() == 2048, "Invalid wordlist length");
43
44 WordList { inner }
45 }
46
47 fn gen_wordmap(wordlist: &WordList) -> WordMap {
49 let inner = wordlist
50 .inner
51 .iter()
52 .enumerate()
53 .map(|(i, item)| (*item, Bits11::from(i as u16)))
54 .collect();
55
56 WordMap { inner }
57 }
58
59 pub static WORDLIST_ENGLISH: Lazy<WordList> =
60 Lazy::new(|| gen_wordlist(include_str!("langs/english.txt")));
61 #[cfg(feature = "chinese-simplified")]
62 pub static WORDLIST_CHINESE_SIMPLIFIED: Lazy<WordList> =
63 Lazy::new(|| gen_wordlist(include_str!("langs/chinese_simplified.txt")));
64 #[cfg(feature = "chinese-traditional")]
65 pub static WORDLIST_CHINESE_TRADITIONAL: Lazy<WordList> =
66 Lazy::new(|| gen_wordlist(include_str!("langs/chinese_traditional.txt")));
67 #[cfg(feature = "french")]
68 pub static WORDLIST_FRENCH: Lazy<WordList> =
69 Lazy::new(|| gen_wordlist(include_str!("langs/french.txt")));
70 #[cfg(feature = "italian")]
71 pub static WORDLIST_ITALIAN: Lazy<WordList> =
72 Lazy::new(|| gen_wordlist(include_str!("langs/italian.txt")));
73 #[cfg(feature = "japanese")]
74 pub static WORDLIST_JAPANESE: Lazy<WordList> =
75 Lazy::new(|| gen_wordlist(include_str!("langs/japanese.txt")));
76 #[cfg(feature = "korean")]
77 pub static WORDLIST_KOREAN: Lazy<WordList> =
78 Lazy::new(|| gen_wordlist(include_str!("langs/korean.txt")));
79 #[cfg(feature = "spanish")]
80 pub static WORDLIST_SPANISH: Lazy<WordList> =
81 Lazy::new(|| gen_wordlist(include_str!("langs/spanish.txt")));
82
83 pub static WORDMAP_ENGLISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ENGLISH));
84 #[cfg(feature = "chinese-simplified")]
85 pub static WORDMAP_CHINESE_SIMPLIFIED: Lazy<WordMap> =
86 Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_SIMPLIFIED));
87 #[cfg(feature = "chinese-traditional")]
88 pub static WORDMAP_CHINESE_TRADITIONAL: Lazy<WordMap> =
89 Lazy::new(|| gen_wordmap(&WORDLIST_CHINESE_TRADITIONAL));
90 #[cfg(feature = "french")]
91 pub static WORDMAP_FRENCH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_FRENCH));
92 #[cfg(feature = "italian")]
93 pub static WORDMAP_ITALIAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_ITALIAN));
94 #[cfg(feature = "japanese")]
95 pub static WORDMAP_JAPANESE: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_JAPANESE));
96 #[cfg(feature = "korean")]
97 pub static WORDMAP_KOREAN: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_KOREAN));
98 #[cfg(feature = "spanish")]
99 pub static WORDMAP_SPANISH: Lazy<WordMap> = Lazy::new(|| gen_wordmap(&WORDLIST_SPANISH));
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Default)]
111pub enum Language {
112 #[default]
113 English,
114 #[cfg(feature = "chinese-simplified")]
115 ChineseSimplified,
116 #[cfg(feature = "chinese-traditional")]
117 ChineseTraditional,
118 #[cfg(feature = "french")]
119 French,
120 #[cfg(feature = "italian")]
121 Italian,
122 #[cfg(feature = "japanese")]
123 Japanese,
124 #[cfg(feature = "korean")]
125 Korean,
126 #[cfg(feature = "spanish")]
127 Spanish,
128}
129
130impl Language {
131 pub fn from_language_code(language_code: &str) -> Option<Self> {
134 match &language_code.to_ascii_lowercase()[..] {
135 "en" => Some(Language::English),
136 #[cfg(feature = "chinese-simplified")]
137 "zh-hans" => Some(Language::ChineseSimplified),
138 #[cfg(feature = "chinese-traditional")]
139 "zh-hant" => Some(Language::ChineseTraditional),
140 #[cfg(feature = "french")]
141 "fr" => Some(Language::French),
142 #[cfg(feature = "italian")]
143 "it" => Some(Language::Italian),
144 #[cfg(feature = "japanese")]
145 "ja" => Some(Language::Japanese),
146 #[cfg(feature = "korean")]
147 "ko" => Some(Language::Korean),
148 #[cfg(feature = "spanish")]
149 "es" => Some(Language::Spanish),
150 _ => None,
151 }
152 }
153
154 pub fn wordlist(&self) -> &'static WordList {
156 match *self {
157 Language::English => &lazy::WORDLIST_ENGLISH,
158 #[cfg(feature = "chinese-simplified")]
159 Language::ChineseSimplified => &lazy::WORDLIST_CHINESE_SIMPLIFIED,
160 #[cfg(feature = "chinese-traditional")]
161 Language::ChineseTraditional => &lazy::WORDLIST_CHINESE_TRADITIONAL,
162 #[cfg(feature = "french")]
163 Language::French => &lazy::WORDLIST_FRENCH,
164 #[cfg(feature = "italian")]
165 Language::Italian => &lazy::WORDLIST_ITALIAN,
166 #[cfg(feature = "japanese")]
167 Language::Japanese => &lazy::WORDLIST_JAPANESE,
168 #[cfg(feature = "korean")]
169 Language::Korean => &lazy::WORDLIST_KOREAN,
170 #[cfg(feature = "spanish")]
171 Language::Spanish => &lazy::WORDLIST_SPANISH,
172 }
173 }
174
175 pub fn wordmap(&self) -> &'static WordMap {
180 match *self {
181 Language::English => &lazy::WORDMAP_ENGLISH,
182 #[cfg(feature = "chinese-simplified")]
183 Language::ChineseSimplified => &lazy::WORDMAP_CHINESE_SIMPLIFIED,
184 #[cfg(feature = "chinese-traditional")]
185 Language::ChineseTraditional => &lazy::WORDMAP_CHINESE_TRADITIONAL,
186 #[cfg(feature = "french")]
187 Language::French => &lazy::WORDMAP_FRENCH,
188 #[cfg(feature = "italian")]
189 Language::Italian => &lazy::WORDMAP_ITALIAN,
190 #[cfg(feature = "japanese")]
191 Language::Japanese => &lazy::WORDMAP_JAPANESE,
192 #[cfg(feature = "korean")]
193 Language::Korean => &lazy::WORDMAP_KOREAN,
194 #[cfg(feature = "spanish")]
195 Language::Spanish => &lazy::WORDMAP_SPANISH,
196 }
197 }
198}
199
200#[cfg(test)]
201mod test {
202 use super::lazy;
203 use super::Language;
204 use super::WordList;
205 #[cfg(target_arch = "wasm32")]
206 use wasm_bindgen_test::*;
207
208 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
209 #[cfg_attr(not(target_arch = "wasm32"), test)]
210 fn words_by_prefix() {
211 let wl = &lazy::WORDLIST_ENGLISH;
212 let res = wl.get_words_by_prefix("woo");
213 assert_eq!(res, ["wood", "wool"]);
214 }
215
216 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
217 #[cfg_attr(not(target_arch = "wasm32"), test)]
218 fn all_words_by_prefix() {
219 let wl = &lazy::WORDLIST_ENGLISH;
220 let res = wl.get_words_by_prefix("");
221 assert_eq!(res.len(), 2048);
222 }
223
224 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
225 #[cfg_attr(not(target_arch = "wasm32"), test)]
226 fn words_by_invalid_prefix() {
227 let wl = &lazy::WORDLIST_ENGLISH;
228 let res = wl.get_words_by_prefix("woof");
229 assert!(res.is_empty());
230 }
231
232 fn is_wordlist_nfkd(wl: &WordList) -> bool {
233 for idx in 0..2047 {
234 let word = wl.get_word(idx.into());
235 if !unicode_normalization::is_nfkd(word) {
236 return false;
237 }
238 }
239 true
240 }
241
242 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
243 #[cfg_attr(not(target_arch = "wasm32"), test)]
244 #[cfg(feature = "chinese-simplified")]
245 fn chinese_simplified_wordlist_is_nfkd() {
246 assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_SIMPLIFIED));
247 }
248
249 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
250 #[cfg_attr(not(target_arch = "wasm32"), test)]
251 #[cfg(feature = "chinese-traditional")]
252 fn chinese_traditional_wordlist_is_nfkd() {
253 assert!(is_wordlist_nfkd(&lazy::WORDLIST_CHINESE_TRADITIONAL));
254 }
255
256 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
257 #[cfg_attr(not(target_arch = "wasm32"), test)]
258 #[cfg(feature = "french")]
259 fn french_wordlist_is_nfkd() {
260 assert!(is_wordlist_nfkd(&lazy::WORDLIST_FRENCH));
261 }
262
263 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
264 #[cfg_attr(not(target_arch = "wasm32"), test)]
265 #[cfg(feature = "italian")]
266 fn italian_wordlist_is_nfkd() {
267 assert!(is_wordlist_nfkd(&lazy::WORDLIST_ITALIAN));
268 }
269
270 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
271 #[cfg_attr(not(target_arch = "wasm32"), test)]
272 #[cfg(feature = "japanese")]
273 fn japanese_wordlist_is_nfkd() {
274 assert!(is_wordlist_nfkd(&lazy::WORDLIST_JAPANESE));
275 }
276
277 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
278 #[cfg_attr(not(target_arch = "wasm32"), test)]
279 #[cfg(feature = "korean")]
280 fn korean_wordlist_is_nfkd() {
281 assert!(is_wordlist_nfkd(&lazy::WORDLIST_KOREAN));
282 }
283
284 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
285 #[cfg_attr(not(target_arch = "wasm32"), test)]
286 #[cfg(feature = "spanish")]
287 fn spanish_wordlist_is_nfkd() {
288 assert!(is_wordlist_nfkd(&lazy::WORDLIST_SPANISH));
289 }
290
291 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
292 #[cfg_attr(not(target_arch = "wasm32"), test)]
293 fn from_language_code_en() {
294 assert_eq!(
295 Language::from_language_code("En").expect("en is a valid language"),
296 Language::English
297 );
298 }
299
300 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
301 #[cfg_attr(not(target_arch = "wasm32"), test)]
302 #[cfg(feature = "chinese-simplified")]
303 fn from_language_code_cn_hans() {
304 assert_eq!(
305 Language::from_language_code("Zh-Hans").expect("zh-hans is a valid language"),
306 Language::ChineseSimplified
307 );
308 }
309
310 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
311 #[cfg_attr(not(target_arch = "wasm32"), test)]
312 #[cfg(feature = "chinese-traditional")]
313 fn from_language_code_cn_hant() {
314 assert_eq!(
315 Language::from_language_code("zh-hanT").expect("zh-hant is a valid language"),
316 Language::ChineseTraditional
317 );
318 }
319
320 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
321 #[cfg_attr(not(target_arch = "wasm32"), test)]
322 #[cfg(feature = "french")]
323 fn from_language_code_fr() {
324 assert_eq!(
325 Language::from_language_code("fr").expect("fr is a valid language"),
326 Language::French
327 );
328 }
329
330 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
331 #[cfg_attr(not(target_arch = "wasm32"), test)]
332 #[cfg(feature = "italian")]
333 fn from_language_code_it() {
334 assert_eq!(
335 Language::from_language_code("It").expect("it is a valid language"),
336 Language::Italian
337 );
338 }
339
340 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
341 #[cfg_attr(not(target_arch = "wasm32"), test)]
342 #[cfg(feature = "japanese")]
343 fn from_language_code_ja() {
344 assert_eq!(
345 Language::from_language_code("Ja").expect("ja is a valid language"),
346 Language::Japanese
347 );
348 }
349
350 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
351 #[cfg_attr(not(target_arch = "wasm32"), test)]
352 #[cfg(feature = "korean")]
353 fn from_language_code_ko() {
354 assert_eq!(
355 Language::from_language_code("kO").expect("ko is a valid language"),
356 Language::Korean
357 );
358 }
359
360 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
361 #[cfg_attr(not(target_arch = "wasm32"), test)]
362 #[cfg(feature = "spanish")]
363 fn from_language_code_es() {
364 assert_eq!(
365 Language::from_language_code("ES").expect("es is a valid language"),
366 Language::Spanish
367 );
368 }
369
370 #[cfg_attr(all(target_arch = "wasm32"), wasm_bindgen_test)]
371 #[cfg_attr(not(target_arch = "wasm32"), test)]
372 fn from_invalid_language_code() {
373 assert_eq!(Language::from_language_code("not a real language"), None);
374 }
375}