lingua/
constant.rs

1/*
2 * Copyright © 2020-present Peter M. Stahl pemistahl@gmail.com
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use std::collections::{HashMap, HashSet};
18use std::str::FromStr;
19use std::sync::LazyLock;
20
21use regex::Regex;
22
23use crate::alphabet::CharSet;
24use crate::language::Language;
25
26pub(crate) static JAPANESE_CHARACTER_SET: LazyLock<CharSet> =
27    LazyLock::new(|| CharSet::from_char_classes(&["Hiragana", "Katakana", "Han"]));
28pub(crate) static MULTIPLE_WHITESPACE: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new("\\s+").unwrap());
30pub(crate) static NUMBERS: LazyLock<Regex> = LazyLock::new(|| Regex::new("\\p{N}").unwrap());
31pub(crate) static PUNCTUATION: LazyLock<Regex> = LazyLock::new(|| Regex::new("\\p{P}").unwrap());
32pub(crate) static TOKENS_WITHOUT_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        "\\p{Bengali}+|\\p{Devanagari}+|\\p{Gujarati}+|\\p{Gurmukhi}+|\\p{Han}|\\p{Hangul}+|\\p{Hiragana}|\\p{Katakana}|\\p{Tamil}+|\\p{Telugu}+|\\p{Thai}+|\\p{L}+",
35    )
36    .unwrap()
37});
38pub(crate) static TOKENS_WITH_OPTIONAL_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| {
39    Regex::new(
40        "\\s*(?:\\p{Bengali}+|\\p{Devanagari}+|\\p{Gujarati}+|\\p{Gurmukhi}+|\\p{Han}|\\p{Hangul}+|\\p{Hiragana}|\\p{Katakana}|\\p{Tamil}+|\\p{Telugu}+|\\p{Thai}+|[\\p{L}'-]+)[\\p{N}\\p{P}]*\\s*",
41    )
42    .unwrap()
43});
44pub(crate) static CHARS_TO_LANGUAGES_MAPPING: LazyLock<HashMap<&'static str, HashSet<Language>>> =
45    LazyLock::new(|| {
46        let mut mapping = hashmap!();
47
48        if cfg!(feature = "portuguese") || cfg!(feature = "vietnamese") {
49            mapping.insert("Ãã", {
50                let mut languages = hashset!();
51                if cfg!(feature = "portuguese") {
52                    languages.insert(Language::from_str("Portuguese").unwrap());
53                }
54                if cfg!(feature = "vietnamese") {
55                    languages.insert(Language::from_str("Vietnamese").unwrap());
56                }
57                languages
58            });
59        }
60
61        if cfg!(feature = "lithuanian") || cfg!(feature = "polish") {
62            mapping.insert("ĄąĘę", {
63                let mut languages = hashset!();
64                if cfg!(feature = "lithuanian") {
65                    languages.insert(Language::from_str("Lithuanian").unwrap());
66                }
67                if cfg!(feature = "polish") {
68                    languages.insert(Language::from_str("Polish").unwrap());
69                }
70                languages
71            });
72        }
73
74        if cfg!(feature = "polish") || cfg!(feature = "romanian") {
75            mapping.insert("Żż", {
76                let mut languages = hashset!();
77                if cfg!(feature = "polish") {
78                    languages.insert(Language::from_str("Polish").unwrap());
79                }
80                if cfg!(feature = "romanian") {
81                    languages.insert(Language::from_str("Romanian").unwrap());
82                }
83                languages
84            });
85        }
86
87        if cfg!(feature = "french") || cfg!(feature = "romanian") {
88            mapping.insert("Îî", {
89                let mut languages = hashset!();
90                if cfg!(feature = "french") {
91                    languages.insert(Language::from_str("French").unwrap());
92                }
93                if cfg!(feature = "romanian") {
94                    languages.insert(Language::from_str("Romanian").unwrap());
95                }
96                languages
97            });
98        }
99
100        if cfg!(feature = "basque") || cfg!(feature = "spanish") {
101            mapping.insert("Ññ", {
102                let mut languages = hashset!();
103                if cfg!(feature = "basque") {
104                    languages.insert(Language::from_str("Basque").unwrap());
105                }
106                if cfg!(feature = "spanish") {
107                    languages.insert(Language::from_str("Spanish").unwrap());
108                }
109                languages
110            });
111        }
112
113        if cfg!(feature = "czech") || cfg!(feature = "slovak") {
114            mapping.insert("ŇňŤť", {
115                let mut languages = hashset!();
116                if cfg!(feature = "czech") {
117                    languages.insert(Language::from_str("Czech").unwrap());
118                }
119                if cfg!(feature = "slovak") {
120                    languages.insert(Language::from_str("Slovak").unwrap());
121                }
122                languages
123            });
124        }
125
126        if cfg!(feature = "romanian") || cfg!(feature = "vietnamese") {
127            mapping.insert("Ăă", {
128                let mut languages = hashset!();
129                if cfg!(feature = "romanian") {
130                    languages.insert(Language::from_str("Romanian").unwrap());
131                }
132                if cfg!(feature = "vietnamese") {
133                    languages.insert(Language::from_str("Vietnamese").unwrap());
134                }
135                languages
136            });
137        }
138
139        if cfg!(feature = "azerbaijani") || cfg!(feature = "turkish") {
140            mapping.insert("İıĞğ", {
141                let mut languages = hashset!();
142                if cfg!(feature = "azerbaijani") {
143                    languages.insert(Language::from_str("Azerbaijani").unwrap());
144                }
145                if cfg!(feature = "turkish") {
146                    languages.insert(Language::from_str("Turkish").unwrap());
147                }
148                languages
149            });
150        }
151
152        if cfg!(feature = "macedonian") || cfg!(feature = "serbian") {
153            mapping.insert("ЈјЉљЊњ", {
154                let mut languages = hashset!();
155                if cfg!(feature = "macedonian") {
156                    languages.insert(Language::from_str("Macedonian").unwrap());
157                }
158                if cfg!(feature = "serbian") {
159                    languages.insert(Language::from_str("Serbian").unwrap());
160                }
161                languages
162            });
163        }
164
165        if cfg!(feature = "vietnamese") || cfg!(feature = "yoruba") {
166            mapping.insert("ẸẹỌọ", {
167                let mut languages = hashset!();
168                if cfg!(feature = "vietnamese") {
169                    languages.insert(Language::from_str("Vietnamese").unwrap());
170                }
171                if cfg!(feature = "yoruba") {
172                    languages.insert(Language::from_str("Yoruba").unwrap());
173                }
174                languages
175            });
176        }
177
178        if cfg!(feature = "icelandic") || cfg!(feature = "turkish") {
179            mapping.insert("ÐðÞþ", {
180                let mut languages = hashset!();
181                if cfg!(feature = "icelandic") {
182                    languages.insert(Language::from_str("Icelandic").unwrap());
183                }
184                if cfg!(feature = "turkish") {
185                    languages.insert(Language::from_str("Turkish").unwrap());
186                }
187                languages
188            });
189        }
190
191        if cfg!(feature = "french") || cfg!(feature = "hungarian") {
192            mapping.insert("Ûû", {
193                let mut languages = hashset!();
194                if cfg!(feature = "french") {
195                    languages.insert(Language::from_str("French").unwrap());
196                }
197                if cfg!(feature = "hungarian") {
198                    languages.insert(Language::from_str("Hungarian").unwrap());
199                }
200                languages
201            });
202        }
203
204        if cfg!(feature = "maori") || cfg!(feature = "yoruba") {
205            mapping.insert("Ōō", {
206                let mut languages = hashset!();
207                if cfg!(feature = "maori") {
208                    languages.insert(Language::from_str("Maori").unwrap());
209                }
210                if cfg!(feature = "yoruba") {
211                    languages.insert(Language::from_str("Yoruba").unwrap());
212                }
213                languages
214            });
215        }
216
217        if cfg!(feature = "kazakh") || cfg!(feature = "mongolian") {
218            mapping.insert("ӨөҮү", {
219                let mut languages = hashset!();
220                if cfg!(feature = "kazakh") {
221                    languages.insert(Language::from_str("Kazakh").unwrap());
222                }
223                if cfg!(feature = "mongolian") {
224                    languages.insert(Language::from_str("Mongolian").unwrap());
225                }
226                languages
227            });
228        }
229
230        if cfg!(feature = "latvian") || cfg!(feature = "maori") || cfg!(feature = "yoruba") {
231            mapping.insert("ĀāĒēĪī", {
232                let mut languages = hashset!();
233                if cfg!(feature = "latvian") {
234                    languages.insert(Language::from_str("Latvian").unwrap());
235                }
236                if cfg!(feature = "maori") {
237                    languages.insert(Language::from_str("Maori").unwrap());
238                }
239                if cfg!(feature = "yoruba") {
240                    languages.insert(Language::from_str("Yoruba").unwrap());
241                }
242                languages
243            });
244        }
245
246        if cfg!(feature = "azerbaijani") || cfg!(feature = "romanian") || cfg!(feature = "turkish")
247        {
248            mapping.insert("Şş", {
249                let mut languages = hashset!();
250                if cfg!(feature = "azerbaijani") {
251                    languages.insert(Language::from_str("Azerbaijani").unwrap());
252                }
253                if cfg!(feature = "romanian") {
254                    languages.insert(Language::from_str("Romanian").unwrap());
255                }
256                if cfg!(feature = "turkish") {
257                    languages.insert(Language::from_str("Turkish").unwrap());
258                }
259                languages
260            });
261        }
262
263        if cfg!(feature = "czech") || cfg!(feature = "romanian") || cfg!(feature = "slovak") {
264            mapping.insert("Ďď", {
265                let mut languages = hashset!();
266                if cfg!(feature = "czech") {
267                    languages.insert(Language::from_str("Czech").unwrap());
268                }
269                if cfg!(feature = "romanian") {
270                    languages.insert(Language::from_str("Romanian").unwrap());
271                }
272                if cfg!(feature = "slovak") {
273                    languages.insert(Language::from_str("Slovak").unwrap());
274                }
275                languages
276            });
277        }
278
279        if cfg!(feature = "bosnian") || cfg!(feature = "croatian") || cfg!(feature = "polish") {
280            mapping.insert("Ćć", {
281                let mut languages = hashset!();
282                if cfg!(feature = "bosnian") {
283                    languages.insert(Language::from_str("Bosnian").unwrap());
284                }
285                if cfg!(feature = "croatian") {
286                    languages.insert(Language::from_str("Croatian").unwrap());
287                }
288                if cfg!(feature = "polish") {
289                    languages.insert(Language::from_str("Polish").unwrap());
290                }
291                languages
292            });
293        }
294
295        if cfg!(feature = "bosnian") || cfg!(feature = "croatian") || cfg!(feature = "vietnamese") {
296            mapping.insert("Đđ", {
297                let mut languages = hashset!();
298                if cfg!(feature = "bosnian") {
299                    languages.insert(Language::from_str("Bosnian").unwrap());
300                }
301                if cfg!(feature = "croatian") {
302                    languages.insert(Language::from_str("Croatian").unwrap());
303                }
304                if cfg!(feature = "vietnamese") {
305                    languages.insert(Language::from_str("Vietnamese").unwrap());
306                }
307                languages
308            });
309        }
310
311        if cfg!(feature = "belarusian") || cfg!(feature = "kazakh") || cfg!(feature = "ukrainian") {
312            mapping.insert("Іі", {
313                let mut languages = hashset!();
314                if cfg!(feature = "belarusian") {
315                    languages.insert(Language::from_str("Belarusian").unwrap());
316                }
317                if cfg!(feature = "kazakh") {
318                    languages.insert(Language::from_str("Kazakh").unwrap());
319                }
320                if cfg!(feature = "ukrainian") {
321                    languages.insert(Language::from_str("Ukrainian").unwrap());
322                }
323                languages
324            });
325        }
326
327        if cfg!(feature = "italian") || cfg!(feature = "vietnamese") || cfg!(feature = "yoruba") {
328            mapping.insert("Ìì", {
329                let mut languages = hashset!();
330                if cfg!(feature = "italian") {
331                    languages.insert(Language::from_str("Italian").unwrap());
332                }
333                if cfg!(feature = "vietnamese") {
334                    languages.insert(Language::from_str("Vietnamese").unwrap());
335                }
336                if cfg!(feature = "yoruba") {
337                    languages.insert(Language::from_str("Yoruba").unwrap());
338                }
339                languages
340            });
341        }
342
343        if cfg!(feature = "bokmal") || cfg!(feature = "danish") || cfg!(feature = "nynorsk") {
344            mapping.insert("Øø", {
345                let mut languages = hashset!();
346                if cfg!(feature = "bokmal") {
347                    languages.insert(Language::from_str("Bokmal").unwrap());
348                }
349                if cfg!(feature = "danish") {
350                    languages.insert(Language::from_str("Danish").unwrap());
351                }
352                if cfg!(feature = "nynorsk") {
353                    languages.insert(Language::from_str("Nynorsk").unwrap());
354                }
355                languages
356            });
357        }
358
359        if cfg!(feature = "latvian")
360            || cfg!(feature = "lithuanian")
361            || cfg!(feature = "maori")
362            || cfg!(feature = "yoruba")
363        {
364            mapping.insert("Ūū", {
365                let mut languages = hashset!();
366                if cfg!(feature = "latvian") {
367                    languages.insert(Language::from_str("Latvian").unwrap());
368                }
369                if cfg!(feature = "lithuanian") {
370                    languages.insert(Language::from_str("Lithuanian").unwrap());
371                }
372                if cfg!(feature = "maori") {
373                    languages.insert(Language::from_str("Maori").unwrap());
374                }
375                if cfg!(feature = "yoruba") {
376                    languages.insert(Language::from_str("Yoruba").unwrap());
377                }
378                languages
379            });
380        }
381
382        if cfg!(feature = "afrikaans")
383            || cfg!(feature = "albanian")
384            || cfg!(feature = "dutch")
385            || cfg!(feature = "french")
386        {
387            mapping.insert("Ëë", {
388                let mut languages = hashset!();
389                if cfg!(feature = "afrikaans") {
390                    languages.insert(Language::from_str("Afrikaans").unwrap());
391                }
392                if cfg!(feature = "albanian") {
393                    languages.insert(Language::from_str("Albanian").unwrap());
394                }
395                if cfg!(feature = "dutch") {
396                    languages.insert(Language::from_str("Dutch").unwrap());
397                }
398                if cfg!(feature = "french") {
399                    languages.insert(Language::from_str("French").unwrap());
400                }
401                languages
402            });
403        }
404
405        if cfg!(feature = "french")
406            || cfg!(feature = "italian")
407            || cfg!(feature = "vietnamese")
408            || cfg!(feature = "yoruba")
409        {
410            mapping.insert("ÈèÙù", {
411                let mut languages = hashset!();
412                if cfg!(feature = "french") {
413                    languages.insert(Language::from_str("French").unwrap());
414                }
415                if cfg!(feature = "italian") {
416                    languages.insert(Language::from_str("Italian").unwrap());
417                }
418                if cfg!(feature = "vietnamese") {
419                    languages.insert(Language::from_str("Vietnamese").unwrap());
420                }
421                if cfg!(feature = "yoruba") {
422                    languages.insert(Language::from_str("Yoruba").unwrap());
423                }
424                languages
425            });
426        }
427
428        if cfg!(feature = "afrikaans")
429            || cfg!(feature = "french")
430            || cfg!(feature = "portuguese")
431            || cfg!(feature = "vietnamese")
432        {
433            mapping.insert("Êê", {
434                let mut languages = hashset!();
435                if cfg!(feature = "afrikaans") {
436                    languages.insert(Language::from_str("Afrikaans").unwrap());
437                }
438                if cfg!(feature = "french") {
439                    languages.insert(Language::from_str("French").unwrap());
440                }
441                if cfg!(feature = "portuguese") {
442                    languages.insert(Language::from_str("Portuguese").unwrap());
443                }
444                if cfg!(feature = "vietnamese") {
445                    languages.insert(Language::from_str("Vietnamese").unwrap());
446                }
447                languages
448            });
449        }
450
451        if cfg!(feature = "estonian")
452            || cfg!(feature = "hungarian")
453            || cfg!(feature = "portuguese")
454            || cfg!(feature = "vietnamese")
455        {
456            mapping.insert("Õõ", {
457                let mut languages = hashset!();
458                if cfg!(feature = "estonian") {
459                    languages.insert(Language::from_str("Estonian").unwrap());
460                }
461                if cfg!(feature = "hungarian") {
462                    languages.insert(Language::from_str("Hungarian").unwrap());
463                }
464                if cfg!(feature = "portuguese") {
465                    languages.insert(Language::from_str("Portuguese").unwrap());
466                }
467                if cfg!(feature = "vietnamese") {
468                    languages.insert(Language::from_str("Vietnamese").unwrap());
469                }
470                languages
471            });
472
473            if cfg!(feature = "french")
474                || cfg!(feature = "portuguese")
475                || cfg!(feature = "slovak")
476                || cfg!(feature = "vietnamese")
477            {
478                mapping.insert("Ôô", {
479                    let mut languages = hashset!();
480                    if cfg!(feature = "french") {
481                        languages.insert(Language::from_str("French").unwrap());
482                    }
483                    if cfg!(feature = "portuguese") {
484                        languages.insert(Language::from_str("Portuguese").unwrap());
485                    }
486                    if cfg!(feature = "slovak") {
487                        languages.insert(Language::from_str("Slovak").unwrap());
488                    }
489                    if cfg!(feature = "vietnamese") {
490                        languages.insert(Language::from_str("Vietnamese").unwrap());
491                    }
492                    languages
493                });
494            }
495
496            if cfg!(feature = "belarusian")
497                || cfg!(feature = "kazakh")
498                || cfg!(feature = "mongolian")
499                || cfg!(feature = "russian")
500            {
501                mapping.insert("ЁёЫыЭэ", {
502                    let mut languages = hashset!();
503                    if cfg!(feature = "belarusian") {
504                        languages.insert(Language::from_str("Belarusian").unwrap());
505                    }
506                    if cfg!(feature = "kazakh") {
507                        languages.insert(Language::from_str("Kazakh").unwrap());
508                    }
509                    if cfg!(feature = "mongolian") {
510                        languages.insert(Language::from_str("Mongolian").unwrap());
511                    }
512                    if cfg!(feature = "russian") {
513                        languages.insert(Language::from_str("Russian").unwrap());
514                    }
515                    languages
516                });
517            }
518
519            if cfg!(feature = "bulgarian")
520                || cfg!(feature = "kazakh")
521                || cfg!(feature = "mongolian")
522                || cfg!(feature = "russian")
523            {
524                mapping.insert("ЩщЪъ", {
525                    let mut languages = hashset!();
526                    if cfg!(feature = "bulgarian") {
527                        languages.insert(Language::from_str("Bulgarian").unwrap());
528                    }
529                    if cfg!(feature = "kazakh") {
530                        languages.insert(Language::from_str("Kazakh").unwrap());
531                    }
532                    if cfg!(feature = "mongolian") {
533                        languages.insert(Language::from_str("Mongolian").unwrap());
534                    }
535                    if cfg!(feature = "russian") {
536                        languages.insert(Language::from_str("Russian").unwrap());
537                    }
538                    languages
539                });
540            }
541
542            if cfg!(feature = "catalan")
543                || cfg!(feature = "italian")
544                || cfg!(feature = "vietnamese")
545                || cfg!(feature = "yoruba")
546            {
547                mapping.insert("Òò", {
548                    let mut languages = hashset!();
549                    if cfg!(feature = "catalan") {
550                        languages.insert(Language::from_str("Catalan").unwrap());
551                    }
552                    if cfg!(feature = "italian") {
553                        languages.insert(Language::from_str("Italian").unwrap());
554                    }
555                    if cfg!(feature = "vietnamese") {
556                        languages.insert(Language::from_str("Vietnamese").unwrap());
557                    }
558                    if cfg!(feature = "yoruba") {
559                        languages.insert(Language::from_str("Yoruba").unwrap());
560                    }
561                    languages
562                });
563            }
564
565            if cfg!(feature = "french")
566                || cfg!(feature = "portuguese")
567                || cfg!(feature = "romanian")
568                || cfg!(feature = "turkish")
569                || cfg!(feature = "vietnamese")
570            {
571                mapping.insert("Ââ", {
572                    let mut languages = hashset!();
573                    if cfg!(feature = "french") {
574                        languages.insert(Language::from_str("French").unwrap());
575                    }
576                    if cfg!(feature = "portuguese") {
577                        languages.insert(Language::from_str("Portuguese").unwrap());
578                    }
579                    if cfg!(feature = "romanian") {
580                        languages.insert(Language::from_str("Romanian").unwrap());
581                    }
582                    if cfg!(feature = "turkish") {
583                        languages.insert(Language::from_str("Turkish").unwrap());
584                    }
585                    if cfg!(feature = "vietnamese") {
586                        languages.insert(Language::from_str("Vietnamese").unwrap());
587                    }
588                    languages
589                });
590            }
591
592            if cfg!(feature = "bokmal")
593                || cfg!(feature = "danish")
594                || cfg!(feature = "icelandic")
595                || cfg!(feature = "nynorsk")
596            {
597                mapping.insert("Ææ", {
598                    let mut languages = hashset!();
599                    if cfg!(feature = "bokmal") {
600                        languages.insert(Language::from_str("Bokmal").unwrap());
601                    }
602                    if cfg!(feature = "danish") {
603                        languages.insert(Language::from_str("Danish").unwrap());
604                    }
605                    if cfg!(feature = "icelandic") {
606                        languages.insert(Language::from_str("Icelandic").unwrap());
607                    }
608                    if cfg!(feature = "nynorsk") {
609                        languages.insert(Language::from_str("Nynorsk").unwrap());
610                    }
611                    languages
612                });
613            }
614
615            if cfg!(feature = "bokmal")
616                || cfg!(feature = "danish")
617                || cfg!(feature = "nynorsk")
618                || cfg!(feature = "swedish")
619            {
620                mapping.insert("Åå", {
621                    let mut languages = hashset!();
622                    if cfg!(feature = "bokmal") {
623                        languages.insert(Language::from_str("Bokmal").unwrap());
624                    }
625                    if cfg!(feature = "danish") {
626                        languages.insert(Language::from_str("Danish").unwrap());
627                    }
628                    if cfg!(feature = "nynorsk") {
629                        languages.insert(Language::from_str("Nynorsk").unwrap());
630                    }
631                    if cfg!(feature = "swedish") {
632                        languages.insert(Language::from_str("Swedish").unwrap());
633                    }
634                    languages
635                });
636            }
637
638            if cfg!(feature = "czech")
639                || cfg!(feature = "icelandic")
640                || cfg!(feature = "slovak")
641                || cfg!(feature = "turkish")
642                || cfg!(feature = "vietnamese")
643            {
644                mapping.insert("Ýý", {
645                    let mut languages = hashset!();
646                    if cfg!(feature = "czech") {
647                        languages.insert(Language::from_str("Czech").unwrap());
648                    }
649                    if cfg!(feature = "icelandic") {
650                        languages.insert(Language::from_str("Icelandic").unwrap());
651                    }
652                    if cfg!(feature = "slovak") {
653                        languages.insert(Language::from_str("Slovak").unwrap());
654                    }
655                    if cfg!(feature = "turkish") {
656                        languages.insert(Language::from_str("Turkish").unwrap());
657                    }
658                    if cfg!(feature = "vietnamese") {
659                        languages.insert(Language::from_str("Vietnamese").unwrap());
660                    }
661                    languages
662                });
663            }
664
665            if cfg!(feature = "estonian")
666                || cfg!(feature = "finnish")
667                || cfg!(feature = "german")
668                || cfg!(feature = "slovak")
669                || cfg!(feature = "swedish")
670            {
671                mapping.insert("Ää", {
672                    let mut languages = hashset!();
673                    if cfg!(feature = "estonian") {
674                        languages.insert(Language::from_str("Estonian").unwrap());
675                    }
676                    if cfg!(feature = "finnish") {
677                        languages.insert(Language::from_str("Finnish").unwrap());
678                    }
679                    if cfg!(feature = "german") {
680                        languages.insert(Language::from_str("German").unwrap());
681                    }
682                    if cfg!(feature = "slovak") {
683                        languages.insert(Language::from_str("Slovak").unwrap());
684                    }
685                    if cfg!(feature = "swedish") {
686                        languages.insert(Language::from_str("Swedish").unwrap());
687                    }
688                    languages
689                });
690            }
691
692            if cfg!(feature = "catalan")
693                || cfg!(feature = "french")
694                || cfg!(feature = "italian")
695                || cfg!(feature = "portuguese")
696                || cfg!(feature = "vietnamese")
697            {
698                mapping.insert("Àà", {
699                    let mut languages = hashset!();
700                    if cfg!(feature = "catalan") {
701                        languages.insert(Language::from_str("Catalan").unwrap());
702                    }
703                    if cfg!(feature = "french") {
704                        languages.insert(Language::from_str("French").unwrap());
705                    }
706                    if cfg!(feature = "italian") {
707                        languages.insert(Language::from_str("Italian").unwrap());
708                    }
709                    if cfg!(feature = "portuguese") {
710                        languages.insert(Language::from_str("Portuguese").unwrap());
711                    }
712                    if cfg!(feature = "vietnamese") {
713                        languages.insert(Language::from_str("Vietnamese").unwrap());
714                    }
715                    languages
716                });
717            }
718
719            if cfg!(feature = "azerbaijani")
720                || cfg!(feature = "catalan")
721                || cfg!(feature = "estonian")
722                || cfg!(feature = "german")
723                || cfg!(feature = "hungarian")
724                || cfg!(feature = "spanish")
725                || cfg!(feature = "turkish")
726            {
727                mapping.insert("Üü", {
728                    let mut languages = hashset!();
729                    if cfg!(feature = "azerbaijani") {
730                        languages.insert(Language::from_str("Azerbaijani").unwrap());
731                    }
732                    if cfg!(feature = "catalan") {
733                        languages.insert(Language::from_str("Catalan").unwrap());
734                    }
735                    if cfg!(feature = "estonian") {
736                        languages.insert(Language::from_str("Estonian").unwrap());
737                    }
738                    if cfg!(feature = "german") {
739                        languages.insert(Language::from_str("German").unwrap());
740                    }
741                    if cfg!(feature = "hungarian") {
742                        languages.insert(Language::from_str("Hungarian").unwrap());
743                    }
744                    if cfg!(feature = "spanish") {
745                        languages.insert(Language::from_str("Spanish").unwrap());
746                    }
747                    if cfg!(feature = "turkish") {
748                        languages.insert(Language::from_str("Turkish").unwrap());
749                    }
750                    languages
751                });
752            }
753
754            if cfg!(feature = "bosnian")
755                || cfg!(feature = "czech")
756                || cfg!(feature = "croatian")
757                || cfg!(feature = "latvian")
758                || cfg!(feature = "lithuanian")
759                || cfg!(feature = "slovak")
760                || cfg!(feature = "slovene")
761            {
762                mapping.insert("ČčŠšŽž", {
763                    let mut languages = hashset!();
764                    if cfg!(feature = "bosnian") {
765                        languages.insert(Language::from_str("Bosnian").unwrap());
766                    }
767                    if cfg!(feature = "czech") {
768                        languages.insert(Language::from_str("Czech").unwrap());
769                    }
770                    if cfg!(feature = "croatian") {
771                        languages.insert(Language::from_str("Croatian").unwrap());
772                    }
773                    if cfg!(feature = "latvian") {
774                        languages.insert(Language::from_str("Latvian").unwrap());
775                    }
776                    if cfg!(feature = "lithuanian") {
777                        languages.insert(Language::from_str("Lithuanian").unwrap());
778                    }
779                    if cfg!(feature = "slovak") {
780                        languages.insert(Language::from_str("Slovak").unwrap());
781                    }
782                    if cfg!(feature = "slovene") {
783                        languages.insert(Language::from_str("Slovene").unwrap());
784                    }
785                    languages
786                });
787            }
788
789            if cfg!(feature = "albanian")
790                || cfg!(feature = "azerbaijani")
791                || cfg!(feature = "basque")
792                || cfg!(feature = "catalan")
793                || cfg!(feature = "french")
794                || cfg!(feature = "portuguese")
795                || cfg!(feature = "turkish")
796            {
797                mapping.insert("Çç", {
798                    let mut languages = hashset!();
799                    if cfg!(feature = "albanian") {
800                        languages.insert(Language::from_str("Albanian").unwrap());
801                    }
802                    if cfg!(feature = "azerbaijani") {
803                        languages.insert(Language::from_str("Azerbaijani").unwrap());
804                    }
805                    if cfg!(feature = "basque") {
806                        languages.insert(Language::from_str("Basque").unwrap());
807                    }
808                    if cfg!(feature = "catalan") {
809                        languages.insert(Language::from_str("Catalan").unwrap());
810                    }
811                    if cfg!(feature = "french") {
812                        languages.insert(Language::from_str("French").unwrap());
813                    }
814                    if cfg!(feature = "portuguese") {
815                        languages.insert(Language::from_str("Portuguese").unwrap());
816                    }
817                    if cfg!(feature = "turkish") {
818                        languages.insert(Language::from_str("Turkish").unwrap());
819                    }
820                    languages
821                });
822            }
823
824            if cfg!(feature = "azerbaijani")
825                || cfg!(feature = "estonian")
826                || cfg!(feature = "finnish")
827                || cfg!(feature = "german")
828                || cfg!(feature = "hungarian")
829                || cfg!(feature = "icelandic")
830                || cfg!(feature = "swedish")
831                || cfg!(feature = "turkish")
832            {
833                mapping.insert("Öö", {
834                    let mut languages = hashset!();
835                    if cfg!(feature = "azerbaijani") {
836                        languages.insert(Language::from_str("Azerbaijani").unwrap());
837                    }
838                    if cfg!(feature = "estonian") {
839                        languages.insert(Language::from_str("Estonian").unwrap());
840                    }
841                    if cfg!(feature = "finnish") {
842                        languages.insert(Language::from_str("Finnish").unwrap());
843                    }
844                    if cfg!(feature = "german") {
845                        languages.insert(Language::from_str("German").unwrap());
846                    }
847                    if cfg!(feature = "hungarian") {
848                        languages.insert(Language::from_str("Hungarian").unwrap());
849                    }
850                    if cfg!(feature = "icelandic") {
851                        languages.insert(Language::from_str("Icelandic").unwrap());
852                    }
853                    if cfg!(feature = "swedish") {
854                        languages.insert(Language::from_str("Swedish").unwrap());
855                    }
856                    if cfg!(feature = "turkish") {
857                        languages.insert(Language::from_str("Turkish").unwrap());
858                    }
859                    languages
860                });
861            }
862
863            if cfg!(feature = "catalan")
864                || cfg!(feature = "hungarian")
865                || cfg!(feature = "icelandic")
866                || cfg!(feature = "irish")
867                || cfg!(feature = "polish")
868                || cfg!(feature = "portuguese")
869                || cfg!(feature = "slovak")
870                || cfg!(feature = "spanish")
871                || cfg!(feature = "vietnamese")
872                || cfg!(feature = "yoruba")
873            {
874                mapping.insert("Óó", {
875                    let mut languages = hashset!();
876                    if cfg!(feature = "catalan") {
877                        languages.insert(Language::from_str("Catalan").unwrap());
878                    }
879                    if cfg!(feature = "hungarian") {
880                        languages.insert(Language::from_str("Hungarian").unwrap());
881                    }
882                    if cfg!(feature = "icelandic") {
883                        languages.insert(Language::from_str("Icelandic").unwrap());
884                    }
885                    if cfg!(feature = "irish") {
886                        languages.insert(Language::from_str("Irish").unwrap());
887                    }
888                    if cfg!(feature = "polish") {
889                        languages.insert(Language::from_str("Polish").unwrap());
890                    }
891                    if cfg!(feature = "portuguese") {
892                        languages.insert(Language::from_str("Portuguese").unwrap());
893                    }
894                    if cfg!(feature = "slovak") {
895                        languages.insert(Language::from_str("Slovak").unwrap());
896                    }
897                    if cfg!(feature = "spanish") {
898                        languages.insert(Language::from_str("Spanish").unwrap());
899                    }
900                    if cfg!(feature = "vietnamese") {
901                        languages.insert(Language::from_str("Vietnamese").unwrap());
902                    }
903                    if cfg!(feature = "yoruba") {
904                        languages.insert(Language::from_str("Yoruba").unwrap());
905                    }
906                    languages
907                });
908            }
909
910            if cfg!(feature = "catalan")
911                || cfg!(feature = "czech")
912                || cfg!(feature = "icelandic")
913                || cfg!(feature = "irish")
914                || cfg!(feature = "hungarian")
915                || cfg!(feature = "portuguese")
916                || cfg!(feature = "slovak")
917                || cfg!(feature = "spanish")
918                || cfg!(feature = "vietnamese")
919                || cfg!(feature = "yoruba")
920            {
921                mapping.insert("ÁáÍíÚú", {
922                    let mut languages = hashset!();
923                    if cfg!(feature = "catalan") {
924                        languages.insert(Language::from_str("Catalan").unwrap());
925                    }
926                    if cfg!(feature = "czech") {
927                        languages.insert(Language::from_str("Czech").unwrap());
928                    }
929                    if cfg!(feature = "icelandic") {
930                        languages.insert(Language::from_str("Icelandic").unwrap());
931                    }
932                    if cfg!(feature = "irish") {
933                        languages.insert(Language::from_str("Irish").unwrap());
934                    }
935                    if cfg!(feature = "hungarian") {
936                        languages.insert(Language::from_str("Hungarian").unwrap());
937                    }
938                    if cfg!(feature = "portuguese") {
939                        languages.insert(Language::from_str("Portuguese").unwrap());
940                    }
941                    if cfg!(feature = "slovak") {
942                        languages.insert(Language::from_str("Slovak").unwrap());
943                    }
944                    if cfg!(feature = "spanish") {
945                        languages.insert(Language::from_str("Spanish").unwrap());
946                    }
947                    if cfg!(feature = "vietnamese") {
948                        languages.insert(Language::from_str("Vietnamese").unwrap());
949                    }
950                    if cfg!(feature = "yoruba") {
951                        languages.insert(Language::from_str("Yoruba").unwrap());
952                    }
953                    languages
954                });
955            }
956
957            if cfg!(feature = "catalan")
958                || cfg!(feature = "czech")
959                || cfg!(feature = "french")
960                || cfg!(feature = "hungarian")
961                || cfg!(feature = "icelandic")
962                || cfg!(feature = "irish")
963                || cfg!(feature = "italian")
964                || cfg!(feature = "portuguese")
965                || cfg!(feature = "slovak")
966                || cfg!(feature = "spanish")
967                || cfg!(feature = "vietnamese")
968                || cfg!(feature = "yoruba")
969            {
970                mapping.insert("Éé", {
971                    let mut languages = hashset!();
972                    if cfg!(feature = "catalan") {
973                        languages.insert(Language::from_str("Catalan").unwrap());
974                    }
975                    if cfg!(feature = "czech") {
976                        languages.insert(Language::from_str("Czech").unwrap());
977                    }
978                    if cfg!(feature = "french") {
979                        languages.insert(Language::from_str("French").unwrap());
980                    }
981                    if cfg!(feature = "hungarian") {
982                        languages.insert(Language::from_str("Hungarian").unwrap());
983                    }
984                    if cfg!(feature = "icelandic") {
985                        languages.insert(Language::from_str("Icelandic").unwrap());
986                    }
987                    if cfg!(feature = "irish") {
988                        languages.insert(Language::from_str("Irish").unwrap());
989                    }
990                    if cfg!(feature = "italian") {
991                        languages.insert(Language::from_str("Italian").unwrap());
992                    }
993                    if cfg!(feature = "portuguese") {
994                        languages.insert(Language::from_str("Portuguese").unwrap());
995                    }
996                    if cfg!(feature = "slovak") {
997                        languages.insert(Language::from_str("Slovak").unwrap());
998                    }
999                    if cfg!(feature = "spanish") {
1000                        languages.insert(Language::from_str("Spanish").unwrap());
1001                    }
1002                    if cfg!(feature = "vietnamese") {
1003                        languages.insert(Language::from_str("Vietnamese").unwrap());
1004                    }
1005                    if cfg!(feature = "yoruba") {
1006                        languages.insert(Language::from_str("Yoruba").unwrap());
1007                    }
1008                    languages
1009                });
1010            }
1011        }
1012
1013        mapping
1014    });