Skip to main content

stop_words/
language_names.rs

1//! Module containing the huge language enum and formatting for it
2
3/// Enum containing available language names
4#[non_exhaustive]
5#[derive(Clone, Debug)]
6pub enum LANGUAGE {
7    #[cfg(feature = "nltk")]
8    /// Albanian (ISO 639-1 Code: sq)
9    Albanian,
10
11    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
12    /// Arabic (ISO 639-1 Code: ar)
13    Arabic,
14
15    #[cfg(feature = "nltk")]
16    /// Azerbaijani (ISO 639-1 Code: az)
17    Azerbaijani,
18
19    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
20    /// Danish (ISO 639-1 Code: da)
21    Danish,
22
23    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
24    /// Dutch (ISO 639-1 Code: nl)
25    Dutch,
26
27    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
28    /// English (ISO 639-1 Code: en)
29    English,
30
31    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
32    /// Finnish (ISO 639-1 Code: fi)
33    Finnish,
34
35    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
36    /// French (ISO 639-1 Code: fr)
37    French,
38
39    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
40    /// German (ISO 639-1 Code: de)
41    German,
42
43    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
44    /// Greek (ISO 639-1 Code: el)
45    Greek,
46
47    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
48    /// Hungarian (ISO 639-1 Code: hu)
49    Hungarian,
50
51    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
52    /// Indonesian (ISO 639-1 Code: id)
53    Indonesian,
54
55    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
56    /// Italian (ISO 639-1 Code: it)
57    Italian,
58
59    #[cfg(feature = "nltk")]
60    /// Kazakh (ISO 639-1 Code: kk)
61    Kazakh,
62
63    #[cfg(feature = "nltk")]
64    /// Nepali (ISO 639-1 Code: ne)
65    Nepali,
66
67    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
68    /// Norwegian (ISO 639-1 Code: no)
69    Norwegian,
70
71    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
72    /// Portuguese (ISO 639-1 Code: pt)
73    Portuguese,
74
75    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
76    /// Romanian (ISO 639-1 Code: ro)
77    Romanian,
78
79    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
80    /// Russian (ISO 639-1 Code: ru)
81    Russian,
82
83    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
84    /// Slovenian (ISO 639-1 Code: sl)
85    Slovenian,
86
87    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
88    /// Spanish (ISO 639-1 Code: es)
89    Spanish,
90
91    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
92    /// Swedish (ISO 639-1 Code: sv)
93    Swedish,
94
95    #[cfg(feature = "nltk")]
96    /// Tajik (ISO 639-1 Code: tg)
97    Tajik,
98
99    #[cfg(feature = "nltk")]
100    /// Tamil (ISO 639-1 Code: ta)
101    Tamil,
102
103    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
104    /// Turkish (ISO 639-1 Code: tr)
105    Turkish,
106
107    #[cfg(feature = "nltk")]
108    /// Uzbek (ISO 639-1 Code: uz)
109    Uzbek,
110
111    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
112    /// Afrikaans (ISO 639-1 Code: af)
113    Afrikaans,
114
115    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
116    /// Armenian (ISO 639-1 Code: hy)
117    Armenian,
118
119    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
120    /// Basque (ISO 639-1 Code: eu)
121    Basque,
122
123    #[cfg(feature = "nltk")]
124    /// Belarusian (ISO 639-1 Code: be)
125    Belarusian,
126
127    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
128    /// Bengali (ISO 639-1 Code: bn)
129    Bengali,
130
131    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
132    /// Breton (ISO 639-1 Code: br)
133    Breton,
134
135    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
136    /// Bulgarian (ISO 639-1 Code: bg)
137    Bulgarian,
138
139    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
140    /// Catalan (ISO 639-1 Code: ca)
141    Catalan,
142
143    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
144    /// Czech (ISO 639-1 Code: cs)
145    Czech,
146
147    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
148    /// Chinese (ISO 639-1 Code: zh)
149    Chinese,
150
151    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
152    /// Esperanto (ISO 639-1 Code: eo)
153    Esperanto,
154
155    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
156    /// Estonian (ISO 639-1 Code: et)
157    Estonian,
158
159    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
160    /// Persian (ISO 639-1 Code: fa)
161    Persian,
162
163    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
164    /// Irish (ISO 639-1 Code: ga)
165    Irish,
166
167    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
168    /// Galician (ISO 639-1 Code: gl)
169    Galician,
170
171    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
172    /// Gujarati (ISO 639-1 Code: gu)
173    Gujarati,
174
175    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
176    /// Hausa (ISO 639-1 Code: ha)
177    Hausa,
178
179    #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
180    /// Hebrew (ISO 639-1 Code: he)
181    Hebrew,
182
183    #[cfg(feature = "nltk")]
184    /// Hinglish (NLTK-specific identifier: hinglish)
185    Hinglish,
186
187    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
188    /// Hindi (ISO 639-1 Code: hi)
189    Hindi,
190
191    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
192    /// Croatian (ISO 639-1 Code: hr)
193    Croatian,
194
195    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
196    /// Japanese (ISO 639-1 Code: ja)
197    Japanese,
198
199    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
200    /// Korean (ISO 639-1 Code: ko)
201    Korean,
202
203    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
204    /// Kurdish (ISO 639-1 Code: ku)
205    Kurdish,
206
207    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
208    /// Latin (ISO 639-1 Code: la)
209    Latin,
210
211    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
212    /// Latvian (ISO 639-1 Code: lv)
213    Latvian,
214
215    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
216    /// Lithuanian (ISO 639-1 Code: lt)
217    Lithuanian,
218
219    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
220    /// Marathi (ISO 639-1 Code: mr)
221    Marathi,
222
223    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
224    /// Malay (ISO 639-1 Code: ms)
225    Malay,
226
227    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
228    /// Polish (ISO 639-1 Code: pl)
229    Polish,
230
231    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
232    /// Slovak (ISO 639-1 Code: sk)
233    Slovak,
234
235    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
236    /// Somali (ISO 639-1 Code: so)
237    Somali,
238
239    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
240    /// Sotho (ISO 639-1 Code: st)
241    Sotho,
242
243    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
244    /// Swahili (ISO 639-1 Code: sw)
245    Swahili,
246
247    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
248    /// Tagalog (ISO 639-1 Code: tl)
249    Tagalog,
250
251    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
252    /// Thai (ISO 639-1 Code: th)
253    Thai,
254
255    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
256    /// Ukrainian (ISO 639-1 Code: uk)
257    Ukrainian,
258
259    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
260    /// Urdu (ISO 639-1 Code: ur)
261    Urdu,
262
263    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
264    /// Vietnamese (ISO 639-1 Code: vi)
265    Vietnamese,
266
267    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
268    /// Yoruba (ISO 639-1 Code: yo)
269    Yoruba,
270
271    #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
272    /// Zulu (ISO 639-1 Code: zu)
273    Zulu,
274
275    #[cfg(feature = "unimplemented")]
276    /// Afar (ISO 639-1 Code: aa)
277    Afar,
278
279    #[cfg(feature = "constructed")]
280    /// Quenya (ISO 639-3 Code: qya)
281    Quenya,
282
283    #[cfg(feature = "constructed")]
284    /// Sindarin (ISO 639-3 Code: sjn)
285    Sindarin,
286
287    #[cfg(feature = "constructed")]
288    /// Klingon (ISO 639-3 Code: tlh)
289    Klingon,
290
291    #[cfg(feature = "constructed")]
292    /// Dothraki (ISO 639-3 Code: N/A, so _dot_ is used here)
293    Dothraki,
294
295    #[cfg(feature = "constructed")]
296    /// Dovahzul (ISO 639-3 Code: N/A, so _dov_ is used here)
297    Dovahzul,
298
299    #[cfg(feature = "constructed")]
300    /// Navi (ISO 639-3 Code: N/A, so _nav_ is used here)
301    Navi,
302
303    #[cfg(feature = "constructed")]
304    /// High Valyrian (ISO 639-3 Code: N/A, so _val_ is used here)
305    HighValyrian,
306}
307
308impl LANGUAGE {
309    /// Return the lookup language code for this variant.
310    #[must_use]
311    #[allow(clippy::too_many_lines)]
312    pub const fn as_str(&self) -> &'static str {
313        match self {
314            #[cfg(feature = "nltk")]
315            LANGUAGE::Albanian => "sq",
316            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
317            LANGUAGE::Arabic => "ar",
318            #[cfg(feature = "nltk")]
319            LANGUAGE::Azerbaijani => "az",
320            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
321            LANGUAGE::Danish => "da",
322            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
323            LANGUAGE::Dutch => "nl",
324            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
325            LANGUAGE::English => "en",
326            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
327            LANGUAGE::Finnish => "fi",
328            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
329            LANGUAGE::French => "fr",
330            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
331            LANGUAGE::German => "de",
332            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
333            LANGUAGE::Greek => "el",
334            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
335            LANGUAGE::Hungarian => "hu",
336            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
337            LANGUAGE::Indonesian => "id",
338            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
339            LANGUAGE::Italian => "it",
340            #[cfg(feature = "nltk")]
341            LANGUAGE::Kazakh => "kk",
342            #[cfg(feature = "nltk")]
343            LANGUAGE::Nepali => "ne",
344            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
345            LANGUAGE::Norwegian => "no",
346            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
347            LANGUAGE::Portuguese => "pt",
348            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
349            LANGUAGE::Romanian => "ro",
350            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
351            LANGUAGE::Russian => "ru",
352            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
353            LANGUAGE::Slovenian => "sl",
354            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
355            LANGUAGE::Spanish => "es",
356            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
357            LANGUAGE::Swedish => "sv",
358            #[cfg(feature = "nltk")]
359            LANGUAGE::Tajik => "tg",
360            #[cfg(feature = "nltk")]
361            LANGUAGE::Tamil => "ta",
362            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
363            LANGUAGE::Turkish => "tr",
364            #[cfg(feature = "nltk")]
365            LANGUAGE::Uzbek => "uz",
366            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
367            LANGUAGE::Afrikaans => "af",
368            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
369            LANGUAGE::Armenian => "hy",
370            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
371            LANGUAGE::Basque => "eu",
372            #[cfg(feature = "nltk")]
373            LANGUAGE::Belarusian => "be",
374            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
375            LANGUAGE::Bengali => "bn",
376            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
377            LANGUAGE::Breton => "br",
378            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
379            LANGUAGE::Bulgarian => "bg",
380            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
381            LANGUAGE::Catalan => "ca",
382            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
383            LANGUAGE::Czech => "cs",
384            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
385            LANGUAGE::Chinese => "zh",
386            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
387            LANGUAGE::Esperanto => "eo",
388            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
389            LANGUAGE::Estonian => "et",
390            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
391            LANGUAGE::Persian => "fa",
392            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
393            LANGUAGE::Irish => "ga",
394            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
395            LANGUAGE::Galician => "gl",
396            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
397            LANGUAGE::Gujarati => "gu",
398            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
399            LANGUAGE::Hausa => "ha",
400            #[cfg(all(any(feature = "nltk", feature = "iso"), not(feature = "constructed")))]
401            LANGUAGE::Hebrew => "he",
402            #[cfg(feature = "nltk")]
403            LANGUAGE::Hinglish => "hinglish",
404            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
405            LANGUAGE::Hindi => "hi",
406            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
407            LANGUAGE::Croatian => "hr",
408            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
409            LANGUAGE::Japanese => "ja",
410            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
411            LANGUAGE::Korean => "ko",
412            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
413            LANGUAGE::Kurdish => "ku",
414            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
415            LANGUAGE::Latin => "la",
416            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
417            LANGUAGE::Latvian => "lv",
418            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
419            LANGUAGE::Lithuanian => "lt",
420            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
421            LANGUAGE::Marathi => "mr",
422            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
423            LANGUAGE::Malay => "ms",
424            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
425            LANGUAGE::Polish => "pl",
426            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
427            LANGUAGE::Slovak => "sk",
428            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
429            LANGUAGE::Somali => "so",
430            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
431            LANGUAGE::Sotho => "st",
432            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
433            LANGUAGE::Swahili => "sw",
434            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
435            LANGUAGE::Tagalog => "tl",
436            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
437            LANGUAGE::Thai => "th",
438            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
439            LANGUAGE::Ukrainian => "uk",
440            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
441            LANGUAGE::Urdu => "ur",
442            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
443            LANGUAGE::Vietnamese => "vi",
444            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
445            LANGUAGE::Yoruba => "yo",
446            #[cfg(all(feature = "iso", not(feature = "nltk"), not(feature = "constructed")))]
447            LANGUAGE::Zulu => "zu",
448            #[cfg(feature = "unimplemented")]
449            LANGUAGE::Afar => "aa",
450            #[cfg(feature = "constructed")]
451            LANGUAGE::Quenya => "qya",
452            #[cfg(feature = "constructed")]
453            LANGUAGE::Sindarin => "sjn",
454            #[cfg(feature = "constructed")]
455            LANGUAGE::Klingon => "tlh",
456            #[cfg(feature = "constructed")]
457            LANGUAGE::Dothraki => "dot",
458            #[cfg(feature = "constructed")]
459            LANGUAGE::Dovahzul => "dov",
460            #[cfg(feature = "constructed")]
461            LANGUAGE::Navi => "nav",
462            #[cfg(feature = "constructed")]
463            LANGUAGE::HighValyrian => "val",
464        }
465    }
466}
467
468impl From<LANGUAGE> for String {
469    fn from(value: LANGUAGE) -> Self {
470        value.as_str().to_owned()
471    }
472}
473
474impl AsRef<str> for LANGUAGE {
475    fn as_ref(&self) -> &str {
476        self.as_str()
477    }
478}
479
480impl std::fmt::Display for LANGUAGE {
481    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
482        f.write_str(self.as_ref())
483    }
484}