1use serde::Serialize;
2use std::fmt;
3
4#[derive(Clone, Debug, Serialize, Hash, Eq, PartialEq)]
5#[serde(rename_all = "kebab-case")]
6#[repr(usize)]
7pub enum Language {
8 English,
9 Chinese,
10 German,
11 Spanish,
12 Russian,
13 Korean,
14 French,
15 Japanese,
16 Portuguese,
17 Turkish,
18 Polish,
19 Catalan,
20 Dutch,
21 Arabic,
22 Swedish,
23 Italian,
24 Indonesian,
25 Hindi,
26 Finnish,
27 Hebrew,
28 Ukrainian,
29 Greek,
30 Malay,
31 Czech,
32 Romanian,
33 Danish,
34 Hungarian,
35 Norwegian,
36 Thai,
37 Urdu,
38 Croatian,
39 Bulgarian,
40 Lithuanian,
41 Latin,
42 Malayalam,
43 Welsh,
44 Slovak,
45 Persian,
46 Latvian,
47 Bengali,
48 Serbian,
49 Azerbaijani,
50 Slovenian,
51 Estonian,
52 Macedonian,
53 Nepali,
54 Mongolian,
55 Bosnian,
56 Kazakh,
57 Albanian,
58 Swahili,
59 Galician,
60 Marathi,
61 Punjabi,
62 Sinhala,
63 Khmer,
64 Afrikaans,
65 Belarusian,
66 Gujarati,
67 Amharic,
68 Yiddish,
69 Lao,
70 Uzbek,
71 Faroese,
72 Pashto,
73 Maltese,
74 Sanskrit,
75 Luxembourgish,
76 Myanmar,
77 Tibetan,
78 Tagalog,
79 Assamese,
80 Tatar,
81 Hausa,
82 Javanese,
83}
84
85impl Language {
86 pub fn as_lang_code(&self) -> &'static str {
87 match self {
88 Language::English => "en",
89 Language::Chinese => "zh",
90 Language::German => "de",
91 Language::Spanish => "es",
92 Language::Russian => "ru",
93 Language::Korean => "ko",
94 Language::French => "fr",
95 Language::Japanese => "ja",
96 Language::Portuguese => "pt",
97 Language::Turkish => "tr",
98 Language::Polish => "pl",
99 Language::Catalan => "ca",
100 Language::Dutch => "nl",
101 Language::Arabic => "ar",
102 Language::Swedish => "sv",
103 Language::Italian => "it",
104 Language::Indonesian => "id",
105 Language::Hindi => "hi",
106 Language::Finnish => "fi",
107 Language::Hebrew => "he",
108 Language::Ukrainian => "uk",
109 Language::Greek => "el",
110 Language::Malay => "ms",
111 Language::Czech => "cs",
112 Language::Romanian => "ro",
113 Language::Danish => "da",
114 Language::Hungarian => "hu",
115 Language::Norwegian => "no",
116 Language::Thai => "th",
117 Language::Urdu => "ur",
118 Language::Croatian => "hr",
119 Language::Bulgarian => "bg",
120 Language::Lithuanian => "lt",
121 Language::Latin => "la",
122 Language::Malayalam => "ml",
123 Language::Welsh => "cy",
124 Language::Slovak => "sk",
125 Language::Persian => "fa",
126 Language::Latvian => "lv",
127 Language::Bengali => "bn",
128 Language::Serbian => "sr",
129 Language::Azerbaijani => "az",
130 Language::Slovenian => "sl",
131 Language::Estonian => "et",
132 Language::Macedonian => "mk",
133 Language::Nepali => "ne",
134 Language::Mongolian => "mn",
135 Language::Bosnian => "bs",
136 Language::Kazakh => "kk",
137 Language::Albanian => "sq",
138 Language::Swahili => "sw",
139 Language::Galician => "gl",
140 Language::Marathi => "mr",
141 Language::Punjabi => "pa",
142 Language::Sinhala => "si",
143 Language::Khmer => "km",
144 Language::Afrikaans => "af",
145 Language::Belarusian => "be",
146 Language::Gujarati => "gu",
147 Language::Amharic => "am",
148 Language::Yiddish => "yi",
149 Language::Lao => "lo",
150 Language::Uzbek => "uz",
151 Language::Faroese => "fo",
152 Language::Pashto => "ps",
153 Language::Maltese => "mt",
154 Language::Sanskrit => "sa",
155 Language::Luxembourgish => "lb",
156 Language::Myanmar => "my",
157 Language::Tibetan => "bo",
158 Language::Tagalog => "tl",
159 Language::Assamese => "as",
160 Language::Tatar => "tt",
161 Language::Hausa => "ha",
162 Language::Javanese => "jw",
163 }
164 }
165}
166
167impl fmt::Display for Language {
168 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169 write!(f, "{}", self.as_lang_code())
170 }
171}
172
173impl PartialEq<&str> for Language {
174 fn eq(&self, other: &&str) -> bool {
175 self.to_string().as_str() == *other
176 }
177}
178
179pub const TESSERACT_LANGUAGES: [(&str, &str); 76] = [
180 ("eng", "english"),
181 ("chi_sim", "chinese"),
182 ("deu", "german"),
183 ("spa", "spanish"),
184 ("rus", "russian"),
185 ("kor", "korean"),
186 ("fra", "french"),
187 ("jpn", "japanese"),
188 ("por", "portuguese"),
189 ("tur", "turkish"),
190 ("pol", "polish"),
191 ("cat", "catalan"),
192 ("nld", "dutch"),
193 ("ara", "arabic"),
194 ("swe", "swedish"),
195 ("ita", "italian"),
196 ("ind", "indonesian"),
197 ("hin", "hindi"),
198 ("fin", "finnish"),
199 ("vie", "vietnamese"),
200 ("heb", "hebrew"),
201 ("ukr", "ukrainian"),
202 ("ell", "greek"),
203 ("msa", "malay"),
204 ("ces", "czech"),
205 ("ron", "romanian"),
206 ("dan", "danish"),
207 ("hun", "hungarian"),
208 ("nor", "norwegian"),
209 ("tha", "thai"),
210 ("urd", "urdu"),
211 ("hrv", "croatian"),
212 ("bul", "bulgarian"),
213 ("lit", "lithuanian"),
214 ("lat", "latin"),
215 ("mal", "malayalam"),
216 ("cym", "welsh"),
217 ("slk", "slovak"),
218 ("fas", "persian"),
219 ("lav", "latvian"),
220 ("ben", "bengali"),
221 ("srp", "serbian"),
222 ("aze", "azerbaijani"),
223 ("slv", "slovenian"),
224 ("est", "estonian"),
225 ("mkd", "macedonian"),
226 ("nep", "nepali"),
227 ("mon", "mongolian"),
228 ("bos", "bosnian"),
229 ("kaz", "kazakh"),
230 ("sqi", "albanian"),
231 ("swa", "swahili"),
232 ("glg", "galician"),
233 ("mar", "marathi"),
234 ("pan", "punjabi"),
235 ("sin", "sinhala"),
236 ("khm", "khmer"),
237 ("afr", "afrikaans"),
238 ("bel", "belarusian"),
239 ("guj", "gujarati"),
240 ("amh", "amharic"),
241 ("yid", "yiddish"),
242 ("lao", "lao"),
243 ("uzb", "uzbek"),
244 ("fo", "faroese"),
245 ("pus", "pashto"),
246 ("mlt", "maltese"),
247 ("san", "sanskrit"),
248 ("lb", "luxembourgish"),
249 ("mya", "myanmar"),
250 ("bod", "tibetan"),
251 ("tgl", "tagalog"),
252 ("asm", "assamese"),
253 ("tat", "tatar"),
254 ("hau", "hausa"),
255 ("jav", "javanese"),
256];