typst_library/text/
lang.rs

1use std::str::FromStr;
2
3use ecow::{EcoString, eco_format};
4use rustc_hash::FxHashMap;
5
6use crate::diag::Hint;
7use crate::foundations::{StyleChain, cast};
8use crate::layout::Dir;
9use crate::text::TextElem;
10
11macro_rules! translation {
12    ($lang:literal) => {
13        ($lang, include_str!(concat!("../../translations/", $lang, ".txt")))
14    };
15}
16
17const TRANSLATIONS: &[(&str, &str)] = &[
18    translation!("af"),
19    translation!("alt"),
20    translation!("am"),
21    translation!("ar"),
22    translation!("as"),
23    translation!("ast"),
24    translation!("az"),
25    translation!("be"),
26    translation!("bg"),
27    translation!("bn"),
28    translation!("bo"),
29    translation!("br"),
30    translation!("bs"),
31    translation!("bua"),
32    translation!("ca"),
33    translation!("ckb"),
34    translation!("cs"),
35    translation!("cu"),
36    translation!("cy"),
37    translation!("da"),
38    translation!("de"),
39    translation!("dsb"),
40    translation!("el"),
41    translation!("en"),
42    translation!("eo"),
43    translation!("es"),
44    translation!("et"),
45    translation!("eu"),
46    translation!("fa"),
47    translation!("fi"),
48    translation!("fil"),
49    translation!("fr"),
50    translation!("fr-CA"),
51    translation!("fur"),
52    translation!("ga"),
53    translation!("gd"),
54    translation!("gl"),
55    translation!("grc"),
56    translation!("gu"),
57    translation!("ha"),
58    translation!("he"),
59    translation!("hi"),
60    translation!("hr"),
61    translation!("hsb"),
62    translation!("hu"),
63    translation!("hy"),
64    translation!("ia"),
65    translation!("id"),
66    translation!("is"),
67    translation!("isv"),
68    translation!("it"),
69    translation!("ja"),
70    translation!("ka"),
71    translation!("km"),
72    translation!("kmr"),
73    translation!("kn"),
74    translation!("ko"),
75    translation!("ku"),
76    translation!("la"),
77    translation!("lb"),
78    translation!("lo"),
79    translation!("lt"),
80    translation!("lv"),
81    translation!("mk"),
82    translation!("ml"),
83    translation!("mr"),
84    translation!("ms"),
85    translation!("nb"),
86    translation!("nl"),
87    translation!("nn"),
88    translation!("no"),
89    translation!("oc"),
90    translation!("or"),
91    translation!("pa"),
92    translation!("pl"),
93    translation!("pms"),
94    translation!("pt"),
95    translation!("pt-PT"),
96    translation!("rm"),
97    translation!("ro"),
98    translation!("ru"),
99    translation!("se"),
100    translation!("si"),
101    translation!("sk"),
102    translation!("sl"),
103    translation!("sq"),
104    translation!("sr"),
105    translation!("sv"),
106    translation!("ta"),
107    translation!("te"),
108    translation!("th"),
109    translation!("tk"),
110    translation!("tl"),
111    translation!("tr"),
112    translation!("ug"),
113    translation!("uk"),
114    translation!("ur"),
115    translation!("vi"),
116    translation!("zh"),
117    translation!("zh-TW"),
118];
119
120/// A locale consisting of a language and an optional region.
121#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
122pub struct Locale {
123    pub lang: Lang,
124    pub region: Option<Region>,
125}
126
127impl Default for Locale {
128    fn default() -> Self {
129        Self::DEFAULT
130    }
131}
132
133impl Locale {
134    pub const DEFAULT: Self = Self::new(Lang::ENGLISH, None);
135
136    pub const fn new(lang: Lang, region: Option<Region>) -> Self {
137        Locale { lang, region }
138    }
139
140    pub fn get_in(styles: StyleChain) -> Self {
141        Locale::new(styles.get(TextElem::lang), styles.get(TextElem::region))
142    }
143
144    pub fn rfc_3066(self) -> EcoString {
145        let mut buf = EcoString::from(self.lang.as_str());
146        if let Some(region) = self.region {
147            buf.push('-');
148            buf.push_str(region.as_str());
149        }
150        buf
151    }
152}
153
154/// An identifier for a natural language.
155#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
156pub struct Lang([u8; 3], u8);
157
158impl Lang {
159    pub const ABKHAZIAN: Self = Self(*b"ab ", 2);
160    pub const AFAR: Self = Self(*b"aa ", 2);
161    pub const AFRIKAANS: Self = Self(*b"af ", 2);
162    pub const AGHEM: Self = Self(*b"agq", 3);
163    pub const AKAN: Self = Self(*b"ak ", 2);
164    pub const AKKADIAN: Self = Self(*b"akk", 3);
165    pub const ALBANIAN: Self = Self(*b"sq ", 2);
166    pub const ALGERIAN_ARABIC: Self = Self(*b"arq", 3);
167    pub const AMHARIC: Self = Self(*b"am ", 2);
168    pub const ANCIENT_EGYPTIAN: Self = Self(*b"egy", 3);
169    pub const ANCIENT_GREEK: Self = Self(*b"grc", 3);
170    pub const ANCIENT_HEBREW: Self = Self(*b"hbo", 3);
171    pub const ARABIC: Self = Self(*b"ar ", 2);
172    pub const ARAMAIC: Self = Self(*b"arc", 3);
173    pub const ARMENIAN: Self = Self(*b"hy ", 2);
174    pub const ASSAMESE: Self = Self(*b"as ", 2);
175    pub const ASTURIAN: Self = Self(*b"ast", 3);
176    pub const ASU: Self = Self(*b"asa", 3);
177    pub const ATSAM: Self = Self(*b"cch", 3);
178    pub const AVESTAN: Self = Self(*b"ae ", 2);
179    pub const AWADHI: Self = Self(*b"awa", 3);
180    pub const AYMARA: Self = Self(*b"ay ", 2);
181    pub const AZERBAIJANI: Self = Self(*b"az ", 2);
182    pub const BAFIA: Self = Self(*b"ksf", 3);
183    pub const BALINESE: Self = Self(*b"ban", 3);
184    pub const BALUCHI: Self = Self(*b"bal", 3);
185    pub const BAMBARA: Self = Self(*b"bm ", 2);
186    pub const BANGLA: Self = Self(*b"bn ", 2);
187    pub const BASAA: Self = Self(*b"bas", 3);
188    pub const BASHKIR: Self = Self(*b"ba ", 2);
189    pub const BASQUE: Self = Self(*b"eu ", 2);
190    pub const BATAK_TOBA: Self = Self(*b"bbc", 3);
191    pub const BAVARIAN: Self = Self(*b"bar", 3);
192    pub const BELARUSIAN: Self = Self(*b"be ", 2);
193    pub const BEMBA: Self = Self(*b"bem", 3);
194    pub const BENA: Self = Self(*b"bez", 3);
195    pub const BETAWI: Self = Self(*b"bew", 3);
196    pub const BHOJPURI: Self = Self(*b"bho", 3);
197    pub const BLIN: Self = Self(*b"byn", 3);
198    pub const BODO: Self = Self(*b"brx", 3);
199    pub const BOSNIAN: Self = Self(*b"bs ", 2);
200    pub const BRETON: Self = Self(*b"br ", 2);
201    pub const BULGARIAN: Self = Self(*b"bg ", 2);
202    pub const BURIAT: Self = Self(*b"bua", 3);
203    pub const BURMESE: Self = Self(*b"my ", 2);
204    pub const CANTONESE: Self = Self(*b"yue", 3);
205    pub const CARIAN: Self = Self(*b"xcr", 3);
206    pub const CATALAN: Self = Self(*b"ca ", 2);
207    pub const CEBUANO: Self = Self(*b"ceb", 3);
208    pub const CENTRAL_ATLAS_TAMAZIGHT: Self = Self(*b"tzm", 3);
209    pub const CENTRAL_KURDISH: Self = Self(*b"ckb", 3);
210    pub const CHAKMA: Self = Self(*b"ccp", 3);
211    pub const CHECHEN: Self = Self(*b"ce ", 2);
212    pub const CHEROKEE: Self = Self(*b"chr", 3);
213    pub const CHIGA: Self = Self(*b"cgg", 3);
214    pub const CHINESE: Self = Self(*b"zh ", 2);
215    pub const CHURCH_SLAVIC: Self = Self(*b"cu ", 2);
216    pub const CHUVASH: Self = Self(*b"cv ", 2);
217    pub const CLASSICAL_MANDAIC: Self = Self(*b"myz", 3);
218    pub const COLOGNIAN: Self = Self(*b"ksh", 3);
219    pub const COPTIC: Self = Self(*b"cop", 3);
220    pub const CORNISH: Self = Self(*b"kw ", 2);
221    pub const CORSICAN: Self = Self(*b"co ", 2);
222    pub const CROATIAN: Self = Self(*b"hr ", 2);
223    pub const CZECH: Self = Self(*b"cs ", 2);
224    pub const DANISH: Self = Self(*b"da ", 2);
225    pub const DIVEHI: Self = Self(*b"dv ", 2);
226    pub const DOGRI: Self = Self(*b"doi", 3);
227    pub const DUALA: Self = Self(*b"dua", 3);
228    pub const DUTCH: Self = Self(*b"nl ", 2);
229    pub const DZONGKHA: Self = Self(*b"dz ", 2);
230    pub const EGYPTIAN_ARABIC: Self = Self(*b"arz", 3);
231    pub const EMBU: Self = Self(*b"ebu", 3);
232    pub const ENGLISH: Self = Self(*b"en ", 2);
233    pub const ERZYA: Self = Self(*b"myv", 3);
234    pub const ESPERANTO: Self = Self(*b"eo ", 2);
235    pub const ESTONIAN: Self = Self(*b"et ", 2);
236    pub const ETRUSCAN: Self = Self(*b"ett", 3);
237    pub const EWE: Self = Self(*b"ee ", 2);
238    pub const EWONDO: Self = Self(*b"ewo", 3);
239    pub const FAROESE: Self = Self(*b"fo ", 2);
240    pub const FILIPINO: Self = Self(*b"fil", 3);
241    pub const FINNISH: Self = Self(*b"fi ", 2);
242    pub const FRENCH: Self = Self(*b"fr ", 2);
243    pub const FRIULIAN: Self = Self(*b"fur", 3);
244    pub const FULAH: Self = Self(*b"ff ", 2);
245    pub const GA: Self = Self(*b"gaa", 3);
246    pub const GALICIAN: Self = Self(*b"gl ", 2);
247    pub const GANDA: Self = Self(*b"lg ", 2);
248    pub const GEEZ: Self = Self(*b"gez", 3);
249    pub const GEORGIAN: Self = Self(*b"ka ", 2);
250    pub const GERMAN: Self = Self(*b"de ", 2);
251    pub const GOTHIC: Self = Self(*b"got", 3);
252    pub const GREEK: Self = Self(*b"el ", 2);
253    pub const GUARANI: Self = Self(*b"gn ", 2);
254    pub const GUJARATI: Self = Self(*b"gu ", 2);
255    pub const GUSII: Self = Self(*b"guz", 3);
256    pub const HARYANVI: Self = Self(*b"bgc", 3);
257    pub const HAUSA: Self = Self(*b"ha ", 2);
258    pub const HAWAIIAN: Self = Self(*b"haw", 3);
259    pub const HEBREW: Self = Self(*b"he ", 2);
260    pub const HINDI: Self = Self(*b"hi ", 2);
261    pub const HMONG_NJUA: Self = Self(*b"hnj", 3);
262    pub const HUNGARIAN: Self = Self(*b"hu ", 2);
263    pub const ICELANDIC: Self = Self(*b"is ", 2);
264    pub const IGBO: Self = Self(*b"ig ", 2);
265    pub const INARI_SAMI: Self = Self(*b"smn", 3);
266    pub const INDONESIAN: Self = Self(*b"id ", 2);
267    pub const INGUSH: Self = Self(*b"inh", 3);
268    pub const INTERLINGUA: Self = Self(*b"ia ", 2);
269    pub const INTERSLAVIC: Self = Self(*b"isv", 3);
270    pub const INUKTITUT: Self = Self(*b"iu ", 2);
271    pub const IRISH: Self = Self(*b"ga ", 2);
272    pub const ITALIAN: Self = Self(*b"it ", 2);
273    pub const JAPANESE: Self = Self(*b"ja ", 2);
274    pub const JAVANESE: Self = Self(*b"jv ", 2);
275    pub const JJU: Self = Self(*b"kaj", 3);
276    pub const JOLA_FONYI: Self = Self(*b"dyo", 3);
277    pub const KABUVERDIANU: Self = Self(*b"kea", 3);
278    pub const KABYLE: Self = Self(*b"kab", 3);
279    pub const KAINGANG: Self = Self(*b"kgp", 3);
280    pub const KAKO: Self = Self(*b"kkj", 3);
281    pub const KALAALLISUT: Self = Self(*b"kl ", 2);
282    pub const KALENJIN: Self = Self(*b"kln", 3);
283    pub const KAMBA: Self = Self(*b"kam", 3);
284    pub const KANGRI: Self = Self(*b"xnr", 3);
285    pub const KANNADA: Self = Self(*b"kn ", 2);
286    pub const KASHMIRI: Self = Self(*b"ks ", 2);
287    pub const KAZAKH: Self = Self(*b"kk ", 2);
288    pub const KHMER: Self = Self(*b"km ", 2);
289    pub const KIKUYU: Self = Self(*b"ki ", 2);
290    pub const KINYARWANDA: Self = Self(*b"rw ", 2);
291    pub const KOMI: Self = Self(*b"kv ", 2);
292    pub const KONKANI: Self = Self(*b"kok", 3);
293    pub const KOREAN: Self = Self(*b"ko ", 2);
294    pub const KOYRABORO_SENNI: Self = Self(*b"ses", 3);
295    pub const KOYRA_CHIINI: Self = Self(*b"khq", 3);
296    pub const KURDISH: Self = Self(*b"ku ", 2);
297    pub const KWASIO: Self = Self(*b"nmg", 3);
298    pub const KYRGYZ: Self = Self(*b"ky ", 2);
299    pub const LADINO: Self = Self(*b"lad", 3);
300    pub const LAKOTA: Self = Self(*b"lkt", 3);
301    pub const LANGI: Self = Self(*b"lag", 3);
302    pub const LAO: Self = Self(*b"lo ", 2);
303    pub const LATIN: Self = Self(*b"la ", 2);
304    pub const LATVIAN: Self = Self(*b"lv ", 2);
305    pub const LEPCHA: Self = Self(*b"lep", 3);
306    pub const LIGURIAN: Self = Self(*b"lij", 3);
307    pub const LIMBU: Self = Self(*b"lif", 3);
308    pub const LINEAR_A: Self = Self(*b"lab", 3);
309    pub const LINGALA: Self = Self(*b"ln ", 2);
310    pub const LITHUANIAN: Self = Self(*b"lt ", 2);
311    pub const LOMBARD: Self = Self(*b"lmo", 3);
312    pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3);
313    pub const LOW_GERMAN: Self = Self(*b"nds", 3);
314    pub const LUBA_KATANGA: Self = Self(*b"lu ", 2);
315    pub const LUO: Self = Self(*b"luo", 3);
316    pub const LUXEMBOURGISH: Self = Self(*b"lb ", 2);
317    pub const LUYIA: Self = Self(*b"luy", 3);
318    pub const LYCIAN: Self = Self(*b"xlc", 3);
319    pub const LYDIAN: Self = Self(*b"xld", 3);
320    pub const LU: Self = Self(*b"khb", 3);
321    pub const MACEDONIAN: Self = Self(*b"mk ", 2);
322    pub const MACHAME: Self = Self(*b"jmc", 3);
323    pub const MAITHILI: Self = Self(*b"mai", 3);
324    pub const MAKASAR: Self = Self(*b"mak", 3);
325    pub const MAKHUWA_MEETTO: Self = Self(*b"mgh", 3);
326    pub const MAKHUWA: Self = Self(*b"vmw", 3);
327    pub const MAKONDE: Self = Self(*b"kde", 3);
328    pub const MALAGASY: Self = Self(*b"mg ", 2);
329    pub const MALAY: Self = Self(*b"ms ", 2);
330    pub const MALAYALAM: Self = Self(*b"ml ", 2);
331    pub const MALTESE: Self = Self(*b"mt ", 2);
332    pub const MANIPURI: Self = Self(*b"mni", 3);
333    pub const MANX: Self = Self(*b"gv ", 2);
334    pub const MARATHI: Self = Self(*b"mr ", 2);
335    pub const MASAI: Self = Self(*b"mas", 3);
336    pub const MAZANDERANI: Self = Self(*b"mzn", 3);
337    pub const MERU: Self = Self(*b"mer", 3);
338    pub const METAʼ: Self = Self(*b"mgo", 3);
339    pub const MONGOLIAN: Self = Self(*b"mn ", 2);
340    pub const MORISYEN: Self = Self(*b"mfe", 3);
341    pub const MUNDANG: Self = Self(*b"mua", 3);
342    pub const MUSCOGEE: Self = Self(*b"mus", 3);
343    pub const MAORI: Self = Self(*b"mi ", 2);
344    pub const NAMA: Self = Self(*b"naq", 3);
345    pub const NAVAJO: Self = Self(*b"nv ", 2);
346    pub const NEPALI: Self = Self(*b"ne ", 2);
347    pub const NEWARI: Self = Self(*b"new", 3);
348    pub const NGIEMBOON: Self = Self(*b"nnh", 3);
349    pub const NGOMBA: Self = Self(*b"jgo", 3);
350    pub const NHEENGATU: Self = Self(*b"yrl", 3);
351    pub const NIGERIAN_PIDGIN: Self = Self(*b"pcm", 3);
352    pub const NORTHERN_FRISIAN: Self = Self(*b"frr", 3);
353    pub const NORTHERN_KURDISH: Self = Self(*b"kmr", 3);
354    pub const NORTHERN_LURI: Self = Self(*b"lrc", 3);
355    pub const NORTHERN_SAMI: Self = Self(*b"se ", 2);
356    pub const NORTHERN_SOTHO: Self = Self(*b"nso", 3);
357    pub const NORTH_NDEBELE: Self = Self(*b"nd ", 2);
358    pub const NORWEGIAN: Self = Self(*b"no ", 2);
359    pub const NORWEGIAN_BOKMAL: Self = Self(*b"nb ", 2);
360    pub const NORWEGIAN_NYNORSK: Self = Self(*b"nn ", 2);
361    pub const NUER: Self = Self(*b"nus", 3);
362    pub const NYANJA: Self = Self(*b"ny ", 2);
363    pub const NYANKOLE: Self = Self(*b"nyn", 3);
364    pub const NKO: Self = Self(*b"nqo", 3);
365    pub const OCCITAN: Self = Self(*b"oc ", 2);
366    pub const ODIA: Self = Self(*b"or ", 2);
367    pub const OLD_IRISH: Self = Self(*b"sga", 3);
368    pub const OLD_NORSE: Self = Self(*b"non", 3);
369    pub const OLD_PERSIAN: Self = Self(*b"peo", 3);
370    pub const OLD_UIGHUR: Self = Self(*b"oui", 3);
371    pub const OROMO: Self = Self(*b"om ", 2);
372    pub const OSAGE: Self = Self(*b"osa", 3);
373    pub const OSSETIC: Self = Self(*b"os ", 2);
374    pub const PAPIAMENTO: Self = Self(*b"pap", 3);
375    pub const PASHTO: Self = Self(*b"ps ", 2);
376    pub const PERSIAN: Self = Self(*b"fa ", 2);
377    pub const PHOENICIAN: Self = Self(*b"phn", 3);
378    pub const PIEDMONTESE: Self = Self(*b"pms", 3);
379    pub const POLISH: Self = Self(*b"pl ", 2);
380    pub const PORTUGUESE: Self = Self(*b"pt ", 2);
381    pub const PRUSSIAN: Self = Self(*b"prg", 3);
382    pub const PUNJABI: Self = Self(*b"pa ", 2);
383    pub const QUECHUA: Self = Self(*b"qu ", 2);
384    pub const RAJASTHANI: Self = Self(*b"raj", 3);
385    pub const ROMANIAN: Self = Self(*b"ro ", 2);
386    pub const ROMANSH: Self = Self(*b"rm ", 2);
387    pub const ROMBO: Self = Self(*b"rof", 3);
388    pub const RUNDI: Self = Self(*b"rn ", 2);
389    pub const RUSSIAN: Self = Self(*b"ru ", 2);
390    pub const RWA: Self = Self(*b"rwk", 3);
391    pub const SABAEAN: Self = Self(*b"xsa", 3);
392    pub const SAHO: Self = Self(*b"ssy", 3);
393    pub const SAKHA: Self = Self(*b"sah", 3);
394    pub const SAMARITAN: Self = Self(*b"smp", 3);
395    pub const SAMBURU: Self = Self(*b"saq", 3);
396    pub const SANGO: Self = Self(*b"sg ", 2);
397    pub const SANGU: Self = Self(*b"sbp", 3);
398    pub const SANSKRIT: Self = Self(*b"sa ", 2);
399    pub const SANTALI: Self = Self(*b"sat", 3);
400    pub const SARAIKI: Self = Self(*b"skr", 3);
401    pub const SARDINIAN: Self = Self(*b"sc ", 2);
402    pub const SCOTTISH_GAELIC: Self = Self(*b"gd ", 2);
403    pub const SENA: Self = Self(*b"seh", 3);
404    pub const SERBIAN: Self = Self(*b"sr ", 2);
405    pub const SHAMBALA: Self = Self(*b"ksb", 3);
406    pub const SHONA: Self = Self(*b"sn ", 2);
407    pub const SICHUAN_YI: Self = Self(*b"ii ", 2);
408    pub const SICILIAN: Self = Self(*b"scn", 3);
409    pub const SILESIAN: Self = Self(*b"szl", 3);
410    pub const SINDHI: Self = Self(*b"sd ", 2);
411    pub const SINHALA: Self = Self(*b"si ", 2);
412    pub const SINTE_ROMANI: Self = Self(*b"rmo", 3);
413    pub const SLOVAK: Self = Self(*b"sk ", 2);
414    pub const SLOVENIAN: Self = Self(*b"sl ", 2);
415    pub const SOGA: Self = Self(*b"xog", 3);
416    pub const SOMALI: Self = Self(*b"so ", 2);
417    pub const SOUTHERN_ALTAI: Self = Self(*b"alt", 3);
418    pub const SOUTHERN_SOTHO: Self = Self(*b"st ", 2);
419    pub const SOUTH_NDEBELE: Self = Self(*b"nr ", 2);
420    pub const SPANISH: Self = Self(*b"es ", 2);
421    pub const STANDARD_MOROCCAN_TAMAZIGHT: Self = Self(*b"zgh", 3);
422    pub const SUNDANESE: Self = Self(*b"su ", 2);
423    pub const SWAHILI: Self = Self(*b"sw ", 2);
424    pub const SWATI: Self = Self(*b"ss ", 2);
425    pub const SWEDISH: Self = Self(*b"sv ", 2);
426    pub const SWISS_GERMAN: Self = Self(*b"gsw", 3);
427    pub const SYRIAC: Self = Self(*b"syr", 3);
428    pub const TACHELHIT: Self = Self(*b"shi", 3);
429    pub const TAITA: Self = Self(*b"dav", 3);
430    pub const TAI_NUA: Self = Self(*b"tdd", 3);
431    pub const TAJIK: Self = Self(*b"tg ", 2);
432    pub const TAMIL: Self = Self(*b"ta ", 2);
433    pub const TANGUT: Self = Self(*b"txg", 3);
434    pub const TAROKO: Self = Self(*b"trv", 3);
435    pub const TASAWAQ: Self = Self(*b"twq", 3);
436    pub const TATAR: Self = Self(*b"tt ", 2);
437    pub const TELUGU: Self = Self(*b"te ", 2);
438    pub const TESO: Self = Self(*b"teo", 3);
439    pub const THAI: Self = Self(*b"th ", 2);
440    pub const TIBETAN: Self = Self(*b"bo ", 2);
441    pub const TIGRE: Self = Self(*b"tig", 3);
442    pub const TIGRINYA: Self = Self(*b"ti ", 2);
443    pub const TOK_PISIN: Self = Self(*b"tpi", 3);
444    pub const TONGAN: Self = Self(*b"to ", 2);
445    pub const TSONGA: Self = Self(*b"ts ", 2);
446    pub const TSWANA: Self = Self(*b"tn ", 2);
447    pub const TURKISH: Self = Self(*b"tr ", 2);
448    pub const TURKMEN: Self = Self(*b"tk ", 2);
449    pub const TYAP: Self = Self(*b"kcg", 3);
450    pub const UGARITIC: Self = Self(*b"uga", 3);
451    pub const UKRAINIAN: Self = Self(*b"uk ", 2);
452    pub const UNKNOWN_LANGUAGE: Self = Self(*b"und", 3);
453    pub const UPPER_SORBIAN: Self = Self(*b"hsb", 3);
454    pub const URDU: Self = Self(*b"ur ", 2);
455    pub const UYGHUR: Self = Self(*b"ug ", 2);
456    pub const UZBEK: Self = Self(*b"uz ", 2);
457    pub const VAI: Self = Self(*b"vai", 3);
458    pub const VENDA: Self = Self(*b"ve ", 2);
459    pub const VENETIAN: Self = Self(*b"vec", 3);
460    pub const VIETNAMESE: Self = Self(*b"vi ", 2);
461    pub const VOLAPUK: Self = Self(*b"vo ", 2);
462    pub const VUNJO: Self = Self(*b"vun", 3);
463    pub const WALSER: Self = Self(*b"wae", 3);
464    pub const WARAY: Self = Self(*b"war", 3);
465    pub const WELSH: Self = Self(*b"cy ", 2);
466    pub const WESTERN_FRISIAN: Self = Self(*b"fy ", 2);
467    pub const WOLAYTTA: Self = Self(*b"wal", 3);
468    pub const WOLOF: Self = Self(*b"wo ", 2);
469    pub const XHOSA: Self = Self(*b"xh ", 2);
470    pub const YANGBEN: Self = Self(*b"yav", 3);
471    pub const YIDDISH: Self = Self(*b"yi ", 2);
472    pub const YORUBA: Self = Self(*b"yo ", 2);
473    pub const ZARMA: Self = Self(*b"dje", 3);
474    pub const ZHUANG: Self = Self(*b"za ", 2);
475    pub const ZULU: Self = Self(*b"zu ", 2);
476
477    /// Return the language code as an all lowercase string slice.
478    pub fn as_str(&self) -> &str {
479        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
480    }
481
482    /// The default direction for the language.
483    pub fn dir(self) -> Dir {
484        match self.as_str() {
485            "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
486            | "yi" => Dir::RTL,
487            _ => Dir::LTR,
488        }
489    }
490}
491
492impl FromStr for Lang {
493    type Err = &'static str;
494
495    /// Construct a language from a two- or three-byte ISO 639-1/2/3 code.
496    fn from_str(iso: &str) -> Result<Self, Self::Err> {
497        let len = iso.len();
498        if matches!(len, 2..=3) && iso.is_ascii() {
499            let mut bytes = [b' '; 3];
500            bytes[..len].copy_from_slice(iso.as_bytes());
501            bytes.make_ascii_lowercase();
502            Ok(Self(bytes, len as u8))
503        } else {
504            Err("expected two or three letter language code (ISO 639-1/2/3)")
505        }
506    }
507}
508
509cast! {
510    Lang,
511    self => self.as_str().into_value(),
512    string: EcoString => {
513        let result = Self::from_str(&string);
514        if result.is_err()
515            && let Some((lang, region)) = string.split_once('-')
516                && Lang::from_str(lang).is_ok() && Region::from_str(region).is_ok() {
517                    return result
518                        .hint(eco_format!(
519                            "you should leave only \"{}\" in the `lang` parameter and specify \"{}\" in the `region` parameter",
520                            lang, region,
521                        ));
522                }
523
524        result?
525    }
526}
527
528/// An identifier for a region somewhere in the world.
529#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
530pub struct Region([u8; 2]);
531
532impl Region {
533    /// Return the region code as an all uppercase string slice.
534    pub fn as_str(&self) -> &str {
535        std::str::from_utf8(&self.0).unwrap_or_default()
536    }
537}
538
539impl PartialEq<&str> for Region {
540    fn eq(&self, other: &&str) -> bool {
541        self.as_str() == *other
542    }
543}
544
545impl FromStr for Region {
546    type Err = &'static str;
547
548    /// Construct a region from its two-byte ISO 3166-1 alpha-2 code.
549    fn from_str(iso: &str) -> Result<Self, Self::Err> {
550        if iso.len() == 2 && iso.is_ascii() {
551            let mut bytes: [u8; 2] = iso.as_bytes().try_into().unwrap();
552            bytes.make_ascii_uppercase();
553            Ok(Self(bytes))
554        } else {
555            Err("expected two letter region code (ISO 3166-1 alpha-2)")
556        }
557    }
558}
559
560cast! {
561    Region,
562    self => self.as_str().into_value(),
563    string: EcoString => Self::from_str(&string)?,
564}
565
566/// An ISO 15924-type script identifier.
567#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
568pub struct WritingScript([u8; 4], u8);
569
570impl WritingScript {
571    /// Return the script as an all lowercase string slice.
572    pub fn as_str(&self) -> &str {
573        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
574    }
575
576    /// Return the description of the script as raw bytes.
577    pub fn as_bytes(&self) -> &[u8; 4] {
578        &self.0
579    }
580}
581
582impl FromStr for WritingScript {
583    type Err = &'static str;
584
585    /// Construct a region from its ISO 15924 code.
586    fn from_str(iso: &str) -> Result<Self, Self::Err> {
587        let len = iso.len();
588        if matches!(len, 3..=4) && iso.is_ascii() {
589            let mut bytes = [b' '; 4];
590            bytes[..len].copy_from_slice(iso.as_bytes());
591            bytes.make_ascii_lowercase();
592            Ok(Self(bytes, len as u8))
593        } else {
594            Err("expected three or four letter script code (ISO 15924 or 'math')")
595        }
596    }
597}
598
599cast! {
600    WritingScript,
601    self => self.as_str().into_value(),
602    string: EcoString => Self::from_str(&string)?,
603}
604
605/// The name with which an element is referenced.
606pub trait LocalName {
607    /// The key of an element in order to get its localized name.
608    const KEY: &'static str;
609
610    /// Get the name in the given language and (optionally) region.
611    fn local_name(lang: Lang, region: Option<Region>) -> &'static str {
612        localized_str(lang, region, Self::KEY)
613    }
614
615    /// Gets the local name from the style chain.
616    fn local_name_in(styles: StyleChain) -> &'static str
617    where
618        Self: Sized,
619    {
620        Self::local_name(styles.get(TextElem::lang), styles.get(TextElem::region))
621    }
622}
623
624/// Retrieves the localized string for a given language and region.
625/// Silently falls back to English if no fitting string exists for
626/// the given language + region. Panics if no fitting string exists
627/// in both given language + region and English.
628#[comemo::memoize]
629pub fn localized_str(lang: Lang, region: Option<Region>, key: &str) -> &'static str {
630    let lang_region_bundle = parse_language_bundle(lang, region).unwrap();
631    if let Some(str) = lang_region_bundle.get(key) {
632        return str;
633    }
634    let lang_bundle = parse_language_bundle(lang, None).unwrap();
635    if let Some(str) = lang_bundle.get(key) {
636        return str;
637    }
638    let english_bundle = parse_language_bundle(Lang::ENGLISH, None).unwrap();
639    english_bundle.get(key).unwrap()
640}
641
642/// Parses the translation file for a given language and region.
643/// Only returns an error if the language file is malformed.
644#[comemo::memoize]
645fn parse_language_bundle(
646    lang: Lang,
647    region: Option<Region>,
648) -> Result<FxHashMap<&'static str, &'static str>, &'static str> {
649    let language_tuple = TRANSLATIONS.iter().find(|it| it.0 == lang_str(lang, region));
650    let Some((_lang_name, language_file)) = language_tuple else {
651        return Ok(FxHashMap::default());
652    };
653
654    let mut bundle = FxHashMap::default();
655    let lines = language_file.trim().lines();
656    for line in lines {
657        if line.trim().starts_with('#') {
658            continue;
659        }
660        let (key, val) = line
661            .split_once('=')
662            .ok_or("malformed translation file: line without \"=\"")?;
663        let (key, val) = (key.trim(), val.trim());
664        if val.is_empty() {
665            return Err("malformed translation file: empty translation value");
666        }
667        let duplicate = bundle.insert(key.trim(), val.trim());
668        if duplicate.is_some() {
669            return Err("malformed translation file: duplicate key");
670        }
671    }
672    Ok(bundle)
673}
674
675/// Convert language + region to a string to be able to get a file name.
676fn lang_str(lang: Lang, region: Option<Region>) -> EcoString {
677    EcoString::from(lang.as_str())
678        + region.map_or_else(EcoString::new, |r| EcoString::from("-") + r.as_str())
679}
680
681#[cfg(test)]
682mod tests {
683    use std::path::PathBuf;
684
685    use rustc_hash::FxHashSet;
686    use typst_utils::option_eq;
687
688    use super::*;
689
690    fn translation_files_iter() -> impl Iterator<Item = PathBuf> {
691        std::fs::read_dir("translations")
692            .unwrap()
693            .map(|e| e.unwrap().path())
694            .filter(|e| e.is_file() && e.extension().is_some_and(|e| e == "txt"))
695    }
696
697    #[test]
698    fn test_region_option_eq() {
699        let region = Some(Region([b'U', b'S']));
700        assert!(option_eq(region, "US"));
701        assert!(!option_eq(region, "AB"));
702    }
703
704    #[test]
705    fn test_all_translations_included() {
706        let defined_keys =
707            FxHashSet::<&str>::from_iter(TRANSLATIONS.iter().map(|(lang, _)| *lang));
708        let mut checked = 0;
709        for file in translation_files_iter() {
710            assert!(
711                defined_keys.contains(
712                    file.file_stem()
713                        .expect("translation file should have basename")
714                        .to_str()
715                        .expect("translation file name should be utf-8 encoded")
716                ),
717                "translation from {:?} should be registered in TRANSLATIONS in {}",
718                file.file_name().unwrap(),
719                file!(),
720            );
721            checked += 1;
722        }
723        assert_eq!(TRANSLATIONS.len(), checked);
724    }
725
726    #[test]
727    fn test_all_translation_files_formatted() {
728        for file in translation_files_iter() {
729            let content = std::fs::read_to_string(&file)
730                .expect("translation file should be in utf-8 encoding");
731            let filename = file.file_name().unwrap();
732            assert!(
733                content.ends_with('\n'),
734                "translation file {filename:?} should end with linebreak",
735            );
736            for line in content.lines() {
737                assert_eq!(
738                    line.trim(),
739                    line,
740                    "line {line:?} in {filename:?} should not have extra whitespaces"
741                );
742            }
743        }
744    }
745
746    #[test]
747    fn test_translations_sorted() {
748        assert!(
749            TRANSLATIONS.is_sorted_by_key(|(lang, _)| lang),
750            "TRANSLATIONS should be sorted"
751        );
752    }
753}