typst_library/text/
lang.rs

1use std::str::FromStr;
2
3use ecow::{EcoString, eco_format};
4use rustc_hash::FxHashMap;
5
6use crate::diag::Hint;
7use crate::foundations::{StyleChain, cast};
8use crate::layout::Dir;
9use crate::text::TextElem;
10
11macro_rules! translation {
12    ($lang:literal) => {
13        ($lang, include_str!(concat!("../../translations/", $lang, ".txt")))
14    };
15}
16
17const TRANSLATIONS: &[(&str, &str)] = &[
18    translation!("af"),
19    translation!("alt"),
20    translation!("am"),
21    translation!("ar"),
22    translation!("as"),
23    translation!("ast"),
24    translation!("az"),
25    translation!("be"),
26    translation!("bg"),
27    translation!("bn"),
28    translation!("bo"),
29    translation!("br"),
30    translation!("bs"),
31    translation!("bua"),
32    translation!("ca"),
33    translation!("ckb"),
34    translation!("cs"),
35    translation!("cu"),
36    translation!("cy"),
37    translation!("da"),
38    translation!("de"),
39    translation!("dsb"),
40    translation!("el"),
41    translation!("en"),
42    translation!("eo"),
43    translation!("es"),
44    translation!("et"),
45    translation!("eu"),
46    translation!("fa"),
47    translation!("fi"),
48    translation!("fil"),
49    translation!("fr"),
50    translation!("fur"),
51    translation!("ga"),
52    translation!("gd"),
53    translation!("gl"),
54    translation!("grc"),
55    translation!("gu"),
56    translation!("ha"),
57    translation!("he"),
58    translation!("hi"),
59    translation!("hr"),
60    translation!("hsb"),
61    translation!("hu"),
62    translation!("hy"),
63    translation!("ia"),
64    translation!("id"),
65    translation!("is"),
66    translation!("isv"),
67    translation!("it"),
68    translation!("ja"),
69    translation!("ka"),
70    translation!("km"),
71    translation!("kmr"),
72    translation!("kn"),
73    translation!("ko"),
74    translation!("ku"),
75    translation!("la"),
76    translation!("lb"),
77    translation!("lo"),
78    translation!("lt"),
79    translation!("lv"),
80    translation!("mk"),
81    translation!("ml"),
82    translation!("mr"),
83    translation!("ms"),
84    translation!("nb"),
85    translation!("nl"),
86    translation!("nn"),
87    translation!("no"),
88    translation!("oc"),
89    translation!("or"),
90    translation!("pa"),
91    translation!("pl"),
92    translation!("pms"),
93    translation!("pt"),
94    translation!("pt-PT"),
95    translation!("rm"),
96    translation!("ro"),
97    translation!("ru"),
98    translation!("se"),
99    translation!("si"),
100    translation!("sk"),
101    translation!("sl"),
102    translation!("sq"),
103    translation!("sr"),
104    translation!("sv"),
105    translation!("ta"),
106    translation!("te"),
107    translation!("th"),
108    translation!("tk"),
109    translation!("tl"),
110    translation!("tr"),
111    translation!("ug"),
112    translation!("uk"),
113    translation!("ur"),
114    translation!("vi"),
115    translation!("zh"),
116    translation!("zh-TW"),
117];
118
119/// A locale consisting of a language and an optional region.
120#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
121pub struct Locale {
122    pub lang: Lang,
123    pub region: Option<Region>,
124}
125
126impl Default for Locale {
127    fn default() -> Self {
128        Self::DEFAULT
129    }
130}
131
132impl Locale {
133    pub const DEFAULT: Self = Self::new(Lang::ENGLISH, None);
134
135    pub const fn new(lang: Lang, region: Option<Region>) -> Self {
136        Locale { lang, region }
137    }
138
139    pub fn get_in(styles: StyleChain) -> Self {
140        Locale::new(styles.get(TextElem::lang), styles.get(TextElem::region))
141    }
142
143    pub fn rfc_3066(self) -> EcoString {
144        let mut buf = EcoString::from(self.lang.as_str());
145        if let Some(region) = self.region {
146            buf.push('-');
147            buf.push_str(region.as_str());
148        }
149        buf
150    }
151}
152
153/// An identifier for a natural language.
154#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
155pub struct Lang([u8; 3], u8);
156
157impl Lang {
158    pub const ABKHAZIAN: Self = Self(*b"ab ", 2);
159    pub const AFAR: Self = Self(*b"aa ", 2);
160    pub const AFRIKAANS: Self = Self(*b"af ", 2);
161    pub const AGHEM: Self = Self(*b"agq", 3);
162    pub const AKAN: Self = Self(*b"ak ", 2);
163    pub const AKKADIAN: Self = Self(*b"akk", 3);
164    pub const ALBANIAN: Self = Self(*b"sq ", 2);
165    pub const ALGERIAN_ARABIC: Self = Self(*b"arq", 3);
166    pub const AMHARIC: Self = Self(*b"am ", 2);
167    pub const ANCIENT_EGYPTIAN: Self = Self(*b"egy", 3);
168    pub const ANCIENT_GREEK: Self = Self(*b"grc", 3);
169    pub const ANCIENT_HEBREW: Self = Self(*b"hbo", 3);
170    pub const ARABIC: Self = Self(*b"ar ", 2);
171    pub const ARAMAIC: Self = Self(*b"arc", 3);
172    pub const ARMENIAN: Self = Self(*b"hy ", 2);
173    pub const ASSAMESE: Self = Self(*b"as ", 2);
174    pub const ASTURIAN: Self = Self(*b"ast", 3);
175    pub const ASU: Self = Self(*b"asa", 3);
176    pub const ATSAM: Self = Self(*b"cch", 3);
177    pub const AVESTAN: Self = Self(*b"ae ", 2);
178    pub const AWADHI: Self = Self(*b"awa", 3);
179    pub const AYMARA: Self = Self(*b"ay ", 2);
180    pub const AZERBAIJANI: Self = Self(*b"az ", 2);
181    pub const BAFIA: Self = Self(*b"ksf", 3);
182    pub const BALINESE: Self = Self(*b"ban", 3);
183    pub const BALUCHI: Self = Self(*b"bal", 3);
184    pub const BAMBARA: Self = Self(*b"bm ", 2);
185    pub const BANGLA: Self = Self(*b"bn ", 2);
186    pub const BASAA: Self = Self(*b"bas", 3);
187    pub const BASHKIR: Self = Self(*b"ba ", 2);
188    pub const BASQUE: Self = Self(*b"eu ", 2);
189    pub const BATAK_TOBA: Self = Self(*b"bbc", 3);
190    pub const BAVARIAN: Self = Self(*b"bar", 3);
191    pub const BELARUSIAN: Self = Self(*b"be ", 2);
192    pub const BEMBA: Self = Self(*b"bem", 3);
193    pub const BENA: Self = Self(*b"bez", 3);
194    pub const BETAWI: Self = Self(*b"bew", 3);
195    pub const BHOJPURI: Self = Self(*b"bho", 3);
196    pub const BLIN: Self = Self(*b"byn", 3);
197    pub const BODO: Self = Self(*b"brx", 3);
198    pub const BOSNIAN: Self = Self(*b"bs ", 2);
199    pub const BRETON: Self = Self(*b"br ", 2);
200    pub const BULGARIAN: Self = Self(*b"bg ", 2);
201    pub const BURIAT: Self = Self(*b"bua", 3);
202    pub const BURMESE: Self = Self(*b"my ", 2);
203    pub const CANTONESE: Self = Self(*b"yue", 3);
204    pub const CARIAN: Self = Self(*b"xcr", 3);
205    pub const CATALAN: Self = Self(*b"ca ", 2);
206    pub const CEBUANO: Self = Self(*b"ceb", 3);
207    pub const CENTRAL_ATLAS_TAMAZIGHT: Self = Self(*b"tzm", 3);
208    pub const CENTRAL_KURDISH: Self = Self(*b"ckb", 3);
209    pub const CHAKMA: Self = Self(*b"ccp", 3);
210    pub const CHECHEN: Self = Self(*b"ce ", 2);
211    pub const CHEROKEE: Self = Self(*b"chr", 3);
212    pub const CHIGA: Self = Self(*b"cgg", 3);
213    pub const CHINESE: Self = Self(*b"zh ", 2);
214    pub const CHURCH_SLAVIC: Self = Self(*b"cu ", 2);
215    pub const CHUVASH: Self = Self(*b"cv ", 2);
216    pub const CLASSICAL_MANDAIC: Self = Self(*b"myz", 3);
217    pub const COLOGNIAN: Self = Self(*b"ksh", 3);
218    pub const COPTIC: Self = Self(*b"cop", 3);
219    pub const CORNISH: Self = Self(*b"kw ", 2);
220    pub const CORSICAN: Self = Self(*b"co ", 2);
221    pub const CROATIAN: Self = Self(*b"hr ", 2);
222    pub const CZECH: Self = Self(*b"cs ", 2);
223    pub const DANISH: Self = Self(*b"da ", 2);
224    pub const DIVEHI: Self = Self(*b"dv ", 2);
225    pub const DOGRI: Self = Self(*b"doi", 3);
226    pub const DUALA: Self = Self(*b"dua", 3);
227    pub const DUTCH: Self = Self(*b"nl ", 2);
228    pub const DZONGKHA: Self = Self(*b"dz ", 2);
229    pub const EGYPTIAN_ARABIC: Self = Self(*b"arz", 3);
230    pub const EMBU: Self = Self(*b"ebu", 3);
231    pub const ENGLISH: Self = Self(*b"en ", 2);
232    pub const ERZYA: Self = Self(*b"myv", 3);
233    pub const ESPERANTO: Self = Self(*b"eo ", 2);
234    pub const ESTONIAN: Self = Self(*b"et ", 2);
235    pub const ETRUSCAN: Self = Self(*b"ett", 3);
236    pub const EWE: Self = Self(*b"ee ", 2);
237    pub const EWONDO: Self = Self(*b"ewo", 3);
238    pub const FAROESE: Self = Self(*b"fo ", 2);
239    pub const FILIPINO: Self = Self(*b"fil", 3);
240    pub const FINNISH: Self = Self(*b"fi ", 2);
241    pub const FRENCH: Self = Self(*b"fr ", 2);
242    pub const FRIULIAN: Self = Self(*b"fur", 3);
243    pub const FULAH: Self = Self(*b"ff ", 2);
244    pub const GA: Self = Self(*b"gaa", 3);
245    pub const GALICIAN: Self = Self(*b"gl ", 2);
246    pub const GANDA: Self = Self(*b"lg ", 2);
247    pub const GEEZ: Self = Self(*b"gez", 3);
248    pub const GEORGIAN: Self = Self(*b"ka ", 2);
249    pub const GERMAN: Self = Self(*b"de ", 2);
250    pub const GOTHIC: Self = Self(*b"got", 3);
251    pub const GREEK: Self = Self(*b"el ", 2);
252    pub const GUARANI: Self = Self(*b"gn ", 2);
253    pub const GUJARATI: Self = Self(*b"gu ", 2);
254    pub const GUSII: Self = Self(*b"guz", 3);
255    pub const HARYANVI: Self = Self(*b"bgc", 3);
256    pub const HAUSA: Self = Self(*b"ha ", 2);
257    pub const HAWAIIAN: Self = Self(*b"haw", 3);
258    pub const HEBREW: Self = Self(*b"he ", 2);
259    pub const HINDI: Self = Self(*b"hi ", 2);
260    pub const HMONG_NJUA: Self = Self(*b"hnj", 3);
261    pub const HUNGARIAN: Self = Self(*b"hu ", 2);
262    pub const ICELANDIC: Self = Self(*b"is ", 2);
263    pub const IGBO: Self = Self(*b"ig ", 2);
264    pub const INARI_SAMI: Self = Self(*b"smn", 3);
265    pub const INDONESIAN: Self = Self(*b"id ", 2);
266    pub const INGUSH: Self = Self(*b"inh", 3);
267    pub const INTERLINGUA: Self = Self(*b"ia ", 2);
268    pub const INTERSLAVIC: Self = Self(*b"isv", 3);
269    pub const INUKTITUT: Self = Self(*b"iu ", 2);
270    pub const IRISH: Self = Self(*b"ga ", 2);
271    pub const ITALIAN: Self = Self(*b"it ", 2);
272    pub const JAPANESE: Self = Self(*b"ja ", 2);
273    pub const JAVANESE: Self = Self(*b"jv ", 2);
274    pub const JJU: Self = Self(*b"kaj", 3);
275    pub const JOLA_FONYI: Self = Self(*b"dyo", 3);
276    pub const KABUVERDIANU: Self = Self(*b"kea", 3);
277    pub const KABYLE: Self = Self(*b"kab", 3);
278    pub const KAINGANG: Self = Self(*b"kgp", 3);
279    pub const KAKO: Self = Self(*b"kkj", 3);
280    pub const KALAALLISUT: Self = Self(*b"kl ", 2);
281    pub const KALENJIN: Self = Self(*b"kln", 3);
282    pub const KAMBA: Self = Self(*b"kam", 3);
283    pub const KANGRI: Self = Self(*b"xnr", 3);
284    pub const KANNADA: Self = Self(*b"kn ", 2);
285    pub const KASHMIRI: Self = Self(*b"ks ", 2);
286    pub const KAZAKH: Self = Self(*b"kk ", 2);
287    pub const KHMER: Self = Self(*b"km ", 2);
288    pub const KIKUYU: Self = Self(*b"ki ", 2);
289    pub const KINYARWANDA: Self = Self(*b"rw ", 2);
290    pub const KOMI: Self = Self(*b"kv ", 2);
291    pub const KONKANI: Self = Self(*b"kok", 3);
292    pub const KOREAN: Self = Self(*b"ko ", 2);
293    pub const KOYRABORO_SENNI: Self = Self(*b"ses", 3);
294    pub const KOYRA_CHIINI: Self = Self(*b"khq", 3);
295    pub const KURDISH: Self = Self(*b"ku ", 2);
296    pub const KWASIO: Self = Self(*b"nmg", 3);
297    pub const KYRGYZ: Self = Self(*b"ky ", 2);
298    pub const LADINO: Self = Self(*b"lad", 3);
299    pub const LAKOTA: Self = Self(*b"lkt", 3);
300    pub const LANGI: Self = Self(*b"lag", 3);
301    pub const LAO: Self = Self(*b"lo ", 2);
302    pub const LATIN: Self = Self(*b"la ", 2);
303    pub const LATVIAN: Self = Self(*b"lv ", 2);
304    pub const LEPCHA: Self = Self(*b"lep", 3);
305    pub const LIGURIAN: Self = Self(*b"lij", 3);
306    pub const LIMBU: Self = Self(*b"lif", 3);
307    pub const LINEAR_A: Self = Self(*b"lab", 3);
308    pub const LINGALA: Self = Self(*b"ln ", 2);
309    pub const LITHUANIAN: Self = Self(*b"lt ", 2);
310    pub const LOMBARD: Self = Self(*b"lmo", 3);
311    pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3);
312    pub const LOW_GERMAN: Self = Self(*b"nds", 3);
313    pub const LUBA_KATANGA: Self = Self(*b"lu ", 2);
314    pub const LUO: Self = Self(*b"luo", 3);
315    pub const LUXEMBOURGISH: Self = Self(*b"lb ", 2);
316    pub const LUYIA: Self = Self(*b"luy", 3);
317    pub const LYCIAN: Self = Self(*b"xlc", 3);
318    pub const LYDIAN: Self = Self(*b"xld", 3);
319    pub const LU: Self = Self(*b"khb", 3);
320    pub const MACEDONIAN: Self = Self(*b"mk ", 2);
321    pub const MACHAME: Self = Self(*b"jmc", 3);
322    pub const MAITHILI: Self = Self(*b"mai", 3);
323    pub const MAKASAR: Self = Self(*b"mak", 3);
324    pub const MAKHUWA_MEETTO: Self = Self(*b"mgh", 3);
325    pub const MAKHUWA: Self = Self(*b"vmw", 3);
326    pub const MAKONDE: Self = Self(*b"kde", 3);
327    pub const MALAGASY: Self = Self(*b"mg ", 2);
328    pub const MALAY: Self = Self(*b"ms ", 2);
329    pub const MALAYALAM: Self = Self(*b"ml ", 2);
330    pub const MALTESE: Self = Self(*b"mt ", 2);
331    pub const MANIPURI: Self = Self(*b"mni", 3);
332    pub const MANX: Self = Self(*b"gv ", 2);
333    pub const MARATHI: Self = Self(*b"mr ", 2);
334    pub const MASAI: Self = Self(*b"mas", 3);
335    pub const MAZANDERANI: Self = Self(*b"mzn", 3);
336    pub const MERU: Self = Self(*b"mer", 3);
337    pub const METAʼ: Self = Self(*b"mgo", 3);
338    pub const MONGOLIAN: Self = Self(*b"mn ", 2);
339    pub const MORISYEN: Self = Self(*b"mfe", 3);
340    pub const MUNDANG: Self = Self(*b"mua", 3);
341    pub const MUSCOGEE: Self = Self(*b"mus", 3);
342    pub const MAORI: Self = Self(*b"mi ", 2);
343    pub const NAMA: Self = Self(*b"naq", 3);
344    pub const NAVAJO: Self = Self(*b"nv ", 2);
345    pub const NEPALI: Self = Self(*b"ne ", 2);
346    pub const NEWARI: Self = Self(*b"new", 3);
347    pub const NGIEMBOON: Self = Self(*b"nnh", 3);
348    pub const NGOMBA: Self = Self(*b"jgo", 3);
349    pub const NHEENGATU: Self = Self(*b"yrl", 3);
350    pub const NIGERIAN_PIDGIN: Self = Self(*b"pcm", 3);
351    pub const NORTHERN_FRISIAN: Self = Self(*b"frr", 3);
352    pub const NORTHERN_KURDISH: Self = Self(*b"kmr", 3);
353    pub const NORTHERN_LURI: Self = Self(*b"lrc", 3);
354    pub const NORTHERN_SAMI: Self = Self(*b"se ", 2);
355    pub const NORTHERN_SOTHO: Self = Self(*b"nso", 3);
356    pub const NORTH_NDEBELE: Self = Self(*b"nd ", 2);
357    pub const NORWEGIAN: Self = Self(*b"no ", 2);
358    pub const NORWEGIAN_BOKMAL: Self = Self(*b"nb ", 2);
359    pub const NORWEGIAN_NYNORSK: Self = Self(*b"nn ", 2);
360    pub const NUER: Self = Self(*b"nus", 3);
361    pub const NYANJA: Self = Self(*b"ny ", 2);
362    pub const NYANKOLE: Self = Self(*b"nyn", 3);
363    pub const NKO: Self = Self(*b"nqo", 3);
364    pub const OCCITAN: Self = Self(*b"oc ", 2);
365    pub const ODIA: Self = Self(*b"or ", 2);
366    pub const OLD_IRISH: Self = Self(*b"sga", 3);
367    pub const OLD_NORSE: Self = Self(*b"non", 3);
368    pub const OLD_PERSIAN: Self = Self(*b"peo", 3);
369    pub const OLD_UIGHUR: Self = Self(*b"oui", 3);
370    pub const OROMO: Self = Self(*b"om ", 2);
371    pub const OSAGE: Self = Self(*b"osa", 3);
372    pub const OSSETIC: Self = Self(*b"os ", 2);
373    pub const PAPIAMENTO: Self = Self(*b"pap", 3);
374    pub const PASHTO: Self = Self(*b"ps ", 2);
375    pub const PERSIAN: Self = Self(*b"fa ", 2);
376    pub const PHOENICIAN: Self = Self(*b"phn", 3);
377    pub const PIEDMONTESE: Self = Self(*b"pms", 3);
378    pub const POLISH: Self = Self(*b"pl ", 2);
379    pub const PORTUGUESE: Self = Self(*b"pt ", 2);
380    pub const PRUSSIAN: Self = Self(*b"prg", 3);
381    pub const PUNJABI: Self = Self(*b"pa ", 2);
382    pub const QUECHUA: Self = Self(*b"qu ", 2);
383    pub const RAJASTHANI: Self = Self(*b"raj", 3);
384    pub const ROMANIAN: Self = Self(*b"ro ", 2);
385    pub const ROMANSH: Self = Self(*b"rm ", 2);
386    pub const ROMBO: Self = Self(*b"rof", 3);
387    pub const RUNDI: Self = Self(*b"rn ", 2);
388    pub const RUSSIAN: Self = Self(*b"ru ", 2);
389    pub const RWA: Self = Self(*b"rwk", 3);
390    pub const SABAEAN: Self = Self(*b"xsa", 3);
391    pub const SAHO: Self = Self(*b"ssy", 3);
392    pub const SAKHA: Self = Self(*b"sah", 3);
393    pub const SAMARITAN: Self = Self(*b"smp", 3);
394    pub const SAMBURU: Self = Self(*b"saq", 3);
395    pub const SANGO: Self = Self(*b"sg ", 2);
396    pub const SANGU: Self = Self(*b"sbp", 3);
397    pub const SANSKRIT: Self = Self(*b"sa ", 2);
398    pub const SANTALI: Self = Self(*b"sat", 3);
399    pub const SARAIKI: Self = Self(*b"skr", 3);
400    pub const SARDINIAN: Self = Self(*b"sc ", 2);
401    pub const SCOTTISH_GAELIC: Self = Self(*b"gd ", 2);
402    pub const SENA: Self = Self(*b"seh", 3);
403    pub const SERBIAN: Self = Self(*b"sr ", 2);
404    pub const SHAMBALA: Self = Self(*b"ksb", 3);
405    pub const SHONA: Self = Self(*b"sn ", 2);
406    pub const SICHUAN_YI: Self = Self(*b"ii ", 2);
407    pub const SICILIAN: Self = Self(*b"scn", 3);
408    pub const SILESIAN: Self = Self(*b"szl", 3);
409    pub const SINDHI: Self = Self(*b"sd ", 2);
410    pub const SINHALA: Self = Self(*b"si ", 2);
411    pub const SINTE_ROMANI: Self = Self(*b"rmo", 3);
412    pub const SLOVAK: Self = Self(*b"sk ", 2);
413    pub const SLOVENIAN: Self = Self(*b"sl ", 2);
414    pub const SOGA: Self = Self(*b"xog", 3);
415    pub const SOMALI: Self = Self(*b"so ", 2);
416    pub const SOUTHERN_ALTAI: Self = Self(*b"alt", 3);
417    pub const SOUTHERN_SOTHO: Self = Self(*b"st ", 2);
418    pub const SOUTH_NDEBELE: Self = Self(*b"nr ", 2);
419    pub const SPANISH: Self = Self(*b"es ", 2);
420    pub const STANDARD_MOROCCAN_TAMAZIGHT: Self = Self(*b"zgh", 3);
421    pub const SUNDANESE: Self = Self(*b"su ", 2);
422    pub const SWAHILI: Self = Self(*b"sw ", 2);
423    pub const SWATI: Self = Self(*b"ss ", 2);
424    pub const SWEDISH: Self = Self(*b"sv ", 2);
425    pub const SWISS_GERMAN: Self = Self(*b"gsw", 3);
426    pub const SYRIAC: Self = Self(*b"syr", 3);
427    pub const TACHELHIT: Self = Self(*b"shi", 3);
428    pub const TAITA: Self = Self(*b"dav", 3);
429    pub const TAI_NUA: Self = Self(*b"tdd", 3);
430    pub const TAJIK: Self = Self(*b"tg ", 2);
431    pub const TAMIL: Self = Self(*b"ta ", 2);
432    pub const TANGUT: Self = Self(*b"txg", 3);
433    pub const TAROKO: Self = Self(*b"trv", 3);
434    pub const TASAWAQ: Self = Self(*b"twq", 3);
435    pub const TATAR: Self = Self(*b"tt ", 2);
436    pub const TELUGU: Self = Self(*b"te ", 2);
437    pub const TESO: Self = Self(*b"teo", 3);
438    pub const THAI: Self = Self(*b"th ", 2);
439    pub const TIBETAN: Self = Self(*b"bo ", 2);
440    pub const TIGRE: Self = Self(*b"tig", 3);
441    pub const TIGRINYA: Self = Self(*b"ti ", 2);
442    pub const TOK_PISIN: Self = Self(*b"tpi", 3);
443    pub const TONGAN: Self = Self(*b"to ", 2);
444    pub const TSONGA: Self = Self(*b"ts ", 2);
445    pub const TSWANA: Self = Self(*b"tn ", 2);
446    pub const TURKISH: Self = Self(*b"tr ", 2);
447    pub const TURKMEN: Self = Self(*b"tk ", 2);
448    pub const TYAP: Self = Self(*b"kcg", 3);
449    pub const UGARITIC: Self = Self(*b"uga", 3);
450    pub const UKRAINIAN: Self = Self(*b"uk ", 2);
451    pub const UNKNOWN_LANGUAGE: Self = Self(*b"und", 3);
452    pub const UPPER_SORBIAN: Self = Self(*b"hsb", 3);
453    pub const URDU: Self = Self(*b"ur ", 2);
454    pub const UYGHUR: Self = Self(*b"ug ", 2);
455    pub const UZBEK: Self = Self(*b"uz ", 2);
456    pub const VAI: Self = Self(*b"vai", 3);
457    pub const VENDA: Self = Self(*b"ve ", 2);
458    pub const VENETIAN: Self = Self(*b"vec", 3);
459    pub const VIETNAMESE: Self = Self(*b"vi ", 2);
460    pub const VOLAPUK: Self = Self(*b"vo ", 2);
461    pub const VUNJO: Self = Self(*b"vun", 3);
462    pub const WALSER: Self = Self(*b"wae", 3);
463    pub const WARAY: Self = Self(*b"war", 3);
464    pub const WELSH: Self = Self(*b"cy ", 2);
465    pub const WESTERN_FRISIAN: Self = Self(*b"fy ", 2);
466    pub const WOLAYTTA: Self = Self(*b"wal", 3);
467    pub const WOLOF: Self = Self(*b"wo ", 2);
468    pub const XHOSA: Self = Self(*b"xh ", 2);
469    pub const YANGBEN: Self = Self(*b"yav", 3);
470    pub const YIDDISH: Self = Self(*b"yi ", 2);
471    pub const YORUBA: Self = Self(*b"yo ", 2);
472    pub const ZARMA: Self = Self(*b"dje", 3);
473    pub const ZHUANG: Self = Self(*b"za ", 2);
474    pub const ZULU: Self = Self(*b"zu ", 2);
475
476    /// Return the language code as an all lowercase string slice.
477    pub fn as_str(&self) -> &str {
478        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
479    }
480
481    /// The default direction for the language.
482    pub fn dir(self) -> Dir {
483        match self.as_str() {
484            "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
485            | "yi" => Dir::RTL,
486            _ => Dir::LTR,
487        }
488    }
489}
490
491impl FromStr for Lang {
492    type Err = &'static str;
493
494    /// Construct a language from a two- or three-byte ISO 639-1/2/3 code.
495    fn from_str(iso: &str) -> Result<Self, Self::Err> {
496        let len = iso.len();
497        if matches!(len, 2..=3) && iso.is_ascii() {
498            let mut bytes = [b' '; 3];
499            bytes[..len].copy_from_slice(iso.as_bytes());
500            bytes.make_ascii_lowercase();
501            Ok(Self(bytes, len as u8))
502        } else {
503            Err("expected two or three letter language code (ISO 639-1/2/3)")
504        }
505    }
506}
507
508cast! {
509    Lang,
510    self => self.as_str().into_value(),
511    string: EcoString => {
512        let result = Self::from_str(&string);
513        if result.is_err()
514            && let Some((lang, region)) = string.split_once('-')
515                && Lang::from_str(lang).is_ok() && Region::from_str(region).is_ok() {
516                    return result
517                        .hint(eco_format!(
518                            "you should leave only \"{}\" in the `lang` parameter and specify \"{}\" in the `region` parameter",
519                            lang, region,
520                        ));
521                }
522
523        result?
524    }
525}
526
527/// An identifier for a region somewhere in the world.
528#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
529pub struct Region([u8; 2]);
530
531impl Region {
532    /// Return the region code as an all uppercase string slice.
533    pub fn as_str(&self) -> &str {
534        std::str::from_utf8(&self.0).unwrap_or_default()
535    }
536}
537
538impl PartialEq<&str> for Region {
539    fn eq(&self, other: &&str) -> bool {
540        self.as_str() == *other
541    }
542}
543
544impl FromStr for Region {
545    type Err = &'static str;
546
547    /// Construct a region from its two-byte ISO 3166-1 alpha-2 code.
548    fn from_str(iso: &str) -> Result<Self, Self::Err> {
549        if iso.len() == 2 && iso.is_ascii() {
550            let mut bytes: [u8; 2] = iso.as_bytes().try_into().unwrap();
551            bytes.make_ascii_uppercase();
552            Ok(Self(bytes))
553        } else {
554            Err("expected two letter region code (ISO 3166-1 alpha-2)")
555        }
556    }
557}
558
559cast! {
560    Region,
561    self => self.as_str().into_value(),
562    string: EcoString => Self::from_str(&string)?,
563}
564
565/// An ISO 15924-type script identifier.
566#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
567pub struct WritingScript([u8; 4], u8);
568
569impl WritingScript {
570    /// Return the script as an all lowercase string slice.
571    pub fn as_str(&self) -> &str {
572        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
573    }
574
575    /// Return the description of the script as raw bytes.
576    pub fn as_bytes(&self) -> &[u8; 4] {
577        &self.0
578    }
579}
580
581impl FromStr for WritingScript {
582    type Err = &'static str;
583
584    /// Construct a region from its ISO 15924 code.
585    fn from_str(iso: &str) -> Result<Self, Self::Err> {
586        let len = iso.len();
587        if matches!(len, 3..=4) && iso.is_ascii() {
588            let mut bytes = [b' '; 4];
589            bytes[..len].copy_from_slice(iso.as_bytes());
590            bytes.make_ascii_lowercase();
591            Ok(Self(bytes, len as u8))
592        } else {
593            Err("expected three or four letter script code (ISO 15924 or 'math')")
594        }
595    }
596}
597
598cast! {
599    WritingScript,
600    self => self.as_str().into_value(),
601    string: EcoString => Self::from_str(&string)?,
602}
603
604/// The name with which an element is referenced.
605pub trait LocalName {
606    /// The key of an element in order to get its localized name.
607    const KEY: &'static str;
608
609    /// Get the name in the given language and (optionally) region.
610    fn local_name(lang: Lang, region: Option<Region>) -> &'static str {
611        localized_str(lang, region, Self::KEY)
612    }
613
614    /// Gets the local name from the style chain.
615    fn local_name_in(styles: StyleChain) -> &'static str
616    where
617        Self: Sized,
618    {
619        Self::local_name(styles.get(TextElem::lang), styles.get(TextElem::region))
620    }
621}
622
623/// Retrieves the localized string for a given language and region.
624/// Silently falls back to English if no fitting string exists for
625/// the given language + region. Panics if no fitting string exists
626/// in both given language + region and English.
627#[comemo::memoize]
628pub fn localized_str(lang: Lang, region: Option<Region>, key: &str) -> &'static str {
629    let lang_region_bundle = parse_language_bundle(lang, region).unwrap();
630    if let Some(str) = lang_region_bundle.get(key) {
631        return str;
632    }
633    let lang_bundle = parse_language_bundle(lang, None).unwrap();
634    if let Some(str) = lang_bundle.get(key) {
635        return str;
636    }
637    let english_bundle = parse_language_bundle(Lang::ENGLISH, None).unwrap();
638    english_bundle.get(key).unwrap()
639}
640
641/// Parses the translation file for a given language and region.
642/// Only returns an error if the language file is malformed.
643#[comemo::memoize]
644fn parse_language_bundle(
645    lang: Lang,
646    region: Option<Region>,
647) -> Result<FxHashMap<&'static str, &'static str>, &'static str> {
648    let language_tuple = TRANSLATIONS.iter().find(|it| it.0 == lang_str(lang, region));
649    let Some((_lang_name, language_file)) = language_tuple else {
650        return Ok(FxHashMap::default());
651    };
652
653    let mut bundle = FxHashMap::default();
654    let lines = language_file.trim().lines();
655    for line in lines {
656        if line.trim().starts_with('#') {
657            continue;
658        }
659        let (key, val) = line
660            .split_once('=')
661            .ok_or("malformed translation file: line without \"=\"")?;
662        let (key, val) = (key.trim(), val.trim());
663        if val.is_empty() {
664            return Err("malformed translation file: empty translation value");
665        }
666        let duplicate = bundle.insert(key.trim(), val.trim());
667        if duplicate.is_some() {
668            return Err("malformed translation file: duplicate key");
669        }
670    }
671    Ok(bundle)
672}
673
674/// Convert language + region to a string to be able to get a file name.
675fn lang_str(lang: Lang, region: Option<Region>) -> EcoString {
676    EcoString::from(lang.as_str())
677        + region.map_or_else(EcoString::new, |r| EcoString::from("-") + r.as_str())
678}
679
680#[cfg(test)]
681mod tests {
682    use std::path::PathBuf;
683
684    use rustc_hash::FxHashSet;
685    use typst_utils::option_eq;
686
687    use super::*;
688
689    fn translation_files_iter() -> impl Iterator<Item = PathBuf> {
690        std::fs::read_dir("translations")
691            .unwrap()
692            .map(|e| e.unwrap().path())
693            .filter(|e| e.is_file() && e.extension().is_some_and(|e| e == "txt"))
694    }
695
696    #[test]
697    fn test_region_option_eq() {
698        let region = Some(Region([b'U', b'S']));
699        assert!(option_eq(region, "US"));
700        assert!(!option_eq(region, "AB"));
701    }
702
703    #[test]
704    fn test_all_translations_included() {
705        let defined_keys =
706            FxHashSet::<&str>::from_iter(TRANSLATIONS.iter().map(|(lang, _)| *lang));
707        let mut checked = 0;
708        for file in translation_files_iter() {
709            assert!(
710                defined_keys.contains(
711                    file.file_stem()
712                        .expect("translation file should have basename")
713                        .to_str()
714                        .expect("translation file name should be utf-8 encoded")
715                ),
716                "translation from {:?} should be registered in TRANSLATIONS in {}",
717                file.file_name().unwrap(),
718                file!(),
719            );
720            checked += 1;
721        }
722        assert_eq!(TRANSLATIONS.len(), checked);
723    }
724
725    #[test]
726    fn test_all_translation_files_formatted() {
727        for file in translation_files_iter() {
728            let content = std::fs::read_to_string(&file)
729                .expect("translation file should be in utf-8 encoding");
730            let filename = file.file_name().unwrap();
731            assert!(
732                content.ends_with('\n'),
733                "translation file {filename:?} should end with linebreak",
734            );
735            for line in content.lines() {
736                assert_eq!(
737                    line.trim(),
738                    line,
739                    "line {line:?} in {filename:?} should not have extra whitespaces"
740                );
741            }
742        }
743    }
744
745    #[test]
746    fn test_translations_sorted() {
747        assert!(
748            TRANSLATIONS.is_sorted_by_key(|(lang, _)| lang),
749            "TRANSLATIONS should be sorted"
750        );
751    }
752}