typst_library/text/
lang.rs

1use std::collections::HashMap;
2use std::str::FromStr;
3
4use ecow::{eco_format, EcoString};
5
6use crate::diag::Hint;
7use crate::foundations::{cast, StyleChain};
8use crate::layout::Dir;
9use crate::text::TextElem;
10
11macro_rules! translation {
12    ($lang:literal) => {
13        ($lang, include_str!(concat!("../../translations/", $lang, ".txt")))
14    };
15}
16
17const TRANSLATIONS: [(&str, &str); 38] = [
18    translation!("ar"),
19    translation!("bg"),
20    translation!("ca"),
21    translation!("cs"),
22    translation!("da"),
23    translation!("de"),
24    translation!("en"),
25    translation!("es"),
26    translation!("et"),
27    translation!("eu"),
28    translation!("fi"),
29    translation!("fr"),
30    translation!("gl"),
31    translation!("el"),
32    translation!("he"),
33    translation!("hu"),
34    translation!("is"),
35    translation!("it"),
36    translation!("ja"),
37    translation!("la"),
38    translation!("nb"),
39    translation!("nl"),
40    translation!("nn"),
41    translation!("pl"),
42    translation!("pt-PT"),
43    translation!("pt"),
44    translation!("ro"),
45    translation!("ru"),
46    translation!("sl"),
47    translation!("sq"),
48    translation!("sr"),
49    translation!("sv"),
50    translation!("tl"),
51    translation!("tr"),
52    translation!("uk"),
53    translation!("vi"),
54    translation!("zh-TW"),
55    translation!("zh"),
56];
57
58/// An identifier for a natural language.
59#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
60pub struct Lang([u8; 3], u8);
61
62impl Lang {
63    pub const ALBANIAN: Self = Self(*b"sq ", 2);
64    pub const ARABIC: Self = Self(*b"ar ", 2);
65    pub const BASQUE: Self = Self(*b"eu ", 2);
66    pub const BOKMÃ…L: Self = Self(*b"nb ", 2);
67    pub const BULGARIAN: Self = Self(*b"bg ", 2);
68    pub const CATALAN: Self = Self(*b"ca ", 2);
69    pub const CHINESE: Self = Self(*b"zh ", 2);
70    pub const CROATIAN: Self = Self(*b"hr ", 2);
71    pub const CZECH: Self = Self(*b"cs ", 2);
72    pub const DANISH: Self = Self(*b"da ", 2);
73    pub const DUTCH: Self = Self(*b"nl ", 2);
74    pub const ENGLISH: Self = Self(*b"en ", 2);
75    pub const ESTONIAN: Self = Self(*b"et ", 2);
76    pub const FILIPINO: Self = Self(*b"tl ", 2);
77    pub const FINNISH: Self = Self(*b"fi ", 2);
78    pub const FRENCH: Self = Self(*b"fr ", 2);
79    pub const GALICIAN: Self = Self(*b"gl ", 2);
80    pub const GERMAN: Self = Self(*b"de ", 2);
81    pub const GREEK: Self = Self(*b"el ", 2);
82    pub const HEBREW: Self = Self(*b"he ", 2);
83    pub const HUNGARIAN: Self = Self(*b"hu ", 2);
84    pub const ICELANDIC: Self = Self(*b"is ", 2);
85    pub const ITALIAN: Self = Self(*b"it ", 2);
86    pub const JAPANESE: Self = Self(*b"ja ", 2);
87    pub const LATIN: Self = Self(*b"la ", 2);
88    pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3);
89    pub const NYNORSK: Self = Self(*b"nn ", 2);
90    pub const POLISH: Self = Self(*b"pl ", 2);
91    pub const PORTUGUESE: Self = Self(*b"pt ", 2);
92    pub const ROMANIAN: Self = Self(*b"ro ", 2);
93    pub const RUSSIAN: Self = Self(*b"ru ", 2);
94    pub const SERBIAN: Self = Self(*b"sr ", 2);
95    pub const SLOVAK: Self = Self(*b"sk ", 2);
96    pub const SLOVENIAN: Self = Self(*b"sl ", 2);
97    pub const SPANISH: Self = Self(*b"es ", 2);
98    pub const SWEDISH: Self = Self(*b"sv ", 2);
99    pub const TURKISH: Self = Self(*b"tr ", 2);
100    pub const UKRAINIAN: Self = Self(*b"uk ", 2);
101    pub const VIETNAMESE: Self = Self(*b"vi ", 2);
102
103    /// Return the language code as an all lowercase string slice.
104    pub fn as_str(&self) -> &str {
105        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
106    }
107
108    /// The default direction for the language.
109    pub fn dir(self) -> Dir {
110        match self.as_str() {
111            "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
112            | "yi" => Dir::RTL,
113            _ => Dir::LTR,
114        }
115    }
116}
117
118impl FromStr for Lang {
119    type Err = &'static str;
120
121    /// Construct a language from a two- or three-byte ISO 639-1/2/3 code.
122    fn from_str(iso: &str) -> Result<Self, Self::Err> {
123        let len = iso.len();
124        if matches!(len, 2..=3) && iso.is_ascii() {
125            let mut bytes = [b' '; 3];
126            bytes[..len].copy_from_slice(iso.as_bytes());
127            bytes.make_ascii_lowercase();
128            Ok(Self(bytes, len as u8))
129        } else {
130            Err("expected two or three letter language code (ISO 639-1/2/3)")
131        }
132    }
133}
134
135cast! {
136    Lang,
137    self => self.as_str().into_value(),
138    string: EcoString => {
139        let result = Self::from_str(&string);
140        if result.is_err() {
141            if let Some((lang, region)) = string.split_once('-') {
142                if Lang::from_str(lang).is_ok() && Region::from_str(region).is_ok() {
143                    return result
144                        .hint(eco_format!(
145                            "you should leave only \"{}\" in the `lang` parameter and specify \"{}\" in the `region` parameter",
146                            lang, region,
147                        ));
148                }
149            }
150        }
151
152        result?
153    }
154}
155
156/// An identifier for a region somewhere in the world.
157#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
158pub struct Region([u8; 2]);
159
160impl Region {
161    /// Return the region code as an all uppercase string slice.
162    pub fn as_str(&self) -> &str {
163        std::str::from_utf8(&self.0).unwrap_or_default()
164    }
165}
166
167impl PartialEq<&str> for Region {
168    fn eq(&self, other: &&str) -> bool {
169        self.as_str() == *other
170    }
171}
172
173impl FromStr for Region {
174    type Err = &'static str;
175
176    /// Construct a region from its two-byte ISO 3166-1 alpha-2 code.
177    fn from_str(iso: &str) -> Result<Self, Self::Err> {
178        if iso.len() == 2 && iso.is_ascii() {
179            let mut bytes: [u8; 2] = iso.as_bytes().try_into().unwrap();
180            bytes.make_ascii_uppercase();
181            Ok(Self(bytes))
182        } else {
183            Err("expected two letter region code (ISO 3166-1 alpha-2)")
184        }
185    }
186}
187
188cast! {
189    Region,
190    self => self.as_str().into_value(),
191    string: EcoString => Self::from_str(&string)?,
192}
193
194/// An ISO 15924-type script identifier.
195#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
196pub struct WritingScript([u8; 4], u8);
197
198impl WritingScript {
199    /// Return the script as an all lowercase string slice.
200    pub fn as_str(&self) -> &str {
201        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
202    }
203
204    /// Return the description of the script as raw bytes.
205    pub fn as_bytes(&self) -> &[u8; 4] {
206        &self.0
207    }
208}
209
210impl FromStr for WritingScript {
211    type Err = &'static str;
212
213    /// Construct a region from its ISO 15924 code.
214    fn from_str(iso: &str) -> Result<Self, Self::Err> {
215        let len = iso.len();
216        if matches!(len, 3..=4) && iso.is_ascii() {
217            let mut bytes = [b' '; 4];
218            bytes[..len].copy_from_slice(iso.as_bytes());
219            bytes.make_ascii_lowercase();
220            Ok(Self(bytes, len as u8))
221        } else {
222            Err("expected three or four letter script code (ISO 15924 or 'math')")
223        }
224    }
225}
226
227cast! {
228    WritingScript,
229    self => self.as_str().into_value(),
230    string: EcoString => Self::from_str(&string)?,
231}
232
233/// The name with which an element is referenced.
234pub trait LocalName {
235    /// The key of an element in order to get its localized name.
236    const KEY: &'static str;
237
238    /// Get the name in the given language and (optionally) region.
239    fn local_name(lang: Lang, region: Option<Region>) -> &'static str {
240        localized_str(lang, region, Self::KEY)
241    }
242
243    /// Gets the local name from the style chain.
244    fn local_name_in(styles: StyleChain) -> &'static str
245    where
246        Self: Sized,
247    {
248        Self::local_name(TextElem::lang_in(styles), TextElem::region_in(styles))
249    }
250}
251
252/// Retrieves the localized string for a given language and region.
253/// Silently falls back to English if no fitting string exists for
254/// the given language + region. Panics if no fitting string exists
255/// in both given language + region and English.
256#[comemo::memoize]
257pub fn localized_str(lang: Lang, region: Option<Region>, key: &str) -> &'static str {
258    let lang_region_bundle = parse_language_bundle(lang, region).unwrap();
259    if let Some(str) = lang_region_bundle.get(key) {
260        return str;
261    }
262    let lang_bundle = parse_language_bundle(lang, None).unwrap();
263    if let Some(str) = lang_bundle.get(key) {
264        return str;
265    }
266    let english_bundle = parse_language_bundle(Lang::ENGLISH, None).unwrap();
267    english_bundle.get(key).unwrap()
268}
269
270/// Parses the translation file for a given language and region.
271/// Only returns an error if the language file is malformed.
272#[comemo::memoize]
273fn parse_language_bundle(
274    lang: Lang,
275    region: Option<Region>,
276) -> Result<HashMap<&'static str, &'static str>, &'static str> {
277    let language_tuple = TRANSLATIONS.iter().find(|it| it.0 == lang_str(lang, region));
278    let Some((_lang_name, language_file)) = language_tuple else {
279        return Ok(HashMap::new());
280    };
281
282    let mut bundle = HashMap::new();
283    let lines = language_file.trim().lines();
284    for line in lines {
285        if line.trim().starts_with('#') {
286            continue;
287        }
288        let (key, val) = line
289            .split_once('=')
290            .ok_or("malformed translation file: line without \"=\"")?;
291        let (key, val) = (key.trim(), val.trim());
292        if val.is_empty() {
293            return Err("malformed translation file: empty translation value");
294        }
295        let duplicate = bundle.insert(key.trim(), val.trim());
296        if duplicate.is_some() {
297            return Err("malformed translation file: duplicate key");
298        }
299    }
300    Ok(bundle)
301}
302
303/// Convert language + region to a string to be able to get a file name.
304fn lang_str(lang: Lang, region: Option<Region>) -> EcoString {
305    EcoString::from(lang.as_str())
306        + region.map_or_else(EcoString::new, |r| EcoString::from("-") + r.as_str())
307}
308
309#[cfg(test)]
310mod tests {
311    use typst_utils::option_eq;
312
313    use super::*;
314
315    #[test]
316    fn test_region_option_eq() {
317        let region = Some(Region([b'U', b'S']));
318        assert!(option_eq(region, "US"));
319        assert!(!option_eq(region, "AB"));
320    }
321}