typst_library/text/
lang.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
use std::collections::HashMap;
use std::str::FromStr;

use ecow::{eco_format, EcoString};

use crate::diag::Hint;
use crate::foundations::{cast, StyleChain};
use crate::layout::Dir;
use crate::text::TextElem;

macro_rules! translation {
    ($lang:literal) => {
        ($lang, include_str!(concat!("../../translations/", $lang, ".txt")))
    };
}

const TRANSLATIONS: [(&str, &str); 38] = [
    translation!("ar"),
    translation!("bg"),
    translation!("ca"),
    translation!("cs"),
    translation!("da"),
    translation!("de"),
    translation!("en"),
    translation!("es"),
    translation!("et"),
    translation!("eu"),
    translation!("fi"),
    translation!("fr"),
    translation!("gl"),
    translation!("el"),
    translation!("he"),
    translation!("hu"),
    translation!("is"),
    translation!("it"),
    translation!("ja"),
    translation!("la"),
    translation!("nb"),
    translation!("nl"),
    translation!("nn"),
    translation!("pl"),
    translation!("pt-PT"),
    translation!("pt"),
    translation!("ro"),
    translation!("ru"),
    translation!("sl"),
    translation!("sq"),
    translation!("sr"),
    translation!("sv"),
    translation!("tl"),
    translation!("tr"),
    translation!("uk"),
    translation!("vi"),
    translation!("zh-TW"),
    translation!("zh"),
];

/// An identifier for a natural language.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Lang([u8; 3], u8);

impl Lang {
    pub const ALBANIAN: Self = Self(*b"sq ", 2);
    pub const ARABIC: Self = Self(*b"ar ", 2);
    pub const BASQUE: Self = Self(*b"eu ", 2);
    pub const BOKMÃ…L: Self = Self(*b"nb ", 2);
    pub const BULGARIAN: Self = Self(*b"bg ", 2);
    pub const CATALAN: Self = Self(*b"ca ", 2);
    pub const CHINESE: Self = Self(*b"zh ", 2);
    pub const CROATIAN: Self = Self(*b"hr ", 2);
    pub const CZECH: Self = Self(*b"cs ", 2);
    pub const DANISH: Self = Self(*b"da ", 2);
    pub const DUTCH: Self = Self(*b"nl ", 2);
    pub const ENGLISH: Self = Self(*b"en ", 2);
    pub const ESTONIAN: Self = Self(*b"et ", 2);
    pub const FILIPINO: Self = Self(*b"tl ", 2);
    pub const FINNISH: Self = Self(*b"fi ", 2);
    pub const FRENCH: Self = Self(*b"fr ", 2);
    pub const GALICIAN: Self = Self(*b"gl ", 2);
    pub const GERMAN: Self = Self(*b"de ", 2);
    pub const GREEK: Self = Self(*b"el ", 2);
    pub const HEBREW: Self = Self(*b"he ", 2);
    pub const HUNGARIAN: Self = Self(*b"hu ", 2);
    pub const ICELANDIC: Self = Self(*b"is ", 2);
    pub const ITALIAN: Self = Self(*b"it ", 2);
    pub const JAPANESE: Self = Self(*b"ja ", 2);
    pub const LATIN: Self = Self(*b"la ", 2);
    pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3);
    pub const NYNORSK: Self = Self(*b"nn ", 2);
    pub const POLISH: Self = Self(*b"pl ", 2);
    pub const PORTUGUESE: Self = Self(*b"pt ", 2);
    pub const ROMANIAN: Self = Self(*b"ro ", 2);
    pub const RUSSIAN: Self = Self(*b"ru ", 2);
    pub const SERBIAN: Self = Self(*b"sr ", 2);
    pub const SLOVAK: Self = Self(*b"sk ", 2);
    pub const SLOVENIAN: Self = Self(*b"sl ", 2);
    pub const SPANISH: Self = Self(*b"es ", 2);
    pub const SWEDISH: Self = Self(*b"sv ", 2);
    pub const TURKISH: Self = Self(*b"tr ", 2);
    pub const UKRAINIAN: Self = Self(*b"uk ", 2);
    pub const VIETNAMESE: Self = Self(*b"vi ", 2);

    /// Return the language code as an all lowercase string slice.
    pub fn as_str(&self) -> &str {
        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
    }

    /// The default direction for the language.
    pub fn dir(self) -> Dir {
        match self.as_str() {
            "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
            | "yi" => Dir::RTL,
            _ => Dir::LTR,
        }
    }
}

impl FromStr for Lang {
    type Err = &'static str;

    /// Construct a language from a two- or three-byte ISO 639-1/2/3 code.
    fn from_str(iso: &str) -> Result<Self, Self::Err> {
        let len = iso.len();
        if matches!(len, 2..=3) && iso.is_ascii() {
            let mut bytes = [b' '; 3];
            bytes[..len].copy_from_slice(iso.as_bytes());
            bytes.make_ascii_lowercase();
            Ok(Self(bytes, len as u8))
        } else {
            Err("expected two or three letter language code (ISO 639-1/2/3)")
        }
    }
}

cast! {
    Lang,
    self => self.as_str().into_value(),
    string: EcoString => {
        let result = Self::from_str(&string);
        if result.is_err() {
            if let Some((lang, region)) = string.split_once('-') {
                if Lang::from_str(lang).is_ok() && Region::from_str(region).is_ok() {
                    return result
                        .hint(eco_format!(
                            "you should leave only \"{}\" in the `lang` parameter and specify \"{}\" in the `region` parameter",
                            lang, region,
                        ));
                }
            }
        }

        result?
    }
}

/// An identifier for a region somewhere in the world.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Region([u8; 2]);

impl Region {
    /// Return the region code as an all uppercase string slice.
    pub fn as_str(&self) -> &str {
        std::str::from_utf8(&self.0).unwrap_or_default()
    }
}

impl PartialEq<&str> for Region {
    fn eq(&self, other: &&str) -> bool {
        self.as_str() == *other
    }
}

impl FromStr for Region {
    type Err = &'static str;

    /// Construct a region from its two-byte ISO 3166-1 alpha-2 code.
    fn from_str(iso: &str) -> Result<Self, Self::Err> {
        if iso.len() == 2 && iso.is_ascii() {
            let mut bytes: [u8; 2] = iso.as_bytes().try_into().unwrap();
            bytes.make_ascii_uppercase();
            Ok(Self(bytes))
        } else {
            Err("expected two letter region code (ISO 3166-1 alpha-2)")
        }
    }
}

cast! {
    Region,
    self => self.as_str().into_value(),
    string: EcoString => Self::from_str(&string)?,
}

/// An ISO 15924-type script identifier.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct WritingScript([u8; 4], u8);

impl WritingScript {
    /// Return the script as an all lowercase string slice.
    pub fn as_str(&self) -> &str {
        std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
    }

    /// Return the description of the script as raw bytes.
    pub fn as_bytes(&self) -> &[u8; 4] {
        &self.0
    }
}

impl FromStr for WritingScript {
    type Err = &'static str;

    /// Construct a region from its ISO 15924 code.
    fn from_str(iso: &str) -> Result<Self, Self::Err> {
        let len = iso.len();
        if matches!(len, 3..=4) && iso.is_ascii() {
            let mut bytes = [b' '; 4];
            bytes[..len].copy_from_slice(iso.as_bytes());
            bytes.make_ascii_lowercase();
            Ok(Self(bytes, len as u8))
        } else {
            Err("expected three or four letter script code (ISO 15924 or 'math')")
        }
    }
}

cast! {
    WritingScript,
    self => self.as_str().into_value(),
    string: EcoString => Self::from_str(&string)?,
}

/// The name with which an element is referenced.
pub trait LocalName {
    /// The key of an element in order to get its localized name.
    const KEY: &'static str;

    /// Get the name in the given language and (optionally) region.
    fn local_name(lang: Lang, region: Option<Region>) -> &'static str {
        localized_str(lang, region, Self::KEY)
    }

    /// Gets the local name from the style chain.
    fn local_name_in(styles: StyleChain) -> &'static str
    where
        Self: Sized,
    {
        Self::local_name(TextElem::lang_in(styles), TextElem::region_in(styles))
    }
}

/// Retrieves the localized string for a given language and region.
/// Silently falls back to English if no fitting string exists for
/// the given language + region. Panics if no fitting string exists
/// in both given language + region and English.
#[comemo::memoize]
pub fn localized_str(lang: Lang, region: Option<Region>, key: &str) -> &'static str {
    let lang_region_bundle = parse_language_bundle(lang, region).unwrap();
    if let Some(str) = lang_region_bundle.get(key) {
        return str;
    }
    let lang_bundle = parse_language_bundle(lang, None).unwrap();
    if let Some(str) = lang_bundle.get(key) {
        return str;
    }
    let english_bundle = parse_language_bundle(Lang::ENGLISH, None).unwrap();
    english_bundle.get(key).unwrap()
}

/// Parses the translation file for a given language and region.
/// Only returns an error if the language file is malformed.
#[comemo::memoize]
fn parse_language_bundle(
    lang: Lang,
    region: Option<Region>,
) -> Result<HashMap<&'static str, &'static str>, &'static str> {
    let language_tuple = TRANSLATIONS.iter().find(|it| it.0 == lang_str(lang, region));
    let Some((_lang_name, language_file)) = language_tuple else {
        return Ok(HashMap::new());
    };

    let mut bundle = HashMap::new();
    let lines = language_file.trim().lines();
    for line in lines {
        if line.trim().starts_with('#') {
            continue;
        }
        let (key, val) = line
            .split_once('=')
            .ok_or("malformed translation file: line without \"=\"")?;
        let (key, val) = (key.trim(), val.trim());
        if val.is_empty() {
            return Err("malformed translation file: empty translation value");
        }
        let duplicate = bundle.insert(key.trim(), val.trim());
        if duplicate.is_some() {
            return Err("malformed translation file: duplicate key");
        }
    }
    Ok(bundle)
}

/// Convert language + region to a string to be able to get a file name.
fn lang_str(lang: Lang, region: Option<Region>) -> EcoString {
    EcoString::from(lang.as_str())
        + region.map_or_else(EcoString::new, |r| EcoString::from("-") + r.as_str())
}

#[cfg(test)]
mod tests {
    use typst_utils::option_eq;

    use super::*;

    #[test]
    fn test_region_option_eq() {
        let region = Some(Region([b'U', b'S']));
        assert!(option_eq(region, "US"));
        assert!(!option_eq(region, "AB"));
    }
}