twine/
lib.rs

1#![allow(clippy::needless_doctest_main)]
2//! ![Rust](https://github.com/IMI-eRnD-Be/twine/workflows/Rust/badge.svg)
3//! [![Latest Version](https://img.shields.io/crates/v/twine.svg)](https://crates.io/crates/twine)
4//! [![Docs.rs](https://docs.rs/twine/badge.svg)](https://docs.rs/twine)
5//! [![LOC](https://tokei.rs/b1/github/IMI-eRnD-Be/twine)](https://github.com/IMI-eRnD-Be/twine)
6//! [![Dependency Status](https://deps.rs/repo/github/IMI-eRnD-Be/twine/status.svg)](https://deps.rs/repo/github/IMI-eRnD-Be/twine)
7//!
8//! Library for internationalization using the [Twine](https://github.com/scelis/twine) file
9//! format.
10//!
11//! # Usage
12//!
13//! 1.  You need to add twine to your `[build-dependencies]` in `Cargo.toml`.
14//!
15//! 2.  Create (or edit) your `build.rs` file:
16//!
17//! ```no_run
18//! fn main() {
19//!     println!("cargo:rerun-if-changed=build.rs");
20//!     twine::build_translations(&["translations.ini"], "i18n.rs");
21//! }
22//! ```
23//!
24//! 3.  You need an INI file with your translations. Example with `translations.ini`:
25//!
26//! ```text
27//! [app_ruin_the_band]
28//!     en = Ruin a band name by translating it in French
29//!     fr = Ruiner le nom d'un groupe en le traduisant en français
30//! [band_tool]
31//!     en = Tool
32//!     fr = Outil
33//! [band_the_doors]
34//!     en = The Doors
35//!     fr = Les portes
36//! [band_rage_against_the_machine]
37//!     en = Rage Against the Machine
38//!     en-gb = Wrath Against the Machine
39//!     fr = Colère contre la machine
40//! [band_the_jackson_5]
41//!     en = The Jackson 5
42//!     fr = Les 5 fils de Jack
43//! [format_string]
44//!     en = %s, %@!
45//!     fr = %s, %@ !
46//! [format_percentage]
47//!     en = %.0f%
48//!     fr = %.0f %
49//! [format_hexadecimal]
50//!     en = %x
51//!     fr = %#X
52//! ```
53//!
54//! 4.  Now in your project you can use the macro `t!` to translate anything:
55//!
56//! ```ignore
57//! # enum Lang { Fr(&'static str) }
58//! # macro_rules! t {
59//! # ($($tokens:tt)+) => {{
60//! # }};
61//! # }
62//! // you need to include the generated file somewhere
63//! include!(concat!(env!("OUT_DIR"), "/i18n.rs"));
64//!
65//! fn main() {
66//!     // use "" if there is no localization
67//!     let lang = Lang::Fr("be");
68//!
69//!     // will output "Ruiner le nom d'un groupe en le traduisant en français"
70//!     t!(app_ruin_the_band => lang);
71//!
72//!     // using formatted arguments, this will output "73 %"
73//!     t!(format_percentage, 73.02f32 => lang);
74//! }
75//! ```
76//!
77//! 5.  Disable incorrect compiler lint `macro_expanded_macro_exports_accessed_by_absolute_paths`:
78//!
79//! This rustc lint does not work properly and often gives a false positive. You can disable it at
80//! the crate level by adding this at the beginning of your `lib.rs` or `main.rs`:
81//!
82//! ```
83//! #![allow(macro_expanded_macro_exports_accessed_by_absolute_paths)]
84//! ```
85//!
86//! # Implementation Notes
87//!
88//! All translation keys must have all the languages of all the keys. For example, if all your keys
89//! have translations for `en` and `fr`, if one key has only `en`, it will fail to compile.
90//!
91//! Localized translation can be provided and will be used if available. Otherwise it will
92//! fallback to the default translation for that language.
93//!
94//! Any typo in the key will make the compilation fail. Missing format arguments will also make
95//! the compilation fail.
96//!
97//! # Features
98//!
99//!  *  `serde`: when this feature is activated you will need to add `serde` to your dependencies
100//!     and the `Lang` enum generated implements `Serialize` and `Deserialize`.
101//!
102//! # License
103//!
104//! This work is dual-licensed under Apache 2.0 and MIT.
105//! You can choose between one of them if you use this work.
106
107use heck::{ToSnakeCase, ToUpperCamelCase};
108use indenter::CodeFormatter;
109use once_cell::sync::Lazy;
110use regex::Regex;
111use std::collections::{HashMap, HashSet};
112use std::fmt;
113use std::fmt::Write as FmtWrite;
114use std::fs;
115use std::io;
116use std::io::{Read, Write};
117use std::path::Path;
118
119// regex that tries to parse printf's format placeholders
120// see: https://docs.microsoft.com/en-us/cpp/c-runtime-library/format-specification-syntax-printf-and-wprintf-functions?view=msvc-160
121static RE_PRINTF: Lazy<Regex> = Lazy::new(|| {
122    Regex::new(r#"%((?P<parameter>\d+)\$)?(?P<flags>[-+#])?(?P<width>\d+)?(?P<precision>\.\d+)?(?P<type>[dis@xXf])|\\u(?P<unicode>[0-9a-fA-F]{4})|\\.|%%|%$|"|[^%"\\]+"#)
123        .unwrap()
124});
125static RE_LANG: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\w+)(-(\w+))?").unwrap());
126static RE_SECTION: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\s*\[([^\]]+)\]").unwrap());
127static RE_KEY_VALUE: Lazy<Regex> =
128    Lazy::new(|| Regex::new(r"^\s*([^\s=;#]+)\s*=\s*(.+?)\s*$").unwrap());
129
130type TwineData = HashMap<String, Vec<(String, String)>>;
131
132/// Generate the `t!()` macro based on the provided list of paths to Twine INI translation files.
133pub fn build_translations<P: AsRef<Path>, O: AsRef<Path>>(
134    ini_files: &[P],
135    output_file: O,
136) -> io::Result<()> {
137    let mut readers = ini_files
138        .iter()
139        .map(|file_path| {
140            let file_path = file_path.as_ref();
141            println!("cargo:rerun-if-changed={}", file_path.display());
142            fs::File::open(file_path)
143        })
144        .collect::<io::Result<Vec<_>>>()?;
145
146    build_translations_from_readers(readers.as_mut_slice(), output_file)
147}
148
149/// Generate the `t!()` macro based on the provided list of `&str` containing Twine INI
150/// translations.
151pub fn build_translations_from_str<P: AsRef<Path>>(
152    strs: &[&str],
153    output_file: P,
154) -> io::Result<()> {
155    let mut readers = strs.iter().map(io::Cursor::new).collect::<Vec<_>>();
156
157    build_translations_from_readers(readers.as_mut_slice(), output_file)
158}
159
160/// Generate the `t!()` macro based on the provided list of readers containing Twine INI
161/// translations.
162pub fn build_translations_from_readers<R: Read, P: AsRef<Path>>(
163    readers: &mut [R],
164    output_file: P,
165) -> io::Result<()> {
166    let mut map = HashMap::new();
167
168    // read all the INI files (might override existing keys)
169    for reader in readers {
170        match read_twine_ini(reader) {
171            Err(err) => panic!("could not read Twine INI file: {}", err),
172            Ok(other_map) => map.extend(other_map),
173        }
174    }
175
176    let out_dir = std::env::var_os("OUT_DIR").unwrap();
177    let dest_path = Path::new(&out_dir).join(output_file);
178    let _ = fs::create_dir_all(dest_path.parent().unwrap());
179    let mut f = io::BufWriter::new(
180        fs::OpenOptions::new()
181            .create(true)
182            .write(true)
183            .truncate(true)
184            .open(dest_path)?,
185    );
186    write!(f, "{}", TwineFormatter { map })?;
187
188    Ok(())
189}
190
191fn read_twine_ini<R: Read>(reader: &mut R) -> io::Result<TwineData> {
192    use std::io::BufRead;
193
194    let mut map: TwineData = HashMap::new();
195    let mut section = None;
196
197    let reader = io::BufReader::new(reader);
198    for (i, line) in reader.lines().enumerate() {
199        let line = line?;
200        if let Some(caps) = RE_SECTION.captures(line.as_str()) {
201            section = Some(
202                map.entry(caps.get(1).unwrap().as_str().to_owned())
203                    .or_default(),
204            );
205        }
206        if let Some(caps) = RE_KEY_VALUE.captures(line.as_str()) {
207            if let Some(section) = section.as_mut() {
208                section.push((
209                    caps.get(1).unwrap().as_str().to_owned(),
210                    caps.get(2).unwrap().as_str().to_owned(),
211                ));
212            } else {
213                panic!("key-value outside section at line {}", i + 1);
214            }
215        }
216    }
217
218    Ok(map)
219}
220
221struct TwineFormatter {
222    map: TwineData,
223}
224
225impl fmt::Display for TwineFormatter {
226    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
227        let mut f = CodeFormatter::new(f, "    ");
228        let mut all_languages = HashSet::new();
229
230        write!(
231            f,
232            r#"
233            #[macro_export]
234            macro_rules! t {{
235            "#,
236        )?;
237        f.indent(1);
238
239        let mut sorted: Vec<_> = self.map.iter().collect();
240        sorted.sort_unstable_by(|(a_key, _), (b_key, _)| a_key.cmp(b_key));
241
242        for (key, translations) in sorted {
243            let key = Self::normalize_key(key.as_str());
244            write!(
245                f,
246                r#"
247                ({key} $(, $fmt_args:expr)* => $lang:expr) => {{{{
248                    #[allow(unreachable_patterns)]
249                    match $lang {{
250                "#,
251            )?;
252            f.indent(2);
253
254            self.generate_match_arms(&mut f, translations, &mut all_languages)?;
255
256            f.dedent(2);
257            write!(
258                f,
259                r#"
260                    }}
261                }}}};
262                "#,
263            )?;
264        }
265        f.dedent(1);
266
267        write!(
268            f,
269            r#"
270            }}
271            "#,
272        )?;
273
274        // generate the `Lang` enum and its variants
275        write!(
276            f,
277            r#"
278            #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
279            #[allow(dead_code)]
280            pub enum Lang {{
281            "#,
282        )?;
283        f.indent(1);
284
285        let lang_variants: HashSet<_> = all_languages
286            .iter()
287            .map(|(lang, _)| lang.as_str())
288            .collect();
289        let mut lang_variants: Vec<_> = lang_variants.into_iter().collect();
290        lang_variants.sort_unstable();
291
292        for lang in lang_variants.iter() {
293            write!(
294                f,
295                r#"
296                {lang}(&'static str),
297                "#,
298            )?;
299        }
300
301        f.dedent(1);
302        write!(
303            f,
304            r#"
305            }}
306
307            impl Lang {{
308                pub fn all_languages() -> &'static [&'static Lang] {{
309                    &[
310            "#,
311        )?;
312        f.indent(3);
313
314        let mut sorted_languages: Vec<_> = all_languages.iter().collect();
315        sorted_languages.sort_unstable();
316
317        for (lang, region) in &sorted_languages {
318            write!(
319                f,
320                r#"
321                &Lang::{}({:?}),
322                "#,
323                lang,
324                region.as_deref().unwrap_or(""),
325            )?;
326        }
327
328        f.dedent(3);
329        write!(
330            f,
331            r#"
332                    ]
333                }}
334            }}
335            "#,
336        )?;
337
338        f.dedent(3);
339        write!(
340            f,
341            r#"
342            impl std::fmt::Display for Lang {{
343                fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {{
344                    write!(f, "{{}}", self.language())?;
345                    let region = self.region();
346                    if !region.is_empty() {{
347                        write!(f, "_{{}}", region)?;
348                    }}
349                    Ok(())
350                }}
351            }}
352            "#,
353        )?;
354
355        f.dedent(3);
356        write!(
357            f,
358            r#"
359            impl Lang {{
360                pub fn language(&self) -> &'static str {{
361                    match self {{
362            "#,
363        )?;
364
365        f.indent(3);
366        for lang in &lang_variants {
367            write!(
368                f,
369                r#"
370                Lang::{}(_) => {:?},
371                "#,
372                lang,
373                lang.to_snake_case(),
374            )?;
375        }
376
377        f.dedent(2);
378        write!(
379            f,
380            r#"
381                    }}
382                }}
383
384                pub fn region(&self) -> &str {{
385                    match self {{
386            "#,
387        )?;
388
389        f.indent(2);
390        for lang in &lang_variants {
391            write!(
392                f,
393                r#"
394                Lang::{lang}(region) => region,
395                "#,
396            )?;
397        }
398
399        f.dedent(3);
400        write!(
401            f,
402            r#"
403                    }}
404                }}
405            }}
406            "#,
407        )?;
408
409        #[cfg(feature = "serde")]
410        {
411            let mut all_regions: Vec<_> = all_languages
412                .iter()
413                .filter_map(|(_, region)| region.as_deref())
414                .collect();
415            all_regions.sort_unstable_by(|a, b| a.cmp(b).reverse());
416            Self::generate_serde(&mut f, &lang_variants, &all_regions)?;
417        }
418
419        Ok(())
420    }
421}
422
423impl TwineFormatter {
424    #[allow(clippy::single_char_add_str)]
425    fn generate_match_arms(
426        &self,
427        f: &mut CodeFormatter<fmt::Formatter>,
428        translations: &[(String, String)],
429        all_languages: &mut HashSet<(String, Option<String>)>,
430    ) -> fmt::Result {
431        let mut match_arms = Vec::new();
432        let mut default_out = None;
433        for (lang, text) in translations {
434            // transform all printf's format placeholder to Rust's format
435            let mut out = String::new();
436            for caps in RE_PRINTF.captures_iter(text.as_str()) {
437                if let Some(type_) = caps.name("type") {
438                    out.push_str("{");
439                    if let Some(parameter) = caps.name("parameter") {
440                        let parameter: usize = parameter
441                            .as_str()
442                            .parse()
443                            .expect("could not parse parameter index");
444                        write!(out, "{}", parameter.saturating_sub(1))?;
445                    }
446                    out.push_str(":");
447                    if let Some(flag) = caps.name("flags") {
448                        out.push_str(flag.as_str());
449                    }
450                    if let Some(width) = caps.name("width") {
451                        out.push_str(width.as_str());
452                    }
453                    if let Some(precision) = caps.name("precision") {
454                        out.push_str(precision.as_str());
455                    }
456                    match type_.as_str() {
457                        x @ "x" | x @ "X" => out.push_str(x),
458                        _ => {}
459                    }
460                    out.push_str("}");
461                } else if &caps[0] == "%%" {
462                    out.push_str("%");
463                } else if &caps[0] == "\"" {
464                    out.push_str("\\\"");
465                } else if let Some(unicode) = caps.name("unicode") {
466                    out.push_str(r"\u{");
467                    out.push_str(unicode.as_str());
468                    out.push_str(r"}");
469                } else {
470                    out.push_str(&caps[0]);
471                }
472            }
473
474            if default_out.is_none() {
475                default_out = Some(out.clone());
476            }
477
478            // parse the language and region, then push the match arm
479            let caps = RE_LANG.captures(lang.as_str()).expect("lang can be parsed");
480            let lang = caps
481                .get(1)
482                .expect("the language is always there")
483                .as_str()
484                .to_upper_camel_case();
485            let region = caps.get(3);
486            all_languages.insert((lang.clone(), region.map(|x| x.as_str().to_string())));
487            match_arms.push((lang, region.map(|x| format!("{:?}", x.as_str())), out));
488        }
489        match_arms.sort_unstable_by(|(a_lang, a_region, _), (b_lang, b_region, _)| {
490            a_lang
491                .cmp(b_lang)
492                .then(a_region.is_none().cmp(&b_region.is_none()))
493        });
494
495        for (lang, region, format) in match_arms {
496            write!(
497                f,
498                r#"
499                $crate::Lang::{}({}) => format!("{}" $(, $fmt_args)*),
500                "#,
501                lang,
502                region.as_deref().unwrap_or("_"),
503                format,
504            )?;
505        }
506
507        if let Some(default_out) = default_out {
508            write!(
509                f,
510                r#"
511                _ => format!("{default_out}" $(, $fmt_args)*),
512                "#,
513            )?;
514        }
515
516        Ok(())
517    }
518
519    // turns all the keys into snake case automatically
520    fn normalize_key(key: &str) -> String {
521        key.to_snake_case().replace('.', "__")
522    }
523
524    #[cfg(feature = "serde")]
525    fn generate_serde(
526        f: &mut CodeFormatter<fmt::Formatter>,
527        all_languages: &[&str],
528        all_regions: &[&str],
529    ) -> fmt::Result {
530        write!(
531            f,
532            r#"
533
534            impl<'de> serde::Deserialize<'de> for Lang {{
535                fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
536                where
537                    D: serde::Deserializer<'de>,
538                {{
539                    use serde::de;
540                    use std::fmt;
541
542                    struct LangVisitor;
543
544                    #[allow(clippy::needless_lifetimes)]
545                    impl<'de> de::Visitor<'de> for LangVisitor {{
546                        type Value = Lang;
547
548                        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {{
549                            formatter.write_str("expected string")
550                        }}
551
552                        fn visit_str<E>(self, value: &str) -> Result<Lang, E>
553                        where
554                            E: de::Error,
555                        {{
556                            let mut it = value.splitn(2, '_');
557                            let lang = it.next().unwrap();
558                            let region = it.next().unwrap_or("");
559
560                            let region = match region.to_lowercase().as_str() {{
561            "#,
562        )?;
563        f.indent(5);
564
565        for region in all_regions {
566            write!(
567                f,
568                r#"
569                {region:?} => {region:?},
570                "#,
571                region = region,
572            )?;
573        }
574
575        f.dedent(1);
576        write!(
577            f,
578            r#"
579                "" => "",
580                _ => {{
581                    return Err(de::Error::invalid_value(
582                        de::Unexpected::Str(region),
583                        &"existing region",
584                    ));
585                }}
586            }};
587
588            match lang {{
589            "#,
590        )?;
591        f.indent(1);
592
593        for lang in all_languages {
594            write!(
595                f,
596                r#"
597                {:?} => Ok(Lang::{}(region)),
598                "#,
599                lang.to_snake_case(),
600                lang,
601            )?;
602        }
603
604        f.dedent(5);
605        write!(
606            f,
607            r#"
608                                _ => Err(de::Error::invalid_value(
609                                    de::Unexpected::Str(region),
610                                    &"existing language",
611                                )),
612                            }}
613                        }}
614                    }}
615
616                    deserializer.deserialize_str(LangVisitor)
617                }}
618            }}
619
620            impl serde::Serialize for Lang {{
621                fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
622                where
623                    S: serde::ser::Serializer,
624                {{
625                    match self {{
626            "#,
627        )?;
628        f.indent(3);
629
630        for lang in all_languages {
631            write!(
632                f,
633                r#"
634                Lang::{variant}("") => serializer.serialize_str({lang:?}),
635                Lang::{variant}(region) => serializer.serialize_str(
636                    &format!("{{}}_{{}}", {lang:?}, region),
637                ),
638                "#,
639                variant = lang,
640                lang = lang.to_snake_case(),
641            )?;
642        }
643
644        f.dedent(3);
645        write!(
646            f,
647            r#"
648                    }}
649                }}
650            }}
651            "#,
652        )?;
653
654        Ok(())
655    }
656}