make_pluralrules/
lib.rs

1//! make_pluralrules generates a Rust code representation of CLDR plural rules in compliance with [Unicode](http://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules).
2//!
3//! Representations of plural rules are generated from [Unicode's plural rules](https://github.com/unicode-cldr/cldr-core/blob/master/supplemental/plurals.json) and uses the intl_pluralrules_parser AST to build the representation.
4//!
5//! The ouput is a Rust file, specified by the user in the comand
6//! ```text
7//! cargo run -- -i <./path/to/cldr.json>... -o <./path/to/output.rs>
8//! ```
9
10mod parser;
11
12use crate::parser::plural_category::PluralCategory;
13use crate::parser::resource::*;
14use proc_macro2::TokenStream;
15use std::collections::BTreeMap;
16use unic_langid::LanguageIdentifier;
17
18/// Takes a string representation of a CLDR JSON file and produces a string representation of the generated Rust code for the plural rules.
19///
20/// The string representation of the Rust code is written to a specified Rust file and can be used to get the plural category for numerical input.
21pub fn generate_rs(cldr_jsons: &[String]) -> String {
22    let mut cldr_version = None;
23    let mut tokens = BTreeMap::new();
24
25    for cldr_json in cldr_jsons {
26        // resource_items is a struct representation of the raw CLDR rules.
27        let resource_items = parse_plurals_resource_from_string(cldr_json).unwrap();
28
29        let res_cldr_version = resource_items.supplemental.version.cldr_version;
30
31        if cldr_version.is_none() {
32            cldr_version = Some(res_cldr_version);
33        } else if cldr_version != Some(res_cldr_version) {
34            panic!("All input resources must use the same CLDR version!");
35        }
36
37        if let Some(data) = resource_items.supplemental.plurals_type_cardinal {
38            let rule_tokens = gen_type_rs(data);
39            if tokens.contains_key("cardinal") {
40                panic!("Cannot provide two inputs with the same data!");
41            }
42            tokens.insert("cardinal".to_owned(), rule_tokens);
43        }
44
45        if let Some(data) = resource_items.supplemental.plurals_type_ordinal {
46            let rule_tokens = gen_type_rs(data);
47            if tokens.contains_key("ordinal") {
48                panic!("Cannot provide two inputs with the same data!");
49            }
50            tokens.insert("ordinal".to_owned(), rule_tokens);
51        }
52    }
53
54    if cldr_version.is_none() || tokens.is_empty() {
55        panic!("None of the input files provided core data!");
56    }
57
58    // Call gen_rs to get Rust code. Convert TokenStream to string for file out.
59    parser::gen_rs::gen_fn(tokens, &cldr_version.unwrap()).to_string()
60}
61
62fn gen_type_rs(rules: BTreeMap<String, BTreeMap<String, String>>) -> (Vec<TokenStream>) {
63    // rule_tokens is a vector of TokenStreams that represent the CLDR plural rules as Rust expressions.
64    let mut rule_tokens = Vec::<TokenStream>::new();
65
66    let mut rules: Vec<(LanguageIdentifier, BTreeMap<String, String>)> = rules
67        .into_iter()
68        .filter_map(|(key, value)| {
69            if key == "root" {
70                None
71            } else {
72                let langid = key.parse().expect(&format!("Parsing {} failed", key));
73                Some((langid, value))
74            }
75        })
76        .collect();
77
78    // We rely on sorted list for binary search in the consumer.
79    rules.sort_unstable_by(|(langid1, _), (langid2, _)| langid1.cmp(langid2));
80
81    for (lang, r) in rules {
82        // this_lang_rules is a vector of plural rules saved as a PluralCategory and a TokenStream
83        let mut this_lang_rules = Vec::<(PluralCategory, TokenStream)>::new();
84
85        for (rule_name, rule_line) in r {
86            // cat_name is the simplified category name from the CLDR source file
87            let cat_name = rule_name.split('-').collect::<Vec<_>>()[2];
88
89            // representation is the
90            let representation = cldr_pluralrules_parser::parse_plural_condition(rule_line)
91                .expect("Parsing of a condition succeeded");
92
93            let cat = if cat_name == "zero" {
94                PluralCategory::ZERO
95            } else if cat_name == "one" {
96                PluralCategory::ONE
97            } else if cat_name == "two" {
98                PluralCategory::TWO
99            } else if cat_name == "few" {
100                PluralCategory::FEW
101            } else if cat_name == "many" {
102                PluralCategory::MANY
103            } else {
104                PluralCategory::OTHER
105            };
106
107            // Only allow rules that are not `OTHER` to be added. `OTHER` can have no rules and is added outside of the loop.
108            if cat != PluralCategory::OTHER {
109                let tokens = parser::gen_pr::gen_pr(representation);
110                this_lang_rules.push((cat, tokens));
111            }
112        }
113        // convert language rules to TokenStream and add them to all the rules
114        rule_tokens.push(parser::gen_rs::gen_mid(&lang, &this_lang_rules));
115    }
116    rule_tokens
117}