simple_locale/
string.rs

1/*!
2The `LocaleString` type provides the a structure for locale identifier strings.
3
4## Standards
5
6> On POSIX platforms such as Unix, Linux and others, locale identifiers are defined by
7> ISO/IEC 15897, which is similar to the BCP 47 definition of language tags, but the
8> locale variant modifier is defined differently, and the character set is included as
9> a part of the identifier.
10
11Locale identifiers are defined in this format: `[language[_territory][.codeset][@modifier]]`.
12For example, Australian English using the UTF-8 encoding is `en_AU.UTF-8`.
13
14* `language` = [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) 2-character language
15  codes.
16* `territory` = [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) 2-character
17  country codes.
18* `codeset` = an undefined string value, `[a-zA-Z0-9_\-]+`.
19  * For example, [IEC 8859](https://en.wikipedia.org/wiki/ISO/IEC_8859) parts 1 to 16 are
20    usually specified as `ISO8859-1` and so on.
21  * should be taken from the values in the IANA
22    [character sets](https://www.iana.org/assignments/character-sets/character-sets.xhtml)
23    list.
24* `modifier` = a semi-colon separated list of _identifiers_, or _name '=' value_ pairs.
25  * Sometimes this is used to indicate the language script in use, as such values from
26    [ISO 15924](http://unicode.org/iso15924/iso15924-codes.html) should be used.
27
28See also:
29
30* [Wikipedia _Locale_](https://en.wikipedia.org/wiki/Locale_(computer_software))
31* [GNU C Library - _Locale-Names_](https://www.gnu.org/software/libc/manual/html_node/Locale-Names.html)
32* [Apple - _NSLocale_](https://developer.apple.com/documentation/foundation/nslocale) and
33  [_localeIdentifier_](https://developer.apple.com/documentation/foundation/nslocale/1416263-localeidentifier)
34* [Microsoft C Runtime - _Locale names, Languages, and Country/Region strings_](https://docs.microsoft.com/en-us/cpp/c-runtime-library/locale-names-languages-and-country-region-strings?view=vs-2019)
35* [Microsoft Windows - _Locale Names_](https://docs.microsoft.com/en-us/windows/win32/intl/locale-names)
36* [IETF _Tags for Identifying Languages_](https://tools.ietf.org/html/bcp47)
37* [W3C _Language Tags and Locale Identifiers for the World Wide Web_](https://www.w3.org/TR/ltli/)
38* [ISO _Procedures for the registration of cultural elements_](https://www.iso.org/standard/50707.html)
39
40*/
41use std::collections::HashMap;
42use std::fmt;
43use std::fmt::Display;
44use std::str::FromStr;
45
46use regex::Regex;
47
48use crate::codes::{country, language};
49
50// ------------------------------------------------------------------------------------------------
51// Public Types
52// ------------------------------------------------------------------------------------------------
53
54#[derive(Debug, PartialEq)]
55pub struct LocaleString {
56    strict: bool,
57    language_code: String,
58    territory: Option<String>,
59    code_set: Option<String>,
60    modifier: Option<String>,
61}
62
63#[derive(Debug, PartialEq)]
64pub enum ParseError {
65    EmptyString,
66    PosixUnsupported,
67    RegexFailure,
68    InvalidLanguageCode,
69    InvalidCountryCode,
70    InvalidCodeSet,
71    InvalidModifier,
72    InvalidPath,
73}
74
75// ------------------------------------------------------------------------------------------------
76// Implementations - LocaleString
77// ------------------------------------------------------------------------------------------------
78
79const SEP_TERRITORY: char = '_';
80const SEP_CODE_SET: char = '.';
81const SEP_MODIFIER: char = '@';
82
83impl LocaleString {
84    pub fn new(language_code: String) -> Self {
85        LocaleString::common_new(language_code, false)
86    }
87
88    pub fn new_strict(language_code: String) -> Self {
89        LocaleString::common_new(language_code, true)
90    }
91
92    fn common_new(language_code: String, strict: bool) -> Self {
93        assert_eq!(
94            language_code.len(),
95            2,
96            "language codes are two character only"
97        );
98        assert_eq!(
99            language_code.chars().all(|c| c.is_lowercase()),
100            true,
101            "language codes are lower case only"
102        );
103        if strict {
104            LocaleString::test_known_language(&language_code);
105        }
106        LocaleString {
107            strict,
108            language_code,
109            territory: None,
110            code_set: None,
111            modifier: None,
112        }
113    }
114
115    pub fn with_language(&self, language_code: String) -> Self {
116        assert_eq!(
117            language_code.len(),
118            2,
119            "language codes are two character only"
120        );
121        assert_eq!(
122            language_code.chars().all(|c| c.is_lowercase()),
123            true,
124            "language codes are lower case only"
125        );
126        if self.strict {
127            LocaleString::test_known_language(&language_code);
128        }
129        LocaleString {
130            strict: false,
131            language_code,
132            territory: self.territory.clone(),
133            code_set: self.code_set.clone(),
134            modifier: self.modifier.clone(),
135        }
136    }
137
138    pub fn with_territory(&self, territory: String) -> Self {
139        assert_eq!(territory.len(), 2, "territory codes are two character only");
140        assert_eq!(
141            territory.chars().all(|c| c.is_uppercase()),
142            true,
143            "territory codes are upper case only"
144        );
145        if self.strict {
146            LocaleString::test_known_territory(&territory);
147        }
148        LocaleString {
149            strict: self.strict,
150            language_code: self.language_code.clone(),
151            territory: Some(territory),
152            code_set: self.code_set.clone(),
153            modifier: self.modifier.clone(),
154        }
155    }
156
157    pub fn with_code_set(&self, code_set: String) -> Self {
158        LocaleString {
159            strict: self.strict,
160            language_code: self.language_code.clone(),
161            territory: self.territory.clone(),
162            code_set: Some(code_set),
163            modifier: self.modifier.clone(),
164        }
165    }
166
167    pub fn with_modifier(&self, modifier: String) -> Self {
168        LocaleString {
169            strict: self.strict,
170            language_code: self.language_code.clone(),
171            territory: self.territory.clone(),
172            code_set: self.code_set.clone(),
173            modifier: Some(modifier),
174        }
175    }
176
177    pub fn with_modifiers<K, V>(&self, modifiers: HashMap<K, V>) -> Self
178    where
179        K: Display,
180        V: Display,
181    {
182        let modifier_strings: Vec<String> = modifiers
183            .iter()
184            .map(|(key, value)| format!("{}={}", key, value))
185            .collect();
186
187        LocaleString {
188            strict: self.strict,
189            language_code: self.language_code.clone(),
190            territory: self.territory.clone(),
191            code_set: self.code_set.clone(),
192            modifier: Some(modifier_strings.join(";")),
193        }
194    }
195
196    pub fn get_language_code(&self) -> String {
197        self.language_code.clone()
198    }
199
200    pub fn get_territory(&self) -> Option<String> {
201        self.territory.clone()
202    }
203
204    pub fn get_code_set(&self) -> Option<String> {
205        self.code_set.clone()
206    }
207
208    pub fn get_modifier(&self) -> Option<String> {
209        self.modifier.clone()
210    }
211
212    fn test_known_language(language_code: &String) {
213        let lang_key = language_code.clone();
214        let result = &language::lookup(&lang_key);
215        assert!(result.is_some(), "language code does not exist");
216    }
217
218    fn test_known_territory(territory: &String) {
219        let country_key = territory.clone();
220        let result = &country::lookup(&country_key);
221        assert!(result.is_some(), "territory code does not exist");
222    }
223}
224
225impl Display for LocaleString {
226    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
227        write!(
228            f,
229            "{}",
230            [
231                self.language_code.clone(),
232                match &self.territory {
233                    Some(v) => format!("{}{}", SEP_TERRITORY, v),
234                    None => "".to_string(),
235                },
236                match &self.code_set {
237                    Some(v) => format!("{}{}", SEP_CODE_SET, v),
238                    None => "".to_string(),
239                },
240                match &self.modifier {
241                    Some(v) => format!("{}{}", SEP_MODIFIER, v),
242                    None => "".to_string(),
243                },
244            ]
245            .join("")
246        )
247    }
248}
249
250impl FromStr for LocaleString {
251    type Err = ParseError;
252
253    fn from_str(s: &str) -> Result<Self, Self::Err> {
254        lazy_static! {
255            static ref RE: Regex =
256                Regex::new(r"^([a-z][a-z]+)(_[A-Z][A-Z]+)?(\.[A-Z][a-zA-Z0-9\-_]+)?(@\w+)?$")
257                    .unwrap();
258        }
259
260        if s.is_empty() {
261            return Err(ParseError::EmptyString);
262        }
263
264        if s == "C" || s == "POSIX" {
265            return Err(ParseError::PosixUnsupported);
266        }
267
268        match RE.captures(s) {
269            None => Err(ParseError::RegexFailure),
270            Some(groups) => {
271                let mut locale = LocaleString::new(groups.get(1).unwrap().as_str().to_string());
272                if let Some(group_str) = groups.get(2) {
273                    locale = locale.with_territory(group_str.as_str()[1..].to_string());
274                }
275                if let Some(group_str) = groups.get(3) {
276                    locale = locale.with_code_set(group_str.as_str()[1..].to_string());
277                }
278                if let Some(group_str) = groups.get(4) {
279                    locale = locale.with_modifier(group_str.as_str()[1..].to_string());
280                }
281                Ok(locale)
282            }
283        }
284    }
285}
286
287// ------------------------------------------------------------------------------------------------
288// Unit Tests
289// ------------------------------------------------------------------------------------------------
290
291#[cfg(test)]
292mod tests {
293    use std::collections::HashMap;
294    use std::str::FromStr;
295
296    use super::LocaleString;
297
298    // --------------------------------------------------------------------------------------------
299    #[test]
300    #[should_panic(expected = "language codes are two character only")]
301    fn test_bad_constructor_length() {
302        LocaleString::new("english".to_string());
303    }
304
305    #[test]
306    #[should_panic(expected = "language codes are lower case only")]
307    fn test_bad_constructor_case() {
308        LocaleString::new("EN".to_string());
309    }
310
311    #[test]
312    #[should_panic(expected = "territory codes are two character only")]
313    fn test_bad_country_length() {
314        LocaleString::new("en".to_string()).with_territory("USA".to_string());
315    }
316
317    #[test]
318    #[should_panic(expected = "territory codes are upper case only")]
319    fn test_bad_country_case() {
320        LocaleString::new("en".to_string()).with_territory("us".to_string());
321    }
322
323    // --------------------------------------------------------------------------------------------
324    #[test]
325    fn test_constructor() {
326        let locale = LocaleString::new("en".to_string());
327        assert_eq!(locale.get_language_code(), "en".to_string());
328        assert_eq!(locale.get_territory(), None);
329        assert_eq!(locale.get_modifier(), None);
330    }
331
332    #[test]
333    fn test_with_language() {
334        let locale = LocaleString::new("en".to_string());
335        assert_eq!(
336            locale.with_language("fr".to_string()).get_language_code(),
337            "fr".to_string()
338        );
339    }
340
341    #[test]
342    fn test_with_country() {
343        let locale = LocaleString::new("en".to_string());
344        assert_eq!(
345            locale.with_territory("UK".to_string()).get_territory(),
346            Some("UK".to_string())
347        );
348    }
349
350    #[test]
351    fn test_with_code_set() {
352        let locale = LocaleString::new("en".to_string());
353        assert_eq!(
354            locale.with_code_set("UTF-8".to_string()).get_code_set(),
355            Some("UTF-8".to_string())
356        );
357    }
358
359    #[test]
360    fn test_with_modifier() {
361        let locale = LocaleString::new("en".to_string());
362        assert_eq!(
363            locale
364                .with_modifier("collation=pinyin;currency=CNY".to_string())
365                .get_modifier(),
366            Some("collation=pinyin;currency=CNY".to_string())
367        );
368    }
369
370    #[test]
371    fn test_with_modifiers() {
372        let locale = LocaleString::new("en".to_string());
373        let modifiers: HashMap<&str, &str> = [("collation", "pinyin"), ("currency", "CNY")]
374            .iter()
375            .cloned()
376            .collect();
377        assert!(locale
378            .with_modifiers(modifiers)
379            .get_modifier()
380            .unwrap()
381            .contains("collation=pinyin"));
382        //        assert!(
383        //            locale.with_modifiers(modifiers).get_modifier().unwrap().contains("currency=CNY")
384        //        );
385    }
386
387    // --------------------------------------------------------------------------------------------
388    #[test]
389    #[should_panic(expected = "language code does not exist")]
390    fn test_strict_bad_language() {
391        LocaleString::new_strict("xx".to_string());
392    }
393
394    #[test]
395    #[should_panic(expected = "territory code does not exist")]
396    fn test_strict_bad_territory() {
397        let locale = LocaleString::new_strict("aa".to_string());
398        locale.with_territory("XX".to_string());
399    }
400
401    #[test]
402    fn test_strict_constructor() {
403        let locale = LocaleString::new_strict("aa".to_string());
404        assert_eq!(locale.get_language_code(), "aa".to_string());
405    }
406
407    // --------------------------------------------------------------------------------------------
408    #[test]
409    fn test_to_string() {
410        let locale = LocaleString::new("en".to_string())
411            .with_territory("US".to_string())
412            .with_code_set("UTF-8".to_string())
413            .with_modifier("collation=pinyin;currency=CNY".to_string());
414        assert_eq!(
415            locale.to_string(),
416            "en_US.UTF-8@collation=pinyin;currency=CNY".to_string()
417        );
418    }
419
420    // --------------------------------------------------------------------------------------------
421    #[test]
422    fn test_from_str_1() {
423        match LocaleString::from_str("en") {
424            Ok(locale) => assert_eq!(locale.get_language_code(), "en"),
425            _ => panic!("LocaleString::from_str failure"),
426        }
427    }
428
429    #[test]
430    fn test_from_str_2() {
431        match LocaleString::from_str("en_US") {
432            Ok(locale) => {
433                assert_eq!(locale.get_language_code(), "en");
434                assert_eq!(locale.get_territory(), Some("US".to_string()));
435            }
436            _ => panic!("LocaleString::from_str failure"),
437        }
438    }
439
440    #[test]
441    fn test_from_str_3() {
442        match LocaleString::from_str("en_US.UTF-8") {
443            Ok(locale) => {
444                assert_eq!(locale.get_language_code(), "en");
445                assert_eq!(locale.get_territory(), Some("US".to_string()));
446                assert_eq!(locale.get_code_set(), Some("UTF-8".to_string()));
447            }
448            _ => panic!("LocaleString::from_str failure"),
449        }
450    }
451
452    #[test]
453    fn test_from_str_4() {
454        match LocaleString::from_str("en_US.UTF-8@Latn") {
455            Ok(locale) => {
456                assert_eq!(locale.get_language_code(), "en");
457                assert_eq!(locale.get_territory(), Some("US".to_string()));
458                assert_eq!(locale.get_code_set(), Some("UTF-8".to_string()));
459                assert_eq!(locale.get_modifier(), Some("Latn".to_string()));
460            }
461            _ => panic!("LocaleString::from_str failure"),
462        }
463    }
464}