unicode_locale_parser/extensions/
unicode_locale.rs

1use crate::constants::SEP;
2use crate::errors::ParserError;
3use crate::extensions::ExtensionKind;
4
5use std::collections::BTreeMap;
6use std::fmt::{self, Debug, Write};
7use std::iter::Peekable;
8
9#[derive(Debug)]
10pub struct UnicodeLocaleExtensions {
11    pub attribute: Vec<String>,
12    pub ufield: BTreeMap<String, Vec<String>>,
13}
14
15impl fmt::Display for UnicodeLocaleExtensions {
16    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
17        write!(f, "{}", ExtensionKind::UnicodeLocale)?;
18        for attribute in &self.attribute {
19            f.write_char(SEP)?;
20            f.write_str(attribute)?;
21        }
22        for (key, values) in &self.ufield {
23            f.write_char(SEP)?;
24            f.write_str(key)?;
25            for value in values {
26                f.write_char(SEP)?;
27                f.write_str(value)?;
28            }
29        }
30        Ok(())
31    }
32}
33
34pub fn parse_unicode_locale_extensions<'a>(
35    iter: &mut Peekable<impl Iterator<Item = &'a str>>,
36) -> Result<UnicodeLocaleExtensions, ParserError> {
37    // unicode_locale_extensions
38    // https://unicode.org/reports/tr35/#unicode_locale_extensions
39
40    let mut attribute = vec![];
41    let mut ufield = BTreeMap::new();
42    let mut ukey: Option<String> = None;
43    let mut uvalue: Vec<String> = vec![];
44
45    while let Some(subtag) = iter.peek() {
46        let subtag_bytes = subtag.as_bytes();
47        let len = subtag_bytes.len();
48        if len == 1 {
49            break;
50        } else if len == 2
51            && subtag_bytes[0].is_ascii_alphanumeric()
52            && subtag_bytes[1].is_ascii_alphabetic()
53        {
54            // for ukey
55            if let Some(ukey) = ukey {
56                if !ufield.contains_key(&ukey) {
57                    ufield.insert(ukey.clone(), vec![]);
58                }
59                let values = ufield.get_mut(&ukey).unwrap();
60                for value in uvalue {
61                    values.push(value);
62                }
63                uvalue = vec![];
64            }
65            ukey = Some(subtag.to_string());
66            iter.next();
67        } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) {
68            if ukey.is_some() {
69                // for uvalue
70                uvalue.push(subtag.to_string());
71            } else {
72                // for attribute
73                attribute.push(subtag.to_string());
74            }
75            iter.next();
76        } else {
77            return Err(ParserError::InvalidSubtag);
78        }
79    }
80
81    if let Some(ukey) = ukey {
82        if !ufield.contains_key(&ukey) {
83            ufield.insert(ukey.clone(), vec![]);
84        }
85        let values = ufield.get_mut(&ukey).unwrap();
86        for value in uvalue {
87            values.push(value);
88        }
89    }
90
91    Ok(UnicodeLocaleExtensions { attribute, ufield })
92}
93
94/*
95 * Unit tests
96 */
97
98#[allow(unused_imports)] // for unit tests
99use crate::shared::split_str;
100
101#[test]
102fn success_unicode_locale_extensions() {
103    // basic case
104    let mut iter = split_str("attr1-ky-value1").peekable();
105    let result = parse_unicode_locale_extensions(&mut iter).unwrap();
106    assert_eq!("u-attr1-ky-value1", format!("{}", result));
107
108    // no attribute
109    let mut iter = split_str("ky-value1").peekable();
110    assert_eq!(
111        "u-ky-value1",
112        format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap())
113    );
114
115    // attribute multiple
116    let mut iter = split_str("attr1-attr2-ky-value1").peekable();
117    assert_eq!(
118        "u-attr1-attr2-ky-value1",
119        format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap())
120    );
121
122    // uvalue multiple
123    let mut iter = split_str("ky-value1-value2").peekable();
124    assert_eq!(
125        "u-ky-value1-value2",
126        format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap())
127    );
128
129    // no uvalue
130    let mut iter = split_str("ky").peekable();
131    assert_eq!(
132        "u-ky",
133        format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap())
134    );
135}
136
137#[test]
138fn fail_unicode_locale_extensions() {
139    // invalid ukey
140    let mut iter = split_str("k1").peekable();
141    assert_eq!(
142        ParserError::InvalidSubtag,
143        parse_unicode_locale_extensions(&mut iter).unwrap_err()
144    );
145
146    // invalid uvalue
147    let mut iter = split_str("ky-{}").peekable();
148    assert_eq!(
149        ParserError::InvalidSubtag,
150        parse_unicode_locale_extensions(&mut iter).unwrap_err()
151    );
152
153    // invalid attribute
154    let mut iter = split_str("ky-value1-{?}").peekable();
155    assert_eq!(
156        ParserError::InvalidSubtag,
157        parse_unicode_locale_extensions(&mut iter).unwrap_err()
158    );
159}