unicode_locale_parser/extensions/
transformed.rs

1use crate::constants::SEP;
2use crate::errors::ParserError;
3use crate::extensions::ExtensionKind;
4use crate::lang::{parse_unicode_language_id_from_iter, UnicodeLanguageIdentifier};
5use crate::subtags::is_language_subtag;
6
7use std::collections::BTreeMap;
8use std::fmt::{self, Debug, Write};
9use std::iter::Peekable;
10
11#[derive(Debug)]
12pub struct TransformedExtensions {
13    pub tlang: Option<UnicodeLanguageIdentifier>,
14    pub tfield: BTreeMap<String, Vec<String>>,
15}
16
17impl fmt::Display for TransformedExtensions {
18    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19        write!(f, "{}", ExtensionKind::Transformed)?;
20        if let Some(tlang) = &self.tlang {
21            f.write_char(SEP)?;
22            write!(f, "{}", tlang)?;
23        }
24        for (key, values) in &self.tfield {
25            f.write_char(SEP)?;
26            f.write_str(key)?;
27            for value in values {
28                f.write_char(SEP)?;
29                f.write_str(value)?;
30            }
31        }
32        Ok(())
33    }
34}
35
36pub fn parse_transformed_extensions<'a>(
37    iter: &mut Peekable<impl Iterator<Item = &'a str>>,
38) -> Result<TransformedExtensions, ParserError> {
39    // transformed_extensions
40    // https://unicode.org/reports/tr35/#transformed_extensions
41    let mut tlang = None;
42    let mut tfield = BTreeMap::new();
43    let mut tkey: Option<String> = None;
44    let mut tvalue: Vec<String> = vec![];
45
46    while let Some(subtag) = iter.peek() {
47        let subtag_bytes = subtag.as_bytes();
48        let len = subtag_bytes.len();
49        if len == 1 {
50            break;
51        } else if len == 2
52            && subtag_bytes[0].is_ascii_alphabetic()
53            && subtag_bytes[1].is_ascii_digit()
54        {
55            // for tkey
56            if let Some(tkey) = tkey {
57                if !tfield.contains_key(&tkey) {
58                    tfield.insert(tkey.clone(), vec![]);
59                }
60                let values = tfield.get_mut(&tkey).unwrap();
61                for value in tvalue {
62                    values.push(value);
63                }
64                tvalue = vec![];
65            }
66            tkey = Some(subtag.to_string());
67            iter.next();
68        } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) {
69            // for tvalue
70            if tkey.is_none() {
71                return Err(ParserError::InvalidSubtag);
72            }
73            tvalue.push(subtag.to_string());
74            iter.next();
75        } else if is_language_subtag(subtag_bytes) {
76            tlang = Some(parse_unicode_language_id_from_iter(iter)?);
77        } else {
78            return Err(ParserError::InvalidSubtag);
79        }
80    }
81
82    if let Some(tkey) = tkey {
83        if tvalue.is_empty() {
84            return Err(ParserError::InvalidSubtag);
85        }
86        if !tfield.contains_key(&tkey) {
87            tfield.insert(tkey.clone(), vec![]);
88        }
89        let values = tfield.get_mut(&tkey).unwrap();
90        for value in tvalue {
91            values.push(value);
92        }
93    }
94
95    Ok(TransformedExtensions { tlang, tfield })
96}
97
98/*
99 * Unit tests
100 */
101
102#[allow(unused_imports)] // for unit tests
103use crate::shared::split_str;
104
105#[test]
106fn success_transformed_extensions() {
107    // basic case
108    let mut iter = split_str("en-US-a1-foo").peekable();
109    assert_eq!(
110        "t-en-US-a1-foo",
111        format!("{}", parse_transformed_extensions(&mut iter).unwrap())
112    );
113
114    // no tlang
115    let mut iter = split_str("a1-foo").peekable();
116    assert_eq!(
117        "t-a1-foo",
118        format!("{}", parse_transformed_extensions(&mut iter).unwrap())
119    );
120
121    // tvalue multiple
122    let mut iter = split_str("en-a1-foo-b1-bar").peekable();
123    assert_eq!(
124        "t-en-a1-foo-b1-bar",
125        format!("{}", parse_transformed_extensions(&mut iter).unwrap())
126    );
127
128    // tlang only
129    let mut iter = split_str("en-Latn-US-macos").peekable();
130    assert_eq!(
131        "t-en-Latn-US-macos",
132        format!("{}", parse_transformed_extensions(&mut iter).unwrap())
133    );
134}
135
136#[test]
137fn fail_transformed_extensions() {
138    // invalid tkey
139    let mut iter = split_str("1a-foo").peekable();
140    assert_eq!(
141        ParserError::InvalidSubtag,
142        parse_transformed_extensions(&mut iter).unwrap_err()
143    );
144
145    // missing tkey
146    let mut iter = split_str("foo").peekable();
147    assert_eq!(
148        ParserError::InvalidSubtag,
149        parse_transformed_extensions(&mut iter).unwrap_err()
150    );
151
152    // missing tvalue
153    let mut iter = split_str("a1-foo-b1").peekable();
154    assert_eq!(
155        ParserError::InvalidSubtag,
156        parse_transformed_extensions(&mut iter).unwrap_err()
157    );
158}