unic_locale_impl/extensions/
transform.rs

1use crate::errors::LocaleError;
2use crate::parser::ParserError;
3
4use unic_langid_impl::LanguageIdentifier;
5
6use std::collections::BTreeMap;
7use std::iter::Peekable;
8
9use tinystr::{TinyStr4, TinyStr8};
10
11/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
12/// Identifier`] specification.
13///
14/// Transform extension carries information about source language or script of
15/// transformed content, including content that has been transliterated, transcribed,
16/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
17///
18/// # Examples
19///
20/// ```
21/// use unic_locale_impl::{Locale, LanguageIdentifier};
22///
23/// let mut loc: Locale = "de-t-en-US-h0-hybrid".parse()
24///     .expect("Parsing failed.");
25///
26/// let en_us: LanguageIdentifier = "en-US".parse()
27///     .expect("Parsing failed.");
28///
29/// assert_eq!(loc.extensions.transform.tlang(), Some(&en_us));
30/// assert_eq!(
31///     loc.extensions.transform.tfield("h0")
32///                             .expect("Getting tfield failed.")
33///                             .collect::<Vec<_>>(),
34///     &["hybrid"]
35/// );
36/// ```
37/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
38/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
39/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
40#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
41pub struct TransformExtensionList {
42    tlang: Option<LanguageIdentifier>,
43
44    // Canonical: sort by key (BTreeMap is already) / remove value 'true'
45    tfields: BTreeMap<TinyStr4, Vec<TinyStr8>>,
46}
47
48fn parse_tkey(key: &[u8]) -> Result<TinyStr4, ParserError> {
49    if key.len() != 2 || !key[0].is_ascii_alphabetic() || !key[1].is_ascii_digit() {
50        return Err(ParserError::InvalidSubtag);
51    }
52    let tkey = TinyStr4::try_from_utf8(key).map_err(|_| ParserError::InvalidSubtag)?;
53    Ok(tkey.to_ascii_lowercase())
54}
55
56const TRUE_TVALUE: TinyStr8 = tinystr::tinystr!(8, "true"); // "true"
57
58fn parse_tvalue(t: &[u8]) -> Result<Option<TinyStr8>, ParserError> {
59    let s = TinyStr8::try_from_utf8(t).map_err(|_| ParserError::InvalidSubtag)?;
60    if t.len() < 3 || t.len() > 8 || !s.is_ascii_alphanumeric() {
61        return Err(ParserError::InvalidSubtag);
62    }
63
64    let s = s.to_ascii_lowercase();
65
66    if s == TRUE_TVALUE {
67        Ok(None)
68    } else {
69        Ok(Some(s))
70    }
71}
72
73fn is_language_subtag(t: &[u8]) -> bool {
74    let slen = t.len();
75    ((2..=8).contains(&slen) || slen == 4) && !t.iter().any(|c: &u8| !c.is_ascii_alphabetic())
76}
77
78impl TransformExtensionList {
79    /// Returns `true` if there are no tfields and no tlang in
80    /// the `TransformExtensionList`.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// use unic_locale_impl::Locale;
86    ///
87    /// let mut loc: Locale = "en-US-t-es-AR".parse()
88    ///     .expect("Parsing failed.");
89    ///
90    /// assert_eq!(loc.extensions.transform.is_empty(), false);
91    /// ```
92    pub fn is_empty(&self) -> bool {
93        self.tlang.is_none() && self.tfields.is_empty()
94    }
95
96    /// Gets tlang from the `TransformExtensionList`.
97    ///
98    /// # Examples
99    ///
100    /// ```
101    /// use unic_locale_impl::Locale;
102    /// use unic_langid_impl::LanguageIdentifier;
103    ///
104    /// let mut loc: Locale = "en-US-t-es-AR".parse()
105    ///     .expect("Parsing failed.");
106    ///
107    /// let tlang: LanguageIdentifier = "es-AR".parse()
108    ///     .expect("Parsing failed on tlang.");
109    ///
110    /// assert_eq!(loc.extensions.transform.tlang(), Some(&tlang));
111    /// ```
112    pub fn tlang(&self) -> Option<&LanguageIdentifier> {
113        self.tlang.as_ref()
114    }
115
116    /// Sets tlang on the `TransformExtensionList`.
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use unic_locale_impl::Locale;
122    /// use unic_langid_impl::LanguageIdentifier;
123    ///
124    /// let mut loc: Locale = "en-US".parse()
125    ///     .expect("Parsing failed.");
126    ///
127    /// let tlang: LanguageIdentifier = "es-AR".parse()
128    ///     .expect("Parsing failed on tlang.");
129    ///
130    /// loc.extensions.transform.set_tlang(tlang)
131    ///     .expect("Setting tlang failed.");
132    ///
133    /// assert_eq!(loc.to_string(), "en-US-t-es-AR");
134    /// ```
135    pub fn set_tlang(&mut self, tlang: LanguageIdentifier) -> Result<(), LocaleError> {
136        self.tlang = Some(tlang);
137        Ok(())
138    }
139
140    /// Clears tlang on the `TransformExtensionList`.
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// use unic_locale_impl::Locale;
146    /// use unic_langid_impl::LanguageIdentifier;
147    ///
148    /// let mut loc: Locale = "en-US-t-es-AR".parse()
149    ///     .expect("Parsing failed.");
150    ///
151    /// loc.extensions.transform.clear_tlang();
152    ///
153    /// assert_eq!(loc.to_string(), "en-US");
154    /// ```
155    pub fn clear_tlang(&mut self) {
156        self.tlang = None;
157    }
158
159    /// Returns the tvalue of tfield in the `TransformExtensionList`.
160    ///
161    /// # Examples
162    ///
163    /// ```
164    /// use unic_locale_impl::Locale;
165    ///
166    /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
167    ///     .expect("Parsing failed.");
168    ///
169    /// assert_eq!(loc.extensions.transform.tfield("k0")
170    ///                .expect("Getting tfield failed.")
171    ///                .collect::<Vec<_>>(),
172    ///            &["dvorak"]);
173    ///
174    /// // Here tfield with tkey "m0" is not available
175    /// assert_eq!(loc.extensions.transform.tfield("m0")
176    ///                .expect("Getting tfield failed.")
177    ///                .collect::<Vec<_>>()
178    ///                .is_empty(),
179    ///            true);
180    /// ```
181    pub fn tfield<S: AsRef<[u8]>>(
182        &self,
183        tkey: S,
184    ) -> Result<impl ExactSizeIterator<Item = &str>, LocaleError> {
185        let tfields: &[_] = match self.tfields.get(&parse_tkey(tkey.as_ref())?) {
186            Some(v) => v,
187            None => &[],
188        };
189
190        Ok(tfields.iter().map(|s| s.as_ref()))
191    }
192
193    /// Returns an iterator over all tkeys in the `TransformExtensionList`.
194    ///
195    /// # Examples
196    ///
197    /// ```
198    /// use unic_locale_impl::Locale;
199    ///
200    /// let mut loc: Locale = "en-US-t-k0-dvorak-h0-hybrid".parse()
201    ///     .expect("Parsing failed.");
202    ///
203    /// assert_eq!(loc.extensions.transform.tfield_keys().collect::<Vec<_>>(),
204    ///            &["h0", "k0"]);
205    /// ```
206    pub fn tfield_keys(&self) -> impl ExactSizeIterator<Item = &str> {
207        self.tfields.keys().map(|s| s.as_ref())
208    }
209
210    /// Adds a tfield to the `TransformExtensionList` or sets tvalue for tkey if
211    /// tfield is already included in the `TransformExtensionList`.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// use unic_locale_impl::Locale;
217    ///
218    /// let mut loc: Locale = "en-US".parse()
219    ///     .expect("Parsing failed.");
220    ///
221    /// loc.extensions.transform.set_tfield("k0", &["dvorak"])
222    ///     .expect("Setting tfield failed.");
223    ///
224    /// assert_eq!(loc.to_string(), "en-US-t-k0-dvorak");
225    ///
226    /// loc.extensions.transform.set_tfield("k0", &["colemak"])
227    ///     .expect("Setting tfield failed.");
228    ///
229    /// assert_eq!(loc.to_string(), "en-US-t-k0-colemak");
230    /// ```
231    pub fn set_tfield<S: AsRef<[u8]>>(&mut self, tkey: S, tvalue: &[S]) -> Result<(), LocaleError> {
232        let tkey = parse_tkey(tkey.as_ref())?;
233
234        let t = tvalue
235            .iter()
236            .filter_map(|val| parse_tvalue(val.as_ref()).transpose())
237            .collect::<Result<Vec<_>, _>>()?;
238
239        self.tfields.insert(tkey, t);
240        Ok(())
241    }
242
243    /// Removes a tfield from the `TransformExtensionList`.
244    ///
245    /// Returns `true` if tfield was included in the `TransformExtensionList`
246    /// before removal.
247    ///
248    /// # Examples
249    ///
250    /// ```
251    /// use unic_locale_impl::Locale;
252    ///
253    /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
254    ///     .expect("Parsing failed.");
255    ///
256    /// assert_eq!(loc.extensions.transform.remove_tfield("k0")
257    ///                .expect("Removing tfield failed."),
258    ///            true);
259    ///
260    /// assert_eq!(loc.to_string(), "en-US");
261    /// ```
262    pub fn remove_tfield<S: AsRef<[u8]>>(&mut self, tkey: S) -> Result<bool, LocaleError> {
263        Ok(self.tfields.remove(&parse_tkey(tkey.as_ref())?).is_some())
264    }
265
266    /// Clears all tfields from the `TransformExtensionList`.
267    ///
268    /// # Examples
269    ///
270    /// ```
271    /// use unic_locale_impl::Locale;
272    ///
273    /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
274    ///     .expect("Parsing failed.");
275    ///
276    /// loc.extensions.transform.clear_tfields();
277    /// assert_eq!(loc.to_string(), "en-US");
278    /// ```
279    pub fn clear_tfields(&mut self) {
280        self.tfields.clear();
281    }
282
283    pub(crate) fn try_from_iter<'a>(
284        iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
285    ) -> Result<Self, ParserError> {
286        let mut text = Self::default();
287
288        let mut st_peek = iter.peek();
289
290        let mut current_tkey = None;
291        let mut current_tvalue = vec![];
292
293        while let Some(subtag) = st_peek {
294            let slen = subtag.len();
295            if slen == 2 && subtag[0].is_ascii_alphabetic() && subtag[1].is_ascii_digit() {
296                if let Some(current_tkey) = current_tkey {
297                    text.tfields.insert(current_tkey, current_tvalue);
298                    current_tvalue = vec![];
299                }
300                current_tkey = Some(parse_tkey(subtag)?);
301                iter.next();
302            } else if current_tkey.is_some() {
303                if let Some(tval) = parse_tvalue(subtag)? {
304                    current_tvalue.push(tval);
305                }
306                iter.next();
307            } else if is_language_subtag(subtag) {
308                text.tlang = Some(
309                    LanguageIdentifier::try_from_iter(iter, true)
310                        .map_err(|_| ParserError::InvalidLanguage)?,
311                );
312            } else {
313                break;
314            }
315            st_peek = iter.peek();
316        }
317
318        if let Some(current_keyword) = current_tkey {
319            text.tfields.insert(current_keyword, current_tvalue);
320        }
321
322        Ok(text)
323    }
324}
325
326impl std::fmt::Display for TransformExtensionList {
327    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
328        if self.is_empty() {
329            return Ok(());
330        }
331
332        f.write_str("-t")?;
333
334        if let Some(tlang) = &self.tlang {
335            write!(f, "-{}", tlang)?;
336        }
337
338        for (k, t) in &self.tfields {
339            write!(f, "-{}", k)?;
340            for v in t {
341                write!(f, "-{}", v)?;
342            }
343        }
344        Ok(())
345    }
346}