unic_locale_impl/extensions/transform.rs
1use crate::errors::LocaleError;
2use crate::parser::ParserError;
3
4use unic_langid_impl::LanguageIdentifier;
5
6use std::collections::BTreeMap;
7use std::iter::Peekable;
8
9use tinystr::{TinyStr4, TinyStr8};
10
11/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
12/// Identifier`] specification.
13///
14/// Transform extension carries information about source language or script of
15/// transformed content, including content that has been transliterated, transcribed,
16/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
17///
18/// # Examples
19///
20/// ```
21/// use unic_locale_impl::{Locale, LanguageIdentifier};
22///
23/// let mut loc: Locale = "de-t-en-US-h0-hybrid".parse()
24/// .expect("Parsing failed.");
25///
26/// let en_us: LanguageIdentifier = "en-US".parse()
27/// .expect("Parsing failed.");
28///
29/// assert_eq!(loc.extensions.transform.tlang(), Some(&en_us));
30/// assert_eq!(
31/// loc.extensions.transform.tfield("h0")
32/// .expect("Getting tfield failed.")
33/// .collect::<Vec<_>>(),
34/// &["hybrid"]
35/// );
36/// ```
37/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
38/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
39/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
40#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
41pub struct TransformExtensionList {
42 tlang: Option<LanguageIdentifier>,
43
44 // Canonical: sort by key (BTreeMap is already) / remove value 'true'
45 tfields: BTreeMap<TinyStr4, Vec<TinyStr8>>,
46}
47
48fn parse_tkey(key: &[u8]) -> Result<TinyStr4, ParserError> {
49 if key.len() != 2 || !key[0].is_ascii_alphabetic() || !key[1].is_ascii_digit() {
50 return Err(ParserError::InvalidSubtag);
51 }
52 let tkey = TinyStr4::try_from_utf8(key).map_err(|_| ParserError::InvalidSubtag)?;
53 Ok(tkey.to_ascii_lowercase())
54}
55
56const TRUE_TVALUE: TinyStr8 = tinystr::tinystr!(8, "true"); // "true"
57
58fn parse_tvalue(t: &[u8]) -> Result<Option<TinyStr8>, ParserError> {
59 let s = TinyStr8::try_from_utf8(t).map_err(|_| ParserError::InvalidSubtag)?;
60 if t.len() < 3 || t.len() > 8 || !s.is_ascii_alphanumeric() {
61 return Err(ParserError::InvalidSubtag);
62 }
63
64 let s = s.to_ascii_lowercase();
65
66 if s == TRUE_TVALUE {
67 Ok(None)
68 } else {
69 Ok(Some(s))
70 }
71}
72
73fn is_language_subtag(t: &[u8]) -> bool {
74 let slen = t.len();
75 ((2..=8).contains(&slen) || slen == 4) && !t.iter().any(|c: &u8| !c.is_ascii_alphabetic())
76}
77
78impl TransformExtensionList {
79 /// Returns `true` if there are no tfields and no tlang in
80 /// the `TransformExtensionList`.
81 ///
82 /// # Examples
83 ///
84 /// ```
85 /// use unic_locale_impl::Locale;
86 ///
87 /// let mut loc: Locale = "en-US-t-es-AR".parse()
88 /// .expect("Parsing failed.");
89 ///
90 /// assert_eq!(loc.extensions.transform.is_empty(), false);
91 /// ```
92 pub fn is_empty(&self) -> bool {
93 self.tlang.is_none() && self.tfields.is_empty()
94 }
95
96 /// Gets tlang from the `TransformExtensionList`.
97 ///
98 /// # Examples
99 ///
100 /// ```
101 /// use unic_locale_impl::Locale;
102 /// use unic_langid_impl::LanguageIdentifier;
103 ///
104 /// let mut loc: Locale = "en-US-t-es-AR".parse()
105 /// .expect("Parsing failed.");
106 ///
107 /// let tlang: LanguageIdentifier = "es-AR".parse()
108 /// .expect("Parsing failed on tlang.");
109 ///
110 /// assert_eq!(loc.extensions.transform.tlang(), Some(&tlang));
111 /// ```
112 pub fn tlang(&self) -> Option<&LanguageIdentifier> {
113 self.tlang.as_ref()
114 }
115
116 /// Sets tlang on the `TransformExtensionList`.
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use unic_locale_impl::Locale;
122 /// use unic_langid_impl::LanguageIdentifier;
123 ///
124 /// let mut loc: Locale = "en-US".parse()
125 /// .expect("Parsing failed.");
126 ///
127 /// let tlang: LanguageIdentifier = "es-AR".parse()
128 /// .expect("Parsing failed on tlang.");
129 ///
130 /// loc.extensions.transform.set_tlang(tlang)
131 /// .expect("Setting tlang failed.");
132 ///
133 /// assert_eq!(loc.to_string(), "en-US-t-es-AR");
134 /// ```
135 pub fn set_tlang(&mut self, tlang: LanguageIdentifier) -> Result<(), LocaleError> {
136 self.tlang = Some(tlang);
137 Ok(())
138 }
139
140 /// Clears tlang on the `TransformExtensionList`.
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// use unic_locale_impl::Locale;
146 /// use unic_langid_impl::LanguageIdentifier;
147 ///
148 /// let mut loc: Locale = "en-US-t-es-AR".parse()
149 /// .expect("Parsing failed.");
150 ///
151 /// loc.extensions.transform.clear_tlang();
152 ///
153 /// assert_eq!(loc.to_string(), "en-US");
154 /// ```
155 pub fn clear_tlang(&mut self) {
156 self.tlang = None;
157 }
158
159 /// Returns the tvalue of tfield in the `TransformExtensionList`.
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// use unic_locale_impl::Locale;
165 ///
166 /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
167 /// .expect("Parsing failed.");
168 ///
169 /// assert_eq!(loc.extensions.transform.tfield("k0")
170 /// .expect("Getting tfield failed.")
171 /// .collect::<Vec<_>>(),
172 /// &["dvorak"]);
173 ///
174 /// // Here tfield with tkey "m0" is not available
175 /// assert_eq!(loc.extensions.transform.tfield("m0")
176 /// .expect("Getting tfield failed.")
177 /// .collect::<Vec<_>>()
178 /// .is_empty(),
179 /// true);
180 /// ```
181 pub fn tfield<S: AsRef<[u8]>>(
182 &self,
183 tkey: S,
184 ) -> Result<impl ExactSizeIterator<Item = &str>, LocaleError> {
185 let tfields: &[_] = match self.tfields.get(&parse_tkey(tkey.as_ref())?) {
186 Some(v) => v,
187 None => &[],
188 };
189
190 Ok(tfields.iter().map(|s| s.as_ref()))
191 }
192
193 /// Returns an iterator over all tkeys in the `TransformExtensionList`.
194 ///
195 /// # Examples
196 ///
197 /// ```
198 /// use unic_locale_impl::Locale;
199 ///
200 /// let mut loc: Locale = "en-US-t-k0-dvorak-h0-hybrid".parse()
201 /// .expect("Parsing failed.");
202 ///
203 /// assert_eq!(loc.extensions.transform.tfield_keys().collect::<Vec<_>>(),
204 /// &["h0", "k0"]);
205 /// ```
206 pub fn tfield_keys(&self) -> impl ExactSizeIterator<Item = &str> {
207 self.tfields.keys().map(|s| s.as_ref())
208 }
209
210 /// Adds a tfield to the `TransformExtensionList` or sets tvalue for tkey if
211 /// tfield is already included in the `TransformExtensionList`.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// use unic_locale_impl::Locale;
217 ///
218 /// let mut loc: Locale = "en-US".parse()
219 /// .expect("Parsing failed.");
220 ///
221 /// loc.extensions.transform.set_tfield("k0", &["dvorak"])
222 /// .expect("Setting tfield failed.");
223 ///
224 /// assert_eq!(loc.to_string(), "en-US-t-k0-dvorak");
225 ///
226 /// loc.extensions.transform.set_tfield("k0", &["colemak"])
227 /// .expect("Setting tfield failed.");
228 ///
229 /// assert_eq!(loc.to_string(), "en-US-t-k0-colemak");
230 /// ```
231 pub fn set_tfield<S: AsRef<[u8]>>(&mut self, tkey: S, tvalue: &[S]) -> Result<(), LocaleError> {
232 let tkey = parse_tkey(tkey.as_ref())?;
233
234 let t = tvalue
235 .iter()
236 .filter_map(|val| parse_tvalue(val.as_ref()).transpose())
237 .collect::<Result<Vec<_>, _>>()?;
238
239 self.tfields.insert(tkey, t);
240 Ok(())
241 }
242
243 /// Removes a tfield from the `TransformExtensionList`.
244 ///
245 /// Returns `true` if tfield was included in the `TransformExtensionList`
246 /// before removal.
247 ///
248 /// # Examples
249 ///
250 /// ```
251 /// use unic_locale_impl::Locale;
252 ///
253 /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
254 /// .expect("Parsing failed.");
255 ///
256 /// assert_eq!(loc.extensions.transform.remove_tfield("k0")
257 /// .expect("Removing tfield failed."),
258 /// true);
259 ///
260 /// assert_eq!(loc.to_string(), "en-US");
261 /// ```
262 pub fn remove_tfield<S: AsRef<[u8]>>(&mut self, tkey: S) -> Result<bool, LocaleError> {
263 Ok(self.tfields.remove(&parse_tkey(tkey.as_ref())?).is_some())
264 }
265
266 /// Clears all tfields from the `TransformExtensionList`.
267 ///
268 /// # Examples
269 ///
270 /// ```
271 /// use unic_locale_impl::Locale;
272 ///
273 /// let mut loc: Locale = "en-US-t-k0-dvorak".parse()
274 /// .expect("Parsing failed.");
275 ///
276 /// loc.extensions.transform.clear_tfields();
277 /// assert_eq!(loc.to_string(), "en-US");
278 /// ```
279 pub fn clear_tfields(&mut self) {
280 self.tfields.clear();
281 }
282
283 pub(crate) fn try_from_iter<'a>(
284 iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
285 ) -> Result<Self, ParserError> {
286 let mut text = Self::default();
287
288 let mut st_peek = iter.peek();
289
290 let mut current_tkey = None;
291 let mut current_tvalue = vec![];
292
293 while let Some(subtag) = st_peek {
294 let slen = subtag.len();
295 if slen == 2 && subtag[0].is_ascii_alphabetic() && subtag[1].is_ascii_digit() {
296 if let Some(current_tkey) = current_tkey {
297 text.tfields.insert(current_tkey, current_tvalue);
298 current_tvalue = vec![];
299 }
300 current_tkey = Some(parse_tkey(subtag)?);
301 iter.next();
302 } else if current_tkey.is_some() {
303 if let Some(tval) = parse_tvalue(subtag)? {
304 current_tvalue.push(tval);
305 }
306 iter.next();
307 } else if is_language_subtag(subtag) {
308 text.tlang = Some(
309 LanguageIdentifier::try_from_iter(iter, true)
310 .map_err(|_| ParserError::InvalidLanguage)?,
311 );
312 } else {
313 break;
314 }
315 st_peek = iter.peek();
316 }
317
318 if let Some(current_keyword) = current_tkey {
319 text.tfields.insert(current_keyword, current_tvalue);
320 }
321
322 Ok(text)
323 }
324}
325
326impl std::fmt::Display for TransformExtensionList {
327 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
328 if self.is_empty() {
329 return Ok(());
330 }
331
332 f.write_str("-t")?;
333
334 if let Some(tlang) = &self.tlang {
335 write!(f, "-{}", tlang)?;
336 }
337
338 for (k, t) in &self.tfields {
339 write!(f, "-{}", k)?;
340 for v in t {
341 write!(f, "-{}", v)?;
342 }
343 }
344 Ok(())
345 }
346}