unic_locale_impl/
lib.rs

1pub(crate) mod errors;
2pub mod extensions;
3pub mod parser;
4
5use errors::LocaleError;
6pub use extensions::{ExtensionType, ExtensionsMap};
7use std::str::FromStr;
8pub use unic_langid_impl::CharacterDirection;
9pub use unic_langid_impl::{subtags, LanguageIdentifier};
10
11/// `Locale` is a core struct representing a Unicode Locale Identifier.
12///
13/// A locale is made of two parts:
14///  * `id` - Unicode Language Identifier
15///  * `extensions` - A set of Unicode Extensions
16///
17/// `Locale` exposes all of the same methods as `LanguageIdentifier`, and
18/// on top of that is able to parse, manipulate and serialize unicode extension
19/// fields.
20///
21/// # Examples
22///
23/// ```
24/// use unic_locale_impl::Locale;
25///
26/// let loc: Locale = "en-US-u-ca-buddhist".parse()
27///     .expect("Failed to parse.");
28///
29/// assert_eq!(loc.id.language, "en");
30/// assert_eq!(loc.id.script, None);
31/// assert_eq!(loc.id.region, Some("US".parse().unwrap()));
32/// assert_eq!(loc.id.variants().len(), 0);
33/// assert_eq!(loc.extensions.unicode.keyword("ca")
34///     .expect("Getting keyword failed.")
35///     .collect::<Vec<_>>(),
36///     &["buddhist"]);
37/// ```
38///
39/// # Parsing
40///
41/// Unicode recognizes three levels of standard conformance for a locale:
42///
43///  * *well-formed* - syntactically correct
44///  * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
45///  * *canonical* - valid and no deprecated codes or structure.
46///
47/// At the moment parsing normalizes a well-formed language identifier converting
48/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
49///
50/// Any bogus subtags will cause the parsing to fail with an error.
51/// No subtag validation is performed.
52///
53/// # Examples:
54///
55/// ```
56/// use unic_locale_impl::Locale;
57///
58/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12".parse()
59///     .expect("Failed to parse.");
60///
61/// assert_eq!(loc.id.language, "en");
62/// assert_eq!(loc.id.script, Some("Latn".parse().unwrap()));
63/// assert_eq!(loc.id.region, Some("US".parse().unwrap()));
64/// assert_eq!(loc.id.variants().collect::<Vec<_>>(), &["valencia"]);
65/// ```
66#[derive(Debug, Default, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
67pub struct Locale {
68    pub id: LanguageIdentifier,
69    pub extensions: extensions::ExtensionsMap,
70}
71
72type PartsTuple = (
73    subtags::Language,
74    Option<subtags::Script>,
75    Option<subtags::Region>,
76    Vec<subtags::Variant>,
77    String,
78);
79
80impl Locale {
81    /// A constructor which takes a utf8 slice, parses it and
82    /// produces a well-formed `Locale`.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use unic_locale_impl::Locale;
88    ///
89    /// let loc = Locale::from_bytes("en-US-u-hc-h12".as_bytes())
90    ///     .expect("Parsing failed.");
91    ///
92    /// assert_eq!(loc.to_string(), "en-US-u-hc-h12");
93    /// ```
94    pub fn from_bytes(v: &[u8]) -> Result<Self, LocaleError> {
95        Ok(parser::parse_locale(v)?)
96    }
97
98    /// A constructor which takes optional subtags as `AsRef<[u8]>`, parses them and
99    /// produces a well-formed `Locale`.
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// use unic_locale_impl::Locale;
105    ///
106    /// let loc = Locale::from_parts("fr".parse().unwrap(), None, Some("CA".parse().unwrap()), &[], None);
107    ///
108    /// assert_eq!(loc.to_string(), "fr-CA");
109    /// ```
110    pub fn from_parts(
111        language: subtags::Language,
112        script: Option<subtags::Script>,
113        region: Option<subtags::Region>,
114        variants: &[subtags::Variant],
115        extensions: Option<extensions::ExtensionsMap>,
116    ) -> Self {
117        let id = LanguageIdentifier::from_parts(language, script, region, variants);
118        Locale {
119            id,
120            extensions: extensions.unwrap_or_default(),
121        }
122    }
123
124    /// # Safety
125    ///
126    /// This function accepts subtags expecting variants
127    /// to be deduplicated and ordered.
128    pub const unsafe fn from_raw_parts_unchecked(
129        language: subtags::Language,
130        script: Option<subtags::Script>,
131        region: Option<subtags::Region>,
132        variants: Option<Box<[subtags::Variant]>>,
133        extensions: extensions::ExtensionsMap,
134    ) -> Self {
135        let id = LanguageIdentifier::from_raw_parts_unchecked(language, script, region, variants);
136        Self { id, extensions }
137    }
138
139    /// Consumes `Locale` and produces raw internal representations
140    /// of all subtags in form of `u64`/`u32`.
141    ///
142    /// Primarily used for storing internal representation and restoring via
143    /// `from_raw_parts_unchecked`.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use unic_locale_impl::Locale;
149    /// use tinystr::{TinyStr8, TinyStr4};
150    ///
151    /// let loc: Locale = "en-US".parse()
152    ///     .expect("Parsing failed.");
153    ///
154    /// let (lang, script, region, variants, extensions) = loc.into_parts();
155    ///
156    /// let loc2 = Locale::from_parts(
157    ///     lang,
158    ///     script,
159    ///     region,
160    ///     &variants,
161    ///     Some(extensions.parse().unwrap())
162    /// );
163    ///
164    /// assert_eq!(loc2.to_string(), "en-US");
165    /// ```
166    pub fn into_parts(self) -> PartsTuple {
167        let (lang, region, script, variants) = self.id.into_parts();
168        (lang, region, script, variants, self.extensions.to_string())
169    }
170
171    /// Compares a `Locale` to another `AsRef<Locale`
172    /// allowing for either side to use the missing fields as wildcards.
173    ///
174    /// This allows for matching between `en` (treated as `en-*-*-*`) and `en-US`.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// use unic_locale_impl::Locale;
180    ///
181    /// let loc1: Locale = "en".parse()
182    ///     .expect("Parsing failed.");
183    ///
184    /// let loc2: Locale = "en-US".parse()
185    ///     .expect("Parsing failed.");
186    ///
187    /// assert_ne!(loc1, loc2); // "en" != "en-US"
188    /// assert_ne!(loc1.to_string(), loc2.to_string()); // "en" != "en-US"
189    ///
190    /// assert_eq!(loc1.matches(&loc2, false, false), false); // "en" != "en-US"
191    /// assert_eq!(loc1.matches(&loc2, true, false), true); // "en-*-*-*" == "en-US"
192    /// assert_eq!(loc1.matches(&loc2, false, true), false); // "en" != "en-*-US-*"
193    /// assert_eq!(loc1.matches(&loc2, true, true), true); // "en-*-*-*" == "en-*-US-*"
194    /// ```
195    pub fn matches<O: AsRef<Self>>(
196        &self,
197        other: &O,
198        self_as_range: bool,
199        other_as_range: bool,
200    ) -> bool {
201        let other = other.as_ref();
202        if !self.extensions.private.is_empty() || !other.extensions.private.is_empty() {
203            return false;
204        }
205        self.id.matches(&other.id, self_as_range, other_as_range)
206    }
207}
208
209impl FromStr for Locale {
210    type Err = LocaleError;
211
212    fn from_str(source: &str) -> Result<Self, Self::Err> {
213        Ok(parser::parse_locale(source)?)
214    }
215}
216
217impl From<LanguageIdentifier> for Locale {
218    fn from(id: LanguageIdentifier) -> Self {
219        Locale {
220            id,
221            extensions: ExtensionsMap::default(),
222        }
223    }
224}
225
226impl From<Locale> for LanguageIdentifier {
227    fn from(value: Locale) -> Self {
228        value.id
229    }
230}
231
232impl AsRef<LanguageIdentifier> for Locale {
233    fn as_ref(&self) -> &LanguageIdentifier {
234        &self.id
235    }
236}
237
238impl AsRef<Locale> for Locale {
239    #[inline(always)]
240    fn as_ref(&self) -> &Locale {
241        self
242    }
243}
244
245impl std::fmt::Display for Locale {
246    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
247        write!(f, "{}{}", self.id, self.extensions)
248    }
249}
250
251/// This is a best-effort operation that performs all available levels of canonicalization.
252///
253/// At the moment the operation will normalize casing and the separator, but in the future
254/// it may also validate and update from deprecated subtags to canonical ones.
255///
256/// # Examples
257///
258/// ```
259/// use unic_locale_impl::canonicalize;
260///
261/// assert_eq!(canonicalize("pL_latn_pl-U-HC-H12"), Ok("pl-Latn-PL-u-hc-h12".to_string()));
262/// ```
263pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, LocaleError> {
264    let locale = Locale::from_bytes(input.as_ref())?;
265    Ok(locale.to_string())
266}