icu_locale_core/subtags/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Language Identifier and Locale contains a set of subtags
6//! which represent different fields of the structure.
7//!
8//! * [`Language`] is the only mandatory field, which when empty,
9//!   takes the value `und`.
10//! * [`Script`] is an optional field representing the written script used by the locale.
11//! * [`Region`] is the region used by the locale.
12//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the
13//!   variant adjustments used by the locale.
14//!
15//! Subtags can be used in isolation, and all basic operations such as parsing, syntax normalization
16//! and serialization are supported on each individual subtag, but most commonly
17//! they are used to construct a [`LanguageIdentifier`] instance.
18//!
19//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags.
20//! It is wrapped around to allow for sorting and deduplication of variants, which
21//! is one of the required steps of language identifier and locale syntax normalization.
22//!
23//! # Examples
24//!
25//! ```
26//! use icu::locale::subtags::{Language, Region, Script, Variant};
27//!
28//! let language: Language =
29//!     "en".parse().expect("Failed to parse a language subtag.");
30//! let script: Script =
31//!     "arab".parse().expect("Failed to parse a script subtag.");
32//! let region: Region =
33//!     "cn".parse().expect("Failed to parse a region subtag.");
34//! let variant: Variant =
35//!     "MacOS".parse().expect("Failed to parse a variant subtag.");
36//!
37//! assert_eq!(language.as_str(), "en");
38//! assert_eq!(script.as_str(), "Arab");
39//! assert_eq!(region.as_str(), "CN");
40//! assert_eq!(variant.as_str(), "macos");
41//! ```
42//!
43//! `Notice`: The subtags are normalized on parsing. That means
44//! that all operations work on a normalized version of the subtag
45//! and serialization is very cheap.
46//!
47//! [`LanguageIdentifier`]: super::LanguageIdentifier
48mod language;
49mod region;
50mod script;
51mod variant;
52mod variants;
53
54#[doc(inline)]
55pub use language::{language, Language};
56#[doc(inline)]
57pub use region::{region, Region};
58#[doc(inline)]
59pub use script::{script, Script};
60#[doc(inline)]
61pub use variant::{variant, Variant};
62pub use variants::Variants;
63
64impl_tinystr_subtag!(
65    /// A generic subtag.
66    ///
67    /// The subtag has to be an ASCII alphanumerical string no shorter than
68    /// two characters and no longer than eight.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use icu::locale::subtags::Subtag;
74    ///
75    /// let subtag1: Subtag = "Foo".parse()
76    ///     .expect("Failed to parse a Subtag.");
77    ///
78    /// assert_eq!(subtag1.as_str(), "foo");
79    /// ```
80    Subtag,
81    subtags,
82    subtag,
83    subtags_subtag,
84    2..=8,
85    s,
86    s.is_ascii_alphanumeric(),
87    s.to_ascii_lowercase(),
88    s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
89    InvalidSubtag,
90    ["foo12"],
91    ["f", "toolooong"],
92);
93
94#[expect(clippy::len_without_is_empty)]
95impl Subtag {
96    #[allow(dead_code)]
97    pub(crate) const fn valid_key(v: &[u8]) -> bool {
98        2 <= v.len() && v.len() <= 8
99    }
100
101    /// Returns the length of `self`.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use icu::locale::subtags::subtag;
107    /// let s = subtag!("foo");
108    /// assert_eq!(s.len(), 3);
109    /// ```
110    pub fn len(&self) -> usize {
111        self.0.len()
112    }
113
114    #[doc(hidden)]
115    pub fn from_tinystr_unvalidated(input: tinystr::TinyAsciiStr<8>) -> Self {
116        Self(input)
117    }
118
119    #[doc(hidden)]
120    pub fn as_tinystr(&self) -> tinystr::TinyAsciiStr<8> {
121        self.0
122    }
123
124    #[allow(dead_code)]
125    pub(crate) fn to_ascii_lowercase(self) -> Self {
126        Self(self.0.to_ascii_lowercase())
127    }
128}
129
130impl<const N: usize> TryFrom<tinystr::TinyAsciiStr<N>> for Subtag {
131    type Error = crate::parser::errors::ParseError;
132
133    fn try_from(value: tinystr::TinyAsciiStr<N>) -> Result<Self, Self::Error> {
134        Self::try_from_str(&value)
135    }
136}
137
138impl PartialEq<str> for Subtag {
139    fn eq(&self, other: &str) -> bool {
140        self.0 == other
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use tinystr::tinystr;
148
149    #[test]
150    fn test_subtag() {
151        let subtag = subtag!("foo");
152        assert_eq!(subtag.as_str(), "foo");
153    }
154
155    #[test]
156    fn test_subtag_from_tinystr() {
157        let subtag = Subtag::try_from(tinystr!(3, "foo"));
158        assert!(subtag.is_ok());
159
160        let subtag = Subtag::try_from(tinystr!(1, "f"));
161        assert!(subtag.is_err());
162    }
163}