icu_locale_core/subtags/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Language Identifier and Locale contains a set of subtags
6//! which represent different fields of the structure.
7//!
8//! * [`Language`] is the only mandatory field, which when empty,
9//! takes the value `und`.
10//! * [`Script`] is an optional field representing the written script used by the locale.
11//! * [`Region`] is the region used by the locale.
12//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the
13//! variant adjustments used by the locale.
14//!
15//! Subtags can be used in isolation, and all basic operations such as parsing, syntax normalization
16//! and serialization are supported on each individual subtag, but most commonly
17//! they are used to construct a [`LanguageIdentifier`] instance.
18//!
19//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags.
20//! It is wrapped around to allow for sorting and deduplication of variants, which
21//! is one of the required steps of language identifier and locale syntax normalization.
22//!
23//! # Examples
24//!
25//! ```
26//! use icu::locale::subtags::{Language, Region, Script, Variant};
27//!
28//! let language: Language =
29//! "en".parse().expect("Failed to parse a language subtag.");
30//! let script: Script =
31//! "arab".parse().expect("Failed to parse a script subtag.");
32//! let region: Region =
33//! "cn".parse().expect("Failed to parse a region subtag.");
34//! let variant: Variant =
35//! "MacOS".parse().expect("Failed to parse a variant subtag.");
36//!
37//! assert_eq!(language.as_str(), "en");
38//! assert_eq!(script.as_str(), "Arab");
39//! assert_eq!(region.as_str(), "CN");
40//! assert_eq!(variant.as_str(), "macos");
41//! ```
42//!
43//! `Notice`: The subtags are normalized on parsing. That means
44//! that all operations work on a normalized version of the subtag
45//! and serialization is very cheap.
46//!
47//! [`LanguageIdentifier`]: super::LanguageIdentifier
48mod language;
49mod region;
50mod script;
51mod variant;
52mod variants;
53
54#[doc(inline)]
55pub use language::{language, Language};
56#[doc(inline)]
57pub use region::{region, Region};
58#[doc(inline)]
59pub use script::{script, Script};
60#[doc(inline)]
61pub use variant::{variant, Variant};
62pub use variants::Variants;
63
64impl_tinystr_subtag!(
65 /// A generic subtag.
66 ///
67 /// The subtag has to be an ASCII alphanumerical string no shorter than
68 /// two characters and no longer than eight.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// use icu::locale::subtags::Subtag;
74 ///
75 /// let subtag1: Subtag = "Foo".parse()
76 /// .expect("Failed to parse a Subtag.");
77 ///
78 /// assert_eq!(subtag1.as_str(), "foo");
79 /// ```
80 Subtag,
81 subtags,
82 subtag,
83 subtags_subtag,
84 2..=8,
85 s,
86 s.is_ascii_alphanumeric(),
87 s.to_ascii_lowercase(),
88 s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
89 InvalidSubtag,
90 ["foo12"],
91 ["f", "toolooong"],
92);
93
94#[expect(clippy::len_without_is_empty)]
95impl Subtag {
96 #[allow(dead_code)]
97 pub(crate) const fn valid_key(v: &[u8]) -> bool {
98 2 <= v.len() && v.len() <= 8
99 }
100
101 /// Returns the length of `self`.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// use icu::locale::subtags::subtag;
107 /// let s = subtag!("foo");
108 /// assert_eq!(s.len(), 3);
109 /// ```
110 pub fn len(&self) -> usize {
111 self.0.len()
112 }
113
114 #[doc(hidden)]
115 pub fn from_tinystr_unvalidated(input: tinystr::TinyAsciiStr<8>) -> Self {
116 Self(input)
117 }
118
119 #[doc(hidden)]
120 pub fn as_tinystr(&self) -> tinystr::TinyAsciiStr<8> {
121 self.0
122 }
123
124 #[allow(dead_code)]
125 pub(crate) fn to_ascii_lowercase(self) -> Self {
126 Self(self.0.to_ascii_lowercase())
127 }
128}
129
130impl<const N: usize> TryFrom<tinystr::TinyAsciiStr<N>> for Subtag {
131 type Error = crate::parser::errors::ParseError;
132
133 fn try_from(value: tinystr::TinyAsciiStr<N>) -> Result<Self, Self::Error> {
134 Self::try_from_str(&value)
135 }
136}
137
138impl PartialEq<str> for Subtag {
139 fn eq(&self, other: &str) -> bool {
140 self.0 == other
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147 use tinystr::tinystr;
148
149 #[test]
150 fn test_subtag() {
151 let subtag = subtag!("foo");
152 assert_eq!(subtag.as_str(), "foo");
153 }
154
155 #[test]
156 fn test_subtag_from_tinystr() {
157 let subtag = Subtag::try_from(tinystr!(3, "foo"));
158 assert!(subtag.is_ok());
159
160 let subtag = Subtag::try_from(tinystr!(1, "f"));
161 assert!(subtag.is_err());
162 }
163}