1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::fmt;
5
6use use_language::{LanguageCode, parse_language_code};
7use use_region::{RegionCode, parse_region_code};
8use use_script::{ScriptCode, parse_script_code};
9
10#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
12pub struct LocaleTagParts {
13 pub language: LanguageCode,
14 pub script: Option<ScriptCode>,
15 pub region: Option<RegionCode>,
16 pub variants: Vec<String>,
17 pub extensions: Vec<String>,
18 pub private_use: Option<String>,
19}
20
21impl LocaleTagParts {
22 #[must_use]
24 pub fn to_tag_string(&self) -> String {
25 let mut subtags = vec![self.language.as_str().to_string()];
26
27 if let Some(script) = &self.script {
28 subtags.push(script.as_str().to_string());
29 }
30
31 if let Some(region) = &self.region {
32 subtags.push(region.as_str().to_string());
33 }
34
35 subtags.extend(self.variants.iter().cloned());
36 subtags.extend(self.extensions.iter().cloned());
37
38 if let Some(private_use) = &self.private_use {
39 subtags.push(private_use.clone());
40 }
41
42 subtags.join("-")
43 }
44}
45
46#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
48pub struct LocaleTag {
49 value: String,
50 parts: LocaleTagParts,
51}
52
53impl LocaleTag {
54 #[must_use]
56 pub fn new(input: &str) -> Option<Self> {
57 parse_locale_tag(input)
58 }
59
60 #[must_use]
62 pub fn as_str(&self) -> &str {
63 &self.value
64 }
65
66 #[must_use]
68 pub const fn parts(&self) -> &LocaleTagParts {
69 &self.parts
70 }
71
72 #[must_use]
74 pub fn into_string(self) -> String {
75 self.value
76 }
77}
78
79impl AsRef<str> for LocaleTag {
80 fn as_ref(&self) -> &str {
81 self.as_str()
82 }
83}
84
85impl fmt::Display for LocaleTag {
86 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
87 formatter.write_str(self.as_str())
88 }
89}
90
91#[must_use]
93pub fn parse_locale_tag(input: &str) -> Option<LocaleTag> {
94 let parts = parse_locale_tag_parts(input)?;
95 let value = parts.to_tag_string();
96
97 Some(LocaleTag { value, parts })
98}
99
100#[must_use]
102pub fn parse_locale_tag_parts(input: &str) -> Option<LocaleTagParts> {
103 let trimmed = input.trim();
104 if trimmed.is_empty() || trimmed.contains('_') {
105 return None;
106 }
107
108 let subtags = trimmed.split('-').collect::<Vec<_>>();
109 if subtags.iter().any(|subtag| subtag.is_empty()) {
110 return None;
111 }
112
113 let language = parse_language_code(subtags.first().copied()?)?;
114 let mut index = 1;
115
116 let script = subtags
117 .get(index)
118 .and_then(|subtag| parse_script_code(subtag))
119 .inspect(|_| index += 1);
120
121 let region = subtags
122 .get(index)
123 .and_then(|subtag| parse_region_code(subtag))
124 .inspect(|_| index += 1);
125
126 let mut variants = Vec::new();
127 let mut extensions = Vec::new();
128 let mut private_use = None;
129 let mut extension_singletons = Vec::new();
130
131 while index < subtags.len() {
132 let subtag = subtags[index];
133
134 if is_private_use_singleton(subtag) {
135 let tail = &subtags[index..];
136 if tail.len() < 2 || !tail[1..].iter().all(|value| is_private_use_subtag(value)) {
137 return None;
138 }
139
140 private_use = Some(tail.join("-"));
141 index = subtags.len();
142 } else if is_extension_singleton(subtag) {
143 let singleton = subtag.to_ascii_lowercase();
144 if extension_singletons.contains(&singleton) {
145 return None;
146 }
147 extension_singletons.push(singleton);
148
149 let start = index;
150 index += 1;
151 let payload_start = index;
152
153 while index < subtags.len() && !is_singleton(subtags[index]) {
154 if !is_extension_subtag(subtags[index]) {
155 return None;
156 }
157 index += 1;
158 }
159
160 if index == payload_start {
161 return None;
162 }
163
164 extensions.push(subtags[start..index].join("-"));
165 } else if is_variant_subtag(subtag) {
166 variants.push(subtag.to_string());
167 index += 1;
168 } else {
169 return None;
170 }
171 }
172
173 Some(LocaleTagParts {
174 language,
175 script,
176 region,
177 variants,
178 extensions,
179 private_use,
180 })
181}
182
183#[must_use]
185pub fn normalize_locale_tag(input: &str) -> Option<String> {
186 parse_locale_tag(input).map(LocaleTag::into_string)
187}
188
189#[must_use]
191pub fn is_locale_tag(input: &str) -> bool {
192 parse_locale_tag(input).is_some()
193}
194
195fn is_singleton(subtag: &str) -> bool {
196 subtag.len() == 1 && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
197}
198
199fn is_extension_singleton(subtag: &str) -> bool {
200 is_singleton(subtag) && !is_private_use_singleton(subtag)
201}
202
203const fn is_private_use_singleton(subtag: &str) -> bool {
204 subtag.eq_ignore_ascii_case("x")
205}
206
207fn is_variant_subtag(subtag: &str) -> bool {
208 let length = subtag.len();
209 subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
210 && ((5..=8).contains(&length)
211 || (length == 4
212 && subtag
213 .bytes()
214 .next()
215 .is_some_and(|byte| byte.is_ascii_digit())))
216}
217
218fn is_extension_subtag(subtag: &str) -> bool {
219 (2..=8).contains(&subtag.len()) && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
220}
221
222fn is_private_use_subtag(subtag: &str) -> bool {
223 (1..=8).contains(&subtag.len()) && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
224}
225
226#[cfg(test)]
227mod tests {
228 use super::{
229 LocaleTag, is_locale_tag, normalize_locale_tag, parse_locale_tag, parse_locale_tag_parts,
230 };
231
232 #[test]
233 fn parses_common_locale_tags() {
234 for tag in ["en", "en-US", "en-Latn-US", "zh-Hant-TW", "sr-Cyrl-RS"] {
235 assert!(is_locale_tag(tag));
236 assert_eq!(parse_locale_tag(tag).unwrap().as_str(), tag);
237 }
238 }
239
240 #[test]
241 fn normalizes_core_subtag_casing() {
242 assert_eq!(normalize_locale_tag("en-us"), Some("en-US".to_string()));
243 assert_eq!(
244 normalize_locale_tag("zh-hant-tw"),
245 Some("zh-Hant-TW".to_string())
246 );
247 assert_eq!(LocaleTag::new("SR-cYRL-rs").unwrap().as_str(), "sr-Cyrl-RS");
248 }
249
250 #[test]
251 fn exposes_normalized_parts() {
252 let parts = parse_locale_tag_parts("zh-hant-tw").unwrap();
253
254 assert_eq!(parts.language.as_str(), "zh");
255 assert_eq!(parts.script.unwrap().as_str(), "Hant");
256 assert_eq!(parts.region.unwrap().as_str(), "TW");
257 }
258
259 #[test]
260 fn preserves_supported_suffixes() {
261 let tag = parse_locale_tag("en-us-oxendict-u-ca-gregory-x-app").unwrap();
262
263 assert_eq!(tag.as_str(), "en-US-oxendict-u-ca-gregory-x-app");
264 assert_eq!(tag.parts().variants, vec!["oxendict"]);
265 assert_eq!(tag.parts().extensions, vec!["u-ca-gregory"]);
266 assert_eq!(tag.parts().private_use.as_deref(), Some("x-app"));
267 }
268
269 #[test]
270 fn rejects_invalid_locale_tag_shapes() {
271 for tag in [
272 "",
273 "en_ US",
274 "en_US",
275 "en--US",
276 "e-US",
277 "en-Lat-US",
278 "en-Latn-USA",
279 "en-u",
280 "en-u-ca-u-nu",
281 "en-x",
282 "en-@",
283 ] {
284 assert!(!is_locale_tag(tag));
285 assert!(parse_locale_tag(tag).is_none());
286 }
287 }
288}