Skip to main content

use_locale_tag/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::fmt;
5
6use use_language::{LanguageCode, parse_language_code};
7use use_region::{RegionCode, parse_region_code};
8use use_script::{ScriptCode, parse_script_code};
9
10/// Parsed locale tag components.
11#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
12pub struct LocaleTagParts {
13    pub language: LanguageCode,
14    pub script: Option<ScriptCode>,
15    pub region: Option<RegionCode>,
16    pub variants: Vec<String>,
17    pub extensions: Vec<String>,
18    pub private_use: Option<String>,
19}
20
21impl LocaleTagParts {
22    /// Builds the normalized tag string represented by these parts.
23    #[must_use]
24    pub fn to_tag_string(&self) -> String {
25        let mut subtags = vec![self.language.as_str().to_string()];
26
27        if let Some(script) = &self.script {
28            subtags.push(script.as_str().to_string());
29        }
30
31        if let Some(region) = &self.region {
32            subtags.push(region.as_str().to_string());
33        }
34
35        subtags.extend(self.variants.iter().cloned());
36        subtags.extend(self.extensions.iter().cloned());
37
38        if let Some(private_use) = &self.private_use {
39            subtags.push(private_use.clone());
40        }
41
42        subtags.join("-")
43    }
44}
45
46/// A normalized locale tag.
47#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
48pub struct LocaleTag {
49    value: String,
50    parts: LocaleTagParts,
51}
52
53impl LocaleTag {
54    /// Parses and normalizes a locale tag.
55    #[must_use]
56    pub fn new(input: &str) -> Option<Self> {
57        parse_locale_tag(input)
58    }
59
60    /// Returns the normalized locale tag.
61    #[must_use]
62    pub fn as_str(&self) -> &str {
63        &self.value
64    }
65
66    /// Returns the parsed locale tag parts.
67    #[must_use]
68    pub const fn parts(&self) -> &LocaleTagParts {
69        &self.parts
70    }
71
72    /// Consumes the locale tag and returns the normalized string.
73    #[must_use]
74    pub fn into_string(self) -> String {
75        self.value
76    }
77}
78
79impl AsRef<str> for LocaleTag {
80    fn as_ref(&self) -> &str {
81        self.as_str()
82    }
83}
84
85impl fmt::Display for LocaleTag {
86    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
87        formatter.write_str(self.as_str())
88    }
89}
90
91/// Parses a locale tag and normalizes its core language, script, and region subtags.
92#[must_use]
93pub fn parse_locale_tag(input: &str) -> Option<LocaleTag> {
94    let parts = parse_locale_tag_parts(input)?;
95    let value = parts.to_tag_string();
96
97    Some(LocaleTag { value, parts })
98}
99
100/// Parses a locale tag into normalized parts.
101#[must_use]
102pub fn parse_locale_tag_parts(input: &str) -> Option<LocaleTagParts> {
103    let trimmed = input.trim();
104    if trimmed.is_empty() || trimmed.contains('_') {
105        return None;
106    }
107
108    let subtags = trimmed.split('-').collect::<Vec<_>>();
109    if subtags.iter().any(|subtag| subtag.is_empty()) {
110        return None;
111    }
112
113    let language = parse_language_code(subtags.first().copied()?)?;
114    let mut index = 1;
115
116    let script = subtags
117        .get(index)
118        .and_then(|subtag| parse_script_code(subtag))
119        .inspect(|_| index += 1);
120
121    let region = subtags
122        .get(index)
123        .and_then(|subtag| parse_region_code(subtag))
124        .inspect(|_| index += 1);
125
126    let mut variants = Vec::new();
127    let mut extensions = Vec::new();
128    let mut private_use = None;
129    let mut extension_singletons = Vec::new();
130
131    while index < subtags.len() {
132        let subtag = subtags[index];
133
134        if is_private_use_singleton(subtag) {
135            let tail = &subtags[index..];
136            if tail.len() < 2 || !tail[1..].iter().all(|value| is_private_use_subtag(value)) {
137                return None;
138            }
139
140            private_use = Some(tail.join("-"));
141            index = subtags.len();
142        } else if is_extension_singleton(subtag) {
143            let singleton = subtag.to_ascii_lowercase();
144            if extension_singletons.contains(&singleton) {
145                return None;
146            }
147            extension_singletons.push(singleton);
148
149            let start = index;
150            index += 1;
151            let payload_start = index;
152
153            while index < subtags.len() && !is_singleton(subtags[index]) {
154                if !is_extension_subtag(subtags[index]) {
155                    return None;
156                }
157                index += 1;
158            }
159
160            if index == payload_start {
161                return None;
162            }
163
164            extensions.push(subtags[start..index].join("-"));
165        } else if is_variant_subtag(subtag) {
166            variants.push(subtag.to_string());
167            index += 1;
168        } else {
169            return None;
170        }
171    }
172
173    Some(LocaleTagParts {
174        language,
175        script,
176        region,
177        variants,
178        extensions,
179        private_use,
180    })
181}
182
183/// Normalizes a locale tag when it is syntactically valid for this crate's subset.
184#[must_use]
185pub fn normalize_locale_tag(input: &str) -> Option<String> {
186    parse_locale_tag(input).map(LocaleTag::into_string)
187}
188
189/// Returns `true` when the input is a supported locale tag shape.
190#[must_use]
191pub fn is_locale_tag(input: &str) -> bool {
192    parse_locale_tag(input).is_some()
193}
194
195fn is_singleton(subtag: &str) -> bool {
196    subtag.len() == 1 && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
197}
198
199fn is_extension_singleton(subtag: &str) -> bool {
200    is_singleton(subtag) && !is_private_use_singleton(subtag)
201}
202
203const fn is_private_use_singleton(subtag: &str) -> bool {
204    subtag.eq_ignore_ascii_case("x")
205}
206
207fn is_variant_subtag(subtag: &str) -> bool {
208    let length = subtag.len();
209    subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
210        && ((5..=8).contains(&length)
211            || (length == 4
212                && subtag
213                    .bytes()
214                    .next()
215                    .is_some_and(|byte| byte.is_ascii_digit())))
216}
217
218fn is_extension_subtag(subtag: &str) -> bool {
219    (2..=8).contains(&subtag.len()) && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
220}
221
222fn is_private_use_subtag(subtag: &str) -> bool {
223    (1..=8).contains(&subtag.len()) && subtag.bytes().all(|byte| byte.is_ascii_alphanumeric())
224}
225
226#[cfg(test)]
227mod tests {
228    use super::{
229        LocaleTag, is_locale_tag, normalize_locale_tag, parse_locale_tag, parse_locale_tag_parts,
230    };
231
232    #[test]
233    fn parses_common_locale_tags() {
234        for tag in ["en", "en-US", "en-Latn-US", "zh-Hant-TW", "sr-Cyrl-RS"] {
235            assert!(is_locale_tag(tag));
236            assert_eq!(parse_locale_tag(tag).unwrap().as_str(), tag);
237        }
238    }
239
240    #[test]
241    fn normalizes_core_subtag_casing() {
242        assert_eq!(normalize_locale_tag("en-us"), Some("en-US".to_string()));
243        assert_eq!(
244            normalize_locale_tag("zh-hant-tw"),
245            Some("zh-Hant-TW".to_string())
246        );
247        assert_eq!(LocaleTag::new("SR-cYRL-rs").unwrap().as_str(), "sr-Cyrl-RS");
248    }
249
250    #[test]
251    fn exposes_normalized_parts() {
252        let parts = parse_locale_tag_parts("zh-hant-tw").unwrap();
253
254        assert_eq!(parts.language.as_str(), "zh");
255        assert_eq!(parts.script.unwrap().as_str(), "Hant");
256        assert_eq!(parts.region.unwrap().as_str(), "TW");
257    }
258
259    #[test]
260    fn preserves_supported_suffixes() {
261        let tag = parse_locale_tag("en-us-oxendict-u-ca-gregory-x-app").unwrap();
262
263        assert_eq!(tag.as_str(), "en-US-oxendict-u-ca-gregory-x-app");
264        assert_eq!(tag.parts().variants, vec!["oxendict"]);
265        assert_eq!(tag.parts().extensions, vec!["u-ca-gregory"]);
266        assert_eq!(tag.parts().private_use.as_deref(), Some("x-app"));
267    }
268
269    #[test]
270    fn rejects_invalid_locale_tag_shapes() {
271        for tag in [
272            "",
273            "en_ US",
274            "en_US",
275            "en--US",
276            "e-US",
277            "en-Lat-US",
278            "en-Latn-USA",
279            "en-u",
280            "en-u-ca-u-nu",
281            "en-x",
282            "en-@",
283        ] {
284            assert!(!is_locale_tag(tag));
285            assert!(parse_locale_tag(tag).is_none());
286        }
287    }
288}