Skip to main content

es_fluent_shared/
language.rs

1//! Shared language-identifier parsing helpers.
2
3use icu::locale::{Locale, LocaleCanonicalizer};
4use thiserror::Error;
5use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
6
7/// Errors returned when parsing a language identifier that must already be canonicalized.
8#[derive(Debug, Error)]
9pub enum CanonicalLanguageIdentifierError {
10    /// The identifier could not be parsed as a BCP-47 language identifier.
11    #[error("Invalid language identifier '{name}'")]
12    Invalid {
13        /// The invalid identifier.
14        name: String,
15        /// The parsing error produced by `unic-langid`.
16        #[source]
17        source: LanguageIdentifierError,
18    },
19    /// The identifier parsed as a `unic-langid` identifier but could not be converted to ICU.
20    #[error("Language identifier '{name}' could not be parsed as an ICU locale: {details}")]
21    IcuInvalid {
22        /// The invalid identifier.
23        name: String,
24        /// The ICU parsing error.
25        details: String,
26    },
27    /// The identifier parsed successfully but was not written in canonical form.
28    #[error("Locale directory '{name}' must use canonical BCP-47 form '{canonical}'")]
29    NonCanonical {
30        /// The original identifier.
31        name: String,
32        /// The canonical identifier.
33        canonical: String,
34    },
35}
36
37/// Parses a language identifier and rejects non-canonical locale forms.
38pub fn parse_canonical_language_identifier(
39    name: &str,
40) -> Result<LanguageIdentifier, CanonicalLanguageIdentifierError> {
41    let lang = name.parse::<LanguageIdentifier>().map_err(|source| {
42        CanonicalLanguageIdentifierError::Invalid {
43            name: name.to_string(),
44            source,
45        }
46    })?;
47    let mut locale =
48        name.parse::<Locale>()
49            .map_err(|source| CanonicalLanguageIdentifierError::IcuInvalid {
50                name: name.to_string(),
51                details: source.to_string(),
52            })?;
53    LocaleCanonicalizer::new_extended().canonicalize(&mut locale);
54
55    let canonical = locale.to_string();
56    if canonical != name {
57        return Err(CanonicalLanguageIdentifierError::NonCanonical {
58            name: name.to_string(),
59            canonical,
60        });
61    }
62
63    Ok(lang)
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69
70    #[test]
71    fn parses_canonical_language_identifier() {
72        let lang = parse_canonical_language_identifier("de-DE-1901")
73            .expect("canonical locale should parse");
74        assert_eq!(lang.to_string(), "de-DE-1901");
75    }
76
77    #[test]
78    fn rejects_invalid_identifier() {
79        let err = parse_canonical_language_identifier("not-a-lang!")
80            .expect_err("invalid locale should fail");
81        assert!(matches!(
82            err,
83            CanonicalLanguageIdentifierError::Invalid { name, .. } if name == "not-a-lang!"
84        ));
85    }
86
87    #[test]
88    fn rejects_noncanonical_identifier() {
89        let err = parse_canonical_language_identifier("en-us")
90            .expect_err("noncanonical locale should fail");
91        assert!(matches!(
92            err,
93            CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
94                if name == "en-us" && canonical == "en-US"
95        ));
96    }
97
98    #[test]
99    fn rejects_noncanonical_language_identifiers() {
100        let err = parse_canonical_language_identifier("iw")
101            .expect_err("noncanonical identifiers should fail");
102        assert!(matches!(
103            err,
104            CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
105                if name == "iw" && canonical == "he"
106        ));
107    }
108}