Skip to main content

es_fluent_shared/
language.rs

1//! Shared language-identifier parsing helpers.
2
3use icu::locale::{Locale, LocaleCanonicalizer};
4use thiserror::Error;
5use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
6
7/// Errors returned when parsing a language identifier that must already be canonicalized.
8#[derive(Debug, Error)]
9pub enum CanonicalLanguageIdentifierError {
10    /// The identifier could not be parsed as a BCP-47 language identifier.
11    #[error("Invalid language identifier '{name}'")]
12    Invalid {
13        /// The invalid identifier.
14        name: String,
15        /// The parsing error produced by `unic-langid`.
16        #[source]
17        source: LanguageIdentifierError,
18    },
19    /// The identifier parsed successfully but was not written in canonical form.
20    #[error("Locale directory '{name}' must use canonical BCP-47 form '{canonical}'")]
21    NonCanonical {
22        /// The original identifier.
23        name: String,
24        /// The canonical identifier.
25        canonical: String,
26    },
27}
28
29/// Parses a language identifier and rejects non-canonical locale forms.
30pub fn parse_canonical_language_identifier(
31    name: &str,
32) -> Result<LanguageIdentifier, CanonicalLanguageIdentifierError> {
33    let lang = name.parse::<LanguageIdentifier>().map_err(|source| {
34        CanonicalLanguageIdentifierError::Invalid {
35            name: name.to_string(),
36            source,
37        }
38    })?;
39    let mut locale = name
40        .parse::<Locale>()
41        .expect("valid unic-langid value should also be valid ICU locale");
42    LocaleCanonicalizer::new_extended().canonicalize(&mut locale);
43
44    let canonical = locale.to_string();
45    if canonical != name {
46        return Err(CanonicalLanguageIdentifierError::NonCanonical {
47            name: name.to_string(),
48            canonical,
49        });
50    }
51
52    Ok(lang)
53}
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    #[test]
60    fn parses_canonical_language_identifier() {
61        let lang = parse_canonical_language_identifier("de-DE-1901")
62            .expect("canonical locale should parse");
63        assert_eq!(lang.to_string(), "de-DE-1901");
64    }
65
66    #[test]
67    fn rejects_invalid_identifier() {
68        let err = parse_canonical_language_identifier("not-a-lang!")
69            .expect_err("invalid locale should fail");
70        assert!(matches!(
71            err,
72            CanonicalLanguageIdentifierError::Invalid { name, .. } if name == "not-a-lang!"
73        ));
74    }
75
76    #[test]
77    fn rejects_noncanonical_identifier() {
78        let err = parse_canonical_language_identifier("en-us")
79            .expect_err("noncanonical locale should fail");
80        assert!(matches!(
81            err,
82            CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
83                if name == "en-us" && canonical == "en-US"
84        ));
85    }
86
87    #[test]
88    fn rejects_aliases_that_are_not_canonicalized() {
89        let err =
90            parse_canonical_language_identifier("iw").expect_err("deprecated aliases should fail");
91        assert!(matches!(
92            err,
93            CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
94                if name == "iw" && canonical == "he"
95        ));
96    }
97}