use icu::locale::{Locale, LocaleCanonicalizer};
use thiserror::Error;
use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
#[derive(Debug, Error)]
pub enum CanonicalLanguageIdentifierError {
#[error("Invalid language identifier '{name}'")]
Invalid {
name: String,
#[source]
source: LanguageIdentifierError,
},
#[error("Locale directory '{name}' must use canonical BCP-47 form '{canonical}'")]
NonCanonical {
name: String,
canonical: String,
},
}
pub fn parse_canonical_language_identifier(
name: &str,
) -> Result<LanguageIdentifier, CanonicalLanguageIdentifierError> {
let lang = name.parse::<LanguageIdentifier>().map_err(|source| {
CanonicalLanguageIdentifierError::Invalid {
name: name.to_string(),
source,
}
})?;
let mut locale = name
.parse::<Locale>()
.expect("valid unic-langid value should also be valid ICU locale");
LocaleCanonicalizer::new_extended().canonicalize(&mut locale);
let canonical = locale.to_string();
if canonical != name {
return Err(CanonicalLanguageIdentifierError::NonCanonical {
name: name.to_string(),
canonical,
});
}
Ok(lang)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_canonical_language_identifier() {
let lang = parse_canonical_language_identifier("de-DE-1901")
.expect("canonical locale should parse");
assert_eq!(lang.to_string(), "de-DE-1901");
}
#[test]
fn rejects_invalid_identifier() {
let err = parse_canonical_language_identifier("not-a-lang!")
.expect_err("invalid locale should fail");
assert!(matches!(
err,
CanonicalLanguageIdentifierError::Invalid { name, .. } if name == "not-a-lang!"
));
}
#[test]
fn rejects_noncanonical_identifier() {
let err = parse_canonical_language_identifier("en-us")
.expect_err("noncanonical locale should fail");
assert!(matches!(
err,
CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
if name == "en-us" && canonical == "en-US"
));
}
#[test]
fn rejects_aliases_that_are_not_canonicalized() {
let err =
parse_canonical_language_identifier("iw").expect_err("deprecated aliases should fail");
assert!(matches!(
err,
CanonicalLanguageIdentifierError::NonCanonical { name, canonical }
if name == "iw" && canonical == "he"
));
}
}