use smol_str::SmolStr;
#[non_exhaustive]
#[allow(missing_docs)] #[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum Lang {
En,
Zh,
De,
Es,
Ru,
Ko,
Fr,
Ja,
Pt,
Tr,
Pl,
Ca,
Nl,
Ar,
Sv,
It,
Id,
Hi,
Fi,
Vi,
He,
Uk,
El,
Ms,
Cs,
Ro,
Da,
Hu,
Ta,
No,
Th,
Ur,
Hr,
Bg,
Lt,
La,
Mi,
Ml,
Cy,
Sk,
Te,
Fa,
Lv,
Bn,
Sr,
Az,
Sl,
Kn,
Et,
Mk,
Br,
Eu,
Is,
Hy,
Ne,
Mn,
Bs,
Kk,
Sq,
Sw,
Gl,
Mr,
Pa,
Si,
Km,
Sn,
Yo,
So,
Af,
Oc,
Ka,
Be,
Tg,
Sd,
Gu,
Am,
Yi,
Lo,
Uz,
Fo,
Ht,
Ps,
Tk,
Nn,
Mt,
Sa,
Lb,
My,
Bo,
Tl,
Mg,
As,
Tt,
Haw,
Ln,
Ha,
Ba,
Jw,
Su,
Yue,
Other(SmolStr),
}
impl Lang {
#[inline]
pub fn as_str(&self) -> &str {
match self {
Self::En => "en",
Self::Zh => "zh",
Self::De => "de",
Self::Es => "es",
Self::Ru => "ru",
Self::Ko => "ko",
Self::Fr => "fr",
Self::Ja => "ja",
Self::Pt => "pt",
Self::Tr => "tr",
Self::Pl => "pl",
Self::Ca => "ca",
Self::Nl => "nl",
Self::Ar => "ar",
Self::Sv => "sv",
Self::It => "it",
Self::Id => "id",
Self::Hi => "hi",
Self::Fi => "fi",
Self::Vi => "vi",
Self::He => "he",
Self::Uk => "uk",
Self::El => "el",
Self::Ms => "ms",
Self::Cs => "cs",
Self::Ro => "ro",
Self::Da => "da",
Self::Hu => "hu",
Self::Ta => "ta",
Self::No => "no",
Self::Th => "th",
Self::Ur => "ur",
Self::Hr => "hr",
Self::Bg => "bg",
Self::Lt => "lt",
Self::La => "la",
Self::Mi => "mi",
Self::Ml => "ml",
Self::Cy => "cy",
Self::Sk => "sk",
Self::Te => "te",
Self::Fa => "fa",
Self::Lv => "lv",
Self::Bn => "bn",
Self::Sr => "sr",
Self::Az => "az",
Self::Sl => "sl",
Self::Kn => "kn",
Self::Et => "et",
Self::Mk => "mk",
Self::Br => "br",
Self::Eu => "eu",
Self::Is => "is",
Self::Hy => "hy",
Self::Ne => "ne",
Self::Mn => "mn",
Self::Bs => "bs",
Self::Kk => "kk",
Self::Sq => "sq",
Self::Sw => "sw",
Self::Gl => "gl",
Self::Mr => "mr",
Self::Pa => "pa",
Self::Si => "si",
Self::Km => "km",
Self::Sn => "sn",
Self::Yo => "yo",
Self::So => "so",
Self::Af => "af",
Self::Oc => "oc",
Self::Ka => "ka",
Self::Be => "be",
Self::Tg => "tg",
Self::Sd => "sd",
Self::Gu => "gu",
Self::Am => "am",
Self::Yi => "yi",
Self::Lo => "lo",
Self::Uz => "uz",
Self::Fo => "fo",
Self::Ht => "ht",
Self::Ps => "ps",
Self::Tk => "tk",
Self::Nn => "nn",
Self::Mt => "mt",
Self::Sa => "sa",
Self::Lb => "lb",
Self::My => "my",
Self::Bo => "bo",
Self::Tl => "tl",
Self::Mg => "mg",
Self::As => "as",
Self::Tt => "tt",
Self::Haw => "haw",
Self::Ln => "ln",
Self::Ha => "ha",
Self::Ba => "ba",
Self::Jw => "jw",
Self::Su => "su",
Self::Yue => "yue",
Self::Other(s) => s.as_str(),
}
}
}
impl Lang {
pub fn from_iso639_1(s: &str) -> Self {
match s {
"en" | "En" | "eN" | "EN" => Self::En,
"zh" | "Zh" | "zH" | "ZH" => Self::Zh,
"de" | "De" | "dE" | "DE" => Self::De,
"es" | "Es" | "eS" | "ES" => Self::Es,
"ru" | "Ru" | "rU" | "RU" => Self::Ru,
"ko" | "Ko" | "kO" | "KO" => Self::Ko,
"fr" | "Fr" | "fR" | "FR" => Self::Fr,
"ja" | "Ja" | "jA" | "JA" => Self::Ja,
"pt" | "Pt" | "pT" | "PT" => Self::Pt,
"tr" | "Tr" | "tR" | "TR" => Self::Tr,
"pl" | "Pl" | "pL" | "PL" => Self::Pl,
"ca" | "Ca" | "cA" | "CA" => Self::Ca,
"nl" | "Nl" | "nL" | "NL" => Self::Nl,
"ar" | "Ar" | "aR" | "AR" => Self::Ar,
"sv" | "Sv" | "sV" | "SV" => Self::Sv,
"it" | "It" | "iT" | "IT" => Self::It,
"id" | "Id" | "iD" | "ID" => Self::Id,
"hi" | "Hi" | "hI" | "HI" => Self::Hi,
"fi" | "Fi" | "fI" | "FI" => Self::Fi,
"vi" | "Vi" | "vI" | "VI" => Self::Vi,
"he" | "He" | "hE" | "HE" => Self::He,
"uk" | "Uk" | "uK" | "UK" => Self::Uk,
"el" | "El" | "eL" | "EL" => Self::El,
"ms" | "Ms" | "mS" | "MS" => Self::Ms,
"cs" | "Cs" | "cS" | "CS" => Self::Cs,
"ro" | "Ro" | "rO" | "RO" => Self::Ro,
"da" | "Da" | "dA" | "DA" => Self::Da,
"hu" | "Hu" | "hU" | "HU" => Self::Hu,
"ta" | "Ta" | "tA" | "TA" => Self::Ta,
"no" | "No" | "nO" | "NO" => Self::No,
"th" | "Th" | "tH" | "TH" => Self::Th,
"ur" | "Ur" | "uR" | "UR" => Self::Ur,
"hr" | "Hr" | "hR" | "HR" => Self::Hr,
"bg" | "Bg" | "bG" | "BG" => Self::Bg,
"lt" | "Lt" | "lT" | "LT" => Self::Lt,
"la" | "La" | "lA" | "LA" => Self::La,
"mi" | "Mi" | "mI" | "MI" => Self::Mi,
"ml" | "Ml" | "mL" | "ML" => Self::Ml,
"cy" | "Cy" | "cY" | "CY" => Self::Cy,
"sk" | "Sk" | "sK" | "SK" => Self::Sk,
"te" | "Te" | "tE" | "TE" => Self::Te,
"fa" | "Fa" | "fA" | "FA" => Self::Fa,
"lv" | "Lv" | "lV" | "LV" => Self::Lv,
"bn" | "Bn" | "bN" | "BN" => Self::Bn,
"sr" | "Sr" | "sR" | "SR" => Self::Sr,
"az" | "Az" | "aZ" | "AZ" => Self::Az,
"sl" | "Sl" | "sL" | "SL" => Self::Sl,
"kn" | "Kn" | "kN" | "KN" => Self::Kn,
"et" | "Et" | "eT" | "ET" => Self::Et,
"mk" | "Mk" | "mK" | "MK" => Self::Mk,
"br" | "Br" | "bR" | "BR" => Self::Br,
"eu" | "Eu" | "eU" | "EU" => Self::Eu,
"is" | "Is" | "iS" | "IS" => Self::Is,
"hy" | "Hy" | "hY" | "HY" => Self::Hy,
"ne" | "Ne" | "nE" | "NE" => Self::Ne,
"mn" | "Mn" | "mN" | "MN" => Self::Mn,
"bs" | "Bs" | "bS" | "BS" => Self::Bs,
"kk" | "Kk" | "kK" | "KK" => Self::Kk,
"sq" | "Sq" | "sQ" | "SQ" => Self::Sq,
"sw" | "Sw" | "sW" | "SW" => Self::Sw,
"gl" | "Gl" | "gL" | "GL" => Self::Gl,
"mr" | "Mr" | "mR" | "MR" => Self::Mr,
"pa" | "Pa" | "pA" | "PA" => Self::Pa,
"si" | "Si" | "sI" | "SI" => Self::Si,
"km" | "Km" | "kM" | "KM" => Self::Km,
"sn" | "Sn" | "sN" | "SN" => Self::Sn,
"yo" | "Yo" | "yO" | "YO" => Self::Yo,
"so" | "So" | "sO" | "SO" => Self::So,
"af" | "Af" | "aF" | "AF" => Self::Af,
"oc" | "Oc" | "oC" | "OC" => Self::Oc,
"ka" | "Ka" | "kA" | "KA" => Self::Ka,
"be" | "Be" | "bE" | "BE" => Self::Be,
"tg" | "Tg" | "tG" | "TG" => Self::Tg,
"sd" | "Sd" | "sD" | "SD" => Self::Sd,
"gu" | "Gu" | "gU" | "GU" => Self::Gu,
"am" | "Am" | "aM" | "AM" => Self::Am,
"yi" | "Yi" | "yI" | "YI" => Self::Yi,
"lo" | "Lo" | "lO" | "LO" => Self::Lo,
"uz" | "Uz" | "uZ" | "UZ" => Self::Uz,
"fo" | "Fo" | "fO" | "FO" => Self::Fo,
"ht" | "Ht" | "hT" | "HT" => Self::Ht,
"ps" | "Ps" | "pS" | "PS" => Self::Ps,
"tk" | "Tk" | "tK" | "TK" => Self::Tk,
"nn" | "Nn" | "nN" | "NN" => Self::Nn,
"mt" | "Mt" | "mT" | "MT" => Self::Mt,
"sa" | "Sa" | "sA" | "SA" => Self::Sa,
"lb" | "Lb" | "lB" | "LB" => Self::Lb,
"my" | "My" | "mY" | "MY" => Self::My,
"bo" | "Bo" | "bO" | "BO" => Self::Bo,
"tl" | "Tl" | "tL" | "TL" => Self::Tl,
"mg" | "Mg" | "mG" | "MG" => Self::Mg,
"as" | "As" | "aS" | "AS" => Self::As,
"tt" | "Tt" | "tT" | "TT" => Self::Tt,
"haw" | "Haw" | "hAW" | "HAW" => Self::Haw,
"ln" | "Ln" | "lN" | "LN" => Self::Ln,
"ha" | "Ha" | "hA" | "HA" => Self::Ha,
"ba" | "Ba" | "bA" | "BA" => Self::Ba,
"jw" | "Jw" | "jW" | "JW" => Self::Jw,
"su" | "Su" | "sU" | "SU" => Self::Su,
"yue" | "Yue" | "yUE" | "YUE" => Self::Yue,
other => Self::Other(SmolStr::new(other)),
}
}
pub fn try_from_iso639_1(s: &str) -> Option<Self> {
Some(match s {
"en" | "En" | "eN" | "EN" => Self::En,
"zh" | "Zh" | "zH" | "ZH" => Self::Zh,
"de" | "De" | "dE" | "DE" => Self::De,
"es" | "Es" | "eS" | "ES" => Self::Es,
"ru" | "Ru" | "rU" | "RU" => Self::Ru,
"ko" | "Ko" | "kO" | "KO" => Self::Ko,
"fr" | "Fr" | "fR" | "FR" => Self::Fr,
"ja" | "Ja" | "jA" | "JA" => Self::Ja,
"pt" | "Pt" | "pT" | "PT" => Self::Pt,
"tr" | "Tr" | "tR" | "TR" => Self::Tr,
"pl" | "Pl" | "pL" | "PL" => Self::Pl,
"ca" | "Ca" | "cA" | "CA" => Self::Ca,
"nl" | "Nl" | "nL" | "NL" => Self::Nl,
"ar" | "Ar" | "aR" | "AR" => Self::Ar,
"sv" | "Sv" | "sV" | "SV" => Self::Sv,
"it" | "It" | "iT" | "IT" => Self::It,
"id" | "Id" | "iD" | "ID" => Self::Id,
"hi" | "Hi" | "hI" | "HI" => Self::Hi,
"fi" | "Fi" | "fI" | "FI" => Self::Fi,
"vi" | "Vi" | "vI" | "VI" => Self::Vi,
"he" | "He" | "hE" | "HE" => Self::He,
"uk" | "Uk" | "uK" | "UK" => Self::Uk,
"el" | "El" | "eL" | "EL" => Self::El,
"ms" | "Ms" | "mS" | "MS" => Self::Ms,
"cs" | "Cs" | "cS" | "CS" => Self::Cs,
"ro" | "Ro" | "rO" | "RO" => Self::Ro,
"da" | "Da" | "dA" | "DA" => Self::Da,
"hu" | "Hu" | "hU" | "HU" => Self::Hu,
"ta" | "Ta" | "tA" | "TA" => Self::Ta,
"no" | "No" | "nO" | "NO" => Self::No,
"th" | "Th" | "tH" | "TH" => Self::Th,
"ur" | "Ur" | "uR" | "UR" => Self::Ur,
"hr" | "Hr" | "hR" | "HR" => Self::Hr,
"bg" | "Bg" | "bG" | "BG" => Self::Bg,
"lt" | "Lt" | "lT" | "LT" => Self::Lt,
"la" | "La" | "lA" | "LA" => Self::La,
"mi" | "Mi" | "mI" | "MI" => Self::Mi,
"ml" | "Ml" | "mL" | "ML" => Self::Ml,
"cy" | "Cy" | "cY" | "CY" => Self::Cy,
"sk" | "Sk" | "sK" | "SK" => Self::Sk,
"te" | "Te" | "tE" | "TE" => Self::Te,
"fa" | "Fa" | "fA" | "FA" => Self::Fa,
"lv" | "Lv" | "lV" | "LV" => Self::Lv,
"bn" | "Bn" | "bN" | "BN" => Self::Bn,
"sr" | "Sr" | "sR" | "SR" => Self::Sr,
"az" | "Az" | "aZ" | "AZ" => Self::Az,
"sl" | "Sl" | "sL" | "SL" => Self::Sl,
"kn" | "Kn" | "kN" | "KN" => Self::Kn,
"et" | "Et" | "eT" | "ET" => Self::Et,
"mk" | "Mk" | "mK" | "MK" => Self::Mk,
"br" | "Br" | "bR" | "BR" => Self::Br,
"eu" | "Eu" | "eU" | "EU" => Self::Eu,
"is" | "Is" | "iS" | "IS" => Self::Is,
"hy" | "Hy" | "hY" | "HY" => Self::Hy,
"ne" | "Ne" | "nE" | "NE" => Self::Ne,
"mn" | "Mn" | "mN" | "MN" => Self::Mn,
"bs" | "Bs" | "bS" | "BS" => Self::Bs,
"kk" | "Kk" | "kK" | "KK" => Self::Kk,
"sq" | "Sq" | "sQ" | "SQ" => Self::Sq,
"sw" | "Sw" | "sW" | "SW" => Self::Sw,
"gl" | "Gl" | "gL" | "GL" => Self::Gl,
"mr" | "Mr" | "mR" | "MR" => Self::Mr,
"pa" | "Pa" | "pA" | "PA" => Self::Pa,
"si" | "Si" | "sI" | "SI" => Self::Si,
"km" | "Km" | "kM" | "KM" => Self::Km,
"sn" | "Sn" | "sN" | "SN" => Self::Sn,
"yo" | "Yo" | "yO" | "YO" => Self::Yo,
"so" | "So" | "sO" | "SO" => Self::So,
"af" | "Af" | "aF" | "AF" => Self::Af,
"oc" | "Oc" | "oC" | "OC" => Self::Oc,
"ka" | "Ka" | "kA" | "KA" => Self::Ka,
"be" | "Be" | "bE" | "BE" => Self::Be,
"tg" | "Tg" | "tG" | "TG" => Self::Tg,
"sd" | "Sd" | "sD" | "SD" => Self::Sd,
"gu" | "Gu" | "gU" | "GU" => Self::Gu,
"am" | "Am" | "aM" | "AM" => Self::Am,
"yi" | "Yi" | "yI" | "YI" => Self::Yi,
"lo" | "Lo" | "lO" | "LO" => Self::Lo,
"uz" | "Uz" | "uZ" | "UZ" => Self::Uz,
"fo" | "Fo" | "fO" | "FO" => Self::Fo,
"ht" | "Ht" | "hT" | "HT" => Self::Ht,
"ps" | "Ps" | "pS" | "PS" => Self::Ps,
"tk" | "Tk" | "tK" | "TK" => Self::Tk,
"nn" | "Nn" | "nN" | "NN" => Self::Nn,
"mt" | "Mt" | "mT" | "MT" => Self::Mt,
"sa" | "Sa" | "sA" | "SA" => Self::Sa,
"lb" | "Lb" | "lB" | "LB" => Self::Lb,
"my" | "My" | "mY" | "MY" => Self::My,
"bo" | "Bo" | "bO" | "BO" => Self::Bo,
"tl" | "Tl" | "tL" | "TL" => Self::Tl,
"mg" | "Mg" | "mG" | "MG" => Self::Mg,
"as" | "As" | "aS" | "AS" => Self::As,
"tt" | "Tt" | "tT" | "TT" => Self::Tt,
"haw" | "Haw" | "hAW" | "HAW" => Self::Haw,
"ln" | "Ln" | "lN" | "LN" => Self::Ln,
"ha" | "Ha" | "hA" | "HA" => Self::Ha,
"ba" | "Ba" | "bA" | "BA" => Self::Ba,
"jw" | "Jw" | "jW" | "JW" => Self::Jw,
"su" | "Su" | "sU" | "SU" => Self::Su,
"yue" | "Yue" | "yUE" | "YUE" => Self::Yue,
_ => return None,
})
}
}
impl core::fmt::Display for Lang {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(self.as_str())
}
}
#[cfg(feature = "serde")]
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
const _: () = {
impl serde::Serialize for Lang {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> serde::Deserialize<'de> for Lang {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error as _;
let s = <&str as serde::Deserialize>::deserialize(deserializer)?;
if s.is_empty() {
return Err(D::Error::custom("Lang code is empty"));
}
if s.len() > 8 {
return Err(D::Error::custom(format!(
"Lang code longer than 8 bytes ({} bytes); whisper.cpp codes are 2-3 ASCII letters",
s.len()
)));
}
if !s.bytes().all(|b| b.is_ascii_alphabetic()) {
return Err(D::Error::custom(
"Lang code must be ASCII letters [a-zA-Z] only (no digits, dashes, or non-ASCII)",
));
}
if s.bytes().all(|b| b.is_ascii_lowercase()) {
Ok(Lang::from_iso639_1(s))
} else {
use smol_str::StrExt;
let lowered = s.to_ascii_lowercase_smolstr();
Ok(Lang::try_from_iso639_1(&lowered).unwrap_or(Self::Other(lowered)))
}
}
}
};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn named_variants_canonicalise() {
let known = [
Lang::En,
Lang::Zh,
Lang::De,
Lang::Es,
Lang::Ru,
Lang::Ko,
Lang::Fr,
Lang::Ja,
Lang::Pt,
Lang::Tr,
Lang::Pl,
Lang::Ca,
Lang::Nl,
Lang::Ar,
Lang::Sv,
Lang::It,
Lang::Id,
Lang::Hi,
Lang::Fi,
Lang::Vi,
Lang::He,
Lang::Uk,
Lang::El,
Lang::Ms,
Lang::Cs,
Lang::Ro,
Lang::Da,
Lang::Hu,
Lang::Ta,
Lang::No,
Lang::Th,
Lang::Ur,
Lang::Hr,
Lang::Bg,
Lang::Lt,
Lang::La,
Lang::Mi,
Lang::Ml,
Lang::Cy,
Lang::Sk,
Lang::Te,
Lang::Fa,
Lang::Lv,
Lang::Bn,
Lang::Sr,
Lang::Az,
Lang::Sl,
Lang::Kn,
Lang::Et,
Lang::Mk,
Lang::Br,
Lang::Eu,
Lang::Is,
Lang::Hy,
Lang::Ne,
Lang::Mn,
Lang::Bs,
Lang::Kk,
Lang::Sq,
Lang::Sw,
Lang::Gl,
Lang::Mr,
Lang::Pa,
Lang::Si,
Lang::Km,
Lang::Sn,
Lang::Yo,
Lang::So,
Lang::Af,
Lang::Oc,
Lang::Ka,
Lang::Be,
Lang::Tg,
Lang::Sd,
Lang::Gu,
Lang::Am,
Lang::Yi,
Lang::Lo,
Lang::Uz,
Lang::Fo,
Lang::Ht,
Lang::Ps,
Lang::Tk,
Lang::Nn,
Lang::Mt,
Lang::Sa,
Lang::Lb,
Lang::My,
Lang::Bo,
Lang::Tl,
Lang::Mg,
Lang::As,
Lang::Tt,
Lang::Haw,
Lang::Ln,
Lang::Ha,
Lang::Ba,
Lang::Jw,
Lang::Su,
Lang::Yue,
];
assert_eq!(
known.len(),
100,
"must keep the 100-variant Appendix C list in sync"
);
for v in known.iter() {
let round = Lang::from_iso639_1(v.as_str());
assert_eq!(&round, v, "round-trip failed for {:?}", v);
assert!(
!matches!(round, Lang::Other(_)),
"{:?} canonicalised to Other; this breaks Eq/Hash",
v
);
}
}
#[test]
fn unknown_codes_land_in_other() {
let r = Lang::from_iso639_1("zzz");
assert_eq!(r, Lang::Other(SmolStr::new("zzz")));
assert_eq!(r.as_str(), "zzz");
}
#[test]
fn other_round_trips_via_as_str() {
let r = Lang::Other(SmolStr::new("xx"));
assert_eq!(r.as_str(), "xx");
assert_eq!(Lang::from_iso639_1(r.as_str()), r);
}
#[cfg(feature = "serde")]
#[test]
fn serde_named_variant_serializes_as_lowercase_iso() {
let json = serde_json::to_string(&Lang::En).expect("serialize");
assert_eq!(
json, "\"en\"",
"Lang::En must serialize as \"en\", not \"En\""
);
let json = serde_json::to_string(&Lang::Yue).expect("serialize");
assert_eq!(json, "\"yue\"");
}
#[cfg(feature = "serde")]
#[test]
fn serde_other_serializes_as_inner_string() {
let v = Lang::Other(SmolStr::new("xx"));
let json = serde_json::to_string(&v).expect("serialize");
assert_eq!(
json, "\"xx\"",
"Lang::Other(\"xx\") must serialize as \"xx\""
);
}
#[cfg(feature = "serde")]
#[test]
fn serde_named_variant_round_trips() {
let json = "\"en\"";
let lang: Lang = serde_json::from_str(json).expect("deserialize");
assert_eq!(lang, Lang::En);
assert_eq!(serde_json::to_string(&lang).unwrap(), json);
}
#[cfg(feature = "serde")]
#[test]
fn serde_unknown_iso_code_round_trips_via_other() {
let json = "\"xx\"";
let lang: Lang = serde_json::from_str(json).expect("deserialize");
assert_eq!(lang, Lang::Other(SmolStr::new("xx")));
assert_eq!(serde_json::to_string(&lang).unwrap(), json);
}
#[cfg(feature = "serde")]
#[test]
fn serde_deserializes_known_codes_to_named_variants() {
let lang: Lang = serde_json::from_str("\"en\"").unwrap();
assert!(matches!(lang, Lang::En), "must canonicalise to Lang::En");
let lang: Lang = serde_json::from_str("\"yue\"").unwrap();
assert!(matches!(lang, Lang::Yue));
}
#[cfg(feature = "serde")]
#[test]
fn serde_accepts_any_case_for_named_variant() {
for input in ["\"en\"", "\"EN\"", "\"En\"", "\"eN\""] {
let lang: Lang = serde_json::from_str(input).expect(input);
assert_eq!(
lang,
Lang::En,
"input {input} must canonicalise to Lang::En"
);
assert_eq!(serde_json::to_string(&lang).unwrap(), "\"en\"");
}
}
#[cfg(feature = "serde")]
#[test]
fn serde_lowercases_unknown_code_into_other() {
let lang: Lang = serde_json::from_str("\"XX\"").expect("deserialize");
assert_eq!(lang, Lang::Other(SmolStr::new("xx")));
let lang: Lang = serde_json::from_str("\"Xx\"").expect("deserialize");
assert_eq!(lang, Lang::Other(SmolStr::new("xx")));
}
#[cfg(feature = "serde")]
#[test]
fn serde_rejects_empty_string() {
let res: Result<Lang, _> = serde_json::from_str("\"\"");
assert!(res.is_err());
}
#[cfg(feature = "serde")]
#[test]
fn serde_rejects_overlong_code() {
let res: Result<Lang, _> = serde_json::from_str("\"abcdefghi\"");
assert!(res.is_err(), "9-byte code must be rejected");
}
#[cfg(feature = "serde")]
#[test]
fn serde_rejects_non_ascii_letters() {
let res: Result<Lang, _> = serde_json::from_str("\"français\"");
assert!(res.is_err(), "non-ASCII must be rejected");
let res: Result<Lang, _> = serde_json::from_str("\"a-b\"");
assert!(res.is_err(), "dash must be rejected");
let res: Result<Lang, _> = serde_json::from_str("\"a1b\"");
assert!(res.is_err(), "digits must be rejected");
}
#[cfg(feature = "serde")]
#[test]
fn serde_rejects_legacy_other_as_map() {
let res: Result<Lang, _> = serde_json::from_str(r#"{"Other":"xx"}"#);
assert!(
res.is_err(),
"legacy Other-as-map encoding must be rejected"
);
}
}