#[must_use]
pub fn to_ipa(arpabet: &str) -> Option<String> {
if arpabet.is_empty() {
return None;
}
let last = arpabet.as_bytes()[arpabet.len() - 1];
let (base, stress): (&str, Option<u8>) = if (b'0'..=b'2').contains(&last) {
(&arpabet[..arpabet.len() - 1], Some(last - b'0'))
} else {
(arpabet, None)
};
match base {
"AH" => {
return Some(if stress == Some(0) {
"ə".into()
} else {
"ʌ".into()
});
}
"ER" => {
return Some(if stress == Some(0) {
"ɚ".into()
} else {
"ɝ".into()
});
}
_ => {}
}
arpabet_table(base).map(str::to_owned)
}
#[must_use]
pub fn convert_sequence<S: AsRef<str>>(arpabet: &[S]) -> Vec<String> {
arpabet.iter().filter_map(|s| to_ipa(s.as_ref())).collect()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BadArpabetToken {
token: String,
}
impl BadArpabetToken {
pub fn new(token: impl Into<String>) -> Self {
Self {
token: token.into(),
}
}
#[inline(always)]
pub fn token(&self) -> &str {
&self.token
}
}
pub fn try_convert_sequence_strict<S: AsRef<str>>(
arpabet: &[S],
) -> Result<Vec<String>, BadArpabetToken> {
let mut out = Vec::with_capacity(arpabet.len());
for s in arpabet {
let token = s.as_ref();
match to_ipa(token) {
Some(ipa) => out.push(ipa),
None => {
return Err(BadArpabetToken::new(token));
}
}
}
Ok(out)
}
fn arpabet_table(base: &str) -> Option<&'static str> {
Some(match base {
"AA" => "ɑ",
"AE" => "æ",
"AO" => "ɔ",
"AW" => "aʊ",
"AY" => "aɪ",
"EH" => "ɛ",
"EY" => "eɪ",
"IH" => "ɪ",
"IY" => "i",
"OW" => "oʊ",
"OY" => "ɔɪ",
"UH" => "ʊ",
"UW" => "u",
"B" => "b",
"CH" => "tʃ",
"D" => "d",
"DH" => "ð",
"F" => "f",
"G" => "ɡ",
"HH" => "h",
"JH" => "dʒ",
"K" => "k",
"L" => "l",
"M" => "m",
"N" => "n",
"NG" => "ŋ",
"P" => "p",
"R" => "ɹ",
"S" => "s",
"SH" => "ʃ",
"T" => "t",
"TH" => "θ",
"V" => "v",
"W" => "w",
"Y" => "j",
"Z" => "z",
"ZH" => "ʒ",
_ => return None,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn maps_consonants() {
assert_eq!(to_ipa("TH").as_deref(), Some("θ"));
assert_eq!(to_ipa("SH").as_deref(), Some("ʃ"));
assert_eq!(to_ipa("NG").as_deref(), Some("ŋ"));
assert_eq!(to_ipa("HH").as_deref(), Some("h"));
assert_eq!(to_ipa("CH").as_deref(), Some("tʃ"));
assert_eq!(to_ipa("JH").as_deref(), Some("dʒ"));
assert_eq!(to_ipa("ZH").as_deref(), Some("ʒ"));
}
#[test]
fn maps_ah_with_stress() {
assert_eq!(to_ipa("AH0").as_deref(), Some("ə"));
assert_eq!(to_ipa("AH1").as_deref(), Some("ʌ"));
assert_eq!(to_ipa("AH2").as_deref(), Some("ʌ"));
}
#[test]
fn maps_er_with_stress() {
assert_eq!(to_ipa("ER0").as_deref(), Some("ɚ"));
assert_eq!(to_ipa("ER1").as_deref(), Some("ɝ"));
assert_eq!(to_ipa("ER2").as_deref(), Some("ɝ"));
}
#[test]
fn maps_regular_vowels() {
assert_eq!(to_ipa("AA0").as_deref(), Some("ɑ"));
assert_eq!(to_ipa("AA1").as_deref(), Some("ɑ"));
assert_eq!(to_ipa("AE1").as_deref(), Some("æ"));
assert_eq!(to_ipa("EY0").as_deref(), Some("eɪ"));
assert_eq!(to_ipa("OW1").as_deref(), Some("oʊ"));
assert_eq!(to_ipa("AW0").as_deref(), Some("aʊ"));
assert_eq!(to_ipa("AY1").as_deref(), Some("aɪ"));
assert_eq!(to_ipa("OY0").as_deref(), Some("ɔɪ"));
}
#[test]
fn returns_none_for_unknown_and_empty() {
assert_eq!(to_ipa("XX"), None);
assert_eq!(to_ipa(""), None);
}
#[test]
fn converts_full_sequence() {
let ipa = convert_sequence(&["HH", "AH0", "L", "OW1"]);
assert_eq!(ipa, vec!["h", "ə", "l", "oʊ"]);
}
#[test]
fn converts_sequence_skips_unknown() {
let ipa = convert_sequence(&["HH", "XX", "L"]);
assert_eq!(ipa, vec!["h", "l"]);
}
#[test]
fn try_convert_sequence_strict_round_trips_known() {
let ipa = try_convert_sequence_strict(&["HH", "AH0", "L", "OW1"]).unwrap();
assert_eq!(ipa, vec!["h", "ə", "l", "oʊ"]);
}
#[test]
fn try_convert_sequence_strict_errors_on_first_unknown() {
let err = try_convert_sequence_strict(&["HH", "XX", "YY", "L"]).unwrap_err();
assert_eq!(err.token(), "XX");
}
#[test]
fn arpabet_convert_sequence_lax_still_drops_unknown() {
let ipa = convert_sequence(&["HH", "XX", "L", "ZZ"]);
assert_eq!(ipa, vec!["h", "l"]);
}
#[test]
fn covers_full_consonant_inventory() {
let pairs: &[(&str, &str)] = &[
("B", "b"),
("CH", "tʃ"),
("D", "d"),
("DH", "ð"),
("F", "f"),
("G", "ɡ"),
("HH", "h"),
("JH", "dʒ"),
("K", "k"),
("L", "l"),
("M", "m"),
("N", "n"),
("NG", "ŋ"),
("P", "p"),
("R", "ɹ"),
("S", "s"),
("SH", "ʃ"),
("T", "t"),
("TH", "θ"),
("V", "v"),
("W", "w"),
("Y", "j"),
("Z", "z"),
("ZH", "ʒ"),
];
for (arpa, ipa) in pairs {
assert_eq!(
to_ipa(arpa).as_deref(),
Some(*ipa),
"consonant {arpa} → {ipa}"
);
}
}
#[test]
fn covers_full_vowel_inventory_with_primary_stress() {
let pairs: &[(&str, &str)] = &[
("AA1", "ɑ"),
("AE1", "æ"),
("AH1", "ʌ"),
("AO1", "ɔ"),
("AW1", "aʊ"),
("AY1", "aɪ"),
("EH1", "ɛ"),
("ER1", "ɝ"),
("EY1", "eɪ"),
("IH1", "ɪ"),
("IY1", "i"),
("OW1", "oʊ"),
("OY1", "ɔɪ"),
("UH1", "ʊ"),
("UW1", "u"),
];
for (arpa, ipa) in pairs {
assert_eq!(to_ipa(arpa).as_deref(), Some(*ipa), "vowel {arpa} → {ipa}");
}
}
}