pub const IPA1: [u32; 96] = [
0x20, 0x21, 0x22, 0x2b0, 0x24, 0x25, 0x0e6, 0x2c8, 0x28, 0x29, 0x27e, 0x2b, 0x2cc, 0x2d, 0x2e, 0x2f, 0x252, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37, 0x275, 0x39, 0x2d0, 0x2b2, 0x3c, 0x3d, 0x3e, 0x294, 0x259, 0x251, 0x3b2, 0xe7, 0xf0, 0x25b, 0x46, 0x262, 0x127, 0x26a, 0x25f, 0x4b, 0x26b, 0x271, 0x14b, 0x254, 0x3a6, 0x263, 0x280, 0x283, 0x3b8, 0x28a, 0x28c, 0x153, 0x3c7, 0xf8, 0x292, 0x32a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x261, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x303, 0x7f, ];
pub fn ipa1_char(c: u8) -> u32 {
if c >= 0x20 && c < 0x80 {
IPA1[(c - 0x20) as usize]
} else {
c as u32
}
}
pub fn encode_utf8(cp: u32, buf: &mut Vec<u8>) {
if cp == 0 { return; }
if cp < 0x80 {
buf.push(cp as u8);
} else if cp < 0x800 {
buf.push(0xc0 | (cp >> 6) as u8);
buf.push(0x80 | (cp & 0x3f) as u8);
} else if cp < 0x10000 {
buf.push(0xe0 | (cp >> 12) as u8);
buf.push(0x80 | ((cp >> 6) & 0x3f) as u8);
buf.push(0x80 | (cp & 0x3f) as u8);
} else {
buf.push(0xf0 | (cp >> 18) as u8);
buf.push(0x80 | ((cp >> 12) & 0x3f) as u8);
buf.push(0x80 | ((cp >> 6) & 0x3f) as u8);
buf.push(0x80 | (cp & 0x3f) as u8);
}
}
pub fn mnemonic_to_ipa(mnemonic: u32, is_vowel: bool) -> String {
let mut out = Vec::new();
let mut first = true;
let mut mnem = mnemonic;
loop {
let c = (mnem & 0xff) as u8;
mnem >>= 8;
if c == 0 { break; }
if c == b'/' { break; } if c == b'#' && is_vowel { break; } if !first && c.is_ascii_digit() {
continue;
}
encode_utf8(ipa1_char(c), &mut out);
first = false;
}
String::from_utf8(out).unwrap_or_default()
}
pub static EN_IPA_OVERRIDES: &[(u8, &str)] = &[
(41, "m\u{0329}"), (42, "n\u{0329}"), (43, "\u{014b}\u{0329}"), (45, "l\u{0329}"), (111, "\u{0259}"), (118, "\u{0259}l"), (129, "\u{0252}"), (130, "\u{0252}"), (131, "\u{0252}"), (132, "\u{0252}"), (144, "\u{0259}\u{028a}"), (145, "\u{0259}\u{028a}"), (156, "\u{0259}\u{0279}"), (157, "\u{028c}\u{0279}"), ];
pub fn phoneme_ipa(code: u8, mnemonic: u32, is_vowel: bool) -> String {
phoneme_ipa_lang(code, mnemonic, is_vowel, true)
}
pub fn phoneme_ipa_lang(code: u8, mnemonic: u32, is_vowel: bool, use_en_overrides: bool) -> String {
if use_en_overrides {
for &(oc, ipa) in EN_IPA_OVERRIDES {
if oc == code {
return ipa.to_string();
}
}
}
mnemonic_to_ipa(mnemonic, is_vowel)
}
pub const IPA_STRESS_PRIMARY: &str = "\u{02c8}";
pub const IPA_STRESS_SECONDARY: &str = "\u{02cc}";
pub const IPA_LENGTH_MARK: &str = "\u{02d0}";
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PendingStress {
None,
Primary,
Secondary,
}
#[allow(missing_docs)]
pub const PH_STRESS: u8 = 1;
#[allow(missing_docs)]
pub const PH_VOWEL: u8 = 2;
#[allow(missing_docs)]
pub const PH_PAUSE: u8 = 0;
pub const PHON_STRESS_U: u8 = 2;
pub const PHON_STRESS_D: u8 = 3;
pub const PHON_STRESS_2: u8 = 4;
pub const PHON_STRESS_3: u8 = 5;
pub const PHON_STRESS_P: u8 = 6;
pub const PHON_STRESS_P2: u8 = 7;
pub const PHON_STRESS_PREV: u8 = 8;
pub const PHON_PAUSE: u8 = 9;
pub const PHON_PAUSE_SHORT: u8 = 10;
pub const PHON_PAUSE_NOLINK: u8 = 11;
pub const PHON_LENGTHEN: u8 = 12;
pub const PHON_SCHWA: u8 = 13;
pub const PHON_SCHWA_SHORT: u8 = 14;
pub const PHON_END_WORD: u8 = 15;
pub const PHON_STRESS_TONIC: u8 = 26;
pub const PHON_PAUSE_CLAUSE: u8 = 27;
#[inline]
pub fn is_stress_code(code: u8) -> bool {
matches!(code, PHON_STRESS_U | PHON_STRESS_D | PHON_STRESS_2 | PHON_STRESS_3
| PHON_STRESS_P | PHON_STRESS_P2 | PHON_STRESS_PREV | PHON_STRESS_TONIC)
}
#[inline]
pub fn is_pause_code(code: u8) -> bool {
matches!(code, 9 | 10 | 11 | 15 | 17 | 21 | 22 | 23 | 24 | 27)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ipa1_spot_checks() {
assert_eq!(ipa1_char(b'@'), 0x259);
assert_eq!(ipa1_char(b':'), 0x2d0);
assert_eq!(ipa1_char(b'D'), 0xf0);
assert_eq!(ipa1_char(b'T'), 0x3b8);
assert_eq!(ipa1_char(b'N'), 0x14b);
assert_eq!(ipa1_char(b'S'), 0x283);
assert_eq!(ipa1_char(b'Z'), 0x292);
assert_eq!(ipa1_char(b'g'), 0x261);
assert_eq!(ipa1_char(b'V'), 0x28c);
assert_eq!(ipa1_char(b'I'), 0x26a);
assert_eq!(ipa1_char(b'U'), 0x28a);
assert_eq!(ipa1_char(b'3'), 0x25c);
}
#[test]
fn mnemonic_ipa_simple() {
let at_mnem: u32 = b'@' as u32;
assert_eq!(mnemonic_to_ipa(at_mnem, true), "ə");
}
#[test]
fn mnemonic_ipa_colon() {
let mnem: u32 = b'3' as u32 | ((b':' as u32) << 8);
assert_eq!(mnemonic_to_ipa(mnem, true), "ɜː");
}
#[test]
fn mnemonic_ipa_digit_skipped() {
let mnem: u32 = b'I' as u32 | ((b'2' as u32) << 8);
assert_eq!(mnemonic_to_ipa(mnem, true), "ɪ");
}
#[test]
fn mnemonic_ipa_hash_vowel() {
let mnem: u32 = b'0' as u32 | ((b'#' as u32) << 8);
assert_eq!(mnemonic_to_ipa(mnem, true), "ɒ");
}
#[test]
#[allow(non_snake_case)]
fn override_oU() {
let mnem: u32 = b'o' as u32 | ((b'U' as u32) << 8);
let ipa = phoneme_ipa(144, mnem, true);
assert_eq!(ipa, "əʊ");
}
#[test]
#[allow(non_snake_case)]
fn override_O2() {
let mnem: u32 = b'O' as u32 | ((b'2' as u32) << 8);
let ipa = phoneme_ipa(132, mnem, true);
assert_eq!(ipa, "ɒ");
}
#[test]
fn encode_utf8_ascii() {
let mut v = Vec::new();
encode_utf8(b'h' as u32, &mut v);
assert_eq!(v, b"h");
}
#[test]
fn encode_utf8_2byte() {
let mut v = Vec::new();
encode_utf8(0x259, &mut v); assert_eq!(String::from_utf8(v).unwrap(), "ə");
}
}