use crate::espeak::EspeakG2P;
use crate::espeak::EspeakIpaTokenizer;
use std::collections::HashMap;
#[allow(dead_code)]
pub fn test_misaki_conversion() -> Result<(), Box<dyn std::error::Error>> {
println!("Testing Misaki phoneme conversion according to spec\n");
let g2p = EspeakG2P::new()?;
let test_cases = vec![
("merchantship", "mˈɜːt^ʃəntʃˌɪp", "mˈɜɹʧəntʃˌɪp"), ("yes", "", "jˈɛs"), ("get", "", "ɡɛt"), ("sung", "", "sˈʌŋ"), ("red", "", "ɹˈɛd"), ("shin", "", "ʃˈɪn"), ("Asia", "", "ˈAʒə"), ("than", "", "ðən"), ("thin", "", "θˈɪn"), ("jump", "", "ʤˈʌmp"), ("chump", "", "ʧˈʌmp"), ("easy", "", "ˈizi"), ("flu", "", "flˈu"), ("spa", "", "spˈɑ"), ("all", "", "ˈɔl"), ("bed", "", "bˈɛd"), ("brick", "", "bɹˈɪk"), ("wood", "", "wˈʊd"), ("sun", "", "sˈʌn"), ("hey", "", "hˈA"), ("high", "", "hˈI"), ("how", "", "hˌW"), ("soy", "", "sˈY"), ("ash", "", "ˈæʃ"), ("butter", "", "bˈʌɾəɹ"), ("boxes", "", "bˈɑksᵻz"), ("pixel", "", "pˈɪksᵊl"), ];
let mut vocab = HashMap::new();
vocab.insert("$".to_string(), 0);
let phonemes = "AIWYbdfhijklmnpstuvwzðŋɑɔəɛɜɡɪɹʃʊʌʒʤʧˈˌθᵊOæɾᵻ";
for (i, ch) in phonemes.chars().enumerate() {
vocab.insert(ch.to_string(), (i + 1) as i64);
}
vocab.insert(" ".to_string(), 100);
let tokenizer = EspeakIpaTokenizer::new(vocab)?;
println!("Testing individual word conversions:");
println!(
"{:<15} {:<25} {:<25} {:<25}",
"Word", "Espeak IPA", "Expected", "Actual"
);
println!("{}", "-".repeat(90));
for (word, espeak_raw, _) in test_cases {
if espeak_raw.is_empty() {
g2p.text_to_ipa(word)?
} else {
espeak_raw.to_string()
};
std::env::set_var("DEBUG_PHONEMES", "1");
let _ = tokenizer.encode(word, None)?;
std::env::remove_var("DEBUG_PHONEMES");
}
Ok(())
}