use crate::tts::vocab::VOCAB;
use lazy_static::lazy_static;
use misaki_rs::{Language, G2P};
use regex::Regex;
use std::sync::Arc;
lazy_static! {
static ref PHONEME_PATTERNS: Regex = Regex::new(r"([a-zɹː])(hˈʌndɹɪd)").unwrap();
static ref Z_PATTERN: Regex = Regex::new(r#" z([;:,.!?¡¿—…"«»"" ]|$)"#).unwrap();
static ref NINETY_PATTERN: Regex = Regex::new(r"(nˈaɪn)ti").unwrap();
}
#[derive(Clone)]
struct MisakiBackend {
g2p: Arc<G2P>,
}
impl MisakiBackend {
fn new(lang: &str) -> Self {
let language = match lang {
"a" => Language::EnglishUS, "b" => Language::EnglishGB, _ => Language::EnglishUS,
};
MisakiBackend {
g2p: Arc::new(G2P::new(language)),
}
}
fn phonemize(&self, text: &str) -> String {
self.g2p.g2p(text).map(|(p, _)| p).unwrap_or_default()
}
}
#[derive(Clone)]
pub struct Phonemizer {
lang: String,
backend: MisakiBackend,
}
impl Phonemizer {
pub fn new(lang: &str) -> Self {
Phonemizer {
lang: lang.to_string(),
backend: MisakiBackend::new(lang),
}
}
pub fn phonemize(&self, text: &str, _normalize: bool) -> String {
let mut ps = self.backend.phonemize(text);
ps = ps
.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ")
.replace("kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ");
ps = ps
.replace("ʲ", "j")
.replace("r", "ɹ")
.replace("x", "k")
.replace("ɬ", "l");
ps = PHONEME_PATTERNS.replace_all(&ps, "$1 $2").to_string();
ps = Z_PATTERN.replace_all(&ps, "z$1").to_string();
if self.lang == "a" {
ps = NINETY_PATTERN.replace_all(&ps, "${1}di").to_string();
}
ps = ps.chars().filter(|&c| VOCAB.contains_key(&c)).collect();
ps.trim().to_string()
}
}