use std::collections::HashMap;
use std::process::Command;
use crate::execution::types::ExecutorResult;
use crate::runtime_adapter::AdapterError;
use super::PhonemizerBackend;
pub struct EspeakBackend {
language: String,
}
impl EspeakBackend {
pub fn new(language: String) -> Self {
Self { language }
}
}
const PRESERVED_PUNCTUATION: &[char] = &[',', '.', ';', ':', '!', '?'];
impl EspeakBackend {
fn run_espeak(&self, text: &str) -> ExecutorResult<String> {
let output = Command::new("espeak-ng")
.args(["--ipa", "-q", "-v", &self.language])
.arg(text)
.output()
.map_err(|e| {
AdapterError::InvalidInput(format!(
"Failed to run espeak-ng. Is it installed? Error: {}. \
Install with: brew install espeak-ng (macOS) or apt-get install espeak-ng (Linux)",
e
))
})?;
if !output.status.success() {
return Err(AdapterError::InvalidInput(format!(
"espeak-ng failed: {}",
String::from_utf8_lossy(&output.stderr)
)));
}
let phonemes = String::from_utf8_lossy(&output.stdout);
Ok(phonemes.split_whitespace().collect::<Vec<_>>().join(" "))
}
}
impl PhonemizerBackend for EspeakBackend {
fn phonemize(&self, text: &str, tokens_map: &HashMap<char, i64>) -> ExecutorResult<String> {
let phonemes = self.run_espeak(text)?;
let filtered: String = phonemes
.chars()
.filter(|c| tokens_map.contains_key(c))
.collect();
Ok(filtered.trim().to_string())
}
fn phonemize_raw(&self, text: &str) -> ExecutorResult<String> {
let mut result = String::new();
let mut segment = String::new();
let flush = |seg: &mut String, out: &mut String| -> ExecutorResult<()> {
let trimmed = seg.trim();
if !trimmed.is_empty() {
let phones = self.run_espeak(trimmed)?;
if !phones.is_empty() {
if !out.is_empty() && !out.ends_with(' ') {
out.push(' ');
}
out.push_str(&phones);
}
}
seg.clear();
Ok(())
};
for c in text.chars() {
if PRESERVED_PUNCTUATION.contains(&c) {
flush(&mut segment, &mut result)?;
result.push(c);
} else {
segment.push(c);
}
}
flush(&mut segment, &mut result)?;
Ok(result)
}
fn name(&self) -> &'static str {
"EspeakNG"
}
}