use std::path::{Path, PathBuf};
use crate::addons::ivr_editor::settings::IvrEditorSettings;
pub const VOICES: &[(&str, &str)] = &[
("en-US-AriaNeural", "English (US) - Aria"),
("en-US-GuyNeural", "English (US) - Guy"),
("en-US-JennyNeural", "English (US) - Jenny"),
("en-GB-SoniaNeural", "English (UK) - Sonia"),
("zh-CN-XiaoxiaoNeural", "Chinese (CN) - Xiaoxiao"),
("zh-CN-YunxiNeural", "Chinese (CN) - Yunxi"),
("ja-JP-NanamiNeural", "Japanese - Nanami"),
("ko-KR-SunHiNeural", "Korean - SunHi"),
("es-ES-ElviraNeural", "Spanish (ES) - Elvira"),
("fr-FR-DeniseNeural", "French - Denise"),
("de-DE-KatjaNeural", "German - Katja"),
];
pub async fn synthesize(text: &str, voice: &str, filename: &str) -> Result<PathBuf, anyhow::Error> {
if text.trim().is_empty() {
anyhow::bail!("text must not be empty");
}
let dir = Path::new("storage/sounds/ivr");
tokio::fs::create_dir_all(dir).await?;
let sanitized = sanitize_filename(filename);
let out_path = dir.join(format!("{}.mp3", sanitized));
let voice = if voice.is_empty() {
"en-US-AriaNeural"
} else {
voice
};
let output = tokio::process::Command::new("edge-cli")
.arg("speak")
.arg("--text")
.arg(text)
.arg("--voice")
.arg(voice)
.arg("--write-media")
.arg(out_path.to_str().unwrap_or("output.mp3"))
.output()
.await
.map_err(|e| anyhow::anyhow!("failed to run edge-cli (is it installed?): {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("edge-cli failed: {}", stderr);
}
Ok(out_path)
}
fn sanitize_filename(name: &str) -> String {
let stem = Path::new(name)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or(name);
stem.chars()
.map(|c| {
if c.is_alphanumeric() || c == '-' || c == '_' {
c
} else {
'_'
}
})
.collect()
}
fn generate_cache_key(voice: &str, text: &str) -> String {
use sha2::{Digest, Sha256};
let input = format!("{}|{}", voice, text);
let hash = Sha256::digest(input.as_bytes());
hex::encode(hash)
}
fn get_cache_path(voice: &str, text: &str) -> PathBuf {
let settings = IvrEditorSettings::load();
let key = generate_cache_key(voice, text);
Path::new(&settings.tts_cache_dir).join(format!("{}.mp3", key))
}
#[derive(Debug)]
pub struct SynthesizeResult {
pub path: PathBuf,
pub cached: bool,
}
pub async fn synthesize_with_cache(
text: &str,
voice: &str,
) -> Result<SynthesizeResult, anyhow::Error> {
if text.trim().is_empty() {
anyhow::bail!("text must not be empty");
}
let voice = if voice.is_empty() {
"en-US-AriaNeural"
} else {
voice
};
let settings = IvrEditorSettings::load();
let cache_dir = Path::new(&settings.tts_cache_dir);
let cache_path = get_cache_path(voice, text);
if cache_path.exists() {
return Ok(SynthesizeResult {
path: cache_path,
cached: true,
});
}
tokio::fs::create_dir_all(cache_dir).await?;
let output = tokio::process::Command::new("edge-cli")
.arg("speak")
.arg("--text")
.arg(text)
.arg("--voice")
.arg(voice)
.arg("--write-media")
.arg(cache_path.to_str().unwrap_or("output.mp3"))
.output()
.await
.map_err(|e| anyhow::anyhow!("failed to run edge-cli (is it installed?): {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("edge-cli failed: {}", stderr);
}
Ok(SynthesizeResult {
path: cache_path,
cached: false,
})
}