use std::sync::Arc;
use thiserror::Error;
use super::g2p::{G2pError, Lang, Phonemizer};
use super::kokoro::KokoroEngine;
#[derive(Debug, Error)]
pub enum TtsError {
#[error("voice '{0}' not registered on this bundle")]
UnknownVoice(String),
#[error("G2P error: {0}")]
G2p(#[from] G2pError),
#[error("neural synthesis requires an engine; this bundle was created without one")]
NotImplemented,
#[error("load error: {0}")]
Load(String),
#[error("engine error: {0}")]
Engine(String),
}
#[derive(Debug, Clone)]
pub struct Voice {
pub id: String,
pub lang: Lang,
pub display_name: String,
}
impl Voice {
pub fn new(id: impl Into<String>, lang: Lang, display_name: impl Into<String>) -> Self {
Self {
id: id.into(),
lang,
display_name: display_name.into(),
}
}
}
#[derive(Debug, Clone)]
pub struct SynthesizeOptions {
pub speed: f32,
}
impl Default for SynthesizeOptions {
fn default() -> Self {
Self { speed: 1.0 }
}
}
pub struct TtsBundle {
voices: Vec<Voice>,
pub sample_rate: u32,
engine: Option<Arc<KokoroEngine>>,
}
impl TtsBundle {
pub fn scaffolding(voices: Vec<Voice>, sample_rate: u32) -> Self {
Self {
voices,
sample_rate,
engine: None,
}
}
pub fn with_engine(mut self, engine: Arc<KokoroEngine>) -> Self {
self.sample_rate = engine.sample_rate();
self.engine = Some(engine);
self
}
pub fn has_engine(&self) -> bool {
self.engine.is_some()
}
pub fn voices(&self) -> &[Voice] {
&self.voices
}
pub fn voice(&self, id: &str) -> Option<&Voice> {
self.voices.iter().find(|v| v.id == id)
}
pub fn phonemize(&self, text: &str, voice_id: &str) -> Result<Vec<String>, TtsError> {
let voice = self
.voice(voice_id)
.ok_or_else(|| TtsError::UnknownVoice(voice_id.to_string()))?;
let phonemizer = Phonemizer::new(voice.lang)?;
Ok(phonemizer.text_to_phonemes(text)?)
}
pub fn synthesize(
&self,
text: &str,
voice_id: &str,
options: &SynthesizeOptions,
) -> Result<Vec<f32>, TtsError> {
match &self.engine {
Some(engine) => engine
.synthesize(text, voice_id, options.speed)
.map_err(|e| TtsError::Engine(e.to_string())),
None => {
let _phonemes = self.phonemize(text, voice_id)?;
let _ = options.speed;
Err(TtsError::NotImplemented)
}
}
}
}
pub fn default_kokoro_voices() -> Vec<Voice> {
vec![
Voice::new("af_alloy", Lang::EnUs, "Alloy"),
Voice::new("af_nova", Lang::EnUs, "Nova"),
Voice::new("af_bella", Lang::EnUs, "Bella"),
Voice::new("am_adam", Lang::EnUs, "Adam"),
Voice::new("am_michael", Lang::EnUs, "Michael"),
Voice::new("bf_alice", Lang::EnGb, "Alice"),
Voice::new("bf_emma", Lang::EnGb, "Emma"),
Voice::new("bm_daniel", Lang::EnGb, "Daniel"),
Voice::new("bm_george", Lang::EnGb, "George"),
]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scaffolding_bundle_exposes_voices() {
let bundle = TtsBundle::scaffolding(default_kokoro_voices(), 24_000);
assert!(bundle.voice("af_alloy").is_some());
assert!(bundle.voice("not-a-voice").is_none());
assert_eq!(bundle.sample_rate, 24_000);
}
#[test]
fn synthesize_returns_not_implemented() {
let bundle = TtsBundle::scaffolding(default_kokoro_voices(), 24_000);
let err = bundle
.synthesize("hello", "af_alloy", &SynthesizeOptions::default())
.unwrap_err();
#[cfg(feature = "tts-g2p")]
assert!(matches!(err, TtsError::NotImplemented));
#[cfg(not(feature = "tts-g2p"))]
assert!(matches!(err, TtsError::G2p(G2pError::FeatureDisabled)));
}
#[test]
fn synthesize_validates_voice_first() {
let bundle = TtsBundle::scaffolding(default_kokoro_voices(), 24_000);
let err = bundle
.synthesize("hello", "ghost_voice", &SynthesizeOptions::default())
.unwrap_err();
assert!(matches!(err, TtsError::UnknownVoice(_)));
}
#[test]
fn default_catalog_has_known_voices() {
let voices = default_kokoro_voices();
assert!(voices.iter().any(|v| v.id == "af_nova"));
assert!(voices.iter().any(|v| v.id == "bm_george"));
}
}