use std::path::{Path, PathBuf};
use crate::error::{Error, Result};
use crate::phoneme::PhonemeData;
use crate::synthesize::{PcmBuffer, Synthesizer, VoiceParams};
use crate::translate::{default_data_dir, Translator};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Parameter {
Rate,
Volume,
Pitch,
Range,
Punctuation,
Capitals,
WordGap,
}
#[derive(Debug, Clone, Default)]
pub struct VoiceSpec {
pub language: Option<String>,
pub name: Option<String>,
pub gender: Gender,
pub age: u8,
}
impl VoiceSpec {
pub fn by_name(lang: &str) -> Self {
VoiceSpec {
language: Some(lang.to_string()),
..Default::default()
}
}
pub fn builder() -> VoiceSpecBuilder {
VoiceSpecBuilder::default()
}
pub(crate) fn effective_lang(&self) -> &str {
self.language
.as_deref()
.or(self.name.as_deref())
.unwrap_or("en")
}
}
#[derive(Debug, Default)]
pub struct VoiceSpecBuilder {
spec: VoiceSpec,
}
impl VoiceSpecBuilder {
pub fn language(mut self, lang: &str) -> Self {
self.spec.language = Some(lang.to_string());
self
}
pub fn name(mut self, name: &str) -> Self {
self.spec.name = Some(name.to_string());
self
}
pub fn gender(mut self, gender: Gender) -> Self {
self.spec.gender = gender;
self
}
pub fn age(mut self, age: u8) -> Self {
self.spec.age = age;
self
}
pub fn build(self) -> VoiceSpec {
self.spec
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Gender {
#[default]
Unknown = 0,
Male = 1,
Female = 2,
Neutral = 3,
}
#[derive(Debug, Clone)]
pub struct SynthEvent {
pub kind: EventKind,
pub text_position: usize,
pub audio_position_ms: u32,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind {
Word(u32),
Sentence,
End,
MsgTerminated,
Phoneme(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum OutputMode {
#[default]
Retrieval,
}
pub struct EspeakNg {
voice_spec: VoiceSpec,
rate: u32,
volume: u32,
pitch: u32,
range: u32,
word_gap: i32,
data_dir: PathBuf,
}
impl EspeakNg {
pub fn new(lang: &str) -> Result<Self> {
Self::with_data_dir(lang, Path::new(&default_data_dir()))
}
pub fn with_data_dir(lang: &str, data_dir: &Path) -> Result<Self> {
if !data_dir.exists() {
return Err(Error::DataPath(format!(
"espeak-ng data directory not found: {}",
data_dir.display()
)));
}
Ok(EspeakNg {
voice_spec: VoiceSpec::by_name(lang),
rate: 175,
volume: 100,
pitch: 50,
range: 50,
word_gap: 0,
data_dir: data_dir.to_path_buf(),
})
}
pub fn builder() -> Builder {
Builder::default()
}
pub fn set_voice(&mut self, lang: &str) {
self.voice_spec = VoiceSpec::by_name(lang);
}
pub fn set_voice_by_spec(&mut self, spec: VoiceSpec) {
self.voice_spec = spec;
}
pub fn set_parameter(&mut self, param: Parameter, value: i32) {
match param {
Parameter::Rate => self.rate = value.clamp(80, 450) as u32,
Parameter::Volume => self.volume = value.clamp(0, 200) as u32,
Parameter::Pitch => self.pitch = value.clamp(0, 100) as u32,
Parameter::Range => self.range = value.clamp(0, 100) as u32,
Parameter::WordGap => self.word_gap = value,
Parameter::Punctuation | Parameter::Capitals => { }
}
}
pub fn set_parameter_relative(&mut self, param: Parameter, delta: i32) {
let current = self.get_parameter(param);
self.set_parameter(param, current + delta);
}
pub fn get_parameter(&self, param: Parameter) -> i32 {
match param {
Parameter::Rate => self.rate as i32,
Parameter::Volume => self.volume as i32,
Parameter::Pitch => self.pitch as i32,
Parameter::Range => self.range as i32,
Parameter::WordGap => self.word_gap,
Parameter::Punctuation | Parameter::Capitals => 0,
}
}
pub const fn sample_rate(&self) -> u32 {
22050
}
pub fn text_to_phonemes(&self, text: &str) -> Result<String> {
let translator = self.make_translator()?;
translator.text_to_ipa(text)
}
pub fn synth(&self, text: &str) -> Result<(PcmBuffer, u32)> {
let translator = self.make_translator()?;
let mut phdata = self.load_phdata()?;
phdata.select_table_by_name(self.voice_spec.effective_lang())
.map_err(|_| Error::VoiceNotFound(
self.voice_spec.effective_lang().to_string()
))?;
let codes = translator.translate_to_codes(text)?;
let voice = self.make_voice_params();
let synth = Synthesizer::new(voice);
let samples = synth.synthesize_codes(&codes, &phdata)?;
Ok((samples, self.sample_rate()))
}
pub fn synth_with_events(&self, text: &str) -> Result<(PcmBuffer, u32, Vec<SynthEvent>)> {
let (samples, rate) = self.synth(text)?;
let events = vec![SynthEvent {
kind: EventKind::MsgTerminated,
text_position: text.len(),
audio_position_ms: samples.len() as u32 * 1000 / rate,
}];
Ok((samples, rate, events))
}
pub fn version() -> &'static str {
env!("CARGO_PKG_VERSION")
}
pub fn data_path(&self) -> &Path {
&self.data_dir
}
pub fn current_voice(&self) -> &VoiceSpec {
&self.voice_spec
}
fn make_translator(&self) -> Result<Translator> {
Translator::new(
self.voice_spec.effective_lang(),
Some(&self.data_dir),
)
}
fn load_phdata(&self) -> Result<PhonemeData> {
PhonemeData::load(&self.data_dir)
.map_err(|_| Error::VoiceNotFound(
format!("phoneme data not found in {}", self.data_dir.display())
))
}
fn make_voice_params(&self) -> VoiceParams {
let speed_percent = (self.rate * 100 / 175).clamp(50, 400);
let pitch_hz = 59 + self.pitch * 118 / 100;
let amplitude = (self.volume / 2).clamp(0, 100);
VoiceParams {
speed_percent,
pitch_hz,
amplitude,
..VoiceParams::default()
}
}
}
#[derive(Debug)]
pub struct Builder {
lang: String,
rate: u32,
volume: u32,
pitch: u32,
range: u32,
data_dir: Option<PathBuf>,
}
impl Default for Builder {
fn default() -> Self {
Builder {
lang: "en".to_string(),
rate: 175,
volume: 100,
pitch: 50,
range: 50,
data_dir: None,
}
}
}
impl Builder {
pub fn voice(mut self, lang: &str) -> Self {
self.lang = lang.to_string();
self
}
pub fn rate(mut self, wpm: u32) -> Self {
self.rate = wpm.clamp(80, 450);
self
}
pub fn volume(mut self, vol: u32) -> Self {
self.volume = vol.clamp(0, 200);
self
}
pub fn pitch(mut self, pitch: u32) -> Self {
self.pitch = pitch.clamp(0, 100);
self
}
pub fn range(mut self, range: u32) -> Self {
self.range = range.clamp(0, 100);
self
}
pub fn data_dir(mut self, path: &Path) -> Self {
self.data_dir = Some(path.to_path_buf());
self
}
pub fn build(self) -> Result<EspeakNg> {
let dir = self.data_dir
.unwrap_or_else(|| PathBuf::from(default_data_dir()));
let mut engine = EspeakNg::with_data_dir(&self.lang, &dir)?;
engine.rate = self.rate;
engine.volume = self.volume;
engine.pitch = self.pitch;
engine.range = self.range;
Ok(engine)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builder_default_values() {
let b = Builder::default();
assert_eq!(b.lang, "en");
assert_eq!(b.rate, 175);
assert_eq!(b.pitch, 50);
assert_eq!(b.volume, 100);
}
#[test]
fn voice_spec_by_name() {
let v = VoiceSpec::by_name("de");
assert_eq!(v.effective_lang(), "de");
}
#[test]
fn voice_spec_builder() {
let v = VoiceSpec::builder()
.language("fr")
.gender(Gender::Female)
.age(25)
.build();
assert_eq!(v.language.as_deref(), Some("fr"));
assert_eq!(v.gender, Gender::Female);
assert_eq!(v.age, 25);
}
#[test]
fn engine_new_missing_dir() {
let res = EspeakNg::with_data_dir("en", Path::new("/nonexistent/path"));
assert!(res.is_err());
}
#[test]
fn engine_sample_rate() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.exists() { return; }
let engine = EspeakNg::new("en").unwrap();
assert_eq!(engine.sample_rate(), 22050);
}
#[test]
fn engine_set_get_parameter() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.exists() { return; }
let mut engine = EspeakNg::new("en").unwrap();
engine.set_parameter(Parameter::Rate, 200);
assert_eq!(engine.get_parameter(Parameter::Rate), 200);
engine.set_parameter(Parameter::Pitch, 70);
assert_eq!(engine.get_parameter(Parameter::Pitch), 70);
engine.set_parameter(Parameter::Rate, 9999);
assert_eq!(engine.get_parameter(Parameter::Rate), 450);
engine.set_parameter(Parameter::Rate, -9999);
assert_eq!(engine.get_parameter(Parameter::Rate), 80);
}
#[test]
fn engine_set_parameter_relative() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.exists() { return; }
let mut engine = EspeakNg::new("en").unwrap();
engine.set_parameter(Parameter::Pitch, 50);
engine.set_parameter_relative(Parameter::Pitch, 10);
assert_eq!(engine.get_parameter(Parameter::Pitch), 60);
}
#[test]
fn engine_text_to_phonemes_en() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.join("en_dict").exists() { return; }
let engine = EspeakNg::new("en").unwrap();
let ipa = engine.text_to_phonemes("hello").unwrap();
assert!(ipa.contains('h'), "expected IPA with 'h', got: {ipa}");
}
#[test]
fn engine_synth_returns_samples() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.join("en_dict").exists() { return; }
let engine = EspeakNg::new("en").unwrap();
let (samples, rate) = engine.synth("hello").unwrap();
assert_eq!(rate, 22050);
assert!(!samples.is_empty());
}
#[test]
fn engine_version_nonempty() {
assert!(!EspeakNg::version().is_empty());
}
#[test]
fn engine_builder_chain() {
let data_dir = PathBuf::from(default_data_dir());
if !data_dir.exists() { return; }
let engine = EspeakNg::builder()
.voice("en")
.rate(200)
.pitch(60)
.volume(80)
.build()
.unwrap();
assert_eq!(engine.get_parameter(Parameter::Rate), 200);
assert_eq!(engine.get_parameter(Parameter::Pitch), 60);
assert_eq!(engine.get_parameter(Parameter::Volume), 80);
}
}