use crate::types::{SpeakOptions, TtsResult, Voice, WordBoundary};
use std::fmt;
pub type OnAudioCallback<'a> = &'a mut dyn FnMut(&[u8]);
pub type OnBoundaryCallback<'a> = &'a mut dyn FnMut(&str, f32, f32);
pub type OnStartCallback<'a> = &'a mut dyn FnMut();
pub type OnEndCallback<'a> = &'a mut dyn FnMut();
pub type OnErrorCallback<'a> = &'a mut dyn FnMut(&str);
#[cfg(feature = "cloud")]
#[must_use]
pub fn preprocess_speech_markdown(text: &str, platform: &str) -> (String, bool) {
use speechmarkdown_rust::{Platform, SpeechMarkdownParser};
if !SpeechMarkdownParser::is_speech_markdown(text) {
return (text.to_string(), false);
}
let platform = match platform {
"azure" => Platform::MicrosoftAzure,
"google" => Platform::GoogleAssistant,
_ => Platform::AmazonAlexa,
};
match SpeechMarkdownParser::to_ssml(text, platform) {
Ok(ssml) => (ssml, true),
Err(_) => (text.to_string(), false),
}
}
#[cfg(not(feature = "cloud"))]
#[must_use]
pub fn preprocess_speech_markdown(text: &str, _platform: &str) -> (String, bool) {
(text.to_string(), false)
}
#[allow(clippy::missing_errors_doc)]
pub trait TtsEngine: Send + Sync + fmt::Debug {
#[allow(clippy::too_many_arguments)]
fn speak(
&self,
text: &str,
voice: Option<&str>,
rate: f32,
pitch: f32,
volume: f32,
on_audio: Option<OnAudioCallback>,
on_boundary: Option<OnBoundaryCallback>,
) -> TtsResult<()>;
fn speak_with_options(
&self,
text: &str,
options: Option<&SpeakOptions>,
on_audio: Option<OnAudioCallback>,
on_boundary: Option<OnBoundaryCallback>,
) -> TtsResult<()> {
let opts = options.cloned().unwrap_or_default();
self.speak(
text,
opts.voice.as_deref(),
opts.effective_rate(),
opts.effective_pitch(),
opts.effective_volume(),
on_audio,
on_boundary,
)
}
#[allow(clippy::too_many_arguments)]
fn speak_sync(
&self,
text: &str,
voice: Option<&str>,
rate: f32,
pitch: f32,
volume: f32,
on_audio: Option<OnAudioCallback>,
on_boundary: Option<OnBoundaryCallback>,
) -> TtsResult<()>;
fn stop(&self) -> TtsResult<()>;
fn pause(&self) -> TtsResult<()> {
Ok(())
}
fn resume(&self) -> TtsResult<()> {
Ok(())
}
fn get_voices(&self) -> TtsResult<Vec<Voice>>;
fn engine_id(&self) -> &'static str;
fn check_credentials(&self) -> TtsResult<bool> {
match self.get_voices() {
Ok(_) => Ok(true),
Err(_) => Ok(false),
}
}
fn synth_to_bytes(
&self,
text: &str,
voice: Option<&str>,
rate: f32,
pitch: f32,
volume: f32,
) -> TtsResult<Vec<u8>> {
let mut buf = Vec::new();
self.speak(
text,
voice,
rate,
pitch,
volume,
Some(&mut |chunk: &[u8]| {
buf.extend_from_slice(chunk);
}),
None,
)?;
Ok(buf)
}
fn synth_to_bytes_with_options(
&self,
text: &str,
options: Option<&SpeakOptions>,
) -> TtsResult<Vec<u8>> {
let opts = options.cloned().unwrap_or_default();
self.synth_to_bytes(
text,
opts.voice.as_deref(),
opts.effective_rate(),
opts.effective_pitch(),
opts.effective_volume(),
)
}
fn synth_with_boundaries(
&self,
text: &str,
voice: Option<&str>,
rate: f32,
pitch: f32,
volume: f32,
) -> TtsResult<(Vec<u8>, Vec<WordBoundary>)> {
let audio = self.synth_to_bytes(text, voice, rate, pitch, volume)?;
let boundaries = estimate_word_boundaries(text);
Ok((audio, boundaries))
}
}
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn estimate_word_boundaries(text: &str) -> Vec<WordBoundary> {
estimate_word_boundaries_with_wpm(text, 150.0)
}
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn estimate_word_boundaries_with_wpm(text: &str, words_per_minute: f64) -> Vec<WordBoundary> {
let words: Vec<&str> = text.split_whitespace().filter(|w| !w.is_empty()).collect();
if words.is_empty() {
return Vec::new();
}
let ms_per_word = 60_000.0 / words_per_minute;
let mut boundaries = Vec::with_capacity(words.len());
let mut current_ms: u64 = 0;
for word in &words {
let length_factor = (word.len() as f64 / 5.0).clamp(0.5, 2.0);
let duration = (ms_per_word * length_factor) as u64;
let duration = duration.max(1);
boundaries.push(WordBoundary {
text: (*word).to_string(),
offset: current_ms,
duration,
});
current_ms += duration;
}
boundaries
}