1use crate::types::{SpeakOptions, TtsResult, Voice, WordBoundary};
4use std::fmt;
5
6pub type OnAudioCallback<'a> = &'a mut dyn FnMut(&[u8]);
8
9pub type OnBoundaryCallback<'a> = &'a mut dyn FnMut(&str, f32, f32);
11
12pub type OnStartCallback<'a> = &'a mut dyn FnMut();
14
15pub type OnEndCallback<'a> = &'a mut dyn FnMut();
17
18pub type OnErrorCallback<'a> = &'a mut dyn FnMut(&str);
20
21#[cfg(feature = "cloud")]
24#[must_use]
25pub fn preprocess_speech_markdown(text: &str, platform: &str) -> (String, bool) {
26 use speechmarkdown_rust::{Platform, SpeechMarkdownParser};
27
28 if !SpeechMarkdownParser::is_speech_markdown(text) {
29 return (text.to_string(), false);
30 }
31
32 let platform = match platform {
33 "azure" => Platform::MicrosoftAzure,
34 "google" => Platform::GoogleAssistant,
35 _ => Platform::AmazonAlexa,
36 };
37
38 match SpeechMarkdownParser::to_ssml(text, platform) {
39 Ok(ssml) => (ssml, true),
40 Err(_) => (text.to_string(), false),
41 }
42}
43
44#[cfg(not(feature = "cloud"))]
45#[must_use]
46pub fn preprocess_speech_markdown(text: &str, _platform: &str) -> (String, bool) {
47 (text.to_string(), false)
48}
49
50#[allow(clippy::missing_errors_doc)]
54pub trait TtsEngine: Send + Sync + fmt::Debug {
55 #[allow(clippy::too_many_arguments)]
57 fn speak(
58 &self,
59 text: &str,
60 voice: Option<&str>,
61 rate: f32,
62 pitch: f32,
63 volume: f32,
64 on_audio: Option<OnAudioCallback>,
65 on_boundary: Option<OnBoundaryCallback>,
66 ) -> TtsResult<()>;
67
68 fn speak_with_options(
70 &self,
71 text: &str,
72 options: Option<&SpeakOptions>,
73 on_audio: Option<OnAudioCallback>,
74 on_boundary: Option<OnBoundaryCallback>,
75 ) -> TtsResult<()> {
76 let opts = options.cloned().unwrap_or_default();
77 self.speak(
78 text,
79 opts.voice.as_deref(),
80 opts.effective_rate(),
81 opts.effective_pitch(),
82 opts.effective_volume(),
83 on_audio,
84 on_boundary,
85 )
86 }
87
88 #[allow(clippy::too_many_arguments)]
90 fn speak_sync(
91 &self,
92 text: &str,
93 voice: Option<&str>,
94 rate: f32,
95 pitch: f32,
96 volume: f32,
97 on_audio: Option<OnAudioCallback>,
98 on_boundary: Option<OnBoundaryCallback>,
99 ) -> TtsResult<()>;
100
101 fn stop(&self) -> TtsResult<()>;
103
104 fn pause(&self) -> TtsResult<()> {
106 Ok(())
107 }
108
109 fn resume(&self) -> TtsResult<()> {
111 Ok(())
112 }
113
114 fn get_voices(&self) -> TtsResult<Vec<Voice>>;
116
117 fn engine_id(&self) -> &'static str;
119
120 fn check_credentials(&self) -> TtsResult<bool> {
123 match self.get_voices() {
124 Ok(_) => Ok(true),
125 Err(_) => Ok(false),
126 }
127 }
128
129 fn synth_to_bytes(
132 &self,
133 text: &str,
134 voice: Option<&str>,
135 rate: f32,
136 pitch: f32,
137 volume: f32,
138 ) -> TtsResult<Vec<u8>> {
139 let mut buf = Vec::new();
140 self.speak(
141 text,
142 voice,
143 rate,
144 pitch,
145 volume,
146 Some(&mut |chunk: &[u8]| {
147 buf.extend_from_slice(chunk);
148 }),
149 None,
150 )?;
151 Ok(buf)
152 }
153
154 fn synth_to_bytes_with_options(
156 &self,
157 text: &str,
158 options: Option<&SpeakOptions>,
159 ) -> TtsResult<Vec<u8>> {
160 let opts = options.cloned().unwrap_or_default();
161 self.synth_to_bytes(
162 text,
163 opts.voice.as_deref(),
164 opts.effective_rate(),
165 opts.effective_pitch(),
166 opts.effective_volume(),
167 )
168 }
169
170 fn synth_with_boundaries(
172 &self,
173 text: &str,
174 voice: Option<&str>,
175 rate: f32,
176 pitch: f32,
177 volume: f32,
178 ) -> TtsResult<(Vec<u8>, Vec<WordBoundary>)> {
179 let audio = self.synth_to_bytes(text, voice, rate, pitch, volume)?;
180 let boundaries = estimate_word_boundaries(text);
181 Ok((audio, boundaries))
182 }
183}
184
185#[must_use]
188#[allow(clippy::cast_precision_loss)]
189pub fn estimate_word_boundaries(text: &str) -> Vec<WordBoundary> {
190 estimate_word_boundaries_with_wpm(text, 150.0)
191}
192
193#[must_use]
196#[allow(clippy::cast_precision_loss)]
197pub fn estimate_word_boundaries_with_wpm(text: &str, words_per_minute: f64) -> Vec<WordBoundary> {
198 let words: Vec<&str> = text.split_whitespace().filter(|w| !w.is_empty()).collect();
199 if words.is_empty() {
200 return Vec::new();
201 }
202
203 let ms_per_word = 60_000.0 / words_per_minute;
204
205 let mut boundaries = Vec::with_capacity(words.len());
206 let mut current_ms: u64 = 0;
207
208 for word in &words {
209 let length_factor = (word.len() as f64 / 5.0).clamp(0.5, 2.0);
210 let duration = (ms_per_word * length_factor) as u64;
211 let duration = duration.max(1);
212
213 boundaries.push(WordBoundary {
214 text: (*word).to_string(),
215 offset: current_ms,
216 duration,
217 });
218 current_ms += duration;
219 }
220
221 boundaries
222}