1use std::collections::HashMap;
4use std::fmt;
5use std::os::raw::c_char;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Gender {
10 Male,
11 Female,
12 Unknown,
13}
14
15impl fmt::Display for Gender {
16 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17 match self {
18 Self::Male => write!(f, "Male"),
19 Self::Female => write!(f, "Female"),
20 Self::Unknown => write!(f, "Unknown"),
21 }
22 }
23}
24
25#[must_use]
27pub fn normalize_gender(value: &str) -> Gender {
28 match value.to_lowercase().as_str() {
29 "female" => Gender::Female,
30 "male" => Gender::Male,
31 _ => Gender::Unknown,
32 }
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct LanguageCode {
38 pub bcp47: String,
40 pub iso639_3: String,
42 pub display: String,
44}
45
46#[derive(Debug, Clone)]
49pub struct Voice {
50 pub id: String,
52 pub name: String,
54 pub gender: Gender,
56 pub provider: String,
58 pub language_codes: Vec<LanguageCode>,
60}
61
62impl Voice {
63 #[must_use]
65 pub fn primary_language(&self) -> &str {
66 self.language_codes.first().map_or("", |l| l.bcp47.as_str())
67 }
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum AudioFormat {
73 Mp3,
74 Wav,
75 Ogg,
76 Opus,
77 Aac,
78 Flac,
79 Pcm,
80}
81
82impl fmt::Display for AudioFormat {
83 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84 match self {
85 Self::Mp3 => write!(f, "mp3"),
86 Self::Wav => write!(f, "wav"),
87 Self::Ogg => write!(f, "ogg"),
88 Self::Opus => write!(f, "opus"),
89 Self::Aac => write!(f, "aac"),
90 Self::Flac => write!(f, "flac"),
91 Self::Pcm => write!(f, "pcm"),
92 }
93 }
94}
95
96#[derive(Debug, Clone, Copy, PartialEq)]
98pub enum SpeechRate {
99 XSlow,
100 Slow,
101 Medium,
102 Fast,
103 XFast,
104}
105
106impl SpeechRate {
107 #[must_use]
109 pub fn rate_value(self) -> f32 {
110 match self {
111 Self::XSlow => 0.5,
112 Self::Slow => 0.75,
113 Self::Medium => 1.0,
114 Self::Fast => 1.25,
115 Self::XFast => 1.5,
116 }
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq)]
122pub enum SpeechPitch {
123 XLow,
124 Low,
125 Medium,
126 High,
127 XHigh,
128}
129
130impl SpeechPitch {
131 #[must_use]
133 pub fn pitch_value(self) -> f32 {
134 match self {
135 Self::XLow => 0.5,
136 Self::Low => 0.75,
137 Self::Medium => 1.0,
138 Self::High => 1.25,
139 Self::XHigh => 1.5,
140 }
141 }
142}
143
144#[derive(Debug, Clone, Default)]
146pub struct SpeakOptions {
147 pub rate: Option<f32>,
149 pub speech_rate: Option<SpeechRate>,
151 pub pitch: Option<f32>,
153 pub speech_pitch: Option<SpeechPitch>,
155 pub volume: Option<f32>,
157 pub voice: Option<String>,
159 pub format: Option<AudioFormat>,
161 pub use_speech_markdown: bool,
163 pub use_word_boundary: bool,
165 pub raw_ssml: bool,
167 pub extra: HashMap<String, String>,
169}
170
171impl SpeakOptions {
172 #[must_use]
174 pub fn effective_rate(&self) -> f32 {
175 self.rate
176 .or_else(|| self.speech_rate.map(SpeechRate::rate_value))
177 .unwrap_or(1.0)
178 }
179
180 #[must_use]
182 pub fn effective_pitch(&self) -> f32 {
183 self.pitch
184 .or_else(|| self.speech_pitch.map(SpeechPitch::pitch_value))
185 .unwrap_or(1.0)
186 }
187
188 #[must_use]
190 pub fn effective_volume(&self) -> f32 {
191 self.volume.unwrap_or(1.0)
192 }
193}
194
195#[derive(Debug, Clone, PartialEq)]
198pub struct WordBoundary {
199 pub text: String,
201 pub offset: u64,
203 pub duration: u64,
205}
206
207#[derive(Debug, Clone)]
209pub struct EngineDescriptor {
210 pub id: String,
212 pub name: String,
214 pub needs_credentials: bool,
216 pub credential_keys_json: String,
218}
219
220#[derive(Debug, Clone)]
222pub struct SherpaModelInfo {
223 pub id: String,
225 pub model_type: String,
227 pub name: String,
229 pub language: Vec<SherpaLanguage>,
231 pub sample_rate: u32,
233 pub num_speakers: u32,
235 pub url: String,
237 pub compression: bool,
239 pub filesize_mb: f64,
241}
242
243#[derive(Debug, Clone)]
245pub struct SherpaLanguage {
246 pub lang_code: String,
248 pub language_name: String,
250 pub country: String,
252}
253
254#[repr(C)]
256pub struct tts_voice {
257 pub id: *mut c_char,
259 pub name: *mut c_char,
261 pub language: *mut c_char,
263 pub gender: *mut c_char,
265 pub engine: *mut c_char,
267}
268
269#[repr(C)]
271pub struct tts_engine_info {
272 pub id: *mut c_char,
274 pub name: *mut c_char,
276 pub needs_credentials: bool,
278 pub credential_keys_json: *mut c_char,
280}
281
282#[derive(Debug)]
284pub struct TtsError(pub String);
285
286impl fmt::Display for TtsError {
287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 write!(f, "{}", self.0)
289 }
290}
291
292impl std::error::Error for TtsError {}
293
294impl From<anyhow::Error> for TtsError {
295 fn from(e: anyhow::Error) -> Self {
296 TtsError(e.to_string())
297 }
298}
299
300pub type TtsResult<T> = Result<T, TtsError>;