1use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum TTSModel {
10 #[serde(rename = "ssfm-v30")]
12 SsfmV30,
13 #[serde(rename = "ssfm-v21")]
15 SsfmV21,
16}
17
18impl Default for TTSModel {
19 fn default() -> Self {
20 TTSModel::SsfmV30
21 }
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "lowercase")]
27pub enum EmotionPreset {
28 Normal,
30 Happy,
32 Sad,
34 Angry,
36 Whisper,
38 #[serde(rename = "toneup")]
40 ToneUp,
41 #[serde(rename = "tonedown")]
43 ToneDown,
44}
45
46impl Default for EmotionPreset {
47 fn default() -> Self {
48 EmotionPreset::Normal
49 }
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum AudioFormat {
56 Wav,
58 Mp3,
60}
61
62impl Default for AudioFormat {
63 fn default() -> Self {
64 AudioFormat::Wav
65 }
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70#[serde(rename_all = "lowercase")]
71pub enum Gender {
72 Male,
73 Female,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum Age {
80 Child,
82 Teenager,
84 YoungAdult,
86 MiddleAge,
88 Elder,
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
94pub enum UseCase {
95 Announcer,
96 Anime,
97 Audiobook,
98 Conversational,
99 Documentary,
100 #[serde(rename = "E-learning")]
101 ELearning,
102 Rapper,
103 Game,
104 #[serde(rename = "Tiktok/Reels")]
105 TikTokReels,
106 News,
107 Podcast,
108 Voicemail,
109 Ads,
110}
111
112#[derive(Debug, Clone, Default, Serialize, Deserialize)]
114pub struct Output {
115 #[serde(skip_serializing_if = "Option::is_none")]
117 pub volume: Option<i32>,
118 #[serde(skip_serializing_if = "Option::is_none")]
120 pub audio_pitch: Option<i32>,
121 #[serde(skip_serializing_if = "Option::is_none")]
123 pub audio_tempo: Option<f64>,
124 #[serde(skip_serializing_if = "Option::is_none")]
126 pub audio_format: Option<AudioFormat>,
127}
128
129impl Output {
130 pub fn new() -> Self {
132 Self::default()
133 }
134
135 pub fn volume(mut self, volume: i32) -> Self {
137 self.volume = Some(volume.clamp(0, 200));
138 self
139 }
140
141 pub fn audio_pitch(mut self, pitch: i32) -> Self {
143 self.audio_pitch = Some(pitch.clamp(-12, 12));
144 self
145 }
146
147 pub fn audio_tempo(mut self, tempo: f64) -> Self {
149 self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
150 self
151 }
152
153 pub fn audio_format(mut self, format: AudioFormat) -> Self {
155 self.audio_format = Some(format);
156 self
157 }
158}
159
160#[derive(Debug, Clone, Default, Serialize, Deserialize)]
162pub struct Prompt {
163 #[serde(skip_serializing_if = "Option::is_none")]
165 pub emotion_preset: Option<EmotionPreset>,
166 #[serde(skip_serializing_if = "Option::is_none")]
168 pub emotion_intensity: Option<f64>,
169}
170
171impl Prompt {
172 pub fn new() -> Self {
174 Self::default()
175 }
176
177 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
179 self.emotion_preset = Some(preset);
180 self
181 }
182
183 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
185 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
186 self
187 }
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct PresetPrompt {
193 pub emotion_type: String,
195 #[serde(skip_serializing_if = "Option::is_none")]
197 pub emotion_preset: Option<EmotionPreset>,
198 #[serde(skip_serializing_if = "Option::is_none")]
200 pub emotion_intensity: Option<f64>,
201}
202
203impl Default for PresetPrompt {
204 fn default() -> Self {
205 Self {
206 emotion_type: "preset".to_string(),
207 emotion_preset: None,
208 emotion_intensity: None,
209 }
210 }
211}
212
213impl PresetPrompt {
214 pub fn new() -> Self {
216 Self::default()
217 }
218
219 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
221 self.emotion_preset = Some(preset);
222 self
223 }
224
225 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
227 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
228 self
229 }
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct SmartPrompt {
235 pub emotion_type: String,
237 #[serde(skip_serializing_if = "Option::is_none")]
239 pub previous_text: Option<String>,
240 #[serde(skip_serializing_if = "Option::is_none")]
242 pub next_text: Option<String>,
243}
244
245impl Default for SmartPrompt {
246 fn default() -> Self {
247 Self {
248 emotion_type: "smart".to_string(),
249 previous_text: None,
250 next_text: None,
251 }
252 }
253}
254
255impl SmartPrompt {
256 pub fn new() -> Self {
258 Self::default()
259 }
260
261 pub fn previous_text(mut self, text: impl Into<String>) -> Self {
263 self.previous_text = Some(text.into());
264 self
265 }
266
267 pub fn next_text(mut self, text: impl Into<String>) -> Self {
269 self.next_text = Some(text.into());
270 self
271 }
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
276#[serde(untagged)]
277pub enum TTSPrompt {
278 Basic(Prompt),
280 Preset(PresetPrompt),
282 Smart(SmartPrompt),
284}
285
286impl From<Prompt> for TTSPrompt {
287 fn from(prompt: Prompt) -> Self {
288 TTSPrompt::Basic(prompt)
289 }
290}
291
292impl From<PresetPrompt> for TTSPrompt {
293 fn from(prompt: PresetPrompt) -> Self {
294 TTSPrompt::Preset(prompt)
295 }
296}
297
298impl From<SmartPrompt> for TTSPrompt {
299 fn from(prompt: SmartPrompt) -> Self {
300 TTSPrompt::Smart(prompt)
301 }
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct TTSRequest {
307 pub voice_id: String,
309 pub text: String,
311 pub model: TTSModel,
313 #[serde(skip_serializing_if = "Option::is_none")]
315 pub language: Option<String>,
316 #[serde(skip_serializing_if = "Option::is_none")]
318 pub prompt: Option<TTSPrompt>,
319 #[serde(skip_serializing_if = "Option::is_none")]
321 pub output: Option<Output>,
322 #[serde(skip_serializing_if = "Option::is_none")]
324 pub seed: Option<i32>,
325}
326
327impl TTSRequest {
328 pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
330 Self {
331 voice_id: voice_id.into(),
332 text: text.into(),
333 model,
334 language: None,
335 prompt: None,
336 output: None,
337 seed: None,
338 }
339 }
340
341 pub fn language(mut self, language: impl Into<String>) -> Self {
343 self.language = Some(language.into());
344 self
345 }
346
347 pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
349 self.prompt = Some(prompt.into());
350 self
351 }
352
353 pub fn output(mut self, output: Output) -> Self {
355 self.output = Some(output);
356 self
357 }
358
359 pub fn seed(mut self, seed: i32) -> Self {
361 self.seed = Some(seed);
362 self
363 }
364}
365
366#[derive(Debug, Clone)]
368pub struct TTSResponse {
369 pub audio_data: Vec<u8>,
371 pub duration: f64,
373 pub format: AudioFormat,
375}
376
377#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct ModelInfo {
380 pub version: TTSModel,
382 pub emotions: Vec<String>,
384}
385
386#[derive(Debug, Clone, Serialize, Deserialize)]
388pub struct VoiceV2 {
389 pub voice_id: String,
391 pub voice_name: String,
393 pub models: Vec<ModelInfo>,
395 #[serde(skip_serializing_if = "Option::is_none")]
397 pub gender: Option<Gender>,
398 #[serde(skip_serializing_if = "Option::is_none")]
400 pub age: Option<Age>,
401 #[serde(skip_serializing_if = "Option::is_none")]
403 pub use_cases: Option<Vec<String>>,
404}
405
406#[derive(Debug, Clone, Default)]
408pub struct VoicesV2Filter {
409 pub model: Option<TTSModel>,
411 pub gender: Option<Gender>,
413 pub age: Option<Age>,
415 pub use_cases: Option<UseCase>,
417}
418
419impl VoicesV2Filter {
420 pub fn new() -> Self {
422 Self::default()
423 }
424
425 pub fn model(mut self, model: TTSModel) -> Self {
427 self.model = Some(model);
428 self
429 }
430
431 pub fn gender(mut self, gender: Gender) -> Self {
433 self.gender = Some(gender);
434 self
435 }
436
437 pub fn age(mut self, age: Age) -> Self {
439 self.age = Some(age);
440 self
441 }
442
443 pub fn use_cases(mut self, use_case: UseCase) -> Self {
445 self.use_cases = Some(use_case);
446 self
447 }
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize)]
452pub struct ErrorResponse {
453 pub detail: String,
455}