1use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum TTSModel {
10 #[serde(rename = "ssfm-v30")]
12 SsfmV30,
13 #[serde(rename = "ssfm-v21")]
15 SsfmV21,
16}
17
18impl Default for TTSModel {
19 fn default() -> Self {
20 TTSModel::SsfmV30
21 }
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "lowercase")]
27pub enum EmotionPreset {
28 Normal,
30 Happy,
32 Sad,
34 Angry,
36 Whisper,
38 #[serde(rename = "toneup")]
40 ToneUp,
41 #[serde(rename = "tonedown")]
43 ToneDown,
44}
45
46impl Default for EmotionPreset {
47 fn default() -> Self {
48 EmotionPreset::Normal
49 }
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum AudioFormat {
56 Wav,
58 Mp3,
60}
61
62impl Default for AudioFormat {
63 fn default() -> Self {
64 AudioFormat::Wav
65 }
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70#[serde(rename_all = "lowercase")]
71pub enum Gender {
72 Male,
73 Female,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum Age {
80 Child,
82 Teenager,
84 YoungAdult,
86 MiddleAge,
88 Elder,
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
94pub enum UseCase {
95 Announcer,
96 Anime,
97 Audiobook,
98 Conversational,
99 Documentary,
100 #[serde(rename = "E-learning")]
101 ELearning,
102 Rapper,
103 Game,
104 #[serde(rename = "Tiktok/Reels")]
105 TikTokReels,
106 News,
107 Podcast,
108 Voicemail,
109 Ads,
110}
111
112#[derive(Debug, Clone, Default, Serialize, Deserialize)]
114pub struct Output {
115 #[serde(skip_serializing_if = "Option::is_none")]
118 pub volume: Option<i32>,
119 #[serde(skip_serializing_if = "Option::is_none")]
122 pub target_lufs: Option<f64>,
123 #[serde(skip_serializing_if = "Option::is_none")]
125 pub audio_pitch: Option<i32>,
126 #[serde(skip_serializing_if = "Option::is_none")]
128 pub audio_tempo: Option<f64>,
129 #[serde(skip_serializing_if = "Option::is_none")]
131 pub audio_format: Option<AudioFormat>,
132}
133
134impl Output {
135 pub fn new() -> Self {
137 Self::default()
138 }
139
140 pub fn volume(mut self, volume: i32) -> Self {
143 self.volume = Some(volume.clamp(0, 200));
144 self
145 }
146
147 pub fn target_lufs(mut self, lufs: f64) -> Self {
149 self.target_lufs = Some(lufs.clamp(-70.0, 0.0));
150 self
151 }
152
153 pub fn audio_pitch(mut self, pitch: i32) -> Self {
155 self.audio_pitch = Some(pitch.clamp(-12, 12));
156 self
157 }
158
159 pub fn audio_tempo(mut self, tempo: f64) -> Self {
161 self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
162 self
163 }
164
165 pub fn audio_format(mut self, format: AudioFormat) -> Self {
167 self.audio_format = Some(format);
168 self
169 }
170}
171
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
174pub struct Prompt {
175 #[serde(skip_serializing_if = "Option::is_none")]
177 pub emotion_preset: Option<EmotionPreset>,
178 #[serde(skip_serializing_if = "Option::is_none")]
180 pub emotion_intensity: Option<f64>,
181}
182
183impl Prompt {
184 pub fn new() -> Self {
186 Self::default()
187 }
188
189 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
191 self.emotion_preset = Some(preset);
192 self
193 }
194
195 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
197 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
198 self
199 }
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct PresetPrompt {
205 pub emotion_type: String,
207 #[serde(skip_serializing_if = "Option::is_none")]
209 pub emotion_preset: Option<EmotionPreset>,
210 #[serde(skip_serializing_if = "Option::is_none")]
212 pub emotion_intensity: Option<f64>,
213}
214
215impl Default for PresetPrompt {
216 fn default() -> Self {
217 Self {
218 emotion_type: "preset".to_string(),
219 emotion_preset: None,
220 emotion_intensity: None,
221 }
222 }
223}
224
225impl PresetPrompt {
226 pub fn new() -> Self {
228 Self::default()
229 }
230
231 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
233 self.emotion_preset = Some(preset);
234 self
235 }
236
237 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
239 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
240 self
241 }
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct SmartPrompt {
247 pub emotion_type: String,
249 #[serde(skip_serializing_if = "Option::is_none")]
251 pub previous_text: Option<String>,
252 #[serde(skip_serializing_if = "Option::is_none")]
254 pub next_text: Option<String>,
255}
256
257impl Default for SmartPrompt {
258 fn default() -> Self {
259 Self {
260 emotion_type: "smart".to_string(),
261 previous_text: None,
262 next_text: None,
263 }
264 }
265}
266
267impl SmartPrompt {
268 pub fn new() -> Self {
270 Self::default()
271 }
272
273 pub fn previous_text(mut self, text: impl Into<String>) -> Self {
275 self.previous_text = Some(text.into());
276 self
277 }
278
279 pub fn next_text(mut self, text: impl Into<String>) -> Self {
281 self.next_text = Some(text.into());
282 self
283 }
284}
285
286#[derive(Debug, Clone, Serialize, Deserialize)]
288#[serde(untagged)]
289pub enum TTSPrompt {
290 Basic(Prompt),
292 Preset(PresetPrompt),
294 Smart(SmartPrompt),
296}
297
298impl From<Prompt> for TTSPrompt {
299 fn from(prompt: Prompt) -> Self {
300 TTSPrompt::Basic(prompt)
301 }
302}
303
304impl From<PresetPrompt> for TTSPrompt {
305 fn from(prompt: PresetPrompt) -> Self {
306 TTSPrompt::Preset(prompt)
307 }
308}
309
310impl From<SmartPrompt> for TTSPrompt {
311 fn from(prompt: SmartPrompt) -> Self {
312 TTSPrompt::Smart(prompt)
313 }
314}
315
316#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct TTSRequest {
319 pub voice_id: String,
321 pub text: String,
323 pub model: TTSModel,
325 #[serde(skip_serializing_if = "Option::is_none")]
327 pub language: Option<String>,
328 #[serde(skip_serializing_if = "Option::is_none")]
330 pub prompt: Option<TTSPrompt>,
331 #[serde(skip_serializing_if = "Option::is_none")]
333 pub output: Option<Output>,
334 #[serde(skip_serializing_if = "Option::is_none")]
336 pub seed: Option<i32>,
337}
338
339impl TTSRequest {
340 pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
342 Self {
343 voice_id: voice_id.into(),
344 text: text.into(),
345 model,
346 language: None,
347 prompt: None,
348 output: None,
349 seed: None,
350 }
351 }
352
353 pub fn language(mut self, language: impl Into<String>) -> Self {
355 self.language = Some(language.into());
356 self
357 }
358
359 pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
361 self.prompt = Some(prompt.into());
362 self
363 }
364
365 pub fn output(mut self, output: Output) -> Self {
367 self.output = Some(output);
368 self
369 }
370
371 pub fn seed(mut self, seed: i32) -> Self {
373 self.seed = Some(seed);
374 self
375 }
376}
377
378#[derive(Debug, Clone)]
380pub struct TTSResponse {
381 pub audio_data: Vec<u8>,
383 pub duration: f64,
385 pub format: AudioFormat,
387}
388
389#[derive(Debug, Clone, Serialize, Deserialize)]
391pub struct ModelInfo {
392 pub version: TTSModel,
394 pub emotions: Vec<String>,
396}
397
398#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct VoiceV2 {
401 pub voice_id: String,
403 pub voice_name: String,
405 pub models: Vec<ModelInfo>,
407 #[serde(skip_serializing_if = "Option::is_none")]
409 pub gender: Option<Gender>,
410 #[serde(skip_serializing_if = "Option::is_none")]
412 pub age: Option<Age>,
413 #[serde(skip_serializing_if = "Option::is_none")]
415 pub use_cases: Option<Vec<String>>,
416}
417
418#[derive(Debug, Clone, Default)]
420pub struct VoicesV2Filter {
421 pub model: Option<TTSModel>,
423 pub gender: Option<Gender>,
425 pub age: Option<Age>,
427 pub use_cases: Option<UseCase>,
429}
430
431impl VoicesV2Filter {
432 pub fn new() -> Self {
434 Self::default()
435 }
436
437 pub fn model(mut self, model: TTSModel) -> Self {
439 self.model = Some(model);
440 self
441 }
442
443 pub fn gender(mut self, gender: Gender) -> Self {
445 self.gender = Some(gender);
446 self
447 }
448
449 pub fn age(mut self, age: Age) -> Self {
451 self.age = Some(age);
452 self
453 }
454
455 pub fn use_cases(mut self, use_case: UseCase) -> Self {
457 self.use_cases = Some(use_case);
458 self
459 }
460}
461
462#[derive(Debug, Clone, Serialize, Deserialize)]
464pub struct ErrorResponse {
465 pub detail: String,
467}