1use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum TTSModel {
10 #[serde(rename = "ssfm-v30")]
12 SsfmV30,
13 #[serde(rename = "ssfm-v21")]
15 SsfmV21,
16}
17
18impl Default for TTSModel {
19 fn default() -> Self {
20 TTSModel::SsfmV30
21 }
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "lowercase")]
27pub enum EmotionPreset {
28 Normal,
30 Happy,
32 Sad,
34 Angry,
36 Whisper,
38 #[serde(rename = "toneup")]
40 ToneUp,
41 #[serde(rename = "tonedown")]
43 ToneDown,
44}
45
46impl Default for EmotionPreset {
47 fn default() -> Self {
48 EmotionPreset::Normal
49 }
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum AudioFormat {
56 Wav,
58 Mp3,
60}
61
62impl Default for AudioFormat {
63 fn default() -> Self {
64 AudioFormat::Wav
65 }
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70#[serde(rename_all = "lowercase")]
71pub enum Gender {
72 Male,
73 Female,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum Age {
80 Child,
82 Teenager,
84 YoungAdult,
86 MiddleAge,
88 Elder,
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
94pub enum UseCase {
95 Announcer,
96 Anime,
97 Audiobook,
98 Conversational,
99 Documentary,
100 #[serde(rename = "E-learning")]
101 ELearning,
102 Rapper,
103 Game,
104 #[serde(rename = "Tiktok/Reels")]
105 TikTokReels,
106 News,
107 Podcast,
108 Voicemail,
109 Ads,
110}
111
112#[derive(Debug, Clone, Default, Serialize, Deserialize)]
114pub struct Output {
115 #[serde(skip_serializing_if = "Option::is_none")]
118 pub volume: Option<i32>,
119 #[serde(skip_serializing_if = "Option::is_none")]
122 pub target_lufs: Option<f64>,
123 #[serde(skip_serializing_if = "Option::is_none")]
125 pub audio_pitch: Option<i32>,
126 #[serde(skip_serializing_if = "Option::is_none")]
128 pub audio_tempo: Option<f64>,
129 #[serde(skip_serializing_if = "Option::is_none")]
131 pub audio_format: Option<AudioFormat>,
132}
133
134impl Output {
135 pub fn new() -> Self {
137 Self::default()
138 }
139
140 pub fn volume(mut self, volume: i32) -> Self {
143 self.volume = Some(volume.clamp(0, 200));
144 self
145 }
146
147 pub fn target_lufs(mut self, lufs: f64) -> Self {
149 self.target_lufs = Some(lufs.clamp(-70.0, 0.0));
150 self
151 }
152
153 pub fn audio_pitch(mut self, pitch: i32) -> Self {
155 self.audio_pitch = Some(pitch.clamp(-12, 12));
156 self
157 }
158
159 pub fn audio_tempo(mut self, tempo: f64) -> Self {
161 self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
162 self
163 }
164
165 pub fn audio_format(mut self, format: AudioFormat) -> Self {
167 self.audio_format = Some(format);
168 self
169 }
170}
171
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
177pub struct OutputStream {
178 #[serde(skip_serializing_if = "Option::is_none")]
180 pub audio_pitch: Option<i32>,
181 #[serde(skip_serializing_if = "Option::is_none")]
183 pub audio_tempo: Option<f64>,
184 #[serde(skip_serializing_if = "Option::is_none")]
186 pub audio_format: Option<AudioFormat>,
187}
188
189impl OutputStream {
190 pub fn new() -> Self {
192 Self::default()
193 }
194
195 pub fn audio_pitch(mut self, pitch: i32) -> Self {
197 self.audio_pitch = Some(pitch.clamp(-12, 12));
198 self
199 }
200
201 pub fn audio_tempo(mut self, tempo: f64) -> Self {
203 self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
204 self
205 }
206
207 pub fn audio_format(mut self, format: AudioFormat) -> Self {
209 self.audio_format = Some(format);
210 self
211 }
212}
213
214#[derive(Debug, Clone, Default, Serialize, Deserialize)]
216pub struct Prompt {
217 #[serde(skip_serializing_if = "Option::is_none")]
219 pub emotion_preset: Option<EmotionPreset>,
220 #[serde(skip_serializing_if = "Option::is_none")]
222 pub emotion_intensity: Option<f64>,
223}
224
225impl Prompt {
226 pub fn new() -> Self {
228 Self::default()
229 }
230
231 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
233 self.emotion_preset = Some(preset);
234 self
235 }
236
237 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
239 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
240 self
241 }
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct PresetPrompt {
247 pub emotion_type: String,
249 #[serde(skip_serializing_if = "Option::is_none")]
251 pub emotion_preset: Option<EmotionPreset>,
252 #[serde(skip_serializing_if = "Option::is_none")]
254 pub emotion_intensity: Option<f64>,
255}
256
257impl Default for PresetPrompt {
258 fn default() -> Self {
259 Self {
260 emotion_type: "preset".to_string(),
261 emotion_preset: None,
262 emotion_intensity: None,
263 }
264 }
265}
266
267impl PresetPrompt {
268 pub fn new() -> Self {
270 Self::default()
271 }
272
273 pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
275 self.emotion_preset = Some(preset);
276 self
277 }
278
279 pub fn emotion_intensity(mut self, intensity: f64) -> Self {
281 self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
282 self
283 }
284}
285
286#[derive(Debug, Clone, Serialize, Deserialize)]
288pub struct SmartPrompt {
289 pub emotion_type: String,
291 #[serde(skip_serializing_if = "Option::is_none")]
293 pub previous_text: Option<String>,
294 #[serde(skip_serializing_if = "Option::is_none")]
296 pub next_text: Option<String>,
297}
298
299impl Default for SmartPrompt {
300 fn default() -> Self {
301 Self {
302 emotion_type: "smart".to_string(),
303 previous_text: None,
304 next_text: None,
305 }
306 }
307}
308
309impl SmartPrompt {
310 pub fn new() -> Self {
312 Self::default()
313 }
314
315 pub fn previous_text(mut self, text: impl Into<String>) -> Self {
317 self.previous_text = Some(text.into());
318 self
319 }
320
321 pub fn next_text(mut self, text: impl Into<String>) -> Self {
323 self.next_text = Some(text.into());
324 self
325 }
326}
327
328#[derive(Debug, Clone, Serialize, Deserialize)]
330#[serde(untagged)]
331pub enum TTSPrompt {
332 Basic(Prompt),
334 Preset(PresetPrompt),
336 Smart(SmartPrompt),
338}
339
340impl From<Prompt> for TTSPrompt {
341 fn from(prompt: Prompt) -> Self {
342 TTSPrompt::Basic(prompt)
343 }
344}
345
346impl From<PresetPrompt> for TTSPrompt {
347 fn from(prompt: PresetPrompt) -> Self {
348 TTSPrompt::Preset(prompt)
349 }
350}
351
352impl From<SmartPrompt> for TTSPrompt {
353 fn from(prompt: SmartPrompt) -> Self {
354 TTSPrompt::Smart(prompt)
355 }
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
360pub struct TTSRequest {
361 pub voice_id: String,
363 pub text: String,
365 pub model: TTSModel,
367 #[serde(skip_serializing_if = "Option::is_none")]
369 pub language: Option<String>,
370 #[serde(skip_serializing_if = "Option::is_none")]
372 pub prompt: Option<TTSPrompt>,
373 #[serde(skip_serializing_if = "Option::is_none")]
375 pub output: Option<Output>,
376 #[serde(skip_serializing_if = "Option::is_none")]
378 pub seed: Option<i32>,
379}
380
381impl TTSRequest {
382 pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
384 Self {
385 voice_id: voice_id.into(),
386 text: text.into(),
387 model,
388 language: None,
389 prompt: None,
390 output: None,
391 seed: None,
392 }
393 }
394
395 pub fn language(mut self, language: impl Into<String>) -> Self {
397 self.language = Some(language.into());
398 self
399 }
400
401 pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
403 self.prompt = Some(prompt.into());
404 self
405 }
406
407 pub fn output(mut self, output: Output) -> Self {
409 self.output = Some(output);
410 self
411 }
412
413 pub fn seed(mut self, seed: i32) -> Self {
415 self.seed = Some(seed);
416 self
417 }
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
425pub struct TTSRequestStream {
426 pub voice_id: String,
428 pub text: String,
430 pub model: TTSModel,
432 #[serde(skip_serializing_if = "Option::is_none")]
434 pub language: Option<String>,
435 #[serde(skip_serializing_if = "Option::is_none")]
437 pub prompt: Option<TTSPrompt>,
438 #[serde(skip_serializing_if = "Option::is_none")]
440 pub output: Option<OutputStream>,
441 #[serde(skip_serializing_if = "Option::is_none")]
443 pub seed: Option<i32>,
444}
445
446impl TTSRequestStream {
447 pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
449 Self {
450 voice_id: voice_id.into(),
451 text: text.into(),
452 model,
453 language: None,
454 prompt: None,
455 output: None,
456 seed: None,
457 }
458 }
459
460 pub fn language(mut self, language: impl Into<String>) -> Self {
462 self.language = Some(language.into());
463 self
464 }
465
466 pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
468 self.prompt = Some(prompt.into());
469 self
470 }
471
472 pub fn output(mut self, output: OutputStream) -> Self {
474 self.output = Some(output);
475 self
476 }
477
478 pub fn seed(mut self, seed: i32) -> Self {
480 self.seed = Some(seed);
481 self
482 }
483}
484
485#[derive(Debug, Clone)]
487pub struct TTSResponse {
488 pub audio_data: Vec<u8>,
490 pub duration: f64,
492 pub format: AudioFormat,
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct ModelInfo {
499 pub version: TTSModel,
501 pub emotions: Vec<String>,
503}
504
505#[derive(Debug, Clone, Serialize, Deserialize)]
507pub struct VoiceV2 {
508 pub voice_id: String,
510 pub voice_name: String,
512 pub models: Vec<ModelInfo>,
514 #[serde(skip_serializing_if = "Option::is_none")]
516 pub gender: Option<Gender>,
517 #[serde(skip_serializing_if = "Option::is_none")]
519 pub age: Option<Age>,
520 #[serde(skip_serializing_if = "Option::is_none")]
522 pub use_cases: Option<Vec<String>>,
523}
524
525#[derive(Debug, Clone, Default)]
527pub struct VoicesV2Filter {
528 pub model: Option<TTSModel>,
530 pub gender: Option<Gender>,
532 pub age: Option<Age>,
534 pub use_cases: Option<UseCase>,
536}
537
538impl VoicesV2Filter {
539 pub fn new() -> Self {
541 Self::default()
542 }
543
544 pub fn model(mut self, model: TTSModel) -> Self {
546 self.model = Some(model);
547 self
548 }
549
550 pub fn gender(mut self, gender: Gender) -> Self {
552 self.gender = Some(gender);
553 self
554 }
555
556 pub fn age(mut self, age: Age) -> Self {
558 self.age = Some(age);
559 self
560 }
561
562 pub fn use_cases(mut self, use_case: UseCase) -> Self {
564 self.use_cases = Some(use_case);
565 self
566 }
567}
568
569#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
571#[serde(rename_all = "lowercase")]
572pub enum PlanTier {
573 Free,
575 Lite,
577 Plus,
579 Custom,
581}
582
583#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
585pub struct Credits {
586 pub plan_credits: i64,
588 pub used_credits: i64,
590}
591
592#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
594pub struct Limits {
595 pub concurrency_limit: i64,
597}
598
599#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
601pub struct SubscriptionResponse {
602 pub plan: PlanTier,
604 pub credits: Credits,
606 pub limits: Limits,
608}
609
610#[derive(Debug, Clone, Serialize, Deserialize)]
612pub struct ErrorResponse {
613 pub detail: String,
615}