use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TTSModel {
#[serde(rename = "ssfm-v30")]
SsfmV30,
#[serde(rename = "ssfm-v21")]
SsfmV21,
}
impl Default for TTSModel {
fn default() -> Self {
TTSModel::SsfmV30
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EmotionPreset {
Normal,
Happy,
Sad,
Angry,
Whisper,
#[serde(rename = "toneup")]
ToneUp,
#[serde(rename = "tonedown")]
ToneDown,
}
impl Default for EmotionPreset {
fn default() -> Self {
EmotionPreset::Normal
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum AudioFormat {
Wav,
Mp3,
}
impl Default for AudioFormat {
fn default() -> Self {
AudioFormat::Wav
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Gender {
Male,
Female,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Age {
Child,
Teenager,
YoungAdult,
MiddleAge,
Elder,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UseCase {
Announcer,
Anime,
Audiobook,
Conversational,
Documentary,
#[serde(rename = "E-learning")]
ELearning,
Rapper,
Game,
#[serde(rename = "Tiktok/Reels")]
TikTokReels,
News,
Podcast,
Voicemail,
Ads,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Output {
#[serde(skip_serializing_if = "Option::is_none")]
pub volume: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub target_lufs: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_pitch: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_tempo: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_format: Option<AudioFormat>,
}
impl Output {
pub fn new() -> Self {
Self::default()
}
pub fn volume(mut self, volume: i32) -> Self {
self.volume = Some(volume.clamp(0, 200));
self
}
pub fn target_lufs(mut self, lufs: f64) -> Self {
self.target_lufs = Some(lufs.clamp(-70.0, 0.0));
self
}
pub fn audio_pitch(mut self, pitch: i32) -> Self {
self.audio_pitch = Some(pitch.clamp(-12, 12));
self
}
pub fn audio_tempo(mut self, tempo: f64) -> Self {
self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
self
}
pub fn audio_format(mut self, format: AudioFormat) -> Self {
self.audio_format = Some(format);
self
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OutputStream {
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_pitch: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_tempo: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_format: Option<AudioFormat>,
}
impl OutputStream {
pub fn new() -> Self {
Self::default()
}
pub fn audio_pitch(mut self, pitch: i32) -> Self {
self.audio_pitch = Some(pitch.clamp(-12, 12));
self
}
pub fn audio_tempo(mut self, tempo: f64) -> Self {
self.audio_tempo = Some(tempo.clamp(0.5, 2.0));
self
}
pub fn audio_format(mut self, format: AudioFormat) -> Self {
self.audio_format = Some(format);
self
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Prompt {
#[serde(skip_serializing_if = "Option::is_none")]
pub emotion_preset: Option<EmotionPreset>,
#[serde(skip_serializing_if = "Option::is_none")]
pub emotion_intensity: Option<f64>,
}
impl Prompt {
pub fn new() -> Self {
Self::default()
}
pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
self.emotion_preset = Some(preset);
self
}
pub fn emotion_intensity(mut self, intensity: f64) -> Self {
self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PresetPrompt {
pub emotion_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub emotion_preset: Option<EmotionPreset>,
#[serde(skip_serializing_if = "Option::is_none")]
pub emotion_intensity: Option<f64>,
}
impl Default for PresetPrompt {
fn default() -> Self {
Self {
emotion_type: "preset".to_string(),
emotion_preset: None,
emotion_intensity: None,
}
}
}
impl PresetPrompt {
pub fn new() -> Self {
Self::default()
}
pub fn emotion_preset(mut self, preset: EmotionPreset) -> Self {
self.emotion_preset = Some(preset);
self
}
pub fn emotion_intensity(mut self, intensity: f64) -> Self {
self.emotion_intensity = Some(intensity.clamp(0.0, 2.0));
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SmartPrompt {
pub emotion_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub previous_text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_text: Option<String>,
}
impl Default for SmartPrompt {
fn default() -> Self {
Self {
emotion_type: "smart".to_string(),
previous_text: None,
next_text: None,
}
}
}
impl SmartPrompt {
pub fn new() -> Self {
Self::default()
}
pub fn previous_text(mut self, text: impl Into<String>) -> Self {
self.previous_text = Some(text.into());
self
}
pub fn next_text(mut self, text: impl Into<String>) -> Self {
self.next_text = Some(text.into());
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum TTSPrompt {
Basic(Prompt),
Preset(PresetPrompt),
Smart(SmartPrompt),
}
impl From<Prompt> for TTSPrompt {
fn from(prompt: Prompt) -> Self {
TTSPrompt::Basic(prompt)
}
}
impl From<PresetPrompt> for TTSPrompt {
fn from(prompt: PresetPrompt) -> Self {
TTSPrompt::Preset(prompt)
}
}
impl From<SmartPrompt> for TTSPrompt {
fn from(prompt: SmartPrompt) -> Self {
TTSPrompt::Smart(prompt)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TTSRequest {
pub voice_id: String,
pub text: String,
pub model: TTSModel,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt: Option<TTSPrompt>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output: Option<Output>,
#[serde(skip_serializing_if = "Option::is_none")]
pub seed: Option<i32>,
}
impl TTSRequest {
pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
Self {
voice_id: voice_id.into(),
text: text.into(),
model,
language: None,
prompt: None,
output: None,
seed: None,
}
}
pub fn language(mut self, language: impl Into<String>) -> Self {
self.language = Some(language.into());
self
}
pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
self.prompt = Some(prompt.into());
self
}
pub fn output(mut self, output: Output) -> Self {
self.output = Some(output);
self
}
pub fn seed(mut self, seed: i32) -> Self {
self.seed = Some(seed);
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TTSRequestStream {
pub voice_id: String,
pub text: String,
pub model: TTSModel,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt: Option<TTSPrompt>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output: Option<OutputStream>,
#[serde(skip_serializing_if = "Option::is_none")]
pub seed: Option<i32>,
}
impl TTSRequestStream {
pub fn new(voice_id: impl Into<String>, text: impl Into<String>, model: TTSModel) -> Self {
Self {
voice_id: voice_id.into(),
text: text.into(),
model,
language: None,
prompt: None,
output: None,
seed: None,
}
}
pub fn language(mut self, language: impl Into<String>) -> Self {
self.language = Some(language.into());
self
}
pub fn prompt(mut self, prompt: impl Into<TTSPrompt>) -> Self {
self.prompt = Some(prompt.into());
self
}
pub fn output(mut self, output: OutputStream) -> Self {
self.output = Some(output);
self
}
pub fn seed(mut self, seed: i32) -> Self {
self.seed = Some(seed);
self
}
}
#[derive(Debug, Clone)]
pub struct TTSResponse {
pub audio_data: Vec<u8>,
pub duration: f64,
pub format: AudioFormat,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub version: TTSModel,
pub emotions: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceV2 {
pub voice_id: String,
pub voice_name: String,
pub models: Vec<ModelInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub gender: Option<Gender>,
#[serde(skip_serializing_if = "Option::is_none")]
pub age: Option<Age>,
#[serde(skip_serializing_if = "Option::is_none")]
pub use_cases: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default)]
pub struct VoicesV2Filter {
pub model: Option<TTSModel>,
pub gender: Option<Gender>,
pub age: Option<Age>,
pub use_cases: Option<UseCase>,
}
impl VoicesV2Filter {
pub fn new() -> Self {
Self::default()
}
pub fn model(mut self, model: TTSModel) -> Self {
self.model = Some(model);
self
}
pub fn gender(mut self, gender: Gender) -> Self {
self.gender = Some(gender);
self
}
pub fn age(mut self, age: Age) -> Self {
self.age = Some(age);
self
}
pub fn use_cases(mut self, use_case: UseCase) -> Self {
self.use_cases = Some(use_case);
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum PlanTier {
Free,
Lite,
Plus,
Custom,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Credits {
pub plan_credits: i64,
pub used_credits: i64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Limits {
pub concurrency_limit: i64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SubscriptionResponse {
pub plan: PlanTier,
pub credits: Credits,
pub limits: Limits,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorResponse {
pub detail: String,
}