Skip to main content

zai_rs/model/text_to_audio/
request.rs

1use serde::Serialize;
2use validator::Validate;
3
4use super::super::traits::*;
5
6#[derive(Debug, Clone, Serialize, Validate)]
7pub struct TextToAudioBody<N>
8where
9    N: ModelName + TextToAudio + Serialize,
10{
11    /// TTS model (e.g., cogtts)
12    pub model: N,
13
14    /// Text to convert to speech (max 4096)
15    #[serde(skip_serializing_if = "Option::is_none")]
16    #[validate(length(max = 4096))]
17    pub input: Option<String>,
18
19    /// Voice preset
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub voice: Option<Voice>,
22
23    /// Speed in [0.5, 2]
24    #[serde(skip_serializing_if = "Option::is_none")]
25    #[validate(range(min = 0.5, max = 2.0))]
26    pub speed: Option<f32>,
27
28    /// Volume in (0, 10]; we validate as [0.0, 10.0] for simplicity
29    #[serde(skip_serializing_if = "Option::is_none")]
30    #[validate(range(min = 0.0, max = 10.0))]
31    pub volume: Option<f32>,
32
33    /// Output audio format
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub response_format: Option<TtsAudioFormat>,
36
37    /// Watermark toggle
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub watermark_enabled: Option<bool>,
40}
41
42impl<N> TextToAudioBody<N>
43where
44    N: ModelName + TextToAudio + Serialize,
45{
46    pub fn new(model: N) -> Self {
47        Self {
48            model,
49            input: None,
50            voice: Some(Voice::Tongtong),
51            speed: None,
52            volume: None,
53            response_format: Some(TtsAudioFormat::Wav),
54            watermark_enabled: None,
55        }
56    }
57
58    pub fn with_input(mut self, input: impl Into<String>) -> Self {
59        self.input = Some(input.into());
60        self
61    }
62
63    pub fn with_voice(mut self, voice: Voice) -> Self {
64        self.voice = Some(voice);
65        self
66    }
67
68    pub fn with_speed(mut self, speed: f32) -> Self {
69        self.speed = Some(speed);
70        self
71    }
72
73    pub fn with_volume(mut self, volume: f32) -> Self {
74        self.volume = Some(volume);
75        self
76    }
77
78    pub fn with_response_format(mut self, fmt: TtsAudioFormat) -> Self {
79        self.response_format = Some(fmt);
80        self
81    }
82
83    pub fn with_watermark_enabled(mut self, enabled: bool) -> Self {
84        self.watermark_enabled = Some(enabled);
85        self
86    }
87}
88
89#[derive(Debug, Clone)]
90pub enum Voice {
91    Tongtong,
92    Chuichui,
93    Xiaochen,
94    Jam,
95    Kazi,
96    Douji,
97    Luodo,
98}
99
100impl serde::Serialize for Voice {
101    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
102    where
103        S: serde::Serializer,
104    {
105        let s = match self {
106            Voice::Tongtong => "tongtong",
107            Voice::Chuichui => "chuichui",
108            Voice::Xiaochen => "xiaochen",
109            Voice::Jam => "jam",
110            Voice::Kazi => "kazi",
111            Voice::Douji => "douji",
112            Voice::Luodo => "luodo",
113        };
114        serializer.serialize_str(s)
115    }
116}
117
118#[derive(Debug, Clone)]
119pub enum TtsAudioFormat {
120    Wav,
121}
122
123impl serde::Serialize for TtsAudioFormat {
124    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
125    where
126        S: serde::Serializer,
127    {
128        let s = match self {
129            TtsAudioFormat::Wav => "wav",
130        };
131        serializer.serialize_str(s)
132    }
133}