1use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
10#[serde(rename_all = "lowercase")]
11pub enum AudioFormat {
12 #[default]
14 Wav,
15 Mp3,
17 Pcm,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
23#[serde(rename_all = "snake_case")]
24pub enum Voice {
25 #[default]
27 MimoDefault,
28 DefaultEn,
30 DefaultZh,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct Audio {
37 #[serde(skip_serializing_if = "Option::is_none")]
39 pub format: Option<AudioFormat>,
40 #[serde(skip_serializing_if = "Option::is_none")]
42 pub voice: Option<Voice>,
43}
44
45impl Audio {
46 pub fn new() -> Self {
58 Self {
59 format: None,
60 voice: None,
61 }
62 }
63
64 pub fn format(mut self, format: AudioFormat) -> Self {
66 self.format = Some(format);
67 self
68 }
69
70 pub fn voice(mut self, voice: Voice) -> Self {
72 self.voice = Some(voice);
73 self
74 }
75
76 pub fn wav() -> Self {
78 Self::new().format(AudioFormat::Wav)
79 }
80
81 pub fn mp3() -> Self {
83 Self::new().format(AudioFormat::Mp3)
84 }
85
86 pub fn pcm() -> Self {
88 Self::new().format(AudioFormat::Pcm)
89 }
90}
91
92impl Default for Audio {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct ResponseAudio {
101 pub id: String,
103 pub data: String,
105 #[serde(skip_serializing_if = "Option::is_none")]
107 pub expires_at: Option<i64>,
108 #[serde(skip_serializing_if = "Option::is_none")]
110 pub transcript: Option<String>,
111}
112
113impl ResponseAudio {
114 pub fn decode_data(&self) -> Result<Vec<u8>, base64::DecodeError> {
137 use base64::Engine;
138 base64::engine::general_purpose::STANDARD.decode(&self.data)
139 }
140
141 pub fn transcript(&self) -> Option<&str> {
143 self.transcript.as_deref()
144 }
145
146 pub fn is_expired(&self) -> bool {
148 if let Some(expires_at) = self.expires_at {
149 let now = std::time::SystemTime::now()
150 .duration_since(std::time::UNIX_EPOCH)
151 .unwrap()
152 .as_secs() as i64;
153 now > expires_at
154 } else {
155 false
156 }
157 }
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct DeltaAudio {
163 pub id: String,
165 pub data: String,
167 #[serde(skip_serializing_if = "Option::is_none")]
169 pub expires_at: Option<i64>,
170 #[serde(skip_serializing_if = "Option::is_none")]
172 pub transcript: Option<String>,
173}
174
175impl DeltaAudio {
176 pub fn decode_data(&self) -> Result<Vec<u8>, base64::DecodeError> {
178 use base64::Engine;
179 base64::engine::general_purpose::STANDARD.decode(&self.data)
180 }
181}
182
183#[derive(Debug, Clone, Default)]
188pub struct TtsStyle {
189 styles: Vec<String>,
190}
191
192impl TtsStyle {
193 pub fn new() -> Self {
195 Self { styles: Vec::new() }
196 }
197
198 pub fn with_style(mut self, style: impl Into<String>) -> Self {
222 self.styles.push(style.into());
223 self
224 }
225
226 pub fn apply(&self, text: &str) -> String {
230 if self.styles.is_empty() {
231 text.to_string()
232 } else {
233 format!("<style>{}</style>{}", self.styles.join(" "), text)
234 }
235 }
236}
237
238pub fn styled_text(style: &str, text: &str) -> String {
249 TtsStyle::new().with_style(style).apply(text)
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255 use base64::Engine;
256
257 #[test]
258 fn test_audio_format_default() {
259 let format = AudioFormat::default();
260 assert_eq!(format, AudioFormat::Wav);
261 }
262
263 #[test]
264 fn test_voice_default() {
265 let voice = Voice::default();
266 assert_eq!(voice, Voice::MimoDefault);
267 }
268
269 #[test]
270 fn test_audio_config() {
271 let audio = Audio::wav().voice(Voice::DefaultZh);
272 assert_eq!(audio.format, Some(AudioFormat::Wav));
273 assert_eq!(audio.voice, Some(Voice::DefaultZh));
274 }
275
276 #[test]
277 fn test_audio_serialization() {
278 let audio = Audio::mp3().voice(Voice::DefaultEn);
279 let json = serde_json::to_string(&audio).unwrap();
280 assert!(json.contains("\"format\":\"mp3\""));
281 assert!(json.contains("\"voice\":\"default_en\""));
282 }
283
284 #[test]
285 fn test_audio_formats() {
286 assert_eq!(Audio::wav().format, Some(AudioFormat::Wav));
287 assert_eq!(Audio::mp3().format, Some(AudioFormat::Mp3));
288 assert_eq!(Audio::pcm().format, Some(AudioFormat::Pcm));
289 }
290
291 #[test]
292 fn test_tts_style_single() {
293 let text = TtsStyle::new().with_style("开心").apply("Hello");
294 assert_eq!(text, "<style>开心</style>Hello");
295 }
296
297 #[test]
298 fn test_tts_style_multiple() {
299 let text = TtsStyle::new().with_style("开心").with_style("变快").apply("Hello");
300 assert!(text.starts_with("<style>"));
301 assert!(text.contains("开心"));
302 assert!(text.contains("变快"));
303 assert!(text.ends_with("Hello"));
304 }
305
306 #[test]
307 fn test_tts_style_empty() {
308 let text = TtsStyle::new().apply("Hello");
309 assert_eq!(text, "Hello");
310 }
311
312 #[test]
313 fn test_styled_text_helper() {
314 let text = styled_text("东北话", "哎呀妈呀");
315 assert_eq!(text, "<style>东北话</style>哎呀妈呀");
316 }
317
318 #[test]
319 fn test_response_audio_decode() {
320 let audio = ResponseAudio {
321 id: "test-id".to_string(),
322 data: base64::engine::general_purpose::STANDARD.encode(b"test audio data"),
323 expires_at: None,
324 transcript: Some("test".to_string()),
325 };
326
327 let decoded = audio.decode_data().unwrap();
328 assert_eq!(decoded, b"test audio data");
329 }
330
331 #[test]
332 fn test_response_audio_transcript() {
333 let audio = ResponseAudio {
334 id: "test-id".to_string(),
335 data: String::new(),
336 expires_at: None,
337 transcript: Some("Hello world".to_string()),
338 };
339
340 assert_eq!(audio.transcript(), Some("Hello world"));
341 }
342}