1use crate::error::Result;
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
11#[serde(rename_all = "lowercase")]
12pub enum AudioFormat {
13 #[default]
15 Wav,
16 Mp3,
18 Pcm,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
24#[serde(rename_all = "snake_case")]
25pub enum Voice {
26 #[default]
28 MimoDefault,
29 DefaultEn,
31 DefaultZh,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct Audio {
38 #[serde(skip_serializing_if = "Option::is_none")]
40 pub format: Option<AudioFormat>,
41 #[serde(skip_serializing_if = "Option::is_none")]
43 pub voice: Option<Voice>,
44}
45
46impl Audio {
47 pub fn new() -> Self {
59 Self {
60 format: None,
61 voice: None,
62 }
63 }
64
65 pub fn format(mut self, format: AudioFormat) -> Self {
67 self.format = Some(format);
68 self
69 }
70
71 pub fn voice(mut self, voice: Voice) -> Self {
73 self.voice = Some(voice);
74 self
75 }
76
77 pub fn wav() -> Self {
79 Self::new().format(AudioFormat::Wav)
80 }
81
82 pub fn mp3() -> Self {
84 Self::new().format(AudioFormat::Mp3)
85 }
86
87 pub fn pcm() -> Self {
89 Self::new().format(AudioFormat::Pcm)
90 }
91}
92
93impl Default for Audio {
94 fn default() -> Self {
95 Self::new()
96 }
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ResponseAudio {
102 pub id: String,
104 pub data: String,
106 #[serde(skip_serializing_if = "Option::is_none")]
108 pub expires_at: Option<i64>,
109 #[serde(skip_serializing_if = "Option::is_none")]
111 pub transcript: Option<String>,
112}
113
114impl ResponseAudio {
115 pub fn decode_data(&self) -> Result<Vec<u8>> {
138 use base64::Engine;
139 base64::engine::general_purpose::STANDARD.decode(&self.data).map_err(Into::into)
140 }
141
142 pub fn transcript(&self) -> Option<&str> {
144 self.transcript.as_deref()
145 }
146
147 pub fn is_expired(&self) -> bool {
149 if let Some(expires_at) = self.expires_at {
150 let now = std::time::SystemTime::now()
151 .duration_since(std::time::UNIX_EPOCH)
152 .unwrap()
153 .as_secs() as i64;
154 now > expires_at
155 } else {
156 false
157 }
158 }
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct DeltaAudio {
164 pub id: String,
166 pub data: String,
168 #[serde(skip_serializing_if = "Option::is_none")]
170 pub expires_at: Option<i64>,
171 #[serde(skip_serializing_if = "Option::is_none")]
173 pub transcript: Option<String>,
174}
175
176impl DeltaAudio {
177 pub fn decode_data(&self) -> Result<Vec<u8>> {
179 use base64::Engine;
180 base64::engine::general_purpose::STANDARD.decode(&self.data).map_err(Into::into)
181 }
182}
183
184#[derive(Debug, Clone, Default)]
189pub struct TtsStyle {
190 styles: Vec<String>,
191}
192
193impl TtsStyle {
194 pub fn new() -> Self {
196 Self { styles: Vec::new() }
197 }
198
199 pub fn with_style(mut self, style: impl Into<String>) -> Self {
223 self.styles.push(style.into());
224 self
225 }
226
227 pub fn apply(&self, text: &str) -> String {
231 if self.styles.is_empty() {
232 text.to_string()
233 } else {
234 format!("<style>{}</style>{}", self.styles.join(" "), text)
235 }
236 }
237}
238
239pub fn styled_text(style: &str, text: &str) -> String {
250 TtsStyle::new().with_style(style).apply(text)
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256 use base64::Engine;
257
258 #[test]
259 fn test_audio_format_default() {
260 let format = AudioFormat::default();
261 assert_eq!(format, AudioFormat::Wav);
262 }
263
264 #[test]
265 fn test_voice_default() {
266 let voice = Voice::default();
267 assert_eq!(voice, Voice::MimoDefault);
268 }
269
270 #[test]
271 fn test_audio_config() {
272 let audio = Audio::wav().voice(Voice::DefaultZh);
273 assert_eq!(audio.format, Some(AudioFormat::Wav));
274 assert_eq!(audio.voice, Some(Voice::DefaultZh));
275 }
276
277 #[test]
278 fn test_audio_serialization() {
279 let audio = Audio::mp3().voice(Voice::DefaultEn);
280 let json = serde_json::to_string(&audio).unwrap();
281 assert!(json.contains("\"format\":\"mp3\""));
282 assert!(json.contains("\"voice\":\"default_en\""));
283 }
284
285 #[test]
286 fn test_audio_formats() {
287 assert_eq!(Audio::wav().format, Some(AudioFormat::Wav));
288 assert_eq!(Audio::mp3().format, Some(AudioFormat::Mp3));
289 assert_eq!(Audio::pcm().format, Some(AudioFormat::Pcm));
290 }
291
292 #[test]
293 fn test_tts_style_single() {
294 let text = TtsStyle::new().with_style("开心").apply("Hello");
295 assert_eq!(text, "<style>开心</style>Hello");
296 }
297
298 #[test]
299 fn test_tts_style_multiple() {
300 let text = TtsStyle::new().with_style("开心").with_style("变快").apply("Hello");
301 assert!(text.starts_with("<style>"));
302 assert!(text.contains("开心"));
303 assert!(text.contains("变快"));
304 assert!(text.ends_with("Hello"));
305 }
306
307 #[test]
308 fn test_tts_style_empty() {
309 let text = TtsStyle::new().apply("Hello");
310 assert_eq!(text, "Hello");
311 }
312
313 #[test]
314 fn test_styled_text_helper() {
315 let text = styled_text("东北话", "哎呀妈呀");
316 assert_eq!(text, "<style>东北话</style>哎呀妈呀");
317 }
318
319 #[test]
320 fn test_response_audio_decode() {
321 let audio = ResponseAudio {
322 id: "test-id".to_string(),
323 data: base64::engine::general_purpose::STANDARD.encode(b"test audio data"),
324 expires_at: None,
325 transcript: Some("test".to_string()),
326 };
327
328 let decoded = audio.decode_data().unwrap();
329 assert_eq!(decoded, b"test audio data");
330 }
331
332 #[test]
333 fn test_response_audio_transcript() {
334 let audio = ResponseAudio {
335 id: "test-id".to_string(),
336 data: String::new(),
337 expires_at: None,
338 transcript: Some("Hello world".to_string()),
339 };
340
341 assert_eq!(audio.transcript(), Some("Hello world"));
342 }
343}