mimo_api/client.rs
1//! HTTP client for the MiMo API.
2
3use {
4 crate::{
5 error::{Error, Result},
6 types::*,
7 },
8 eventsource_stream::Eventsource,
9 futures::{StreamExt, stream::BoxStream},
10 reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue},
11 std::env,
12 tokio::{fs::File, io::AsyncWriteExt},
13};
14
15const API_BASE_URL: &str = "https://api.xiaomimimo.com/v1";
16const ENV_API_KEY: &str = "XIAOMI_API_KEY";
17
18/// HTTP client for the MiMo API.
19#[derive(Debug, Clone)]
20pub struct Client {
21 /// The underlying HTTP client.
22 http_client: reqwest::Client,
23 /// The API key for authentication.
24 api_key: String,
25 /// The base URL for the API.
26 base_url: String,
27}
28
29impl Client {
30 /// Create a new client with the given API key.
31 ///
32 /// # Example
33 ///
34 /// ```rust
35 /// use mimo_api::Client;
36 ///
37 /// let client = Client::new("your-api-key");
38 /// ```
39 pub fn new(api_key: impl Into<String>) -> Self {
40 Self {
41 http_client: reqwest::Client::new(),
42 api_key: api_key.into(),
43 base_url: API_BASE_URL.to_string(),
44 }
45 }
46
47 /// Create a new client from the `XIAOMI_API_KEY` environment variable.
48 ///
49 /// # Errors
50 ///
51 /// Returns an error if the `XIAOMI_API_KEY` environment variable is not set.
52 ///
53 /// # Example
54 ///
55 /// ```rust,no_run
56 /// use mimo_api::Client;
57 ///
58 /// // Assuming XIAOMI_API_KEY is set in environment
59 /// let client = Client::from_env()?;
60 /// # Ok::<(), Box<dyn std::error::Error>>(())
61 /// ```
62 pub fn from_env() -> Result<Self> {
63 let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
64 Ok(Self::new(api_key))
65 }
66
67 /// Set a custom base URL for the API.
68 ///
69 /// This is useful for testing or using a custom API endpoint.
70 pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
71 self.base_url = base_url.into();
72 self
73 }
74
75 /// Build headers for the request.
76 fn build_headers(&self) -> Result<HeaderMap> {
77 let mut headers = HeaderMap::new();
78 headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
79 headers.insert(
80 "api-key",
81 HeaderValue::from_str(&self.api_key)
82 .map_err(|_| Error::InvalidParameter("Invalid API key".into()))?,
83 );
84 Ok(headers)
85 }
86
87 /// Send a chat completion request.
88 ///
89 /// # Example
90 ///
91 /// ```rust,no_run
92 /// use mimo_api::{Client, ChatRequest, Message};
93 ///
94 /// #[tokio::main]
95 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
96 /// let client = Client::from_env()?;
97 /// let request = ChatRequest::new("mimo-v2-flash")
98 /// .message(Message::user("Hello!"));
99 /// let response = client.chat(request).await?;
100 /// println!("{}", response.choices[0].message.content);
101 /// Ok(())
102 /// }
103 /// ```
104 pub async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {
105 let url = format!("{}/chat/completions", self.base_url);
106 let headers = self.build_headers()?;
107
108 let response = self
109 .http_client
110 .post(&url)
111 .headers(headers)
112 .json(&request)
113 .send()
114 .await?;
115
116 let status = response.status();
117 if !status.is_success() {
118 let error_text = response.text().await.unwrap_or_default();
119 return Err(Error::api_error(status.as_u16(), error_text));
120 }
121
122 response.json().await.map_err(Error::from)
123 }
124
125 /// Send a chat completion request with streaming response.
126 ///
127 /// Returns a stream of `StreamChunk` objects.
128 ///
129 /// # Example
130 ///
131 /// ```rust,no_run
132 /// use mimo_api::{Client, ChatRequest, Message};
133 /// use futures::StreamExt;
134 ///
135 /// #[tokio::main]
136 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
137 /// let client = Client::from_env()?;
138 /// let request = ChatRequest::new("mimo-v2-flash")
139 /// .message(Message::user("Tell me a story."))
140 /// .stream(true);
141 ///
142 /// let mut stream = client.chat_stream(request).await?;
143 /// while let Some(chunk) = stream.next().await {
144 /// match chunk {
145 /// Ok(chunk) => {
146 /// if let Some(content) = &chunk.choices[0].delta.content {
147 /// print!("{}", content);
148 /// }
149 /// }
150 /// Err(e) => eprintln!("Error: {}", e),
151 /// }
152 /// }
153 /// Ok(())
154 /// }
155 /// ```
156 pub async fn chat_stream(
157 &self,
158 request: ChatRequest,
159 ) -> Result<BoxStream<'static, Result<StreamChunk>>> {
160 let mut request = request;
161 request.stream = Some(true);
162
163 let url = format!("{}/chat/completions", self.base_url);
164 let headers = self.build_headers()?;
165
166 let response = self
167 .http_client
168 .post(&url)
169 .headers(headers)
170 .json(&request)
171 .send()
172 .await?;
173
174 let status = response.status();
175 if !status.is_success() {
176 let error_text = response.text().await.unwrap_or_default();
177 return Err(Error::api_error(status.as_u16(), error_text));
178 }
179
180 let stream = response
181 .bytes_stream()
182 .eventsource()
183 .filter_map(|event| async move {
184 match event {
185 Ok(event) => {
186 if event.data == "[DONE]" {
187 None
188 } else {
189 match serde_json::from_str::<StreamChunk>(&event.data) {
190 Ok(chunk) => Some(Ok(chunk)),
191 Err(e) => Some(Err(Error::StreamError(e.to_string()))),
192 }
193 }
194 }
195 Err(e) => Some(Err(Error::StreamError(e.to_string()))),
196 }
197 })
198 .boxed();
199
200 Ok(stream)
201 }
202
203 /// Create a text-to-speech request builder.
204 ///
205 /// This method creates a builder for synthesizing speech from text using the `mimo-v2-tts` model.
206 ///
207 /// # Arguments
208 ///
209 /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
210 ///
211 /// # Example
212 ///
213 /// ```rust,no_run
214 /// use mimo_api::{Client, Voice};
215 ///
216 /// #[tokio::main]
217 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
218 /// let client = Client::from_env()?;
219 ///
220 /// let response = client.tts("Hello, world!")
221 /// .voice(Voice::DefaultEn)
222 /// .send()
223 /// .await?;
224 ///
225 /// let audio = response.audio()?;
226 /// let audio_bytes = audio.decode_data()?;
227 /// tokio::fs::write("output.wav", audio_bytes).await?;
228 /// Ok(())
229 /// }
230 /// ```
231 pub fn tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
232 TtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
233 }
234
235 /// Create a text-to-speech request builder with styled text.
236 ///
237 /// This method allows you to apply style controls to the synthesized speech.
238 ///
239 /// # Example
240 ///
241 /// ```rust,no_run
242 /// use mimo_api::{Client, Voice};
243 ///
244 /// #[tokio::main]
245 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
246 /// let client = Client::from_env()?;
247 ///
248 /// // Synthesize speech with "开心" (happy) style
249 /// let response = client.tts_styled("开心", "明天就是周五了,真开心!")
250 /// .voice(Voice::DefaultZh)
251 /// .send()
252 /// .await?;
253 ///
254 /// let audio = response.audio()?;
255 /// let audio_bytes = audio.decode_data()?;
256 /// tokio::fs::write("output.wav", audio_bytes).await?;
257 /// Ok(())
258 /// }
259 /// ```
260 pub fn tts_styled(&self, style: &str, text: &str) -> TtsRequestBuilder {
261 TtsRequestBuilder::new(
262 self.clone(),
263 Model::MiMoV2Tts.as_str(),
264 styled_text(style, text),
265 )
266 }
267
268 /// Create a text-to-speech request builder using the MiMo V2.5 TTS model.
269 ///
270 /// This method uses the updated TTS model with more preset voices.
271 ///
272 /// # Arguments
273 ///
274 /// * `text` - The text to synthesize.
275 ///
276 /// # Example
277 ///
278 /// ```rust,no_run
279 /// use mimo_api::{Client, Voice};
280 ///
281 /// #[tokio::main]
282 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
283 /// let client = Client::from_env()?;
284 ///
285 /// let response = client.v25_tts("Hello, world!")
286 /// .voice(Voice::Mia)
287 /// .send()
288 /// .await?;
289 ///
290 /// let audio = response.audio()?;
291 /// let audio_bytes = audio.decode_data()?;
292 /// tokio::fs::write("output.wav", audio_bytes).await?;
293 /// Ok(())
294 /// }
295 /// ```
296 pub fn v25_tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
297 TtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
298 }
299
300 /// Create a TTS request builder with voice design (MiMo V2.5 TTS VoiceDesign).
301 ///
302 /// This method uses text description to design a custom voice.
303 /// The `user_message` is REQUIRED and should contain the voice description.
304 ///
305 /// # Arguments
306 ///
307 /// * `text` - The text to synthesize.
308 ///
309 /// # Example
310 ///
311 /// ```rust,no_run
312 /// use mimo_api::Client;
313 ///
314 /// #[tokio::main]
315 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
316 /// let client = Client::from_env()?;
317 ///
318 /// let response = client.v25_tts_voice_design("Hello, world!")
319 /// .user_message("Give me a young male tone.")
320 /// .send()
321 /// .await?;
322 ///
323 /// let audio = response.audio()?;
324 /// let audio_bytes = audio.decode_data()?;
325 /// tokio::fs::write("output.wav", audio_bytes).await?;
326 /// Ok(())
327 /// }
328 /// ```
329 pub fn v25_tts_voice_design(&self, text: impl Into<String>) -> TtsRequestBuilder {
330 TtsRequestBuilder::new(
331 self.clone(),
332 Model::MiMoV25TtsVoiceDesign.as_str(),
333 text.into(),
334 )
335 }
336
337 /// Create a TTS request builder with voice clone (MiMo V2.5 TTS VoiceClone).
338 ///
339 /// This method uses an audio sample to clone a voice.
340 /// Use `Voice::custom()` or `Voice::from_audio_file()` to set the voice.
341 ///
342 /// # Arguments
343 ///
344 /// * `text` - The text to synthesize.
345 ///
346 /// # Example
347 ///
348 /// ```rust,no_run
349 /// use mimo_api::{Client, Voice};
350 ///
351 /// #[tokio::main]
352 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
353 /// let client = Client::from_env()?;
354 ///
355 /// let voice = Voice::from_audio_file("voice_sample.mp3").await?;
356 ///
357 /// let response = client.v25_tts_voice_clone("Hello, world!")
358 /// .voice(voice)
359 /// .send()
360 /// .await?;
361 ///
362 /// let audio = response.audio()?;
363 /// let audio_bytes = audio.decode_data()?;
364 /// tokio::fs::write("output.wav", audio_bytes).await?;
365 /// Ok(())
366 /// }
367 /// ```
368 pub fn v25_tts_voice_clone(&self, text: impl Into<String>) -> TtsRequestBuilder {
369 TtsRequestBuilder::new(
370 self.clone(),
371 Model::MiMoV25TtsVoiceClone.as_str(),
372 text.into(),
373 )
374 }
375
376 /// Create a streaming text-to-speech request builder.
377 ///
378 /// This method creates a builder for streaming speech synthesis using the `mimo-v2-tts` model.
379 /// Streaming TTS delivers audio data in real-time chunks.
380 ///
381 /// # Arguments
382 ///
383 /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
384 ///
385 /// # Example
386 ///
387 /// ```rust,no_run
388 /// use mimo_api::{Client, Voice};
389 /// use futures::StreamExt;
390 /// use tokio::fs::File;
391 /// use tokio::io::AsyncWriteExt;
392 ///
393 /// #[tokio::main]
394 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
395 /// let client = Client::from_env()?;
396 ///
397 /// let mut stream = client.tts_stream("Hello, world!")
398 /// .voice(Voice::DefaultEn)
399 /// .send()
400 /// .await?;
401 ///
402 /// let mut file = File::create("output.pcm").await?;
403 /// let mut total_bytes = 0;
404 ///
405 /// while let Some(chunk) = stream.next().await {
406 /// let audio_bytes = chunk?;
407 /// file.write_all(&audio_bytes).await?;
408 /// total_bytes += audio_bytes.len();
409 /// }
410 ///
411 /// println!("Total bytes: {}", total_bytes);
412 /// Ok(())
413 /// }
414 /// ```
415 pub fn tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
416 StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
417 }
418
419 /// Create a streaming text-to-speech request builder with styled text.
420 ///
421 /// This method allows you to apply style controls to the streaming synthesized speech.
422 ///
423 /// # Arguments
424 ///
425 /// * `style` - The style to apply (e.g., "开心", "悲伤", "变快", "变慢")
426 /// * `text` - The text to synthesize
427 ///
428 /// # Example
429 ///
430 /// ```rust,no_run
431 /// use mimo_api::{Client, Voice};
432 /// use futures::StreamExt;
433 /// use tokio::fs::File;
434 /// use tokio::io::AsyncWriteExt;
435 ///
436 /// #[tokio::main]
437 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
438 /// let client = Client::from_env()?;
439 ///
440 /// // Synthesize speech with "开心" (happy) style
441 /// let mut stream = client.tts_styled_stream("开心", "明天就是周五了,真开心!")
442 /// .voice(Voice::DefaultZh)
443 /// .send()
444 /// .await?;
445 ///
446 /// let mut file = File::create("output.pcm").await?;
447 /// let mut total_bytes = 0;
448 ///
449 /// while let Some(chunk) = stream.next().await {
450 /// let audio_bytes = chunk?;
451 /// file.write_all(&audio_bytes).await?;
452 /// total_bytes += audio_bytes.len();
453 /// }
454 ///
455 /// println!("Total bytes: {}", total_bytes);
456 /// Ok(())
457 /// }
458 /// ```
459 pub fn tts_styled_stream(&self, style: &str, text: &str) -> StreamingTtsRequestBuilder {
460 StreamingTtsRequestBuilder::new(
461 self.clone(),
462 Model::MiMoV2Tts.as_str(),
463 styled_text(style, text),
464 )
465 }
466
467 /// Create a streaming TTS request builder using MiMo V2.5 TTS model.
468 ///
469 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
470 /// The streaming API currently returns results in compatibility mode.
471 pub fn v25_tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
472 StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
473 }
474
475 /// Create a streaming TTS request builder with voice design.
476 ///
477 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
478 pub fn v25_tts_voice_design_stream(
479 &self,
480 text: impl Into<String>,
481 ) -> StreamingTtsRequestBuilder {
482 StreamingTtsRequestBuilder::new(
483 self.clone(),
484 Model::MiMoV25TtsVoiceDesign.as_str(),
485 text.into(),
486 )
487 }
488
489 /// Create a streaming TTS request builder with voice clone.
490 ///
491 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
492 pub fn v25_tts_voice_clone_stream(
493 &self,
494 text: impl Into<String>,
495 ) -> StreamingTtsRequestBuilder {
496 StreamingTtsRequestBuilder::new(
497 self.clone(),
498 Model::MiMoV25TtsVoiceClone.as_str(),
499 text.into(),
500 )
501 }
502}
503
504/// Builder for text-to-speech requests.
505///
506/// This builder provides a fluent API for configuring TTS requests.
507#[derive(Debug, Clone)]
508pub struct TtsRequestBuilder {
509 client: Client,
510 model: String,
511 text: String,
512 user_message: Option<String>,
513 voice: Voice,
514 format: AudioFormat,
515}
516
517impl TtsRequestBuilder {
518 /// Create a new TTS request builder.
519 fn new(client: Client, model: impl Into<String>, text: String) -> Self {
520 Self {
521 client,
522 model: model.into(),
523 text,
524 user_message: None,
525 voice: Voice::default(),
526 format: AudioFormat::default(),
527 }
528 }
529
530 /// Set the voice for synthesis.
531 ///
532 /// Available voices:
533 /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
534 /// - `Voice::DefaultEn` - Default English female voice
535 /// - `Voice::DefaultZh` - Default Chinese female voice
536 pub fn voice(mut self, voice: Voice) -> Self {
537 self.voice = voice;
538 self
539 }
540
541 /// Set the audio output format.
542 ///
543 /// Available formats:
544 /// - `AudioFormat::Wav` - WAV format (recommended for high quality)
545 /// - `AudioFormat::Mp3` - MP3 format (smaller file size)
546 /// - `AudioFormat::Pcm` - PCM format (for streaming)
547 pub fn format(mut self, format: AudioFormat) -> Self {
548 self.format = format;
549 self
550 }
551
552 /// Add a user message to influence the synthesis style.
553 ///
554 /// The user message can help adjust the tone and style of the synthesized speech.
555 pub fn user_message(mut self, message: impl Into<String>) -> Self {
556 self.user_message = Some(message.into());
557 self
558 }
559
560 /// Send the TTS request and return the response.
561 ///
562 /// # Returns
563 ///
564 /// A `TtsResponse` containing the synthesized audio data.
565 ///
566 /// # Example
567 ///
568 /// ```rust,no_run
569 /// use mimo_api::{Client, Voice, AudioFormat};
570 ///
571 /// #[tokio::main]
572 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
573 /// let client = Client::from_env()?;
574 ///
575 /// let response = client.tts("Hello, world!")
576 /// .voice(Voice::DefaultEn)
577 /// .format(AudioFormat::Mp3)
578 /// .send()
579 /// .await?;
580 ///
581 /// let audio = response.audio()?;
582 /// println!("Audio ID: {}", audio.id);
583 /// println!("Transcript: {:?}", audio.transcript());
584 /// Ok(())
585 /// }
586 /// ```
587 pub async fn send(self) -> Result<TtsResponse> {
588 let mut messages = Vec::new();
589
590 // Add optional user message
591 if let Some(user_msg) = self.user_message {
592 messages.push(Message::user(MessageContent::Text(user_msg)));
593 }
594
595 // Add assistant message with text to synthesize
596 messages.push(Message::assistant(MessageContent::Text(self.text)));
597
598 // Voice design model does not support audio.voice parameter
599 // Model name is "mimo-v2.5-tts-voicedesign" (no hyphen between voice and design)
600 let is_voice_design = self.model.contains("voicedesign");
601
602 let audio = if is_voice_design {
603 // Voice design model only supports format, not voice
604 Some(Audio {
605 format: Some(self.format),
606 voice: None,
607 })
608 } else {
609 Some(Audio {
610 format: Some(self.format),
611 voice: Some(self.voice),
612 })
613 };
614
615 let request = ChatRequest {
616 model: self.model,
617 messages,
618 audio,
619 ..Default::default()
620 };
621
622 let response = self.client.chat(request).await?;
623 Ok(TtsResponse(response))
624 }
625}
626
627/// Response from a text-to-speech request.
628#[derive(Debug, Clone)]
629pub struct TtsResponse(pub ChatResponse);
630
631impl TtsResponse {
632 /// Get the audio data from the response.
633 ///
634 /// # Errors
635 ///
636 /// Returns an error if no audio data is present in the response.
637 pub fn audio(&self) -> Result<&ResponseAudio> {
638 self.0
639 .choices
640 .first()
641 .and_then(|c| c.message.audio.as_ref())
642 .ok_or_else(|| Error::InvalidResponse("No audio data in response".into()))
643 }
644
645 /// Get the content text from the response.
646 pub fn content(&self) -> Option<&str> {
647 self.0.choices.first().map(|c| c.message.content.as_str())
648 }
649
650 /// Get the underlying chat response.
651 pub fn into_inner(self) -> ChatResponse {
652 self.0
653 }
654}
655
656/// Builder for streaming text-to-speech requests.
657///
658/// This builder provides a fluent API for configuring streaming TTS requests.
659#[derive(Debug, Clone)]
660pub struct StreamingTtsRequestBuilder {
661 client: Client,
662 model: String,
663 text: String,
664 user_message: Option<String>,
665 voice: Voice,
666}
667
668impl StreamingTtsRequestBuilder {
669 /// Create a new streaming TTS request builder.
670 fn new(client: Client, model: impl Into<String>, text: String) -> Self {
671 Self {
672 client,
673 model: model.into(),
674 text,
675 user_message: None,
676 voice: Voice::default(),
677 }
678 }
679
680 /// Set the voice for synthesis.
681 ///
682 /// Available voices:
683 /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
684 /// - `Voice::DefaultEn` - Default English female voice
685 /// - `Voice::DefaultZh` - Default Chinese female voice
686 ///
687 /// # Example
688 ///
689 /// ```rust,no_run
690 /// use mimo_api::{Client, Voice};
691 ///
692 /// #[tokio::main]
693 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
694 /// let client = Client::from_env()?;
695 ///
696 /// let stream = client.tts_stream("Hello!")
697 /// .voice(Voice::DefaultEn)
698 /// .send()
699 /// .await?;
700 ///
701 /// Ok(())
702 /// }
703 /// ```
704 pub fn voice(mut self, voice: Voice) -> Self {
705 self.voice = voice;
706 self
707 }
708
709 /// Add a user message to influence the synthesis style.
710 ///
711 /// The user message can help adjust the tone and style of the synthesized speech.
712 ///
713 /// # Example
714 ///
715 /// ```rust,no_run
716 /// use mimo_api::Client;
717 ///
718 /// #[tokio::main]
719 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
720 /// let client = Client::from_env()?;
721 ///
722 /// let stream = client.tts_stream("Hello there!")
723 /// .user_message("Speak in a friendly, conversational tone")
724 /// .send()
725 /// .await?;
726 ///
727 /// Ok(())
728 /// }
729 /// ```
730 pub fn user_message(mut self, message: impl Into<String>) -> Self {
731 self.user_message = Some(message.into());
732 self
733 }
734
735 /// Send the streaming TTS request and return the response stream.
736 ///
737 /// # Returns
738 ///
739 /// A `StreamingTtsResponse` that yields audio data chunks.
740 ///
741 /// # Example
742 ///
743 /// ```rust,no_run
744 /// use mimo_api::{Client, Voice};
745 /// use futures::StreamExt;
746 /// use tokio::fs::File;
747 /// use tokio::io::AsyncWriteExt;
748 ///
749 /// #[tokio::main]
750 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
751 /// let client = Client::from_env()?;
752 ///
753 /// let mut stream = client.tts_stream("Hello, world!")
754 /// .voice(Voice::DefaultEn)
755 /// .send()
756 /// .await?;
757 ///
758 /// let mut file = File::create("output.pcm").await?;
759 /// let mut total_bytes = 0;
760 ///
761 /// while let Some(result) = stream.next().await {
762 /// let audio_bytes = result?;
763 /// file.write_all(&audio_bytes).await?;
764 /// total_bytes += audio_bytes.len();
765 /// }
766 ///
767 /// println!("Total bytes: {}", total_bytes);
768 /// Ok(())
769 /// }
770 /// ```
771 pub async fn send(self) -> Result<StreamingTtsResponse> {
772 let mut messages = Vec::new();
773
774 // Add optional user message
775 if let Some(user_msg) = self.user_message {
776 messages.push(Message::user(MessageContent::Text(user_msg)));
777 }
778
779 // Add assistant message with text to synthesize
780 messages.push(Message::assistant(MessageContent::Text(self.text)));
781
782 let request = ChatRequest {
783 model: self.model,
784 messages,
785 stream: Some(true),
786 audio: Some(Audio {
787 format: Some(AudioFormat::Pcm16), // PCM16 is recommended for streaming
788 voice: Some(self.voice),
789 }),
790 ..Default::default()
791 };
792
793 let stream = self.client.chat_stream(request).await?;
794 Ok(StreamingTtsResponse::new(stream))
795 }
796}
797
798/// Response from a streaming text-to-speech request.
799///
800/// This type wraps the underlying stream and provides convenience methods
801/// for consuming audio data.
802pub struct StreamingTtsResponse {
803 stream: BoxStream<'static, Result<StreamChunk>>,
804 total_bytes: u64,
805 chunk_count: u32,
806}
807
808impl StreamingTtsResponse {
809 /// Create a new streaming TTS response.
810 fn new(stream: BoxStream<'static, Result<StreamChunk>>) -> Self {
811 Self {
812 stream,
813 total_bytes: 0,
814 chunk_count: 0,
815 }
816 }
817
818 /// Collect all audio chunks and return them as a single byte vector.
819 ///
820 /// This is a convenience method for non-streaming use cases where you
821 /// want to wait for all audio data before processing it.
822 ///
823 /// # Example
824 ///
825 /// ```rust,no_run
826 /// use mimo_api::Client;
827 ///
828 /// #[tokio::main]
829 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
830 /// let client = Client::from_env()?;
831 ///
832 /// let mut stream = client.tts_stream("Hello, world!").send().await?;
833 /// let audio_bytes = stream.collect_audio().await?;
834 ///
835 /// tokio::fs::write("output.pcm", &audio_bytes).await?;
836 /// println!("Total bytes: {}", audio_bytes.len());
837 ///
838 /// Ok(())
839 /// }
840 /// ```
841 pub async fn collect_audio(&mut self) -> Result<Vec<u8>> {
842 let mut all_bytes = Vec::new();
843
844 while let Some(chunk) = self.stream.next().await {
845 if let Some(audio_bytes) = self.process_chunk(chunk?)? {
846 all_bytes.extend(audio_bytes);
847 }
848 }
849
850 Ok(all_bytes)
851 }
852
853 /// Save all audio chunks to a file.
854 ///
855 /// This is a convenience method that collects all audio data and writes it to a file.
856 ///
857 /// # Example
858 ///
859 /// ```rust,no_run
860 /// use mimo_api::Client;
861 ///
862 /// #[tokio::main]
863 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
864 /// let client = Client::from_env()?;
865 ///
866 /// let mut stream: mimo_api::StreamingTtsResponse = client.tts_stream("Hello, world!").send().await?;
867 /// stream.save_to_file("output.pcm").await?;
868 ///
869 /// println!("Audio saved to file");
870 ///
871 /// Ok(())
872 /// }
873 /// ```
874 pub async fn save_to_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<()> {
875 let mut file = File::create(path).await?;
876
877 while let Some(chunk) = self.stream.next().await {
878 if let Some(audio_bytes) = self.process_chunk(chunk?)? {
879 file.write_all(&audio_bytes).await?;
880 }
881 }
882
883 file.flush().await?;
884 Ok(())
885 }
886
887 /// Process a stream chunk and return audio bytes if present.
888 fn process_chunk(&mut self, chunk: StreamChunk) -> Result<Option<Vec<u8>>> {
889 if !chunk.choices.is_empty()
890 && let Some(audio) = &chunk.choices[0].delta.audio
891 {
892 let bytes = audio.decode_data()?;
893 self.total_bytes += bytes.len() as u64;
894 self.chunk_count += 1;
895 return Ok(Some(bytes));
896 }
897 Ok(None)
898 }
899
900 /// Get the total number of bytes received so far.
901 pub fn total_bytes(&self) -> u64 {
902 self.total_bytes
903 }
904
905 /// Get the number of audio chunks received so far.
906 pub fn chunk_count(&self) -> u32 {
907 self.chunk_count
908 }
909}
910
911impl futures::Stream for StreamingTtsResponse {
912 type Item = Result<Vec<u8>>;
913
914 fn poll_next(
915 mut self: std::pin::Pin<&mut Self>,
916 cx: &mut std::task::Context<'_>,
917 ) -> std::task::Poll<Option<Self::Item>> {
918 // Process chunks until we find one with audio data or the stream ends
919 loop {
920 match std::pin::Pin::new(&mut self.stream).poll_next(cx) {
921 std::task::Poll::Ready(Some(Ok(chunk))) => {
922 // Check if this is the final chunk with finish_reason
923 let is_final = chunk
924 .choices
925 .first()
926 .and_then(|c| c.finish_reason.as_ref())
927 .is_some();
928
929 match self.process_chunk(chunk) {
930 Ok(Some(bytes)) => {
931 // Return audio data from this chunk
932 return std::task::Poll::Ready(Some(Ok(bytes)));
933 }
934 Ok(None) => {
935 // No audio data in this chunk
936 if is_final {
937 // Stream has ended, no more audio data
938 return std::task::Poll::Ready(None);
939 }
940 // Continue to next chunk
941 continue;
942 }
943 Err(e) => return std::task::Poll::Ready(Some(Err(e))),
944 }
945 }
946 std::task::Poll::Ready(Some(Err(e))) => {
947 let error_msg = format!("Stream error: {}", e);
948 return std::task::Poll::Ready(Some(Err(Error::StreamError(error_msg))));
949 }
950 std::task::Poll::Ready(None) => {
951 // Stream has ended normally
952 return std::task::Poll::Ready(None);
953 }
954 std::task::Poll::Pending => return std::task::Poll::Pending,
955 }
956 }
957 }
958}