Skip to main content

mimo_api/
client.rs

1//! HTTP client for the MiMo API.
2
3use {
4    crate::{
5        error::{Error, Result},
6        types::*,
7    },
8    eventsource_stream::Eventsource,
9    futures::{StreamExt, stream::BoxStream},
10    reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue},
11    std::env,
12    tokio::{fs::File, io::AsyncWriteExt},
13};
14
15const API_BASE_URL: &str = "https://api.xiaomimimo.com/v1";
16const ENV_API_KEY: &str = "XIAOMI_API_KEY";
17
18/// HTTP client for the MiMo API.
19#[derive(Debug, Clone)]
20pub struct Client {
21    /// The underlying HTTP client.
22    http_client: reqwest::Client,
23    /// The API key for authentication.
24    api_key: String,
25    /// The base URL for the API.
26    base_url: String,
27}
28
29impl Client {
30    /// Create a new client with the given API key.
31    ///
32    /// # Example
33    ///
34    /// ```rust
35    /// use mimo_api::Client;
36    ///
37    /// let client = Client::new("your-api-key");
38    /// ```
39    pub fn new(api_key: impl Into<String>) -> Self {
40        Self {
41            http_client: reqwest::Client::new(),
42            api_key: api_key.into(),
43            base_url: API_BASE_URL.to_string(),
44        }
45    }
46
47    /// Create a new client from the `XIAOMI_API_KEY` environment variable.
48    ///
49    /// # Errors
50    ///
51    /// Returns an error if the `XIAOMI_API_KEY` environment variable is not set.
52    ///
53    /// # Example
54    ///
55    /// ```rust,no_run
56    /// use mimo_api::Client;
57    ///
58    /// // Assuming XIAOMI_API_KEY is set in environment
59    /// let client = Client::from_env()?;
60    /// # Ok::<(), Box<dyn std::error::Error>>(())
61    /// ```
62    pub fn from_env() -> Result<Self> {
63        let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
64        Ok(Self::new(api_key))
65    }
66
67    /// Set a custom base URL for the API.
68    ///
69    /// This is useful for testing or using a custom API endpoint.
70    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
71        self.base_url = base_url.into();
72        self
73    }
74
75    /// Build headers for the request.
76    fn build_headers(&self) -> Result<HeaderMap> {
77        let mut headers = HeaderMap::new();
78        headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
79        headers.insert(
80            "api-key",
81            HeaderValue::from_str(&self.api_key)
82                .map_err(|_| Error::InvalidParameter("Invalid API key".into()))?,
83        );
84        Ok(headers)
85    }
86
87    /// Send a chat completion request.
88    ///
89    /// # Example
90    ///
91    /// ```rust,no_run
92    /// use mimo_api::{Client, ChatRequest, Message};
93    ///
94    /// #[tokio::main]
95    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
96    ///     let client = Client::from_env()?;
97    ///     let request = ChatRequest::new("mimo-v2-flash")
98    ///         .message(Message::user("Hello!"));
99    ///     let response = client.chat(request).await?;
100    ///     println!("{}", response.choices[0].message.content);
101    ///     Ok(())
102    /// }
103    /// ```
104    pub async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {
105        let url = format!("{}/chat/completions", self.base_url);
106        let headers = self.build_headers()?;
107
108        let response = self
109            .http_client
110            .post(&url)
111            .headers(headers)
112            .json(&request)
113            .send()
114            .await?;
115
116        let status = response.status();
117        if !status.is_success() {
118            let error_text = response.text().await.unwrap_or_default();
119            return Err(Error::api_error(status.as_u16(), error_text));
120        }
121
122        response.json().await.map_err(Error::from)
123    }
124
125    /// Send a chat completion request with streaming response.
126    ///
127    /// Returns a stream of `StreamChunk` objects.
128    ///
129    /// # Example
130    ///
131    /// ```rust,no_run
132    /// use mimo_api::{Client, ChatRequest, Message};
133    /// use futures::StreamExt;
134    ///
135    /// #[tokio::main]
136    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
137    ///     let client = Client::from_env()?;
138    ///     let request = ChatRequest::new("mimo-v2-flash")
139    ///         .message(Message::user("Tell me a story."))
140    ///         .stream(true);
141    ///     
142    ///     let mut stream = client.chat_stream(request).await?;
143    ///     while let Some(chunk) = stream.next().await {
144    ///         match chunk {
145    ///             Ok(chunk) => {
146    ///                 if let Some(content) = &chunk.choices[0].delta.content {
147    ///                     print!("{}", content);
148    ///                 }
149    ///             }
150    ///             Err(e) => eprintln!("Error: {}", e),
151    ///         }
152    ///     }
153    ///     Ok(())
154    /// }
155    /// ```
156    pub async fn chat_stream(
157        &self,
158        request: ChatRequest,
159    ) -> Result<BoxStream<'static, Result<StreamChunk>>> {
160        let mut request = request;
161        request.stream = Some(true);
162
163        let url = format!("{}/chat/completions", self.base_url);
164        let headers = self.build_headers()?;
165
166        let response = self
167            .http_client
168            .post(&url)
169            .headers(headers)
170            .json(&request)
171            .send()
172            .await?;
173
174        let status = response.status();
175        if !status.is_success() {
176            let error_text = response.text().await.unwrap_or_default();
177            return Err(Error::api_error(status.as_u16(), error_text));
178        }
179
180        let stream = response
181            .bytes_stream()
182            .eventsource()
183            .filter_map(|event| async move {
184                match event {
185                    Ok(event) => {
186                        if event.data == "[DONE]" {
187                            None
188                        } else {
189                            match serde_json::from_str::<StreamChunk>(&event.data) {
190                                Ok(chunk) => Some(Ok(chunk)),
191                                Err(e) => Some(Err(Error::StreamError(e.to_string()))),
192                            }
193                        }
194                    }
195                    Err(e) => Some(Err(Error::StreamError(e.to_string()))),
196                }
197            })
198            .boxed();
199
200        Ok(stream)
201    }
202
203    /// Create a text-to-speech request builder.
204    ///
205    /// This method creates a builder for synthesizing speech from text using the `mimo-v2-tts` model.
206    ///
207    /// # Arguments
208    ///
209    /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
210    ///
211    /// # Example
212    ///
213    /// ```rust,no_run
214    /// use mimo_api::{Client, Voice};
215    ///
216    /// #[tokio::main]
217    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
218    ///     let client = Client::from_env()?;
219    ///     
220    ///     let response = client.tts("Hello, world!")
221    ///         .voice(Voice::DefaultEn)
222    ///         .send()
223    ///         .await?;
224    ///     
225    ///     let audio = response.audio()?;
226    ///     let audio_bytes = audio.decode_data()?;
227    ///     tokio::fs::write("output.wav", audio_bytes).await?;
228    ///     Ok(())
229    /// }
230    /// ```
231    pub fn tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
232        TtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
233    }
234
235    /// Create a text-to-speech request builder with styled text.
236    ///
237    /// This method allows you to apply style controls to the synthesized speech.
238    ///
239    /// # Example
240    ///
241    /// ```rust,no_run
242    /// use mimo_api::{Client, Voice};
243    ///
244    /// #[tokio::main]
245    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
246    ///     let client = Client::from_env()?;
247    ///
248    ///     // Synthesize speech with "开心" (happy) style
249    ///     let response = client.tts_styled("开心", "明天就是周五了,真开心!")
250    ///         .voice(Voice::DefaultZh)
251    ///         .send()
252    ///         .await?;
253    ///
254    ///     let audio = response.audio()?;
255    ///     let audio_bytes = audio.decode_data()?;
256    ///     tokio::fs::write("output.wav", audio_bytes).await?;
257    ///     Ok(())
258    /// }
259    /// ```
260    pub fn tts_styled(&self, style: &str, text: &str) -> TtsRequestBuilder {
261        TtsRequestBuilder::new(
262            self.clone(),
263            Model::MiMoV2Tts.as_str(),
264            styled_text(style, text),
265        )
266    }
267
268    /// Create a text-to-speech request builder using the MiMo V2.5 TTS model.
269    ///
270    /// This method uses the updated TTS model with more preset voices.
271    ///
272    /// # Arguments
273    ///
274    /// * `text` - The text to synthesize.
275    ///
276    /// # Example
277    ///
278    /// ```rust,no_run
279    /// use mimo_api::{Client, Voice};
280    ///
281    /// #[tokio::main]
282    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
283    ///     let client = Client::from_env()?;
284    ///
285    ///     let response = client.v25_tts("Hello, world!")
286    ///         .voice(Voice::Mia)
287    ///         .send()
288    ///         .await?;
289    ///
290    ///     let audio = response.audio()?;
291    ///     let audio_bytes = audio.decode_data()?;
292    ///     tokio::fs::write("output.wav", audio_bytes).await?;
293    ///     Ok(())
294    /// }
295    /// ```
296    pub fn v25_tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
297        TtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
298    }
299
300    /// Create a TTS request builder with voice design (MiMo V2.5 TTS VoiceDesign).
301    ///
302    /// This method uses text description to design a custom voice.
303    /// The `user_message` is REQUIRED and should contain the voice description.
304    ///
305    /// # Arguments
306    ///
307    /// * `text` - The text to synthesize.
308    ///
309    /// # Example
310    ///
311    /// ```rust,no_run
312    /// use mimo_api::Client;
313    ///
314    /// #[tokio::main]
315    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
316    ///     let client = Client::from_env()?;
317    ///
318    ///     let response = client.v25_tts_voice_design("Hello, world!")
319    ///         .user_message("Give me a young male tone.")
320    ///         .send()
321    ///         .await?;
322    ///
323    ///     let audio = response.audio()?;
324    ///     let audio_bytes = audio.decode_data()?;
325    ///     tokio::fs::write("output.wav", audio_bytes).await?;
326    ///     Ok(())
327    /// }
328    /// ```
329    pub fn v25_tts_voice_design(&self, text: impl Into<String>) -> TtsRequestBuilder {
330        TtsRequestBuilder::new(
331            self.clone(),
332            Model::MiMoV25TtsVoiceDesign.as_str(),
333            text.into(),
334        )
335    }
336
337    /// Create a TTS request builder with voice clone (MiMo V2.5 TTS VoiceClone).
338    ///
339    /// This method uses an audio sample to clone a voice.
340    /// Use `Voice::custom()` or `Voice::from_audio_file()` to set the voice.
341    ///
342    /// # Arguments
343    ///
344    /// * `text` - The text to synthesize.
345    ///
346    /// # Example
347    ///
348    /// ```rust,no_run
349    /// use mimo_api::{Client, Voice};
350    ///
351    /// #[tokio::main]
352    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
353    ///     let client = Client::from_env()?;
354    ///
355    ///     let voice = Voice::from_audio_file("voice_sample.mp3").await?;
356    ///
357    ///     let response = client.v25_tts_voice_clone("Hello, world!")
358    ///         .voice(voice)
359    ///         .send()
360    ///         .await?;
361    ///
362    ///     let audio = response.audio()?;
363    ///     let audio_bytes = audio.decode_data()?;
364    ///     tokio::fs::write("output.wav", audio_bytes).await?;
365    ///     Ok(())
366    /// }
367    /// ```
368    pub fn v25_tts_voice_clone(&self, text: impl Into<String>) -> TtsRequestBuilder {
369        TtsRequestBuilder::new(
370            self.clone(),
371            Model::MiMoV25TtsVoiceClone.as_str(),
372            text.into(),
373        )
374    }
375
376    /// Create a streaming text-to-speech request builder.
377    ///
378    /// This method creates a builder for streaming speech synthesis using the `mimo-v2-tts` model.
379    /// Streaming TTS delivers audio data in real-time chunks.
380    ///
381    /// # Arguments
382    ///
383    /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
384    ///
385    /// # Example
386    ///
387    /// ```rust,no_run
388    /// use mimo_api::{Client, Voice};
389    /// use futures::StreamExt;
390    /// use tokio::fs::File;
391    /// use tokio::io::AsyncWriteExt;
392    ///
393    /// #[tokio::main]
394    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
395    ///     let client = Client::from_env()?;
396    ///
397    ///     let mut stream = client.tts_stream("Hello, world!")
398    ///         .voice(Voice::DefaultEn)
399    ///         .send()
400    ///         .await?;
401    ///
402    ///     let mut file = File::create("output.pcm").await?;
403    ///     let mut total_bytes = 0;
404    ///
405    ///     while let Some(chunk) = stream.next().await {
406    ///         let audio_bytes = chunk?;
407    ///         file.write_all(&audio_bytes).await?;
408    ///         total_bytes += audio_bytes.len();
409    ///     }
410    ///
411    ///     println!("Total bytes: {}", total_bytes);
412    ///     Ok(())
413    /// }
414    /// ```
415    pub fn tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
416        StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
417    }
418
419    /// Create a streaming text-to-speech request builder with styled text.
420    ///
421    /// This method allows you to apply style controls to the streaming synthesized speech.
422    ///
423    /// # Arguments
424    ///
425    /// * `style` - The style to apply (e.g., "开心", "悲伤", "变快", "变慢")
426    /// * `text` - The text to synthesize
427    ///
428    /// # Example
429    ///
430    /// ```rust,no_run
431    /// use mimo_api::{Client, Voice};
432    /// use futures::StreamExt;
433    /// use tokio::fs::File;
434    /// use tokio::io::AsyncWriteExt;
435    ///
436    /// #[tokio::main]
437    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
438    ///     let client = Client::from_env()?;
439    ///
440    ///     // Synthesize speech with "开心" (happy) style
441    ///     let mut stream = client.tts_styled_stream("开心", "明天就是周五了,真开心!")
442    ///         .voice(Voice::DefaultZh)
443    ///         .send()
444    ///         .await?;
445    ///
446    ///     let mut file = File::create("output.pcm").await?;
447    ///     let mut total_bytes = 0;
448    ///
449    ///     while let Some(chunk) = stream.next().await {
450    ///         let audio_bytes = chunk?;
451    ///         file.write_all(&audio_bytes).await?;
452    ///         total_bytes += audio_bytes.len();
453    ///     }
454    ///
455    ///     println!("Total bytes: {}", total_bytes);
456    ///     Ok(())
457    /// }
458    /// ```
459    pub fn tts_styled_stream(&self, style: &str, text: &str) -> StreamingTtsRequestBuilder {
460        StreamingTtsRequestBuilder::new(
461            self.clone(),
462            Model::MiMoV2Tts.as_str(),
463            styled_text(style, text),
464        )
465    }
466
467    /// Create a streaming TTS request builder using MiMo V2.5 TTS model.
468    ///
469    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
470    /// The streaming API currently returns results in compatibility mode.
471    pub fn v25_tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
472        StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
473    }
474
475    /// Create a streaming TTS request builder with voice design.
476    ///
477    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
478    pub fn v25_tts_voice_design_stream(
479        &self,
480        text: impl Into<String>,
481    ) -> StreamingTtsRequestBuilder {
482        StreamingTtsRequestBuilder::new(
483            self.clone(),
484            Model::MiMoV25TtsVoiceDesign.as_str(),
485            text.into(),
486        )
487    }
488
489    /// Create a streaming TTS request builder with voice clone.
490    ///
491    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
492    pub fn v25_tts_voice_clone_stream(
493        &self,
494        text: impl Into<String>,
495    ) -> StreamingTtsRequestBuilder {
496        StreamingTtsRequestBuilder::new(
497            self.clone(),
498            Model::MiMoV25TtsVoiceClone.as_str(),
499            text.into(),
500        )
501    }
502}
503
504/// Builder for text-to-speech requests.
505///
506/// This builder provides a fluent API for configuring TTS requests.
507#[derive(Debug, Clone)]
508pub struct TtsRequestBuilder {
509    client: Client,
510    model: String,
511    text: String,
512    user_message: Option<String>,
513    voice: Voice,
514    format: AudioFormat,
515}
516
517impl TtsRequestBuilder {
518    /// Create a new TTS request builder.
519    fn new(client: Client, model: impl Into<String>, text: String) -> Self {
520        Self {
521            client,
522            model: model.into(),
523            text,
524            user_message: None,
525            voice: Voice::default(),
526            format: AudioFormat::default(),
527        }
528    }
529
530    /// Set the voice for synthesis.
531    ///
532    /// Available voices:
533    /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
534    /// - `Voice::DefaultEn` - Default English female voice
535    /// - `Voice::DefaultZh` - Default Chinese female voice
536    pub fn voice(mut self, voice: Voice) -> Self {
537        self.voice = voice;
538        self
539    }
540
541    /// Set the audio output format.
542    ///
543    /// Available formats:
544    /// - `AudioFormat::Wav` - WAV format (recommended for high quality)
545    /// - `AudioFormat::Mp3` - MP3 format (smaller file size)
546    /// - `AudioFormat::Pcm` - PCM format (for streaming)
547    pub fn format(mut self, format: AudioFormat) -> Self {
548        self.format = format;
549        self
550    }
551
552    /// Add a user message to influence the synthesis style.
553    ///
554    /// The user message can help adjust the tone and style of the synthesized speech.
555    pub fn user_message(mut self, message: impl Into<String>) -> Self {
556        self.user_message = Some(message.into());
557        self
558    }
559
560    /// Send the TTS request and return the response.
561    ///
562    /// # Returns
563    ///
564    /// A `TtsResponse` containing the synthesized audio data.
565    ///
566    /// # Example
567    ///
568    /// ```rust,no_run
569    /// use mimo_api::{Client, Voice, AudioFormat};
570    ///
571    /// #[tokio::main]
572    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
573    ///     let client = Client::from_env()?;
574    ///     
575    ///     let response = client.tts("Hello, world!")
576    ///         .voice(Voice::DefaultEn)
577    ///         .format(AudioFormat::Mp3)
578    ///         .send()
579    ///         .await?;
580    ///     
581    ///     let audio = response.audio()?;
582    ///     println!("Audio ID: {}", audio.id);
583    ///     println!("Transcript: {:?}", audio.transcript());
584    ///     Ok(())
585    /// }
586    /// ```
587    pub async fn send(self) -> Result<TtsResponse> {
588        let mut messages = Vec::new();
589
590        // Add optional user message
591        if let Some(user_msg) = self.user_message {
592            messages.push(Message::user(MessageContent::Text(user_msg)));
593        }
594
595        // Add assistant message with text to synthesize
596        messages.push(Message::assistant(MessageContent::Text(self.text)));
597
598        // Voice design model does not support audio.voice parameter
599        // Model name is "mimo-v2.5-tts-voicedesign" (no hyphen between voice and design)
600        let is_voice_design = self.model.contains("voicedesign");
601
602        let audio = if is_voice_design {
603            // Voice design model only supports format, not voice
604            Some(Audio {
605                format: Some(self.format),
606                voice: None,
607            })
608        } else {
609            Some(Audio {
610                format: Some(self.format),
611                voice: Some(self.voice),
612            })
613        };
614
615        let request = ChatRequest {
616            model: self.model,
617            messages,
618            audio,
619            ..Default::default()
620        };
621
622        let response = self.client.chat(request).await?;
623        Ok(TtsResponse(response))
624    }
625}
626
627/// Response from a text-to-speech request.
628#[derive(Debug, Clone)]
629pub struct TtsResponse(pub ChatResponse);
630
631impl TtsResponse {
632    /// Get the audio data from the response.
633    ///
634    /// # Errors
635    ///
636    /// Returns an error if no audio data is present in the response.
637    pub fn audio(&self) -> Result<&ResponseAudio> {
638        self.0
639            .choices
640            .first()
641            .and_then(|c| c.message.audio.as_ref())
642            .ok_or_else(|| Error::InvalidResponse("No audio data in response".into()))
643    }
644
645    /// Get the content text from the response.
646    pub fn content(&self) -> Option<&str> {
647        self.0.choices.first().map(|c| c.message.content.as_str())
648    }
649
650    /// Get the underlying chat response.
651    pub fn into_inner(self) -> ChatResponse {
652        self.0
653    }
654}
655
656/// Builder for streaming text-to-speech requests.
657///
658/// This builder provides a fluent API for configuring streaming TTS requests.
659#[derive(Debug, Clone)]
660pub struct StreamingTtsRequestBuilder {
661    client: Client,
662    model: String,
663    text: String,
664    user_message: Option<String>,
665    voice: Voice,
666}
667
668impl StreamingTtsRequestBuilder {
669    /// Create a new streaming TTS request builder.
670    fn new(client: Client, model: impl Into<String>, text: String) -> Self {
671        Self {
672            client,
673            model: model.into(),
674            text,
675            user_message: None,
676            voice: Voice::default(),
677        }
678    }
679
680    /// Set the voice for synthesis.
681    ///
682    /// Available voices:
683    /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
684    /// - `Voice::DefaultEn` - Default English female voice
685    /// - `Voice::DefaultZh` - Default Chinese female voice
686    ///
687    /// # Example
688    ///
689    /// ```rust,no_run
690    /// use mimo_api::{Client, Voice};
691    ///
692    /// #[tokio::main]
693    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
694    ///     let client = Client::from_env()?;
695    ///
696    ///     let stream = client.tts_stream("Hello!")
697    ///         .voice(Voice::DefaultEn)
698    ///         .send()
699    ///         .await?;
700    ///
701    ///     Ok(())
702    /// }
703    /// ```
704    pub fn voice(mut self, voice: Voice) -> Self {
705        self.voice = voice;
706        self
707    }
708
709    /// Add a user message to influence the synthesis style.
710    ///
711    /// The user message can help adjust the tone and style of the synthesized speech.
712    ///
713    /// # Example
714    ///
715    /// ```rust,no_run
716    /// use mimo_api::Client;
717    ///
718    /// #[tokio::main]
719    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
720    ///     let client = Client::from_env()?;
721    ///
722    ///     let stream = client.tts_stream("Hello there!")
723    ///         .user_message("Speak in a friendly, conversational tone")
724    ///         .send()
725    ///         .await?;
726    ///
727    ///     Ok(())
728    /// }
729    /// ```
730    pub fn user_message(mut self, message: impl Into<String>) -> Self {
731        self.user_message = Some(message.into());
732        self
733    }
734
735    /// Send the streaming TTS request and return the response stream.
736    ///
737    /// # Returns
738    ///
739    /// A `StreamingTtsResponse` that yields audio data chunks.
740    ///
741    /// # Example
742    ///
743    /// ```rust,no_run
744    /// use mimo_api::{Client, Voice};
745    /// use futures::StreamExt;
746    /// use tokio::fs::File;
747    /// use tokio::io::AsyncWriteExt;
748    ///
749    /// #[tokio::main]
750    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
751    ///     let client = Client::from_env()?;
752    ///
753    ///     let mut stream = client.tts_stream("Hello, world!")
754    ///         .voice(Voice::DefaultEn)
755    ///         .send()
756    ///         .await?;
757    ///
758    ///     let mut file = File::create("output.pcm").await?;
759    ///     let mut total_bytes = 0;
760    ///
761    ///     while let Some(result) = stream.next().await {
762    ///         let audio_bytes = result?;
763    ///         file.write_all(&audio_bytes).await?;
764    ///         total_bytes += audio_bytes.len();
765    ///     }
766    ///
767    ///     println!("Total bytes: {}", total_bytes);
768    ///     Ok(())
769    /// }
770    /// ```
771    pub async fn send(self) -> Result<StreamingTtsResponse> {
772        let mut messages = Vec::new();
773
774        // Add optional user message
775        if let Some(user_msg) = self.user_message {
776            messages.push(Message::user(MessageContent::Text(user_msg)));
777        }
778
779        // Add assistant message with text to synthesize
780        messages.push(Message::assistant(MessageContent::Text(self.text)));
781
782        let request = ChatRequest {
783            model: self.model,
784            messages,
785            stream: Some(true),
786            audio: Some(Audio {
787                format: Some(AudioFormat::Pcm16), // PCM16 is recommended for streaming
788                voice: Some(self.voice),
789            }),
790            ..Default::default()
791        };
792
793        let stream = self.client.chat_stream(request).await?;
794        Ok(StreamingTtsResponse::new(stream))
795    }
796}
797
798/// Response from a streaming text-to-speech request.
799///
800/// This type wraps the underlying stream and provides convenience methods
801/// for consuming audio data.
802pub struct StreamingTtsResponse {
803    stream: BoxStream<'static, Result<StreamChunk>>,
804    total_bytes: u64,
805    chunk_count: u32,
806}
807
808impl StreamingTtsResponse {
809    /// Create a new streaming TTS response.
810    fn new(stream: BoxStream<'static, Result<StreamChunk>>) -> Self {
811        Self {
812            stream,
813            total_bytes: 0,
814            chunk_count: 0,
815        }
816    }
817
818    /// Collect all audio chunks and return them as a single byte vector.
819    ///
820    /// This is a convenience method for non-streaming use cases where you
821    /// want to wait for all audio data before processing it.
822    ///
823    /// # Example
824    ///
825    /// ```rust,no_run
826    /// use mimo_api::Client;
827    ///
828    /// #[tokio::main]
829    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
830    ///     let client = Client::from_env()?;
831    ///
832    ///     let mut stream = client.tts_stream("Hello, world!").send().await?;
833    ///     let audio_bytes = stream.collect_audio().await?;
834    ///
835    ///     tokio::fs::write("output.pcm", &audio_bytes).await?;
836    ///     println!("Total bytes: {}", audio_bytes.len());
837    ///
838    ///     Ok(())
839    /// }
840    /// ```
841    pub async fn collect_audio(&mut self) -> Result<Vec<u8>> {
842        let mut all_bytes = Vec::new();
843
844        while let Some(chunk) = self.stream.next().await {
845            if let Some(audio_bytes) = self.process_chunk(chunk?)? {
846                all_bytes.extend(audio_bytes);
847            }
848        }
849
850        Ok(all_bytes)
851    }
852
853    /// Save all audio chunks to a file.
854    ///
855    /// This is a convenience method that collects all audio data and writes it to a file.
856    ///
857    /// # Example
858    ///
859    /// ```rust,no_run
860    /// use mimo_api::Client;
861    ///
862    /// #[tokio::main]
863    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
864    ///     let client = Client::from_env()?;
865    ///
866    ///     let mut stream: mimo_api::StreamingTtsResponse = client.tts_stream("Hello, world!").send().await?;
867    ///     stream.save_to_file("output.pcm").await?;
868    ///
869    ///     println!("Audio saved to file");
870    ///
871    ///     Ok(())
872    /// }
873    /// ```
874    pub async fn save_to_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<()> {
875        let mut file = File::create(path).await?;
876
877        while let Some(chunk) = self.stream.next().await {
878            if let Some(audio_bytes) = self.process_chunk(chunk?)? {
879                file.write_all(&audio_bytes).await?;
880            }
881        }
882
883        file.flush().await?;
884        Ok(())
885    }
886
887    /// Process a stream chunk and return audio bytes if present.
888    fn process_chunk(&mut self, chunk: StreamChunk) -> Result<Option<Vec<u8>>> {
889        if !chunk.choices.is_empty()
890            && let Some(audio) = &chunk.choices[0].delta.audio
891        {
892            let bytes = audio.decode_data()?;
893            self.total_bytes += bytes.len() as u64;
894            self.chunk_count += 1;
895            return Ok(Some(bytes));
896        }
897        Ok(None)
898    }
899
900    /// Get the total number of bytes received so far.
901    pub fn total_bytes(&self) -> u64 {
902        self.total_bytes
903    }
904
905    /// Get the number of audio chunks received so far.
906    pub fn chunk_count(&self) -> u32 {
907        self.chunk_count
908    }
909}
910
911impl futures::Stream for StreamingTtsResponse {
912    type Item = Result<Vec<u8>>;
913
914    fn poll_next(
915        mut self: std::pin::Pin<&mut Self>,
916        cx: &mut std::task::Context<'_>,
917    ) -> std::task::Poll<Option<Self::Item>> {
918        // Process chunks until we find one with audio data or the stream ends
919        loop {
920            match std::pin::Pin::new(&mut self.stream).poll_next(cx) {
921                std::task::Poll::Ready(Some(Ok(chunk))) => {
922                    // Check if this is the final chunk with finish_reason
923                    let is_final = chunk
924                        .choices
925                        .first()
926                        .and_then(|c| c.finish_reason.as_ref())
927                        .is_some();
928
929                    match self.process_chunk(chunk) {
930                        Ok(Some(bytes)) => {
931                            // Return audio data from this chunk
932                            return std::task::Poll::Ready(Some(Ok(bytes)));
933                        }
934                        Ok(None) => {
935                            // No audio data in this chunk
936                            if is_final {
937                                // Stream has ended, no more audio data
938                                return std::task::Poll::Ready(None);
939                            }
940                            // Continue to next chunk
941                            continue;
942                        }
943                        Err(e) => return std::task::Poll::Ready(Some(Err(e))),
944                    }
945                }
946                std::task::Poll::Ready(Some(Err(e))) => {
947                    let error_msg = format!("Stream error: {}", e);
948                    return std::task::Poll::Ready(Some(Err(Error::StreamError(error_msg))));
949                }
950                std::task::Poll::Ready(None) => {
951                    // Stream has ended normally
952                    return std::task::Poll::Ready(None);
953                }
954                std::task::Poll::Pending => return std::task::Poll::Pending,
955            }
956        }
957    }
958}