Skip to main content

mimo_api/
client.rs

1//! HTTP client for the MiMo API.
2
3use {
4    crate::{
5        error::{Error, Result},
6        types::*,
7    },
8    eventsource_stream::Eventsource,
9    futures::{StreamExt, stream::BoxStream},
10    reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue},
11    std::env,
12    tokio::{fs::File, io::AsyncWriteExt},
13};
14
15const API_BASE_URL: &str = "https://api.xiaomimimo.com/v1";
16const ENV_API_KEY: &str = "XIAOMI_API_KEY";
17
18// Plan-specific base URLs for different regional clusters
19const PLAN_BASE_URL_CN: &str = "https://token-plan-cn.xiaomimimo.com/v1";
20const PLAN_BASE_URL_SGP: &str = "https://token-plan-sgp.xiaomimimo.com/v1";
21const PLAN_BASE_URL_AMS: &str = "https://token-plan-ams.xiaomimimo.com/v1";
22
23/// HTTP client for the MiMo API.
24#[derive(Debug, Clone)]
25pub struct Client {
26    /// The underlying HTTP client.
27    http_client: reqwest::Client,
28    /// The API key for authentication.
29    api_key: String,
30    /// The base URL for the API.
31    base_url: String,
32}
33
34impl Client {
35    /// Create a new client with the given API key.
36    ///
37    /// # Example
38    ///
39    /// ```rust
40    /// use mimo_api::Client;
41    ///
42    /// let client = Client::new("your-api-key");
43    /// ```
44    pub fn new(api_key: impl Into<String>) -> Self {
45        Self {
46            http_client: reqwest::Client::new(),
47            api_key: api_key.into(),
48            base_url: API_BASE_URL.to_string(),
49        }
50    }
51
52    /// Create a new client from the `XIAOMI_API_KEY` environment variable.
53    ///
54    /// # Errors
55    ///
56    /// Returns an error if the `XIAOMI_API_KEY` environment variable is not set.
57    ///
58    /// # Example
59    ///
60    /// ```rust,no_run
61    /// use mimo_api::Client;
62    ///
63    /// // Assuming XIAOMI_API_KEY is set in environment
64    /// let client = Client::from_env()?;
65    /// # Ok::<(), Box<dyn std::error::Error>>(())
66    /// ```
67    pub fn from_env() -> Result<Self> {
68        let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
69        Ok(Self::new(api_key))
70    }
71
72    /// Set a custom base URL for the API.
73    ///
74    /// This is useful for testing or using a custom API endpoint.
75    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
76        self.base_url = base_url.into();
77        self
78    }
79
80    // ========== Plan-specific convenience methods ==========
81
82    /// Create a new client for the Plan API using the **China cluster**.
83    ///
84    /// This uses the base URL: `https://token-plan-cn.xiaomimimo.com/v1`
85    ///
86    /// # Example
87    ///
88    /// ```rust
89    /// use mimo_api::Client;
90    ///
91    /// let client = Client::plan_cn("your-api-key");
92    /// ```
93    pub fn plan_cn(api_key: impl Into<String>) -> Self {
94        Self::new(api_key).with_base_url(PLAN_BASE_URL_CN)
95    }
96
97    /// Create a new client for the Plan API using the **Singapore cluster**.
98    ///
99    /// This uses the base URL: `https://token-plan-sgp.xiaomimimo.com/v1`
100    ///
101    /// # Example
102    ///
103    /// ```rust
104    /// use mimo_api::Client;
105    ///
106    /// let client = Client::plan_sgp("your-api-key");
107    /// ```
108    pub fn plan_sgp(api_key: impl Into<String>) -> Self {
109        Self::new(api_key).with_base_url(PLAN_BASE_URL_SGP)
110    }
111
112    /// Create a new client for the Plan API using the **Europe cluster**.
113    ///
114    /// This uses the base URL: `https://token-plan-ams.xiaomimimo.com/v1`
115    ///
116    /// # Example
117    ///
118    /// ```rust
119    /// use mimo_api::Client;
120    ///
121    /// let client = Client::plan_ams("your-api-key");
122    /// ```
123    pub fn plan_ams(api_key: impl Into<String>) -> Self {
124        Self::new(api_key).with_base_url(PLAN_BASE_URL_AMS)
125    }
126
127    /// Create a new client for the Plan API from environment variable,
128    /// using the **China cluster**.
129    ///
130    /// Uses the `XIAOMI_API_KEY` environment variable.
131    ///
132    /// # Example
133    ///
134    /// ```rust,no_run
135    /// use mimo_api::Client;
136    ///
137    /// let client = Client::plan_cn_from_env()?;
138    /// # Ok::<(), Box<dyn std::error::Error>>(())
139    /// ```
140    pub fn plan_cn_from_env() -> Result<Self> {
141        let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
142        Ok(Self::plan_cn(api_key))
143    }
144
145    /// Create a new client for the Plan API from environment variable,
146    /// using the **Singapore cluster**.
147    ///
148    /// Uses the `XIAOMI_API_KEY` environment variable.
149    ///
150    /// # Example
151    ///
152    /// ```rust,no_run
153    /// use mimo_api::Client;
154    ///
155    /// let client = Client::plan_sgp_from_env()?;
156    /// # Ok::<(), Box<dyn std::error::Error>>(())
157    /// ```
158    pub fn plan_sgp_from_env() -> Result<Self> {
159        let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
160        Ok(Self::plan_sgp(api_key))
161    }
162
163    /// Create a new client for the Plan API from environment variable,
164    /// using the **Europe cluster**.
165    ///
166    /// Uses the `XIAOMI_API_KEY` environment variable.
167    ///
168    /// # Example
169    ///
170    /// ```rust,no_run
171    /// use mimo_api::Client;
172    ///
173    /// let client = Client::plan_ams_from_env()?;
174    /// # Ok::<(), Box<dyn std::error::Error>>(())
175    /// ```
176    pub fn plan_ams_from_env() -> Result<Self> {
177        let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
178        Ok(Self::plan_ams(api_key))
179    }
180
181    /// Build headers for the request.
182    fn build_headers(&self) -> Result<HeaderMap> {
183        let mut headers = HeaderMap::new();
184        headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
185        headers.insert(
186            "api-key",
187            HeaderValue::from_str(&self.api_key)
188                .map_err(|_| Error::InvalidParameter("Invalid API key".into()))?,
189        );
190        Ok(headers)
191    }
192
193    /// Send a chat completion request.
194    ///
195    /// # Example
196    ///
197    /// ```rust,no_run
198    /// use mimo_api::{Client, ChatRequest, Message};
199    ///
200    /// #[tokio::main]
201    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
202    ///     let client = Client::from_env()?;
203    ///     let request = ChatRequest::new("mimo-v2-flash")
204    ///         .message(Message::user("Hello!"));
205    ///     let response = client.chat(request).await?;
206    ///     println!("{}", response.choices[0].message.content);
207    ///     Ok(())
208    /// }
209    /// ```
210    pub async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {
211        let url = format!("{}/chat/completions", self.base_url);
212        let headers = self.build_headers()?;
213
214        let response = self
215            .http_client
216            .post(&url)
217            .headers(headers)
218            .json(&request)
219            .send()
220            .await?;
221
222        let status = response.status();
223        if !status.is_success() {
224            let error_text = response.text().await.unwrap_or_default();
225            return Err(Error::api_error(status.as_u16(), error_text));
226        }
227
228        response.json().await.map_err(Error::from)
229    }
230
231    /// Send a chat completion request with streaming response.
232    ///
233    /// Returns a stream of `StreamChunk` objects.
234    ///
235    /// # Example
236    ///
237    /// ```rust,no_run
238    /// use mimo_api::{Client, ChatRequest, Message};
239    /// use futures::StreamExt;
240    ///
241    /// #[tokio::main]
242    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
243    ///     let client = Client::from_env()?;
244    ///     let request = ChatRequest::new("mimo-v2-flash")
245    ///         .message(Message::user("Tell me a story."))
246    ///         .stream(true);
247    ///     
248    ///     let mut stream = client.chat_stream(request).await?;
249    ///     while let Some(chunk) = stream.next().await {
250    ///         match chunk {
251    ///             Ok(chunk) => {
252    ///                 if let Some(content) = &chunk.choices[0].delta.content {
253    ///                     print!("{}", content);
254    ///                 }
255    ///             }
256    ///             Err(e) => eprintln!("Error: {}", e),
257    ///         }
258    ///     }
259    ///     Ok(())
260    /// }
261    /// ```
262    pub async fn chat_stream(
263        &self,
264        request: ChatRequest,
265    ) -> Result<BoxStream<'static, Result<StreamChunk>>> {
266        let mut request = request;
267        request.stream = Some(true);
268
269        let url = format!("{}/chat/completions", self.base_url);
270        let headers = self.build_headers()?;
271
272        let response = self
273            .http_client
274            .post(&url)
275            .headers(headers)
276            .json(&request)
277            .send()
278            .await?;
279
280        let status = response.status();
281        if !status.is_success() {
282            let error_text = response.text().await.unwrap_or_default();
283            return Err(Error::api_error(status.as_u16(), error_text));
284        }
285
286        let stream = response
287            .bytes_stream()
288            .eventsource()
289            .filter_map(|event| async move {
290                match event {
291                    Ok(event) => {
292                        if event.data == "[DONE]" {
293                            None
294                        } else {
295                            match serde_json::from_str::<StreamChunk>(&event.data) {
296                                Ok(chunk) => Some(Ok(chunk)),
297                                Err(e) => Some(Err(Error::StreamError(e.to_string()))),
298                            }
299                        }
300                    }
301                    Err(e) => Some(Err(Error::StreamError(e.to_string()))),
302                }
303            })
304            .boxed();
305
306        Ok(stream)
307    }
308
309    /// Create a text-to-speech request builder.
310    ///
311    /// This method creates a builder for synthesizing speech from text using the `mimo-v2-tts` model.
312    ///
313    /// # Arguments
314    ///
315    /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
316    ///
317    /// # Example
318    ///
319    /// ```rust,no_run
320    /// use mimo_api::{Client, Voice};
321    ///
322    /// #[tokio::main]
323    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
324    ///     let client = Client::from_env()?;
325    ///     
326    ///     let response = client.tts("Hello, world!")
327    ///         .voice(Voice::DefaultEn)
328    ///         .send()
329    ///         .await?;
330    ///     
331    ///     let audio = response.audio()?;
332    ///     let audio_bytes = audio.decode_data()?;
333    ///     tokio::fs::write("output.wav", audio_bytes).await?;
334    ///     Ok(())
335    /// }
336    /// ```
337    pub fn tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
338        TtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
339    }
340
341    /// Create a text-to-speech request builder with styled text.
342    ///
343    /// This method allows you to apply style controls to the synthesized speech.
344    ///
345    /// # Example
346    ///
347    /// ```rust,no_run
348    /// use mimo_api::{Client, Voice};
349    ///
350    /// #[tokio::main]
351    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
352    ///     let client = Client::from_env()?;
353    ///
354    ///     // Synthesize speech with "开心" (happy) style
355    ///     let response = client.tts_styled("开心", "明天就是周五了,真开心!")
356    ///         .voice(Voice::DefaultZh)
357    ///         .send()
358    ///         .await?;
359    ///
360    ///     let audio = response.audio()?;
361    ///     let audio_bytes = audio.decode_data()?;
362    ///     tokio::fs::write("output.wav", audio_bytes).await?;
363    ///     Ok(())
364    /// }
365    /// ```
366    pub fn tts_styled(&self, style: &str, text: &str) -> TtsRequestBuilder {
367        TtsRequestBuilder::new(
368            self.clone(),
369            Model::MiMoV2Tts.as_str(),
370            styled_text(style, text),
371        )
372    }
373
374    /// Create a text-to-speech request builder using the MiMo V2.5 TTS model.
375    ///
376    /// This method uses the updated TTS model with more preset voices.
377    ///
378    /// # Arguments
379    ///
380    /// * `text` - The text to synthesize.
381    ///
382    /// # Example
383    ///
384    /// ```rust,no_run
385    /// use mimo_api::{Client, Voice};
386    ///
387    /// #[tokio::main]
388    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
389    ///     let client = Client::from_env()?;
390    ///
391    ///     let response = client.v25_tts("Hello, world!")
392    ///         .voice(Voice::Mia)
393    ///         .send()
394    ///         .await?;
395    ///
396    ///     let audio = response.audio()?;
397    ///     let audio_bytes = audio.decode_data()?;
398    ///     tokio::fs::write("output.wav", audio_bytes).await?;
399    ///     Ok(())
400    /// }
401    /// ```
402    pub fn v25_tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
403        TtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
404    }
405
406    /// Create a TTS request builder with voice design (MiMo V2.5 TTS VoiceDesign).
407    ///
408    /// This method uses text description to design a custom voice.
409    /// The `user_message` is REQUIRED and should contain the voice description.
410    ///
411    /// # Arguments
412    ///
413    /// * `text` - The text to synthesize.
414    ///
415    /// # Example
416    ///
417    /// ```rust,no_run
418    /// use mimo_api::Client;
419    ///
420    /// #[tokio::main]
421    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
422    ///     let client = Client::from_env()?;
423    ///
424    ///     let response = client.v25_tts_voice_design("Hello, world!")
425    ///         .user_message("Give me a young male tone.")
426    ///         .send()
427    ///         .await?;
428    ///
429    ///     let audio = response.audio()?;
430    ///     let audio_bytes = audio.decode_data()?;
431    ///     tokio::fs::write("output.wav", audio_bytes).await?;
432    ///     Ok(())
433    /// }
434    /// ```
435    pub fn v25_tts_voice_design(&self, text: impl Into<String>) -> TtsRequestBuilder {
436        TtsRequestBuilder::new(
437            self.clone(),
438            Model::MiMoV25TtsVoiceDesign.as_str(),
439            text.into(),
440        )
441    }
442
443    /// Create a TTS request builder with voice clone (MiMo V2.5 TTS VoiceClone).
444    ///
445    /// This method uses an audio sample to clone a voice.
446    /// Use `Voice::custom()` or `Voice::from_audio_file()` to set the voice.
447    ///
448    /// # Arguments
449    ///
450    /// * `text` - The text to synthesize.
451    ///
452    /// # Example
453    ///
454    /// ```rust,no_run
455    /// use mimo_api::{Client, Voice};
456    ///
457    /// #[tokio::main]
458    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
459    ///     let client = Client::from_env()?;
460    ///
461    ///     let voice = Voice::from_audio_file("voice_sample.mp3").await?;
462    ///
463    ///     let response = client.v25_tts_voice_clone("Hello, world!")
464    ///         .voice(voice)
465    ///         .send()
466    ///         .await?;
467    ///
468    ///     let audio = response.audio()?;
469    ///     let audio_bytes = audio.decode_data()?;
470    ///     tokio::fs::write("output.wav", audio_bytes).await?;
471    ///     Ok(())
472    /// }
473    /// ```
474    pub fn v25_tts_voice_clone(&self, text: impl Into<String>) -> TtsRequestBuilder {
475        TtsRequestBuilder::new(
476            self.clone(),
477            Model::MiMoV25TtsVoiceClone.as_str(),
478            text.into(),
479        )
480    }
481
482    /// Create a streaming text-to-speech request builder.
483    ///
484    /// This method creates a builder for streaming speech synthesis using the `mimo-v2-tts` model.
485    /// Streaming TTS delivers audio data in real-time chunks.
486    ///
487    /// # Arguments
488    ///
489    /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
490    ///
491    /// # Example
492    ///
493    /// ```rust,no_run
494    /// use mimo_api::{Client, Voice};
495    /// use futures::StreamExt;
496    /// use tokio::fs::File;
497    /// use tokio::io::AsyncWriteExt;
498    ///
499    /// #[tokio::main]
500    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
501    ///     let client = Client::from_env()?;
502    ///
503    ///     let mut stream = client.tts_stream("Hello, world!")
504    ///         .voice(Voice::DefaultEn)
505    ///         .send()
506    ///         .await?;
507    ///
508    ///     let mut file = File::create("output.pcm").await?;
509    ///     let mut total_bytes = 0;
510    ///
511    ///     while let Some(chunk) = stream.next().await {
512    ///         let audio_bytes = chunk?;
513    ///         file.write_all(&audio_bytes).await?;
514    ///         total_bytes += audio_bytes.len();
515    ///     }
516    ///
517    ///     println!("Total bytes: {}", total_bytes);
518    ///     Ok(())
519    /// }
520    /// ```
521    pub fn tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
522        StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
523    }
524
525    /// Create a streaming text-to-speech request builder with styled text.
526    ///
527    /// This method allows you to apply style controls to the streaming synthesized speech.
528    ///
529    /// # Arguments
530    ///
531    /// * `style` - The style to apply (e.g., "开心", "悲伤", "变快", "变慢")
532    /// * `text` - The text to synthesize
533    ///
534    /// # Example
535    ///
536    /// ```rust,no_run
537    /// use mimo_api::{Client, Voice};
538    /// use futures::StreamExt;
539    /// use tokio::fs::File;
540    /// use tokio::io::AsyncWriteExt;
541    ///
542    /// #[tokio::main]
543    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
544    ///     let client = Client::from_env()?;
545    ///
546    ///     // Synthesize speech with "开心" (happy) style
547    ///     let mut stream = client.tts_styled_stream("开心", "明天就是周五了,真开心!")
548    ///         .voice(Voice::DefaultZh)
549    ///         .send()
550    ///         .await?;
551    ///
552    ///     let mut file = File::create("output.pcm").await?;
553    ///     let mut total_bytes = 0;
554    ///
555    ///     while let Some(chunk) = stream.next().await {
556    ///         let audio_bytes = chunk?;
557    ///         file.write_all(&audio_bytes).await?;
558    ///         total_bytes += audio_bytes.len();
559    ///     }
560    ///
561    ///     println!("Total bytes: {}", total_bytes);
562    ///     Ok(())
563    /// }
564    /// ```
565    pub fn tts_styled_stream(&self, style: &str, text: &str) -> StreamingTtsRequestBuilder {
566        StreamingTtsRequestBuilder::new(
567            self.clone(),
568            Model::MiMoV2Tts.as_str(),
569            styled_text(style, text),
570        )
571    }
572
573    /// Create a streaming TTS request builder using MiMo V2.5 TTS model.
574    ///
575    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
576    /// The streaming API currently returns results in compatibility mode.
577    pub fn v25_tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
578        StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
579    }
580
581    /// Create a streaming TTS request builder with voice design.
582    ///
583    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
584    pub fn v25_tts_voice_design_stream(
585        &self,
586        text: impl Into<String>,
587    ) -> StreamingTtsRequestBuilder {
588        StreamingTtsRequestBuilder::new(
589            self.clone(),
590            Model::MiMoV25TtsVoiceDesign.as_str(),
591            text.into(),
592        )
593    }
594
595    /// Create a streaming TTS request builder with voice clone.
596    ///
597    /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
598    pub fn v25_tts_voice_clone_stream(
599        &self,
600        text: impl Into<String>,
601    ) -> StreamingTtsRequestBuilder {
602        StreamingTtsRequestBuilder::new(
603            self.clone(),
604            Model::MiMoV25TtsVoiceClone.as_str(),
605            text.into(),
606        )
607    }
608}
609
610/// Builder for text-to-speech requests.
611///
612/// This builder provides a fluent API for configuring TTS requests.
613#[derive(Debug, Clone)]
614pub struct TtsRequestBuilder {
615    client: Client,
616    model: String,
617    text: String,
618    user_message: Option<String>,
619    voice: Voice,
620    format: AudioFormat,
621}
622
623impl TtsRequestBuilder {
624    /// Create a new TTS request builder.
625    fn new(client: Client, model: impl Into<String>, text: String) -> Self {
626        Self {
627            client,
628            model: model.into(),
629            text,
630            user_message: None,
631            voice: Voice::default(),
632            format: AudioFormat::default(),
633        }
634    }
635
636    /// Set the voice for synthesis.
637    ///
638    /// Available voices:
639    /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
640    /// - `Voice::DefaultEn` - Default English female voice
641    /// - `Voice::DefaultZh` - Default Chinese female voice
642    pub fn voice(mut self, voice: Voice) -> Self {
643        self.voice = voice;
644        self
645    }
646
647    /// Set the audio output format.
648    ///
649    /// Available formats:
650    /// - `AudioFormat::Wav` - WAV format (recommended for high quality)
651    /// - `AudioFormat::Mp3` - MP3 format (smaller file size)
652    /// - `AudioFormat::Pcm` - PCM format (for streaming)
653    pub fn format(mut self, format: AudioFormat) -> Self {
654        self.format = format;
655        self
656    }
657
658    /// Add a user message to influence the synthesis style.
659    ///
660    /// The user message can help adjust the tone and style of the synthesized speech.
661    pub fn user_message(mut self, message: impl Into<String>) -> Self {
662        self.user_message = Some(message.into());
663        self
664    }
665
666    /// Send the TTS request and return the response.
667    ///
668    /// # Returns
669    ///
670    /// A `TtsResponse` containing the synthesized audio data.
671    ///
672    /// # Example
673    ///
674    /// ```rust,no_run
675    /// use mimo_api::{Client, Voice, AudioFormat};
676    ///
677    /// #[tokio::main]
678    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
679    ///     let client = Client::from_env()?;
680    ///     
681    ///     let response = client.tts("Hello, world!")
682    ///         .voice(Voice::DefaultEn)
683    ///         .format(AudioFormat::Mp3)
684    ///         .send()
685    ///         .await?;
686    ///     
687    ///     let audio = response.audio()?;
688    ///     println!("Audio ID: {}", audio.id);
689    ///     println!("Transcript: {:?}", audio.transcript());
690    ///     Ok(())
691    /// }
692    /// ```
693    pub async fn send(self) -> Result<TtsResponse> {
694        let mut messages = Vec::new();
695
696        // Add optional user message
697        if let Some(user_msg) = self.user_message {
698            messages.push(Message::user(MessageContent::Text(user_msg)));
699        }
700
701        // Add assistant message with text to synthesize
702        messages.push(Message::assistant(MessageContent::Text(self.text)));
703
704        // Voice design model does not support audio.voice parameter
705        // Model name is "mimo-v2.5-tts-voicedesign" (no hyphen between voice and design)
706        let is_voice_design = self.model.contains("voicedesign");
707
708        let audio = if is_voice_design {
709            // Voice design model only supports format, not voice
710            Some(Audio {
711                format: Some(self.format),
712                voice: None,
713            })
714        } else {
715            Some(Audio {
716                format: Some(self.format),
717                voice: Some(self.voice),
718            })
719        };
720
721        let request = ChatRequest {
722            model: self.model,
723            messages,
724            audio,
725            ..Default::default()
726        };
727
728        let response = self.client.chat(request).await?;
729        Ok(TtsResponse(response))
730    }
731}
732
733/// Response from a text-to-speech request.
734#[derive(Debug, Clone)]
735pub struct TtsResponse(pub ChatResponse);
736
737impl TtsResponse {
738    /// Get the audio data from the response.
739    ///
740    /// # Errors
741    ///
742    /// Returns an error if no audio data is present in the response.
743    pub fn audio(&self) -> Result<&ResponseAudio> {
744        self.0
745            .choices
746            .first()
747            .and_then(|c| c.message.audio.as_ref())
748            .ok_or_else(|| Error::InvalidResponse("No audio data in response".into()))
749    }
750
751    /// Get the content text from the response.
752    pub fn content(&self) -> Option<&str> {
753        self.0.choices.first().map(|c| c.message.content.as_str())
754    }
755
756    /// Get the underlying chat response.
757    pub fn into_inner(self) -> ChatResponse {
758        self.0
759    }
760}
761
762/// Builder for streaming text-to-speech requests.
763///
764/// This builder provides a fluent API for configuring streaming TTS requests.
765#[derive(Debug, Clone)]
766pub struct StreamingTtsRequestBuilder {
767    client: Client,
768    model: String,
769    text: String,
770    user_message: Option<String>,
771    voice: Voice,
772}
773
774impl StreamingTtsRequestBuilder {
775    /// Create a new streaming TTS request builder.
776    fn new(client: Client, model: impl Into<String>, text: String) -> Self {
777        Self {
778            client,
779            model: model.into(),
780            text,
781            user_message: None,
782            voice: Voice::default(),
783        }
784    }
785
786    /// Set the voice for synthesis.
787    ///
788    /// Available voices:
789    /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
790    /// - `Voice::DefaultEn` - Default English female voice
791    /// - `Voice::DefaultZh` - Default Chinese female voice
792    ///
793    /// # Example
794    ///
795    /// ```rust,no_run
796    /// use mimo_api::{Client, Voice};
797    ///
798    /// #[tokio::main]
799    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
800    ///     let client = Client::from_env()?;
801    ///
802    ///     let stream = client.tts_stream("Hello!")
803    ///         .voice(Voice::DefaultEn)
804    ///         .send()
805    ///         .await?;
806    ///
807    ///     Ok(())
808    /// }
809    /// ```
810    pub fn voice(mut self, voice: Voice) -> Self {
811        self.voice = voice;
812        self
813    }
814
815    /// Add a user message to influence the synthesis style.
816    ///
817    /// The user message can help adjust the tone and style of the synthesized speech.
818    ///
819    /// # Example
820    ///
821    /// ```rust,no_run
822    /// use mimo_api::Client;
823    ///
824    /// #[tokio::main]
825    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
826    ///     let client = Client::from_env()?;
827    ///
828    ///     let stream = client.tts_stream("Hello there!")
829    ///         .user_message("Speak in a friendly, conversational tone")
830    ///         .send()
831    ///         .await?;
832    ///
833    ///     Ok(())
834    /// }
835    /// ```
836    pub fn user_message(mut self, message: impl Into<String>) -> Self {
837        self.user_message = Some(message.into());
838        self
839    }
840
841    /// Send the streaming TTS request and return the response stream.
842    ///
843    /// # Returns
844    ///
845    /// A `StreamingTtsResponse` that yields audio data chunks.
846    ///
847    /// # Example
848    ///
849    /// ```rust,no_run
850    /// use mimo_api::{Client, Voice};
851    /// use futures::StreamExt;
852    /// use tokio::fs::File;
853    /// use tokio::io::AsyncWriteExt;
854    ///
855    /// #[tokio::main]
856    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
857    ///     let client = Client::from_env()?;
858    ///
859    ///     let mut stream = client.tts_stream("Hello, world!")
860    ///         .voice(Voice::DefaultEn)
861    ///         .send()
862    ///         .await?;
863    ///
864    ///     let mut file = File::create("output.pcm").await?;
865    ///     let mut total_bytes = 0;
866    ///
867    ///     while let Some(result) = stream.next().await {
868    ///         let audio_bytes = result?;
869    ///         file.write_all(&audio_bytes).await?;
870    ///         total_bytes += audio_bytes.len();
871    ///     }
872    ///
873    ///     println!("Total bytes: {}", total_bytes);
874    ///     Ok(())
875    /// }
876    /// ```
877    pub async fn send(self) -> Result<StreamingTtsResponse> {
878        let mut messages = Vec::new();
879
880        // Add optional user message
881        if let Some(user_msg) = self.user_message {
882            messages.push(Message::user(MessageContent::Text(user_msg)));
883        }
884
885        // Add assistant message with text to synthesize
886        messages.push(Message::assistant(MessageContent::Text(self.text)));
887
888        let request = ChatRequest {
889            model: self.model,
890            messages,
891            stream: Some(true),
892            audio: Some(Audio {
893                format: Some(AudioFormat::Pcm16), // PCM16 is recommended for streaming
894                voice: Some(self.voice),
895            }),
896            ..Default::default()
897        };
898
899        let stream = self.client.chat_stream(request).await?;
900        Ok(StreamingTtsResponse::new(stream))
901    }
902}
903
904/// Response from a streaming text-to-speech request.
905///
906/// This type wraps the underlying stream and provides convenience methods
907/// for consuming audio data.
908pub struct StreamingTtsResponse {
909    stream: BoxStream<'static, Result<StreamChunk>>,
910    total_bytes: u64,
911    chunk_count: u32,
912}
913
914impl StreamingTtsResponse {
915    /// Create a new streaming TTS response.
916    fn new(stream: BoxStream<'static, Result<StreamChunk>>) -> Self {
917        Self {
918            stream,
919            total_bytes: 0,
920            chunk_count: 0,
921        }
922    }
923
924    /// Collect all audio chunks and return them as a single byte vector.
925    ///
926    /// This is a convenience method for non-streaming use cases where you
927    /// want to wait for all audio data before processing it.
928    ///
929    /// # Example
930    ///
931    /// ```rust,no_run
932    /// use mimo_api::Client;
933    ///
934    /// #[tokio::main]
935    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
936    ///     let client = Client::from_env()?;
937    ///
938    ///     let mut stream = client.tts_stream("Hello, world!").send().await?;
939    ///     let audio_bytes = stream.collect_audio().await?;
940    ///
941    ///     tokio::fs::write("output.pcm", &audio_bytes).await?;
942    ///     println!("Total bytes: {}", audio_bytes.len());
943    ///
944    ///     Ok(())
945    /// }
946    /// ```
947    pub async fn collect_audio(&mut self) -> Result<Vec<u8>> {
948        let mut all_bytes = Vec::new();
949
950        while let Some(chunk) = self.stream.next().await {
951            if let Some(audio_bytes) = self.process_chunk(chunk?)? {
952                all_bytes.extend(audio_bytes);
953            }
954        }
955
956        Ok(all_bytes)
957    }
958
959    /// Save all audio chunks to a file.
960    ///
961    /// This is a convenience method that collects all audio data and writes it to a file.
962    ///
963    /// # Example
964    ///
965    /// ```rust,no_run
966    /// use mimo_api::Client;
967    ///
968    /// #[tokio::main]
969    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
970    ///     let client = Client::from_env()?;
971    ///
972    ///     let mut stream: mimo_api::StreamingTtsResponse = client.tts_stream("Hello, world!").send().await?;
973    ///     stream.save_to_file("output.pcm").await?;
974    ///
975    ///     println!("Audio saved to file");
976    ///
977    ///     Ok(())
978    /// }
979    /// ```
980    pub async fn save_to_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<()> {
981        let mut file = File::create(path).await?;
982
983        while let Some(chunk) = self.stream.next().await {
984            if let Some(audio_bytes) = self.process_chunk(chunk?)? {
985                file.write_all(&audio_bytes).await?;
986            }
987        }
988
989        file.flush().await?;
990        Ok(())
991    }
992
993    /// Process a stream chunk and return audio bytes if present.
994    fn process_chunk(&mut self, chunk: StreamChunk) -> Result<Option<Vec<u8>>> {
995        if !chunk.choices.is_empty()
996            && let Some(audio) = &chunk.choices[0].delta.audio
997        {
998            let bytes = audio.decode_data()?;
999            self.total_bytes += bytes.len() as u64;
1000            self.chunk_count += 1;
1001            return Ok(Some(bytes));
1002        }
1003        Ok(None)
1004    }
1005
1006    /// Get the total number of bytes received so far.
1007    pub fn total_bytes(&self) -> u64 {
1008        self.total_bytes
1009    }
1010
1011    /// Get the number of audio chunks received so far.
1012    pub fn chunk_count(&self) -> u32 {
1013        self.chunk_count
1014    }
1015}
1016
1017impl futures::Stream for StreamingTtsResponse {
1018    type Item = Result<Vec<u8>>;
1019
1020    fn poll_next(
1021        mut self: std::pin::Pin<&mut Self>,
1022        cx: &mut std::task::Context<'_>,
1023    ) -> std::task::Poll<Option<Self::Item>> {
1024        // Process chunks until we find one with audio data or the stream ends
1025        loop {
1026            match std::pin::Pin::new(&mut self.stream).poll_next(cx) {
1027                std::task::Poll::Ready(Some(Ok(chunk))) => {
1028                    // Check if this is the final chunk with finish_reason
1029                    let is_final = chunk
1030                        .choices
1031                        .first()
1032                        .and_then(|c| c.finish_reason.as_ref())
1033                        .is_some();
1034
1035                    match self.process_chunk(chunk) {
1036                        Ok(Some(bytes)) => {
1037                            // Return audio data from this chunk
1038                            return std::task::Poll::Ready(Some(Ok(bytes)));
1039                        }
1040                        Ok(None) => {
1041                            // No audio data in this chunk
1042                            if is_final {
1043                                // Stream has ended, no more audio data
1044                                return std::task::Poll::Ready(None);
1045                            }
1046                            // Continue to next chunk
1047                            continue;
1048                        }
1049                        Err(e) => return std::task::Poll::Ready(Some(Err(e))),
1050                    }
1051                }
1052                std::task::Poll::Ready(Some(Err(e))) => {
1053                    let error_msg = format!("Stream error: {}", e);
1054                    return std::task::Poll::Ready(Some(Err(Error::StreamError(error_msg))));
1055                }
1056                std::task::Poll::Ready(None) => {
1057                    // Stream has ended normally
1058                    return std::task::Poll::Ready(None);
1059                }
1060                std::task::Poll::Pending => return std::task::Poll::Pending,
1061            }
1062        }
1063    }
1064}