mimo_api/client.rs
1//! HTTP client for the MiMo API.
2
3use {
4 crate::{
5 error::{Error, Result},
6 types::*,
7 },
8 eventsource_stream::Eventsource,
9 futures::{StreamExt, stream::BoxStream},
10 reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue},
11 std::env,
12 tokio::{fs::File, io::AsyncWriteExt},
13};
14
15const API_BASE_URL: &str = "https://api.xiaomimimo.com/v1";
16const ENV_API_KEY: &str = "XIAOMI_API_KEY";
17
18// Plan-specific base URLs for different regional clusters
19const PLAN_BASE_URL_CN: &str = "https://token-plan-cn.xiaomimimo.com/v1";
20const PLAN_BASE_URL_SGP: &str = "https://token-plan-sgp.xiaomimimo.com/v1";
21const PLAN_BASE_URL_AMS: &str = "https://token-plan-ams.xiaomimimo.com/v1";
22
23/// HTTP client for the MiMo API.
24#[derive(Debug, Clone)]
25pub struct Client {
26 /// The underlying HTTP client.
27 http_client: reqwest::Client,
28 /// The API key for authentication.
29 api_key: String,
30 /// The base URL for the API.
31 base_url: String,
32}
33
34impl Client {
35 /// Create a new client with the given API key.
36 ///
37 /// # Example
38 ///
39 /// ```rust
40 /// use mimo_api::Client;
41 ///
42 /// let client = Client::new("your-api-key");
43 /// ```
44 pub fn new(api_key: impl Into<String>) -> Self {
45 Self {
46 http_client: reqwest::Client::new(),
47 api_key: api_key.into(),
48 base_url: API_BASE_URL.to_string(),
49 }
50 }
51
52 /// Create a new client from the `XIAOMI_API_KEY` environment variable.
53 ///
54 /// # Errors
55 ///
56 /// Returns an error if the `XIAOMI_API_KEY` environment variable is not set.
57 ///
58 /// # Example
59 ///
60 /// ```rust,no_run
61 /// use mimo_api::Client;
62 ///
63 /// // Assuming XIAOMI_API_KEY is set in environment
64 /// let client = Client::from_env()?;
65 /// # Ok::<(), Box<dyn std::error::Error>>(())
66 /// ```
67 pub fn from_env() -> Result<Self> {
68 let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
69 Ok(Self::new(api_key))
70 }
71
72 /// Set a custom base URL for the API.
73 ///
74 /// This is useful for testing or using a custom API endpoint.
75 pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
76 self.base_url = base_url.into();
77 self
78 }
79
80 // ========== Plan-specific convenience methods ==========
81
82 /// Create a new client for the Plan API using the **China cluster**.
83 ///
84 /// This uses the base URL: `https://token-plan-cn.xiaomimimo.com/v1`
85 ///
86 /// # Example
87 ///
88 /// ```rust
89 /// use mimo_api::Client;
90 ///
91 /// let client = Client::plan_cn("your-api-key");
92 /// ```
93 pub fn plan_cn(api_key: impl Into<String>) -> Self {
94 Self::new(api_key).with_base_url(PLAN_BASE_URL_CN)
95 }
96
97 /// Create a new client for the Plan API using the **Singapore cluster**.
98 ///
99 /// This uses the base URL: `https://token-plan-sgp.xiaomimimo.com/v1`
100 ///
101 /// # Example
102 ///
103 /// ```rust
104 /// use mimo_api::Client;
105 ///
106 /// let client = Client::plan_sgp("your-api-key");
107 /// ```
108 pub fn plan_sgp(api_key: impl Into<String>) -> Self {
109 Self::new(api_key).with_base_url(PLAN_BASE_URL_SGP)
110 }
111
112 /// Create a new client for the Plan API using the **Europe cluster**.
113 ///
114 /// This uses the base URL: `https://token-plan-ams.xiaomimimo.com/v1`
115 ///
116 /// # Example
117 ///
118 /// ```rust
119 /// use mimo_api::Client;
120 ///
121 /// let client = Client::plan_ams("your-api-key");
122 /// ```
123 pub fn plan_ams(api_key: impl Into<String>) -> Self {
124 Self::new(api_key).with_base_url(PLAN_BASE_URL_AMS)
125 }
126
127 /// Create a new client for the Plan API from environment variable,
128 /// using the **China cluster**.
129 ///
130 /// Uses the `XIAOMI_API_KEY` environment variable.
131 ///
132 /// # Example
133 ///
134 /// ```rust,no_run
135 /// use mimo_api::Client;
136 ///
137 /// let client = Client::plan_cn_from_env()?;
138 /// # Ok::<(), Box<dyn std::error::Error>>(())
139 /// ```
140 pub fn plan_cn_from_env() -> Result<Self> {
141 let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
142 Ok(Self::plan_cn(api_key))
143 }
144
145 /// Create a new client for the Plan API from environment variable,
146 /// using the **Singapore cluster**.
147 ///
148 /// Uses the `XIAOMI_API_KEY` environment variable.
149 ///
150 /// # Example
151 ///
152 /// ```rust,no_run
153 /// use mimo_api::Client;
154 ///
155 /// let client = Client::plan_sgp_from_env()?;
156 /// # Ok::<(), Box<dyn std::error::Error>>(())
157 /// ```
158 pub fn plan_sgp_from_env() -> Result<Self> {
159 let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
160 Ok(Self::plan_sgp(api_key))
161 }
162
163 /// Create a new client for the Plan API from environment variable,
164 /// using the **Europe cluster**.
165 ///
166 /// Uses the `XIAOMI_API_KEY` environment variable.
167 ///
168 /// # Example
169 ///
170 /// ```rust,no_run
171 /// use mimo_api::Client;
172 ///
173 /// let client = Client::plan_ams_from_env()?;
174 /// # Ok::<(), Box<dyn std::error::Error>>(())
175 /// ```
176 pub fn plan_ams_from_env() -> Result<Self> {
177 let api_key = env::var(ENV_API_KEY).map_err(|_| Error::MissingApiKey)?;
178 Ok(Self::plan_ams(api_key))
179 }
180
181 /// Build headers for the request.
182 fn build_headers(&self) -> Result<HeaderMap> {
183 let mut headers = HeaderMap::new();
184 headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
185 headers.insert(
186 "api-key",
187 HeaderValue::from_str(&self.api_key)
188 .map_err(|_| Error::InvalidParameter("Invalid API key".into()))?,
189 );
190 Ok(headers)
191 }
192
193 /// Send a chat completion request.
194 ///
195 /// # Example
196 ///
197 /// ```rust,no_run
198 /// use mimo_api::{Client, ChatRequest, Message};
199 ///
200 /// #[tokio::main]
201 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
202 /// let client = Client::from_env()?;
203 /// let request = ChatRequest::new("mimo-v2-flash")
204 /// .message(Message::user("Hello!"));
205 /// let response = client.chat(request).await?;
206 /// println!("{}", response.choices[0].message.content);
207 /// Ok(())
208 /// }
209 /// ```
210 pub async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {
211 let url = format!("{}/chat/completions", self.base_url);
212 let headers = self.build_headers()?;
213
214 let response = self
215 .http_client
216 .post(&url)
217 .headers(headers)
218 .json(&request)
219 .send()
220 .await?;
221
222 let status = response.status();
223 if !status.is_success() {
224 let error_text = response.text().await.unwrap_or_default();
225 return Err(Error::api_error(status.as_u16(), error_text));
226 }
227
228 response.json().await.map_err(Error::from)
229 }
230
231 /// Send a chat completion request with streaming response.
232 ///
233 /// Returns a stream of `StreamChunk` objects.
234 ///
235 /// # Example
236 ///
237 /// ```rust,no_run
238 /// use mimo_api::{Client, ChatRequest, Message};
239 /// use futures::StreamExt;
240 ///
241 /// #[tokio::main]
242 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
243 /// let client = Client::from_env()?;
244 /// let request = ChatRequest::new("mimo-v2-flash")
245 /// .message(Message::user("Tell me a story."))
246 /// .stream(true);
247 ///
248 /// let mut stream = client.chat_stream(request).await?;
249 /// while let Some(chunk) = stream.next().await {
250 /// match chunk {
251 /// Ok(chunk) => {
252 /// if let Some(content) = &chunk.choices[0].delta.content {
253 /// print!("{}", content);
254 /// }
255 /// }
256 /// Err(e) => eprintln!("Error: {}", e),
257 /// }
258 /// }
259 /// Ok(())
260 /// }
261 /// ```
262 pub async fn chat_stream(
263 &self,
264 request: ChatRequest,
265 ) -> Result<BoxStream<'static, Result<StreamChunk>>> {
266 let mut request = request;
267 request.stream = Some(true);
268
269 let url = format!("{}/chat/completions", self.base_url);
270 let headers = self.build_headers()?;
271
272 let response = self
273 .http_client
274 .post(&url)
275 .headers(headers)
276 .json(&request)
277 .send()
278 .await?;
279
280 let status = response.status();
281 if !status.is_success() {
282 let error_text = response.text().await.unwrap_or_default();
283 return Err(Error::api_error(status.as_u16(), error_text));
284 }
285
286 let stream = response
287 .bytes_stream()
288 .eventsource()
289 .filter_map(|event| async move {
290 match event {
291 Ok(event) => {
292 if event.data == "[DONE]" {
293 None
294 } else {
295 match serde_json::from_str::<StreamChunk>(&event.data) {
296 Ok(chunk) => Some(Ok(chunk)),
297 Err(e) => Some(Err(Error::StreamError(e.to_string()))),
298 }
299 }
300 }
301 Err(e) => Some(Err(Error::StreamError(e.to_string()))),
302 }
303 })
304 .boxed();
305
306 Ok(stream)
307 }
308
309 /// Create a text-to-speech request builder.
310 ///
311 /// This method creates a builder for synthesizing speech from text using the `mimo-v2-tts` model.
312 ///
313 /// # Arguments
314 ///
315 /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
316 ///
317 /// # Example
318 ///
319 /// ```rust,no_run
320 /// use mimo_api::{Client, Voice};
321 ///
322 /// #[tokio::main]
323 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
324 /// let client = Client::from_env()?;
325 ///
326 /// let response = client.tts("Hello, world!")
327 /// .voice(Voice::DefaultEn)
328 /// .send()
329 /// .await?;
330 ///
331 /// let audio = response.audio()?;
332 /// let audio_bytes = audio.decode_data()?;
333 /// tokio::fs::write("output.wav", audio_bytes).await?;
334 /// Ok(())
335 /// }
336 /// ```
337 pub fn tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
338 TtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
339 }
340
341 /// Create a text-to-speech request builder with styled text.
342 ///
343 /// This method allows you to apply style controls to the synthesized speech.
344 ///
345 /// # Example
346 ///
347 /// ```rust,no_run
348 /// use mimo_api::{Client, Voice};
349 ///
350 /// #[tokio::main]
351 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
352 /// let client = Client::from_env()?;
353 ///
354 /// // Synthesize speech with "开心" (happy) style
355 /// let response = client.tts_styled("开心", "明天就是周五了,真开心!")
356 /// .voice(Voice::DefaultZh)
357 /// .send()
358 /// .await?;
359 ///
360 /// let audio = response.audio()?;
361 /// let audio_bytes = audio.decode_data()?;
362 /// tokio::fs::write("output.wav", audio_bytes).await?;
363 /// Ok(())
364 /// }
365 /// ```
366 pub fn tts_styled(&self, style: &str, text: &str) -> TtsRequestBuilder {
367 TtsRequestBuilder::new(
368 self.clone(),
369 Model::MiMoV2Tts.as_str(),
370 styled_text(style, text),
371 )
372 }
373
374 /// Create a text-to-speech request builder using the MiMo V2.5 TTS model.
375 ///
376 /// This method uses the updated TTS model with more preset voices.
377 ///
378 /// # Arguments
379 ///
380 /// * `text` - The text to synthesize.
381 ///
382 /// # Example
383 ///
384 /// ```rust,no_run
385 /// use mimo_api::{Client, Voice};
386 ///
387 /// #[tokio::main]
388 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
389 /// let client = Client::from_env()?;
390 ///
391 /// let response = client.v25_tts("Hello, world!")
392 /// .voice(Voice::Mia)
393 /// .send()
394 /// .await?;
395 ///
396 /// let audio = response.audio()?;
397 /// let audio_bytes = audio.decode_data()?;
398 /// tokio::fs::write("output.wav", audio_bytes).await?;
399 /// Ok(())
400 /// }
401 /// ```
402 pub fn v25_tts(&self, text: impl Into<String>) -> TtsRequestBuilder {
403 TtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
404 }
405
406 /// Create a TTS request builder with voice design (MiMo V2.5 TTS VoiceDesign).
407 ///
408 /// This method uses text description to design a custom voice.
409 /// The `user_message` is REQUIRED and should contain the voice description.
410 ///
411 /// # Arguments
412 ///
413 /// * `text` - The text to synthesize.
414 ///
415 /// # Example
416 ///
417 /// ```rust,no_run
418 /// use mimo_api::Client;
419 ///
420 /// #[tokio::main]
421 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
422 /// let client = Client::from_env()?;
423 ///
424 /// let response = client.v25_tts_voice_design("Hello, world!")
425 /// .user_message("Give me a young male tone.")
426 /// .send()
427 /// .await?;
428 ///
429 /// let audio = response.audio()?;
430 /// let audio_bytes = audio.decode_data()?;
431 /// tokio::fs::write("output.wav", audio_bytes).await?;
432 /// Ok(())
433 /// }
434 /// ```
435 pub fn v25_tts_voice_design(&self, text: impl Into<String>) -> TtsRequestBuilder {
436 TtsRequestBuilder::new(
437 self.clone(),
438 Model::MiMoV25TtsVoiceDesign.as_str(),
439 text.into(),
440 )
441 }
442
443 /// Create a TTS request builder with voice clone (MiMo V2.5 TTS VoiceClone).
444 ///
445 /// This method uses an audio sample to clone a voice.
446 /// Use `Voice::custom()` or `Voice::from_audio_file()` to set the voice.
447 ///
448 /// # Arguments
449 ///
450 /// * `text` - The text to synthesize.
451 ///
452 /// # Example
453 ///
454 /// ```rust,no_run
455 /// use mimo_api::{Client, Voice};
456 ///
457 /// #[tokio::main]
458 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
459 /// let client = Client::from_env()?;
460 ///
461 /// let voice = Voice::from_audio_file("voice_sample.mp3").await?;
462 ///
463 /// let response = client.v25_tts_voice_clone("Hello, world!")
464 /// .voice(voice)
465 /// .send()
466 /// .await?;
467 ///
468 /// let audio = response.audio()?;
469 /// let audio_bytes = audio.decode_data()?;
470 /// tokio::fs::write("output.wav", audio_bytes).await?;
471 /// Ok(())
472 /// }
473 /// ```
474 pub fn v25_tts_voice_clone(&self, text: impl Into<String>) -> TtsRequestBuilder {
475 TtsRequestBuilder::new(
476 self.clone(),
477 Model::MiMoV25TtsVoiceClone.as_str(),
478 text.into(),
479 )
480 }
481
482 /// Create a streaming text-to-speech request builder.
483 ///
484 /// This method creates a builder for streaming speech synthesis using the `mimo-v2-tts` model.
485 /// Streaming TTS delivers audio data in real-time chunks.
486 ///
487 /// # Arguments
488 ///
489 /// * `text` - The text to synthesize. This text will be placed in an `assistant` message.
490 ///
491 /// # Example
492 ///
493 /// ```rust,no_run
494 /// use mimo_api::{Client, Voice};
495 /// use futures::StreamExt;
496 /// use tokio::fs::File;
497 /// use tokio::io::AsyncWriteExt;
498 ///
499 /// #[tokio::main]
500 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
501 /// let client = Client::from_env()?;
502 ///
503 /// let mut stream = client.tts_stream("Hello, world!")
504 /// .voice(Voice::DefaultEn)
505 /// .send()
506 /// .await?;
507 ///
508 /// let mut file = File::create("output.pcm").await?;
509 /// let mut total_bytes = 0;
510 ///
511 /// while let Some(chunk) = stream.next().await {
512 /// let audio_bytes = chunk?;
513 /// file.write_all(&audio_bytes).await?;
514 /// total_bytes += audio_bytes.len();
515 /// }
516 ///
517 /// println!("Total bytes: {}", total_bytes);
518 /// Ok(())
519 /// }
520 /// ```
521 pub fn tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
522 StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV2Tts.as_str(), text.into())
523 }
524
525 /// Create a streaming text-to-speech request builder with styled text.
526 ///
527 /// This method allows you to apply style controls to the streaming synthesized speech.
528 ///
529 /// # Arguments
530 ///
531 /// * `style` - The style to apply (e.g., "开心", "悲伤", "变快", "变慢")
532 /// * `text` - The text to synthesize
533 ///
534 /// # Example
535 ///
536 /// ```rust,no_run
537 /// use mimo_api::{Client, Voice};
538 /// use futures::StreamExt;
539 /// use tokio::fs::File;
540 /// use tokio::io::AsyncWriteExt;
541 ///
542 /// #[tokio::main]
543 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
544 /// let client = Client::from_env()?;
545 ///
546 /// // Synthesize speech with "开心" (happy) style
547 /// let mut stream = client.tts_styled_stream("开心", "明天就是周五了,真开心!")
548 /// .voice(Voice::DefaultZh)
549 /// .send()
550 /// .await?;
551 ///
552 /// let mut file = File::create("output.pcm").await?;
553 /// let mut total_bytes = 0;
554 ///
555 /// while let Some(chunk) = stream.next().await {
556 /// let audio_bytes = chunk?;
557 /// file.write_all(&audio_bytes).await?;
558 /// total_bytes += audio_bytes.len();
559 /// }
560 ///
561 /// println!("Total bytes: {}", total_bytes);
562 /// Ok(())
563 /// }
564 /// ```
565 pub fn tts_styled_stream(&self, style: &str, text: &str) -> StreamingTtsRequestBuilder {
566 StreamingTtsRequestBuilder::new(
567 self.clone(),
568 Model::MiMoV2Tts.as_str(),
569 styled_text(style, text),
570 )
571 }
572
573 /// Create a streaming TTS request builder using MiMo V2.5 TTS model.
574 ///
575 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
576 /// The streaming API currently returns results in compatibility mode.
577 pub fn v25_tts_stream(&self, text: impl Into<String>) -> StreamingTtsRequestBuilder {
578 StreamingTtsRequestBuilder::new(self.clone(), Model::MiMoV25Tts.as_str(), text.into())
579 }
580
581 /// Create a streaming TTS request builder with voice design.
582 ///
583 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
584 pub fn v25_tts_voice_design_stream(
585 &self,
586 text: impl Into<String>,
587 ) -> StreamingTtsRequestBuilder {
588 StreamingTtsRequestBuilder::new(
589 self.clone(),
590 Model::MiMoV25TtsVoiceDesign.as_str(),
591 text.into(),
592 )
593 }
594
595 /// Create a streaming TTS request builder with voice clone.
596 ///
597 /// Note: Low-latency streaming for V2.5 TTS series is not yet available.
598 pub fn v25_tts_voice_clone_stream(
599 &self,
600 text: impl Into<String>,
601 ) -> StreamingTtsRequestBuilder {
602 StreamingTtsRequestBuilder::new(
603 self.clone(),
604 Model::MiMoV25TtsVoiceClone.as_str(),
605 text.into(),
606 )
607 }
608}
609
610/// Builder for text-to-speech requests.
611///
612/// This builder provides a fluent API for configuring TTS requests.
613#[derive(Debug, Clone)]
614pub struct TtsRequestBuilder {
615 client: Client,
616 model: String,
617 text: String,
618 user_message: Option<String>,
619 voice: Voice,
620 format: AudioFormat,
621}
622
623impl TtsRequestBuilder {
624 /// Create a new TTS request builder.
625 fn new(client: Client, model: impl Into<String>, text: String) -> Self {
626 Self {
627 client,
628 model: model.into(),
629 text,
630 user_message: None,
631 voice: Voice::default(),
632 format: AudioFormat::default(),
633 }
634 }
635
636 /// Set the voice for synthesis.
637 ///
638 /// Available voices:
639 /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
640 /// - `Voice::DefaultEn` - Default English female voice
641 /// - `Voice::DefaultZh` - Default Chinese female voice
642 pub fn voice(mut self, voice: Voice) -> Self {
643 self.voice = voice;
644 self
645 }
646
647 /// Set the audio output format.
648 ///
649 /// Available formats:
650 /// - `AudioFormat::Wav` - WAV format (recommended for high quality)
651 /// - `AudioFormat::Mp3` - MP3 format (smaller file size)
652 /// - `AudioFormat::Pcm` - PCM format (for streaming)
653 pub fn format(mut self, format: AudioFormat) -> Self {
654 self.format = format;
655 self
656 }
657
658 /// Add a user message to influence the synthesis style.
659 ///
660 /// The user message can help adjust the tone and style of the synthesized speech.
661 pub fn user_message(mut self, message: impl Into<String>) -> Self {
662 self.user_message = Some(message.into());
663 self
664 }
665
666 /// Send the TTS request and return the response.
667 ///
668 /// # Returns
669 ///
670 /// A `TtsResponse` containing the synthesized audio data.
671 ///
672 /// # Example
673 ///
674 /// ```rust,no_run
675 /// use mimo_api::{Client, Voice, AudioFormat};
676 ///
677 /// #[tokio::main]
678 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
679 /// let client = Client::from_env()?;
680 ///
681 /// let response = client.tts("Hello, world!")
682 /// .voice(Voice::DefaultEn)
683 /// .format(AudioFormat::Mp3)
684 /// .send()
685 /// .await?;
686 ///
687 /// let audio = response.audio()?;
688 /// println!("Audio ID: {}", audio.id);
689 /// println!("Transcript: {:?}", audio.transcript());
690 /// Ok(())
691 /// }
692 /// ```
693 pub async fn send(self) -> Result<TtsResponse> {
694 let mut messages = Vec::new();
695
696 // Add optional user message
697 if let Some(user_msg) = self.user_message {
698 messages.push(Message::user(MessageContent::Text(user_msg)));
699 }
700
701 // Add assistant message with text to synthesize
702 messages.push(Message::assistant(MessageContent::Text(self.text)));
703
704 // Voice design model does not support audio.voice parameter
705 // Model name is "mimo-v2.5-tts-voicedesign" (no hyphen between voice and design)
706 let is_voice_design = self.model.contains("voicedesign");
707
708 let audio = if is_voice_design {
709 // Voice design model only supports format, not voice
710 Some(Audio {
711 format: Some(self.format),
712 voice: None,
713 })
714 } else {
715 Some(Audio {
716 format: Some(self.format),
717 voice: Some(self.voice),
718 })
719 };
720
721 let request = ChatRequest {
722 model: self.model,
723 messages,
724 audio,
725 ..Default::default()
726 };
727
728 let response = self.client.chat(request).await?;
729 Ok(TtsResponse(response))
730 }
731}
732
733/// Response from a text-to-speech request.
734#[derive(Debug, Clone)]
735pub struct TtsResponse(pub ChatResponse);
736
737impl TtsResponse {
738 /// Get the audio data from the response.
739 ///
740 /// # Errors
741 ///
742 /// Returns an error if no audio data is present in the response.
743 pub fn audio(&self) -> Result<&ResponseAudio> {
744 self.0
745 .choices
746 .first()
747 .and_then(|c| c.message.audio.as_ref())
748 .ok_or_else(|| Error::InvalidResponse("No audio data in response".into()))
749 }
750
751 /// Get the content text from the response.
752 pub fn content(&self) -> Option<&str> {
753 self.0.choices.first().map(|c| c.message.content.as_str())
754 }
755
756 /// Get the underlying chat response.
757 pub fn into_inner(self) -> ChatResponse {
758 self.0
759 }
760}
761
762/// Builder for streaming text-to-speech requests.
763///
764/// This builder provides a fluent API for configuring streaming TTS requests.
765#[derive(Debug, Clone)]
766pub struct StreamingTtsRequestBuilder {
767 client: Client,
768 model: String,
769 text: String,
770 user_message: Option<String>,
771 voice: Voice,
772}
773
774impl StreamingTtsRequestBuilder {
775 /// Create a new streaming TTS request builder.
776 fn new(client: Client, model: impl Into<String>, text: String) -> Self {
777 Self {
778 client,
779 model: model.into(),
780 text,
781 user_message: None,
782 voice: Voice::default(),
783 }
784 }
785
786 /// Set the voice for synthesis.
787 ///
788 /// Available voices:
789 /// - `Voice::MimoDefault` - MiMo default voice (balanced tone)
790 /// - `Voice::DefaultEn` - Default English female voice
791 /// - `Voice::DefaultZh` - Default Chinese female voice
792 ///
793 /// # Example
794 ///
795 /// ```rust,no_run
796 /// use mimo_api::{Client, Voice};
797 ///
798 /// #[tokio::main]
799 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
800 /// let client = Client::from_env()?;
801 ///
802 /// let stream = client.tts_stream("Hello!")
803 /// .voice(Voice::DefaultEn)
804 /// .send()
805 /// .await?;
806 ///
807 /// Ok(())
808 /// }
809 /// ```
810 pub fn voice(mut self, voice: Voice) -> Self {
811 self.voice = voice;
812 self
813 }
814
815 /// Add a user message to influence the synthesis style.
816 ///
817 /// The user message can help adjust the tone and style of the synthesized speech.
818 ///
819 /// # Example
820 ///
821 /// ```rust,no_run
822 /// use mimo_api::Client;
823 ///
824 /// #[tokio::main]
825 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
826 /// let client = Client::from_env()?;
827 ///
828 /// let stream = client.tts_stream("Hello there!")
829 /// .user_message("Speak in a friendly, conversational tone")
830 /// .send()
831 /// .await?;
832 ///
833 /// Ok(())
834 /// }
835 /// ```
836 pub fn user_message(mut self, message: impl Into<String>) -> Self {
837 self.user_message = Some(message.into());
838 self
839 }
840
841 /// Send the streaming TTS request and return the response stream.
842 ///
843 /// # Returns
844 ///
845 /// A `StreamingTtsResponse` that yields audio data chunks.
846 ///
847 /// # Example
848 ///
849 /// ```rust,no_run
850 /// use mimo_api::{Client, Voice};
851 /// use futures::StreamExt;
852 /// use tokio::fs::File;
853 /// use tokio::io::AsyncWriteExt;
854 ///
855 /// #[tokio::main]
856 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
857 /// let client = Client::from_env()?;
858 ///
859 /// let mut stream = client.tts_stream("Hello, world!")
860 /// .voice(Voice::DefaultEn)
861 /// .send()
862 /// .await?;
863 ///
864 /// let mut file = File::create("output.pcm").await?;
865 /// let mut total_bytes = 0;
866 ///
867 /// while let Some(result) = stream.next().await {
868 /// let audio_bytes = result?;
869 /// file.write_all(&audio_bytes).await?;
870 /// total_bytes += audio_bytes.len();
871 /// }
872 ///
873 /// println!("Total bytes: {}", total_bytes);
874 /// Ok(())
875 /// }
876 /// ```
877 pub async fn send(self) -> Result<StreamingTtsResponse> {
878 let mut messages = Vec::new();
879
880 // Add optional user message
881 if let Some(user_msg) = self.user_message {
882 messages.push(Message::user(MessageContent::Text(user_msg)));
883 }
884
885 // Add assistant message with text to synthesize
886 messages.push(Message::assistant(MessageContent::Text(self.text)));
887
888 let request = ChatRequest {
889 model: self.model,
890 messages,
891 stream: Some(true),
892 audio: Some(Audio {
893 format: Some(AudioFormat::Pcm16), // PCM16 is recommended for streaming
894 voice: Some(self.voice),
895 }),
896 ..Default::default()
897 };
898
899 let stream = self.client.chat_stream(request).await?;
900 Ok(StreamingTtsResponse::new(stream))
901 }
902}
903
904/// Response from a streaming text-to-speech request.
905///
906/// This type wraps the underlying stream and provides convenience methods
907/// for consuming audio data.
908pub struct StreamingTtsResponse {
909 stream: BoxStream<'static, Result<StreamChunk>>,
910 total_bytes: u64,
911 chunk_count: u32,
912}
913
914impl StreamingTtsResponse {
915 /// Create a new streaming TTS response.
916 fn new(stream: BoxStream<'static, Result<StreamChunk>>) -> Self {
917 Self {
918 stream,
919 total_bytes: 0,
920 chunk_count: 0,
921 }
922 }
923
924 /// Collect all audio chunks and return them as a single byte vector.
925 ///
926 /// This is a convenience method for non-streaming use cases where you
927 /// want to wait for all audio data before processing it.
928 ///
929 /// # Example
930 ///
931 /// ```rust,no_run
932 /// use mimo_api::Client;
933 ///
934 /// #[tokio::main]
935 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
936 /// let client = Client::from_env()?;
937 ///
938 /// let mut stream = client.tts_stream("Hello, world!").send().await?;
939 /// let audio_bytes = stream.collect_audio().await?;
940 ///
941 /// tokio::fs::write("output.pcm", &audio_bytes).await?;
942 /// println!("Total bytes: {}", audio_bytes.len());
943 ///
944 /// Ok(())
945 /// }
946 /// ```
947 pub async fn collect_audio(&mut self) -> Result<Vec<u8>> {
948 let mut all_bytes = Vec::new();
949
950 while let Some(chunk) = self.stream.next().await {
951 if let Some(audio_bytes) = self.process_chunk(chunk?)? {
952 all_bytes.extend(audio_bytes);
953 }
954 }
955
956 Ok(all_bytes)
957 }
958
959 /// Save all audio chunks to a file.
960 ///
961 /// This is a convenience method that collects all audio data and writes it to a file.
962 ///
963 /// # Example
964 ///
965 /// ```rust,no_run
966 /// use mimo_api::Client;
967 ///
968 /// #[tokio::main]
969 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
970 /// let client = Client::from_env()?;
971 ///
972 /// let mut stream: mimo_api::StreamingTtsResponse = client.tts_stream("Hello, world!").send().await?;
973 /// stream.save_to_file("output.pcm").await?;
974 ///
975 /// println!("Audio saved to file");
976 ///
977 /// Ok(())
978 /// }
979 /// ```
980 pub async fn save_to_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<()> {
981 let mut file = File::create(path).await?;
982
983 while let Some(chunk) = self.stream.next().await {
984 if let Some(audio_bytes) = self.process_chunk(chunk?)? {
985 file.write_all(&audio_bytes).await?;
986 }
987 }
988
989 file.flush().await?;
990 Ok(())
991 }
992
993 /// Process a stream chunk and return audio bytes if present.
994 fn process_chunk(&mut self, chunk: StreamChunk) -> Result<Option<Vec<u8>>> {
995 if !chunk.choices.is_empty()
996 && let Some(audio) = &chunk.choices[0].delta.audio
997 {
998 let bytes = audio.decode_data()?;
999 self.total_bytes += bytes.len() as u64;
1000 self.chunk_count += 1;
1001 return Ok(Some(bytes));
1002 }
1003 Ok(None)
1004 }
1005
1006 /// Get the total number of bytes received so far.
1007 pub fn total_bytes(&self) -> u64 {
1008 self.total_bytes
1009 }
1010
1011 /// Get the number of audio chunks received so far.
1012 pub fn chunk_count(&self) -> u32 {
1013 self.chunk_count
1014 }
1015}
1016
1017impl futures::Stream for StreamingTtsResponse {
1018 type Item = Result<Vec<u8>>;
1019
1020 fn poll_next(
1021 mut self: std::pin::Pin<&mut Self>,
1022 cx: &mut std::task::Context<'_>,
1023 ) -> std::task::Poll<Option<Self::Item>> {
1024 // Process chunks until we find one with audio data or the stream ends
1025 loop {
1026 match std::pin::Pin::new(&mut self.stream).poll_next(cx) {
1027 std::task::Poll::Ready(Some(Ok(chunk))) => {
1028 // Check if this is the final chunk with finish_reason
1029 let is_final = chunk
1030 .choices
1031 .first()
1032 .and_then(|c| c.finish_reason.as_ref())
1033 .is_some();
1034
1035 match self.process_chunk(chunk) {
1036 Ok(Some(bytes)) => {
1037 // Return audio data from this chunk
1038 return std::task::Poll::Ready(Some(Ok(bytes)));
1039 }
1040 Ok(None) => {
1041 // No audio data in this chunk
1042 if is_final {
1043 // Stream has ended, no more audio data
1044 return std::task::Poll::Ready(None);
1045 }
1046 // Continue to next chunk
1047 continue;
1048 }
1049 Err(e) => return std::task::Poll::Ready(Some(Err(e))),
1050 }
1051 }
1052 std::task::Poll::Ready(Some(Err(e))) => {
1053 let error_msg = format!("Stream error: {}", e);
1054 return std::task::Poll::Ready(Some(Err(Error::StreamError(error_msg))));
1055 }
1056 std::task::Poll::Ready(None) => {
1057 // Stream has ended normally
1058 return std::task::Poll::Ready(None);
1059 }
1060 std::task::Poll::Pending => return std::task::Poll::Pending,
1061 }
1062 }
1063 }
1064}