ibm_watson/text-to-speech/synthesis/
mod.rs

1use std::borrow::Cow;
2/// Errors that may be returned in speech synthesis requests
3pub mod errors;
4
5use reqwest::{Method, Request, StatusCode, Url, Version};
6use url::form_urlencoded::byte_serialize;
7
8use self::errors::SynthesisError;
9
10use super::TextToSpeech;
11
12/// The service can return audio in the following formats (MIME types):
13#[derive(Clone, Copy)]
14pub enum AudioFormat {
15    /// You must specify the rate of the audio.
16    AudioAlaw { sample_rate: u16 },
17    /// The service returns audio with a sampling rate of 8000 Hz.
18    AudioBasic,
19    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz.
20    AudioFlac { sample_rate: Option<u16> },
21    /// You must specify the rate of the audio. You can optionally specify the endianness of the audio. The default endianness is little-endian
22    AudioL16 {
23        sample_rate: u16,
24        endianess: Option<AudioEndianness>,
25    },
26    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
27    AudioOgg { sample_rate: Option<u16> },
28    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
29    AudioOggCodecsOpus { sample_rate: Option<u16> },
30    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
31    AudioOggCodecsVorbis { sample_rate: Option<u16> },
32    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
33    AudioMp3 { sample_rate: Option<u16> },
34    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
35    AudioMpeg { sample_rate: Option<u16> },
36    /// You must specify the rate of the audio
37    AudioMulaw { sample_rate: u16 },
38    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
39    AudioWav { sample_rate: Option<u16> },
40    /// The service returns the audio in the opus codec. The service returns audio with a sampling rate of 48,000 Hz
41    AudioWebm,
42    /// The service returns audio with a sampling rate of 48,000 Hz
43    AudioWebmCodecsOpus,
44    /// You can optionally specify the rate of the audio. The default sampling rate is 22,050 Hz
45    AudioWebmCodecsVorbis { sample_rate: Option<u16> },
46}
47
48impl Default for AudioFormat {
49    /// The default audio format: [`AudioOggCodecsOpus`]
50    ///
51    /// [`AudioOggCodecsOpus`]: Self::AudioOggCodecsOpus
52    fn default() -> Self {
53        AudioFormat::AudioOggCodecsOpus {
54            sample_rate: Some(48000),
55        }
56    }
57}
58
59impl AudioFormat {
60    /// The value that the server expects for a particular format
61    pub fn id(&self) -> Cow<'static, str> {
62        match &self {
63            AudioFormat::AudioAlaw { sample_rate } => {
64                let url = format!("audio/alaw;rate={sample_rate}");
65                serialise_bytes(&url)
66            }
67            AudioFormat::AudioBasic => Cow::from("audio/basic"),
68            AudioFormat::AudioFlac { sample_rate } => {
69                let url = format!("audio/flac;rate={}", sample_rate.unwrap_or(22050));
70                serialise_bytes(&url)
71            }
72            AudioFormat::AudioL16 {
73                sample_rate,
74                endianess: endianness,
75            } => {
76                let url = match endianness {
77                    Some(endianness) => {
78                        format!(
79                            "audio/flac;rate={sample_rate};endianness={}",
80                            endianness.id()
81                        )
82                    }
83                    None => {
84                        format!("audio/flac;rate={sample_rate}")
85                    }
86                };
87                serialise_bytes(&url)
88            }
89            AudioFormat::AudioOgg { sample_rate } => {
90                let url = format!("audio/ogg;rate={}", sample_rate.unwrap_or(22050));
91                serialise_bytes(&url)
92            }
93            AudioFormat::AudioOggCodecsOpus { sample_rate } => {
94                let url = format!(
95                    "audio/ogg;codecs=opus;rate={}",
96                    match sample_rate {
97                        Some(rate) => *rate,
98                        None => 48000,
99                    }
100                );
101                serialise_bytes(&url)
102            }
103            AudioFormat::AudioOggCodecsVorbis { sample_rate } => {
104                let url = format!(
105                    "audio/ogg;codecs=vorbis;rate={}",
106                    sample_rate.unwrap_or(22050)
107                );
108                serialise_bytes(&url)
109            }
110            AudioFormat::AudioMp3 { sample_rate } => {
111                let url = format!("audio/mp3;rate={}", sample_rate.unwrap_or(22050));
112                serialise_bytes(&url)
113            }
114            AudioFormat::AudioMpeg { sample_rate } => {
115                let url = format!("audio/mpeg;rate={}", sample_rate.unwrap_or(22050));
116                serialise_bytes(&url)
117            }
118            AudioFormat::AudioMulaw { sample_rate } => {
119                let url = format!("audio/mulaw;rate={}", sample_rate);
120                serialise_bytes(&url)
121            }
122            AudioFormat::AudioWav { sample_rate } => {
123                let url = format!("audio/wav;rate={}", sample_rate.unwrap_or(22050));
124                serialise_bytes(&url)
125            }
126            AudioFormat::AudioWebm => serialise_bytes("audio/webm"),
127            AudioFormat::AudioWebmCodecsOpus => serialise_bytes("audio/webm;codecs=opus"),
128            AudioFormat::AudioWebmCodecsVorbis { sample_rate } => {
129                let url = format!(
130                    "audio/webm;codecs=vorbis/rate={}",
131                    sample_rate.unwrap_or(22050)
132                );
133                serialise_bytes(&url)
134            }
135        }
136    }
137}
138
139fn serialise_bytes(url: &str) -> Cow<'static, str> {
140    let url: String = byte_serialize(url.as_bytes()).collect();
141    Cow::from(url)
142}
143
144#[derive(Default, Clone, Copy)]
145/// The server expects the following values for audio endianness
146pub enum AudioEndianness {
147    /// Big Endian
148    BigEndian,
149    #[default]
150    /// Little Endian
151    LittleEndian,
152}
153
154impl AudioEndianness {
155    /// The string value expected by the server for [`AudioEndianness`]
156    ///
157    /// [`AudioEndianness`]: Self
158    pub fn id(&self) -> &str {
159        match self {
160            AudioEndianness::BigEndian => "big-endian",
161            AudioEndianness::LittleEndian => "little-endian",
162        }
163    }
164}
165
166impl TextToSpeech<'_> {
167    /// Synthesises text to audio that is spoken in the [`specified voice`]. The service bases its understanding of the language for the input text on the specified voice. Use a voice that matches the language of the input text.
168    ///
169    /// # Parameters
170    ///
171    /// * `text` - The text to synthesise
172    /// * `format` - The requested [`AudioFormat`] (MIME type) of the audio. Defaults to [`AudioOggCodecsOpus`]
173    /// * `customisation_id` - The customisation ID (GUID) of a custom [`model`] to use for the synthesis. If a custom model is specified, it works only if it matches the [`language`] of the indicated voice. You must make the request with credentials for the instance of the service that owns the custom model. Omit the parameter to use the specified voice with no customisation
174    ///
175    /// [`AudioFormat`]: super::synthesis::AudioFormat
176    /// [`AudioOggCodecsOpus`]: super::synthesis::AudioFormat::AudioOggCodecsOpus
177    /// [`name`]: super::voices::Voice::name
178    /// [`language`]: super::voices::Voice::language
179    /// [`gender`]: super::voices::Voice::gender
180    /// [`specified voice`]: super::TextToSpeech::set_voice()
181    /// [`model`]: super::customisations::Model
182    ///
183    /// # Example
184    /// ``` no_run
185    /// # use ibm_watson::{
186    /// #     auth::IamAuthenticator,
187    /// #     tts::{voices::WatsonVoice, TextToSpeech},
188    /// # };
189    /// # async fn foo()-> Result<(), Box<dyn std::error::Error>> {
190    /// # let auth = IamAuthenticator::new("api_key").await?;
191    /// # let tts = TextToSpeech::new(&auth, "service_url");
192    /// let synth_bytes = tts.synthesise("Hey there", None, None).await?;
193    /// # Ok(())
194    /// # }
195    /// ```
196    pub async fn synthesise(
197        &self,
198        text: impl AsRef<str>,
199        format: Option<AudioFormat>,
200        customisation_id: Option<&str>,
201    ) -> Result<bytes::Bytes, SynthesisError> {
202        let mut url = Url::parse(self.service_url).unwrap();
203        url.set_path("v1/synthesize");
204        url.set_query(customisation_id);
205        url.query_pairs_mut().append_pair("text", text.as_ref());
206        url.query_pairs_mut().append_pair("voice", self.voice.id());
207        if let Some(format) = format {
208            url.query_pairs_mut().append_pair("accept", &format.id());
209        }
210        let mut req = Request::new(Method::GET, url);
211
212        if cfg!(feature = "http2") {
213            *req.version_mut() = Version::HTTP_2;
214        }
215
216        let client = self.get_client();
217        let response = client
218            .execute(req)
219            .await
220            .map_err(|e| SynthesisError::ConnectionError(e.to_string()))?;
221        assert_eq!(response.status(), 200);
222        match response.status() {
223            StatusCode::OK => {
224                let bytes = response.bytes().await.unwrap();
225                Ok(bytes)
226            }
227            StatusCode::NOT_ACCEPTABLE => Err(SynthesisError::NotAcceptable406),
228            StatusCode::UNSUPPORTED_MEDIA_TYPE => Err(SynthesisError::UnsupportedMediaType415),
229            StatusCode::INTERNAL_SERVER_ERROR => Err(SynthesisError::InternalServerError500),
230            StatusCode::SERVICE_UNAVAILABLE => Err(SynthesisError::ServiceUnavailable500),
231            StatusCode::BAD_REQUEST => Err(SynthesisError::BadRequest400),
232            StatusCode::NOT_FOUND => Err(SynthesisError::NotFound404),
233            _ => {
234                unreachable!()
235            }
236        }
237    }
238}