llm_sdk/api/
speech.rs

1use crate::IntoRequest;
2use derive_builder::Builder;
3use reqwest_middleware::{ClientWithMiddleware, RequestBuilder};
4use serde::Serialize;
5
6#[derive(Debug, Clone, Serialize, Builder)]
7#[builder(pattern = "mutable")]
8pub struct SpeechRequest {
9    /// One of the available TTS models: tts-1 or tts-1-hd
10    #[builder(default)]
11    model: SpeechModel,
12    /// The text to generate audio for. The maximum length is 4096 characters.
13    #[builder(setter(into))]
14    input: String,
15    /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide.
16    #[builder(default)]
17    voice: SpeechVoice,
18    /// The format to audio in. Supported formats are mp3, opus, aac, and flac.
19    #[builder(default)]
20    response_format: SpeechResponseFormat,
21    /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
22    #[builder(default, setter(strip_option))]
23    #[serde(skip_serializing_if = "Option::is_none")]
24    speed: Option<f32>,
25}
26
27#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize)]
28pub enum SpeechModel {
29    #[default]
30    #[serde(rename = "tts-1")]
31    Tts1,
32    #[serde(rename = "tts-1-hd")]
33    Tts1Hd,
34}
35
36#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize)]
37#[serde(rename_all = "snake_case")]
38pub enum SpeechVoice {
39    Alloy,
40    Echo,
41    Fable,
42    Onyx,
43    #[default]
44    Nova,
45    Shimmer,
46}
47
48#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize)]
49#[serde(rename_all = "snake_case")]
50pub enum SpeechResponseFormat {
51    #[default]
52    Mp3,
53    Opus,
54    Aac,
55    Flac,
56}
57
58impl IntoRequest for SpeechRequest {
59    fn into_request(self, base_url: &str, client: ClientWithMiddleware) -> RequestBuilder {
60        let url = format!("{}/audio/speech", base_url);
61        client.post(url).json(&self)
62    }
63}
64
65impl SpeechRequest {
66    pub fn new(input: impl Into<String>) -> Self {
67        SpeechRequestBuilder::default()
68            .input(input)
69            .build()
70            .unwrap()
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77    use crate::SDK;
78    use anyhow::Result;
79
80    #[tokio::test]
81    async fn speech_should_work() -> Result<()> {
82        let req = SpeechRequest::new("The quick brown fox jumped over the lazy dog.");
83        let _res = SDK.speech(req).await.unwrap();
84
85        Ok(())
86    }
87}