portkey_sdk/service/
audio.rs

1//! Audio API service.
2//!
3//! This module provides methods for audio transcription using Whisper and GPT models.
4
5use std::future::Future;
6
7use reqwest::multipart::{Form, Part};
8
9use crate::client::PortkeyClient;
10use crate::error::Result;
11use crate::model::{
12    CreateSpeechRequest, CreateTranscriptionRequest, CreateTranslationRequest,
13    TranscriptionResponse, TranslationResponse,
14};
15
16/// Trait for Audio API operations.
17pub trait AudioService {
18    /// Creates a transcription of an audio file.
19    ///
20    /// # Arguments
21    ///
22    /// * `file_data` - The audio file data as bytes
23    /// * `file_name` - The name of the audio file
24    /// * `request` - The transcription request parameters
25    ///
26    /// # Returns
27    ///
28    /// Returns a `TranscriptionResponse` containing the transcribed text and optional metadata.
29    ///
30    /// # Example
31    ///
32    /// ```no_run
33    /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
34    /// use portkey_sdk::service::AudioService;
35    /// use portkey_sdk::builder::AuthMethod;
36    /// use portkey_sdk::model::{CreateTranscriptionRequest, TranscriptionResponseFormat};
37    /// use std::fs;
38    ///
39    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
40    /// let config = PortkeyConfig::builder()
41    ///     .with_api_key("your-api-key")
42    ///     .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
43    ///     .build()?;
44    /// let client = PortkeyClient::new(config)?;
45    ///
46    /// let audio_data = fs::read("audio.mp3")?;
47    ///
48    /// let request = CreateTranscriptionRequest {
49    ///     model: "whisper-1".to_string(),
50    ///     language: Some("en".to_string()),
51    ///     response_format: Some(TranscriptionResponseFormat::Json),
52    ///     ..Default::default()
53    /// };
54    ///
55    /// let response = client.create_transcription(
56    ///     audio_data,
57    ///     "audio.mp3",
58    ///     request
59    /// ).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    fn create_transcription(
64        &self,
65        file_data: Vec<u8>,
66        file_name: &str,
67        request: CreateTranscriptionRequest,
68    ) -> impl Future<Output = Result<TranscriptionResponse>>;
69
70    /// Creates speech audio from text input.
71    ///
72    /// # Arguments
73    ///
74    /// * `request` - The speech generation request with text, voice, and options
75    ///
76    /// # Returns
77    ///
78    /// Returns audio data as bytes in the specified format.
79    ///
80    /// # Example
81    ///
82    /// ```no_run
83    /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
84    /// use portkey_sdk::service::AudioService;
85    /// use portkey_sdk::builder::AuthMethod;
86    /// use portkey_sdk::model::{CreateSpeechRequest, Voice, SpeechResponseFormat};
87    /// use std::fs;
88    ///
89    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
90    /// let config = PortkeyConfig::builder()
91    ///     .with_api_key("your-api-key")
92    ///     .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
93    ///     .build()?;
94    /// let client = PortkeyClient::new(config)?;
95    ///
96    /// let request = CreateSpeechRequest {
97    ///     model: "tts-1".to_string(),
98    ///     input: "The quick brown fox jumped over the lazy dog.".to_string(),
99    ///     voice: Voice::Alloy,
100    ///     response_format: Some(SpeechResponseFormat::Mp3),
101    ///     speed: Some(1.0),
102    /// };
103    ///
104    /// let audio_data = client.create_speech(request).await?;
105    /// fs::write("speech.mp3", audio_data)?;
106    /// # Ok(())
107    /// # }
108    /// ```
109    fn create_speech(&self, request: CreateSpeechRequest) -> impl Future<Output = Result<Vec<u8>>>;
110
111    /// Translates audio to English.
112    ///
113    /// # Arguments
114    ///
115    /// * `file_data` - The audio file data as bytes
116    /// * `file_name` - The name of the audio file
117    /// * `request` - The translation request parameters
118    ///
119    /// # Returns
120    ///
121    /// Returns a `TranslationResponse` containing the translated text (in English).
122    ///
123    /// # Example
124    ///
125    /// ```no_run
126    /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
127    /// use portkey_sdk::service::AudioService;
128    /// use portkey_sdk::builder::AuthMethod;
129    /// use portkey_sdk::model::{CreateTranslationRequest, TranscriptionResponseFormat};
130    /// use std::fs;
131    ///
132    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
133    /// let config = PortkeyConfig::builder()
134    ///     .with_api_key("your-api-key")
135    ///     .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
136    ///     .build()?;
137    /// let client = PortkeyClient::new(config)?;
138    ///
139    /// let audio_data = fs::read("german.m4a")?;
140    ///
141    /// let request = CreateTranslationRequest {
142    ///     model: "whisper-1".to_string(),
143    ///     prompt: Some("Optional prompt in English".to_string()),
144    ///     response_format: Some(TranscriptionResponseFormat::Json),
145    ///     ..Default::default()
146    /// };
147    ///
148    /// let response = client.create_translation(
149    ///     audio_data,
150    ///     "german.m4a",
151    ///     request
152    /// ).await?;
153    /// # Ok(())
154    /// # }
155    /// ```
156    fn create_translation(
157        &self,
158        file_data: Vec<u8>,
159        file_name: &str,
160        request: CreateTranslationRequest,
161    ) -> impl Future<Output = Result<TranslationResponse>>;
162}
163
164impl AudioService for PortkeyClient {
165    async fn create_transcription(
166        &self,
167        file_data: Vec<u8>,
168        file_name: &str,
169        request: CreateTranscriptionRequest,
170    ) -> Result<TranscriptionResponse> {
171        // Build multipart form
172        let file_part = Part::bytes(file_data).file_name(file_name.to_string());
173
174        let mut form = Form::new()
175            .part("file", file_part)
176            .text("model", request.model.clone());
177
178        if let Some(language) = request.language {
179            form = form.text("language", language);
180        }
181
182        if let Some(prompt) = request.prompt {
183            form = form.text("prompt", prompt);
184        }
185
186        if let Some(response_format) = request.response_format {
187            let format_str = match response_format {
188                crate::model::TranscriptionResponseFormat::Json => "json",
189                crate::model::TranscriptionResponseFormat::Text => "text",
190                crate::model::TranscriptionResponseFormat::Srt => "srt",
191                crate::model::TranscriptionResponseFormat::VerboseJson => "verbose_json",
192                crate::model::TranscriptionResponseFormat::Vtt => "vtt",
193            };
194            form = form.text("response_format", format_str);
195        }
196
197        if let Some(temperature) = request.temperature {
198            form = form.text("temperature", temperature.to_string());
199        }
200
201        if let Some(granularities) = request.timestamp_granularities {
202            for granularity in granularities {
203                let granularity_str = match granularity {
204                    crate::model::TimestampGranularity::Word => "word",
205                    crate::model::TimestampGranularity::Segment => "segment",
206                };
207                form = form.text("timestamp_granularities[]", granularity_str);
208            }
209        }
210
211        let response = self
212            .send_multipart(reqwest::Method::POST, "/audio/transcriptions", form)
213            .await?;
214
215        let response = response.error_for_status()?;
216        let transcription_response: TranscriptionResponse = response.json().await?;
217        Ok(transcription_response)
218    }
219
220    async fn create_speech(&self, request: CreateSpeechRequest) -> Result<Vec<u8>> {
221        let response = self
222            .send_json(reqwest::Method::POST, "/audio/speech", &request)
223            .await?;
224
225        let response = response.error_for_status()?;
226        let audio_bytes = response.bytes().await?;
227        Ok(audio_bytes.to_vec())
228    }
229
230    async fn create_translation(
231        &self,
232        file_data: Vec<u8>,
233        file_name: &str,
234        request: CreateTranslationRequest,
235    ) -> Result<TranslationResponse> {
236        // Build multipart form
237        let file_part = Part::bytes(file_data).file_name(file_name.to_string());
238
239        let mut form = Form::new()
240            .part("file", file_part)
241            .text("model", request.model.clone());
242
243        if let Some(prompt) = request.prompt {
244            form = form.text("prompt", prompt);
245        }
246
247        if let Some(response_format) = request.response_format {
248            let format_str = match response_format {
249                crate::model::TranscriptionResponseFormat::Json => "json",
250                crate::model::TranscriptionResponseFormat::Text => "text",
251                crate::model::TranscriptionResponseFormat::Srt => "srt",
252                crate::model::TranscriptionResponseFormat::VerboseJson => "verbose_json",
253                crate::model::TranscriptionResponseFormat::Vtt => "vtt",
254            };
255            form = form.text("response_format", format_str);
256        }
257
258        if let Some(temperature) = request.temperature {
259            form = form.text("temperature", temperature.to_string());
260        }
261
262        let response = self
263            .send_multipart(reqwest::Method::POST, "/audio/translations", form)
264            .await?;
265
266        let response = response.error_for_status()?;
267        let translation_response: TranslationResponse = response.json().await?;
268        Ok(translation_response)
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_create_transcription_request() {
278        let request = CreateTranscriptionRequest {
279            model: "whisper-1".to_string(),
280            language: Some("en".to_string()),
281            ..Default::default()
282        };
283
284        assert_eq!(request.model, "whisper-1");
285        assert_eq!(request.language, Some("en".to_string()));
286    }
287}