portkey_sdk/service/audio.rs
1//! Audio API service.
2//!
3//! This module provides methods for audio transcription using Whisper and GPT models.
4
5use std::future::Future;
6
7use reqwest::multipart::{Form, Part};
8
9use crate::client::PortkeyClient;
10use crate::error::Result;
11use crate::model::{
12 CreateSpeechRequest, CreateTranscriptionRequest, CreateTranslationRequest,
13 TranscriptionResponse, TranslationResponse,
14};
15
16/// Trait for Audio API operations.
17pub trait AudioService {
18 /// Creates a transcription of an audio file.
19 ///
20 /// # Arguments
21 ///
22 /// * `file_data` - The audio file data as bytes
23 /// * `file_name` - The name of the audio file
24 /// * `request` - The transcription request parameters
25 ///
26 /// # Returns
27 ///
28 /// Returns a `TranscriptionResponse` containing the transcribed text and optional metadata.
29 ///
30 /// # Example
31 ///
32 /// ```no_run
33 /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
34 /// use portkey_sdk::service::AudioService;
35 /// use portkey_sdk::builder::AuthMethod;
36 /// use portkey_sdk::model::{CreateTranscriptionRequest, TranscriptionResponseFormat};
37 /// use std::fs;
38 ///
39 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
40 /// let config = PortkeyConfig::builder()
41 /// .with_api_key("your-api-key")
42 /// .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
43 /// .build()?;
44 /// let client = PortkeyClient::new(config)?;
45 ///
46 /// let audio_data = fs::read("audio.mp3")?;
47 ///
48 /// let request = CreateTranscriptionRequest {
49 /// model: "whisper-1".to_string(),
50 /// language: Some("en".to_string()),
51 /// response_format: Some(TranscriptionResponseFormat::Json),
52 /// ..Default::default()
53 /// };
54 ///
55 /// let response = client.create_transcription(
56 /// audio_data,
57 /// "audio.mp3",
58 /// request
59 /// ).await?;
60 /// # Ok(())
61 /// # }
62 /// ```
63 fn create_transcription(
64 &self,
65 file_data: Vec<u8>,
66 file_name: &str,
67 request: CreateTranscriptionRequest,
68 ) -> impl Future<Output = Result<TranscriptionResponse>>;
69
70 /// Creates speech audio from text input.
71 ///
72 /// # Arguments
73 ///
74 /// * `request` - The speech generation request with text, voice, and options
75 ///
76 /// # Returns
77 ///
78 /// Returns audio data as bytes in the specified format.
79 ///
80 /// # Example
81 ///
82 /// ```no_run
83 /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
84 /// use portkey_sdk::service::AudioService;
85 /// use portkey_sdk::builder::AuthMethod;
86 /// use portkey_sdk::model::{CreateSpeechRequest, Voice, SpeechResponseFormat};
87 /// use std::fs;
88 ///
89 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
90 /// let config = PortkeyConfig::builder()
91 /// .with_api_key("your-api-key")
92 /// .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
93 /// .build()?;
94 /// let client = PortkeyClient::new(config)?;
95 ///
96 /// let request = CreateSpeechRequest {
97 /// model: "tts-1".to_string(),
98 /// input: "The quick brown fox jumped over the lazy dog.".to_string(),
99 /// voice: Voice::Alloy,
100 /// response_format: Some(SpeechResponseFormat::Mp3),
101 /// speed: Some(1.0),
102 /// };
103 ///
104 /// let audio_data = client.create_speech(request).await?;
105 /// fs::write("speech.mp3", audio_data)?;
106 /// # Ok(())
107 /// # }
108 /// ```
109 fn create_speech(&self, request: CreateSpeechRequest) -> impl Future<Output = Result<Vec<u8>>>;
110
111 /// Translates audio to English.
112 ///
113 /// # Arguments
114 ///
115 /// * `file_data` - The audio file data as bytes
116 /// * `file_name` - The name of the audio file
117 /// * `request` - The translation request parameters
118 ///
119 /// # Returns
120 ///
121 /// Returns a `TranslationResponse` containing the translated text (in English).
122 ///
123 /// # Example
124 ///
125 /// ```no_run
126 /// use portkey_sdk::{PortkeyConfig, PortkeyClient};
127 /// use portkey_sdk::service::AudioService;
128 /// use portkey_sdk::builder::AuthMethod;
129 /// use portkey_sdk::model::{CreateTranslationRequest, TranscriptionResponseFormat};
130 /// use std::fs;
131 ///
132 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
133 /// let config = PortkeyConfig::builder()
134 /// .with_api_key("your-api-key")
135 /// .with_auth_method(AuthMethod::virtual_key("your-virtual-key"))
136 /// .build()?;
137 /// let client = PortkeyClient::new(config)?;
138 ///
139 /// let audio_data = fs::read("german.m4a")?;
140 ///
141 /// let request = CreateTranslationRequest {
142 /// model: "whisper-1".to_string(),
143 /// prompt: Some("Optional prompt in English".to_string()),
144 /// response_format: Some(TranscriptionResponseFormat::Json),
145 /// ..Default::default()
146 /// };
147 ///
148 /// let response = client.create_translation(
149 /// audio_data,
150 /// "german.m4a",
151 /// request
152 /// ).await?;
153 /// # Ok(())
154 /// # }
155 /// ```
156 fn create_translation(
157 &self,
158 file_data: Vec<u8>,
159 file_name: &str,
160 request: CreateTranslationRequest,
161 ) -> impl Future<Output = Result<TranslationResponse>>;
162}
163
164impl AudioService for PortkeyClient {
165 async fn create_transcription(
166 &self,
167 file_data: Vec<u8>,
168 file_name: &str,
169 request: CreateTranscriptionRequest,
170 ) -> Result<TranscriptionResponse> {
171 // Build multipart form
172 let file_part = Part::bytes(file_data).file_name(file_name.to_string());
173
174 let mut form = Form::new()
175 .part("file", file_part)
176 .text("model", request.model.clone());
177
178 if let Some(language) = request.language {
179 form = form.text("language", language);
180 }
181
182 if let Some(prompt) = request.prompt {
183 form = form.text("prompt", prompt);
184 }
185
186 if let Some(response_format) = request.response_format {
187 let format_str = match response_format {
188 crate::model::TranscriptionResponseFormat::Json => "json",
189 crate::model::TranscriptionResponseFormat::Text => "text",
190 crate::model::TranscriptionResponseFormat::Srt => "srt",
191 crate::model::TranscriptionResponseFormat::VerboseJson => "verbose_json",
192 crate::model::TranscriptionResponseFormat::Vtt => "vtt",
193 };
194 form = form.text("response_format", format_str);
195 }
196
197 if let Some(temperature) = request.temperature {
198 form = form.text("temperature", temperature.to_string());
199 }
200
201 if let Some(granularities) = request.timestamp_granularities {
202 for granularity in granularities {
203 let granularity_str = match granularity {
204 crate::model::TimestampGranularity::Word => "word",
205 crate::model::TimestampGranularity::Segment => "segment",
206 };
207 form = form.text("timestamp_granularities[]", granularity_str);
208 }
209 }
210
211 let response = self
212 .send_multipart(reqwest::Method::POST, "/audio/transcriptions", form)
213 .await?;
214
215 let response = response.error_for_status()?;
216 let transcription_response: TranscriptionResponse = response.json().await?;
217 Ok(transcription_response)
218 }
219
220 async fn create_speech(&self, request: CreateSpeechRequest) -> Result<Vec<u8>> {
221 let response = self
222 .send_json(reqwest::Method::POST, "/audio/speech", &request)
223 .await?;
224
225 let response = response.error_for_status()?;
226 let audio_bytes = response.bytes().await?;
227 Ok(audio_bytes.to_vec())
228 }
229
230 async fn create_translation(
231 &self,
232 file_data: Vec<u8>,
233 file_name: &str,
234 request: CreateTranslationRequest,
235 ) -> Result<TranslationResponse> {
236 // Build multipart form
237 let file_part = Part::bytes(file_data).file_name(file_name.to_string());
238
239 let mut form = Form::new()
240 .part("file", file_part)
241 .text("model", request.model.clone());
242
243 if let Some(prompt) = request.prompt {
244 form = form.text("prompt", prompt);
245 }
246
247 if let Some(response_format) = request.response_format {
248 let format_str = match response_format {
249 crate::model::TranscriptionResponseFormat::Json => "json",
250 crate::model::TranscriptionResponseFormat::Text => "text",
251 crate::model::TranscriptionResponseFormat::Srt => "srt",
252 crate::model::TranscriptionResponseFormat::VerboseJson => "verbose_json",
253 crate::model::TranscriptionResponseFormat::Vtt => "vtt",
254 };
255 form = form.text("response_format", format_str);
256 }
257
258 if let Some(temperature) = request.temperature {
259 form = form.text("temperature", temperature.to_string());
260 }
261
262 let response = self
263 .send_multipart(reqwest::Method::POST, "/audio/translations", form)
264 .await?;
265
266 let response = response.error_for_status()?;
267 let translation_response: TranslationResponse = response.json().await?;
268 Ok(translation_response)
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_create_transcription_request() {
278 let request = CreateTranscriptionRequest {
279 model: "whisper-1".to_string(),
280 language: Some("en".to_string()),
281 ..Default::default()
282 };
283
284 assert_eq!(request.model, "whisper-1");
285 assert_eq!(request.language, Some("en".to_string()));
286 }
287}