audio_speech/
audio_speech.rs1#![allow(clippy::uninlined_format_args)]
2use openai_client_base::{
31 apis::{audio_api, configuration::Configuration},
32 models::{
33 create_speech_request::{ResponseFormat, StreamFormat},
34 CreateSpeechRequest,
35 },
36};
37use openai_ergonomic::{Client, Error};
38use std::io::Write;
39use std::path::PathBuf;
40
41#[tokio::main]
42async fn main() -> Result<(), Box<dyn std::error::Error>> {
43 println!("š OpenAI Ergonomic - Audio Speech (Text-to-Speech) Example\n");
44
45 let client = match Client::from_env() {
47 Ok(client_builder) => {
48 println!("ā
Client initialized successfully");
49 client_builder.build()
50 }
51 Err(e) => {
52 eprintln!("ā Failed to initialize client: {e}");
53 eprintln!("š” Make sure OPENAI_API_KEY is set in your environment");
54 return Err(e.into());
55 }
56 };
57
58 println!("\nšļø Example 1: Basic Text-to-Speech");
60 println!("===================================");
61
62 match basic_text_to_speech(&client).await {
63 Ok(()) => println!("ā
Basic TTS example completed"),
64 Err(e) => {
65 eprintln!("ā Basic TTS example failed: {e}");
66 handle_api_error(&e);
67 }
68 }
69
70 println!("\nš Example 2: Voice Comparison");
72 println!("===============================");
73
74 match voice_comparison_example(&client).await {
75 Ok(()) => println!("ā
Voice comparison example completed"),
76 Err(e) => {
77 eprintln!("ā Voice comparison example failed: {e}");
78 handle_api_error(&e);
79 }
80 }
81
82 println!("\nšµ Example 3: Audio Format Options");
84 println!("===================================");
85
86 match audio_format_example(&client).await {
87 Ok(()) => println!("ā
Audio format example completed"),
88 Err(e) => {
89 eprintln!("ā Audio format example failed: {e}");
90 handle_api_error(&e);
91 }
92 }
93
94 println!("\nā” Example 4: Speed Control");
96 println!("===========================");
97
98 match speed_control_example(&client).await {
99 Ok(()) => println!("ā
Speed control example completed"),
100 Err(e) => {
101 eprintln!("ā Speed control example failed: {e}");
102 handle_api_error(&e);
103 }
104 }
105
106 println!("\nš” Example 5: Streaming Audio");
108 println!("==============================");
109
110 match streaming_audio_example(&client).await {
111 Ok(()) => println!("ā
Streaming audio example completed"),
112 Err(e) => {
113 eprintln!("ā Streaming audio example failed: {e}");
114 handle_api_error(&e);
115 }
116 }
117
118 println!("\nš All audio speech examples completed! Check the output files in the current directory.");
119 Ok(())
120}
121
122async fn basic_text_to_speech(client: &Client) -> Result<(), Error> {
124 println!("Converting text to speech with default settings...");
125
126 let text = "Hello! This is a demonstration of OpenAI's text-to-speech capabilities using the openai-ergonomic crate.";
127
128 let request = CreateSpeechRequest::builder()
130 .model("tts-1".to_string())
131 .input(text.to_string())
132 .voice("alloy".to_string())
133 .response_format(ResponseFormat::Mp3)
134 .build();
135
136 let configuration = create_configuration(client);
149 let response = audio_api::create_speech()
150 .configuration(&configuration)
151 .create_speech_request(request)
152 .call()
153 .await
154 .map_err(|e| Error::Api {
155 status: 0,
156 message: e.to_string(),
157 error_type: None,
158 error_code: None,
159 })?;
160
161 let audio_data = response.bytes().await.map_err(Error::Http)?;
163 let filename = "basic_speech.mp3";
164 save_audio_file(&audio_data, filename)?;
165
166 println!("šµ Generated speech saved to: {filename}");
167 println!(" Text: \"{text}\"");
168 println!(" Voice: alloy");
169 println!(" Format: mp3");
170 println!(" Size: {} bytes", audio_data.len());
171
172 Ok(())
173}
174
175async fn voice_comparison_example(client: &Client) -> Result<(), Error> {
177 println!("Generating speech with different voices...");
178
179 let text = "The quick brown fox jumps over the lazy dog.";
180 let voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
181
182 let configuration = create_configuration(client);
183
184 for voice in &voices {
185 println!(" š¤ Generating with voice: {voice}");
186
187 let request = CreateSpeechRequest::builder()
188 .model("tts-1".to_string())
189 .input(text.to_string())
190 .voice((*voice).to_string())
191 .response_format(ResponseFormat::Mp3)
192 .build();
193
194 match audio_api::create_speech()
195 .configuration(&configuration)
196 .create_speech_request(request)
197 .call()
198 .await
199 {
200 Ok(response) => {
201 let audio_data = response.bytes().await.map_err(Error::Http)?;
202 let filename = format!("voice_{voice}.mp3");
203 save_audio_file(&audio_data, &filename)?;
204 println!(" ā
Saved to: {filename} ({} bytes)", audio_data.len());
205 }
206 Err(e) => {
207 eprintln!(" ā Failed to generate audio for voice {voice}: {e}");
208 }
209 }
210 }
211
212 println!("\nš” Note: Listen to the generated files to compare different voice characteristics");
213
214 Ok(())
215}
216
217async fn audio_format_example(client: &Client) -> Result<(), Error> {
219 println!("Generating speech in different audio formats...");
220
221 let text = "This demonstrates various audio format options.";
222 let formats = [
223 (ResponseFormat::Mp3, "mp3"),
224 (ResponseFormat::Opus, "opus"),
225 (ResponseFormat::Aac, "aac"),
226 (ResponseFormat::Flac, "flac"),
227 (ResponseFormat::Wav, "wav"),
228 (ResponseFormat::Pcm, "pcm"),
229 ];
230
231 let configuration = create_configuration(client);
232
233 for (format, extension) in &formats {
234 println!(" šµ Generating in format: {extension}");
235
236 let request = CreateSpeechRequest::builder()
237 .model("tts-1".to_string())
238 .input(text.to_string())
239 .voice("nova".to_string())
240 .response_format(*format)
241 .build();
242
243 match audio_api::create_speech()
244 .configuration(&configuration)
245 .create_speech_request(request)
246 .call()
247 .await
248 {
249 Ok(response) => {
250 let audio_data = response.bytes().await.map_err(Error::Http)?;
251 let filename = format!("format_example.{extension}");
252 save_audio_file(&audio_data, &filename)?;
253 println!(" ā
Saved to: {filename} ({} bytes)", audio_data.len());
254 }
255 Err(e) => {
256 eprintln!(" ā Failed to generate audio in format {extension}: {e}");
257 }
258 }
259 }
260
261 println!("\nš” Note: Different formats have different quality/compression trade-offs:");
262 println!(" - MP3: Good compression, widely supported");
263 println!(" - OPUS: Excellent compression for voice, modern codec");
264 println!(" - AAC: Good compression, Apple ecosystem friendly");
265 println!(" - FLAC: Lossless compression, larger files");
266 println!(" - WAV: Uncompressed, largest files, universal support");
267 println!(" - PCM: Raw audio data, suitable for further processing");
268
269 Ok(())
270}
271
272async fn speed_control_example(client: &Client) -> Result<(), Error> {
274 println!("Generating speech at different speeds...");
275
276 let text = "This sentence will be spoken at different speeds to demonstrate the speed control feature.";
277 let speeds = [0.25, 0.5, 1.0, 1.5, 2.0, 4.0];
278
279 let configuration = create_configuration(client);
280
281 for &speed in &speeds {
282 println!(" ā” Generating at speed: {speed}x");
283
284 let request = CreateSpeechRequest::builder()
285 .model("tts-1".to_string())
286 .input(text.to_string())
287 .voice("echo".to_string())
288 .response_format(ResponseFormat::Mp3)
289 .speed(speed)
290 .build();
291
292 match audio_api::create_speech()
293 .configuration(&configuration)
294 .create_speech_request(request)
295 .call()
296 .await
297 {
298 Ok(response) => {
299 let audio_data = response.bytes().await.map_err(Error::Http)?;
300 let filename = format!("speed_{speed}.mp3");
301 save_audio_file(&audio_data, &filename)?;
302 println!(" ā
Saved to: {filename} ({} bytes)", audio_data.len());
303 }
304 Err(e) => {
305 eprintln!(" ā Failed to generate audio at speed {speed}x: {e}");
306 }
307 }
308 }
309
310 println!("\nš” Note: Speed range is 0.25x to 4.0x normal speed");
311 println!(" - 0.25x: Very slow, good for learning pronunciation");
312 println!(" - 1.0x: Normal speed");
313 println!(" - 4.0x: Very fast, good for quick content consumption");
314
315 Ok(())
316}
317
318async fn streaming_audio_example(client: &Client) -> Result<(), Error> {
320 println!("Attempting to generate streaming audio...");
321
322 let text = "This is a longer text that demonstrates streaming audio capabilities. Streaming allows you to start playing audio before the entire generation is complete, which is useful for real-time applications and longer content.";
323
324 let configuration = create_configuration(client);
325
326 let request = CreateSpeechRequest::builder()
328 .model("gpt-4o-mini-tts".to_string())
329 .input(text.to_string())
330 .voice("shimmer".to_string())
331 .response_format(ResponseFormat::Mp3)
332 .stream_format(StreamFormat::Audio)
333 .build();
334
335 println!(" š” Attempting streaming generation...");
336
337 match audio_api::create_speech()
338 .configuration(&configuration)
339 .create_speech_request(request.clone())
340 .call()
341 .await
342 {
343 Ok(response) => {
344 let audio_data = response.bytes().await.map_err(Error::Http)?;
345 let filename = "streaming_example.mp3";
346 save_audio_file(&audio_data, filename)?;
347 println!(
348 " ā
Streaming audio saved to: {filename} ({} bytes)",
349 audio_data.len()
350 );
351
352 println!("\nš” Note: In a real streaming implementation, you would:");
353 println!(" - Process audio chunks as they arrive");
354 println!(" - Start playback before full generation is complete");
355 println!(" - Handle streaming format appropriately");
356 }
357 Err(e) => {
358 eprintln!(" ā ļø Streaming with gpt-4o-mini-tts failed, trying fallback: {e}");
359
360 let fallback_request = CreateSpeechRequest::builder()
362 .model("tts-1-hd".to_string())
363 .input(text.to_string())
364 .voice("shimmer".to_string())
365 .response_format(ResponseFormat::Mp3)
366 .build();
367
368 match audio_api::create_speech()
369 .configuration(&configuration)
370 .create_speech_request(fallback_request)
371 .call()
372 .await
373 {
374 Ok(response) => {
375 let audio_data = response.bytes().await.map_err(Error::Http)?;
376 let filename = "fallback_example.mp3";
377 save_audio_file(&audio_data, filename)?;
378 println!(
379 " ā
Fallback audio saved to: {filename} ({} bytes)",
380 audio_data.len()
381 );
382 }
383 Err(e) => {
384 eprintln!(" ā Fallback generation also failed: {e}");
385 }
386 }
387 }
388 }
389
390 println!("\nš” Note: Streaming support varies by model:");
391 println!(" - gpt-4o-mini-tts: Supports streaming");
392 println!(" - tts-1, tts-1-hd: No streaming support");
393 println!(" - Stream formats: 'sse' (Server-Sent Events) or 'audio' (raw audio chunks)");
394
395 Ok(())
396}
397
398fn create_configuration(client: &Client) -> Configuration {
400 let mut configuration = Configuration::new();
401 configuration.bearer_access_token = Some(client.config().api_key().to_string());
402
403 if let Some(base_url) = client.config().base_url() {
404 configuration.base_path = base_url.to_string();
405 }
406
407 if let Some(org_id) = client.config().organization_id() {
408 configuration.user_agent = Some(format!(
409 "openai-ergonomic/{} org/{}",
410 env!("CARGO_PKG_VERSION"),
411 org_id
412 ));
413 }
414
415 configuration
416}
417
418fn save_audio_file(audio_data: &[u8], filename: &str) -> Result<(), Error> {
420 let path = PathBuf::from(filename);
421 let mut file = std::fs::File::create(&path).map_err(Error::File)?;
422 file.write_all(audio_data).map_err(Error::File)?;
423 Ok(())
424}
425
426fn handle_api_error(error: &Error) {
428 match error {
429 Error::Api {
430 status,
431 message,
432 error_type,
433 error_code,
434 } => {
435 eprintln!("š« API Error [{status}]: {message}");
436 if let Some(error_type) = error_type {
437 eprintln!(" Type: {error_type}");
438 }
439 if let Some(error_code) = error_code {
440 eprintln!(" Code: {error_code}");
441 }
442
443 match *status {
445 401 => eprintln!("š” Check your API key: export OPENAI_API_KEY=\"your-key\""),
446 429 => eprintln!("š” Rate limited - try again in a moment"),
447 500..=599 => eprintln!("š” Server error - try again later"),
448 _ => {}
449 }
450 }
451 Error::InvalidRequest(msg) => {
452 eprintln!("š« Invalid Request: {msg}");
453 eprintln!("š” Check your request parameters");
454 }
455 Error::Config(msg) => {
456 eprintln!("š« Configuration Error: {msg}");
457 eprintln!("š” Check your client configuration");
458 }
459 Error::Http(err) => {
460 eprintln!("š« HTTP Error: {err}");
461 eprintln!("š” Check your network connection");
462 }
463 Error::Json(err) => {
464 eprintln!("š« JSON Error: {err}");
465 eprintln!("š” Response parsing failed - may be a temporary issue");
466 }
467 Error::Authentication(msg) => {
468 eprintln!("š« Authentication Error: {msg}");
469 eprintln!("š” Check your API key");
470 }
471 Error::RateLimit(msg) => {
472 eprintln!("š« Rate Limit Error: {msg}");
473 eprintln!("š” Try again in a moment");
474 }
475 Error::Stream(msg) => {
476 eprintln!("š« Stream Error: {msg}");
477 eprintln!("š” Connection issue with streaming");
478 }
479 Error::File(err) => {
480 eprintln!("š« File Error: {err}");
481 eprintln!("š” Check file permissions and paths");
482 }
483 Error::Builder(msg) => {
484 eprintln!("š« Builder Error: {msg}");
485 eprintln!("š” Check your request builder configuration");
486 }
487 Error::Internal(msg) => {
488 eprintln!("š« Internal Error: {msg}");
489 eprintln!("š” This may be a bug, please report it");
490 }
491 Error::StreamConnection { message } => {
492 eprintln!("š« Stream Connection Error: {message}");
493 eprintln!("š” Check your network connection");
494 }
495 Error::StreamParsing { message, chunk } => {
496 eprintln!("š« Stream Parsing Error: {message}");
497 eprintln!(" Problematic chunk: {chunk}");
498 eprintln!("š” The response stream may be corrupted");
499 }
500 Error::StreamBuffer { message } => {
501 eprintln!("š« Stream Buffer Error: {message}");
502 eprintln!("š” The stream buffer encountered an issue");
503 }
504 }
505}