audio_speech/
audio_speech.rs

1#![allow(clippy::uninlined_format_args)]
2//! Audio Speech (Text-to-Speech) example for the openai-ergonomic crate.
3//!
4//! This example demonstrates text-to-speech functionality using `OpenAI`'s TTS models.
5//! It shows how to generate audio from text with different voices, formats, and options.
6//!
7//! ## Features Demonstrated
8//!
9//! - Basic text-to-speech conversion
10//! - Different voice options (alloy, echo, fable, onyx, nova, shimmer)
11//! - Multiple audio formats (mp3, opus, aac, flac, wav, pcm)
12//! - Speed control for generated audio
13//! - Streaming audio output
14//! - File handling for audio output
15//! - Model selection (tts-1, tts-1-hd, gpt-4o-mini-tts)
16//!
17//! ## Prerequisites
18//!
19//! Set your `OpenAI` API key:
20//! ```bash
21//! export OPENAI_API_KEY="your-key-here"
22//! ```
23//!
24//! ## Usage
25//!
26//! ```bash
27//! cargo run --example audio_speech
28//! ```
29
30use openai_client_base::{
31    apis::{audio_api, configuration::Configuration},
32    models::{
33        create_speech_request::{ResponseFormat, StreamFormat},
34        CreateSpeechRequest,
35    },
36};
37use openai_ergonomic::{Client, Error};
38use std::io::Write;
39use std::path::PathBuf;
40
41#[tokio::main]
42async fn main() -> Result<(), Box<dyn std::error::Error>> {
43    println!("šŸ”Š OpenAI Ergonomic - Audio Speech (Text-to-Speech) Example\n");
44
45    // Initialize client from environment variables
46    let client = match Client::from_env() {
47        Ok(client_builder) => {
48            println!("āœ… Client initialized successfully");
49            client_builder.build()
50        }
51        Err(e) => {
52            eprintln!("āŒ Failed to initialize client: {e}");
53            eprintln!("šŸ’” Make sure OPENAI_API_KEY is set in your environment");
54            return Err(e.into());
55        }
56    };
57
58    // Example 1: Basic Text-to-Speech
59    println!("\nšŸŽ™ļø Example 1: Basic Text-to-Speech");
60    println!("===================================");
61
62    match basic_text_to_speech(&client).await {
63        Ok(()) => println!("āœ… Basic TTS example completed"),
64        Err(e) => {
65            eprintln!("āŒ Basic TTS example failed: {e}");
66            handle_api_error(&e);
67        }
68    }
69
70    // Example 2: Voice Comparison
71    println!("\nšŸŽ­ Example 2: Voice Comparison");
72    println!("===============================");
73
74    match voice_comparison_example(&client).await {
75        Ok(()) => println!("āœ… Voice comparison example completed"),
76        Err(e) => {
77            eprintln!("āŒ Voice comparison example failed: {e}");
78            handle_api_error(&e);
79        }
80    }
81
82    // Example 3: Audio Format Options
83    println!("\nšŸŽµ Example 3: Audio Format Options");
84    println!("===================================");
85
86    match audio_format_example(&client).await {
87        Ok(()) => println!("āœ… Audio format example completed"),
88        Err(e) => {
89            eprintln!("āŒ Audio format example failed: {e}");
90            handle_api_error(&e);
91        }
92    }
93
94    // Example 4: Speed Control
95    println!("\n⚔ Example 4: Speed Control");
96    println!("===========================");
97
98    match speed_control_example(&client).await {
99        Ok(()) => println!("āœ… Speed control example completed"),
100        Err(e) => {
101            eprintln!("āŒ Speed control example failed: {e}");
102            handle_api_error(&e);
103        }
104    }
105
106    // Example 5: Streaming Audio (Note: requires model support)
107    println!("\nšŸ“” Example 5: Streaming Audio");
108    println!("==============================");
109
110    match streaming_audio_example(&client).await {
111        Ok(()) => println!("āœ… Streaming audio example completed"),
112        Err(e) => {
113            eprintln!("āŒ Streaming audio example failed: {e}");
114            handle_api_error(&e);
115        }
116    }
117
118    println!("\nšŸŽ‰ All audio speech examples completed! Check the output files in the current directory.");
119    Ok(())
120}
121
122/// Example 1: Basic text-to-speech with default settings
123async fn basic_text_to_speech(client: &Client) -> Result<(), Error> {
124    println!("Converting text to speech with default settings...");
125
126    let text = "Hello! This is a demonstration of OpenAI's text-to-speech capabilities using the openai-ergonomic crate.";
127
128    // Create speech request with basic settings
129    let request = CreateSpeechRequest::builder()
130        .model("tts-1".to_string())
131        .input(text.to_string())
132        .voice("alloy".to_string())
133        .response_format(ResponseFormat::Mp3)
134        .build();
135
136    // Note: Once audio builders are implemented, this would look like:
137    // let audio_response = client
138    //     .audio()
139    //     .speech()
140    //     .model("tts-1")
141    //     .input(text)
142    //     .voice("alloy")
143    //     .format("mp3")
144    //     .execute()
145    //     .await?;
146
147    // For now, we'll use the base client directly
148    let configuration = create_configuration(client);
149    let response = audio_api::create_speech()
150        .configuration(&configuration)
151        .create_speech_request(request)
152        .call()
153        .await
154        .map_err(|e| Error::Api {
155            status: 0,
156            message: e.to_string(),
157            error_type: None,
158            error_code: None,
159        })?;
160
161    // Save the audio data to file
162    let audio_data = response.bytes().await.map_err(Error::Http)?;
163    let filename = "basic_speech.mp3";
164    save_audio_file(&audio_data, filename)?;
165
166    println!("šŸŽµ Generated speech saved to: {filename}");
167    println!("   Text: \"{text}\"");
168    println!("   Voice: alloy");
169    println!("   Format: mp3");
170    println!("   Size: {} bytes", audio_data.len());
171
172    Ok(())
173}
174
175/// Example 2: Demonstrate different voice options
176async fn voice_comparison_example(client: &Client) -> Result<(), Error> {
177    println!("Generating speech with different voices...");
178
179    let text = "The quick brown fox jumps over the lazy dog.";
180    let voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
181
182    let configuration = create_configuration(client);
183
184    for voice in &voices {
185        println!("  šŸŽ¤ Generating with voice: {voice}");
186
187        let request = CreateSpeechRequest::builder()
188            .model("tts-1".to_string())
189            .input(text.to_string())
190            .voice((*voice).to_string())
191            .response_format(ResponseFormat::Mp3)
192            .build();
193
194        match audio_api::create_speech()
195            .configuration(&configuration)
196            .create_speech_request(request)
197            .call()
198            .await
199        {
200            Ok(response) => {
201                let audio_data = response.bytes().await.map_err(Error::Http)?;
202                let filename = format!("voice_{voice}.mp3");
203                save_audio_file(&audio_data, &filename)?;
204                println!("     āœ… Saved to: {filename} ({} bytes)", audio_data.len());
205            }
206            Err(e) => {
207                eprintln!("     āŒ Failed to generate audio for voice {voice}: {e}");
208            }
209        }
210    }
211
212    println!("\nšŸ’” Note: Listen to the generated files to compare different voice characteristics");
213
214    Ok(())
215}
216
217/// Example 3: Demonstrate different audio formats
218async fn audio_format_example(client: &Client) -> Result<(), Error> {
219    println!("Generating speech in different audio formats...");
220
221    let text = "This demonstrates various audio format options.";
222    let formats = [
223        (ResponseFormat::Mp3, "mp3"),
224        (ResponseFormat::Opus, "opus"),
225        (ResponseFormat::Aac, "aac"),
226        (ResponseFormat::Flac, "flac"),
227        (ResponseFormat::Wav, "wav"),
228        (ResponseFormat::Pcm, "pcm"),
229    ];
230
231    let configuration = create_configuration(client);
232
233    for (format, extension) in &formats {
234        println!("  šŸŽµ Generating in format: {extension}");
235
236        let request = CreateSpeechRequest::builder()
237            .model("tts-1".to_string())
238            .input(text.to_string())
239            .voice("nova".to_string())
240            .response_format(*format)
241            .build();
242
243        match audio_api::create_speech()
244            .configuration(&configuration)
245            .create_speech_request(request)
246            .call()
247            .await
248        {
249            Ok(response) => {
250                let audio_data = response.bytes().await.map_err(Error::Http)?;
251                let filename = format!("format_example.{extension}");
252                save_audio_file(&audio_data, &filename)?;
253                println!("     āœ… Saved to: {filename} ({} bytes)", audio_data.len());
254            }
255            Err(e) => {
256                eprintln!("     āŒ Failed to generate audio in format {extension}: {e}");
257            }
258        }
259    }
260
261    println!("\nšŸ’” Note: Different formats have different quality/compression trade-offs:");
262    println!("   - MP3: Good compression, widely supported");
263    println!("   - OPUS: Excellent compression for voice, modern codec");
264    println!("   - AAC: Good compression, Apple ecosystem friendly");
265    println!("   - FLAC: Lossless compression, larger files");
266    println!("   - WAV: Uncompressed, largest files, universal support");
267    println!("   - PCM: Raw audio data, suitable for further processing");
268
269    Ok(())
270}
271
272/// Example 4: Demonstrate speed control
273async fn speed_control_example(client: &Client) -> Result<(), Error> {
274    println!("Generating speech at different speeds...");
275
276    let text = "This sentence will be spoken at different speeds to demonstrate the speed control feature.";
277    let speeds = [0.25, 0.5, 1.0, 1.5, 2.0, 4.0];
278
279    let configuration = create_configuration(client);
280
281    for &speed in &speeds {
282        println!("  ⚔ Generating at speed: {speed}x");
283
284        let request = CreateSpeechRequest::builder()
285            .model("tts-1".to_string())
286            .input(text.to_string())
287            .voice("echo".to_string())
288            .response_format(ResponseFormat::Mp3)
289            .speed(speed)
290            .build();
291
292        match audio_api::create_speech()
293            .configuration(&configuration)
294            .create_speech_request(request)
295            .call()
296            .await
297        {
298            Ok(response) => {
299                let audio_data = response.bytes().await.map_err(Error::Http)?;
300                let filename = format!("speed_{speed}.mp3");
301                save_audio_file(&audio_data, &filename)?;
302                println!("     āœ… Saved to: {filename} ({} bytes)", audio_data.len());
303            }
304            Err(e) => {
305                eprintln!("     āŒ Failed to generate audio at speed {speed}x: {e}");
306            }
307        }
308    }
309
310    println!("\nšŸ’” Note: Speed range is 0.25x to 4.0x normal speed");
311    println!("   - 0.25x: Very slow, good for learning pronunciation");
312    println!("   - 1.0x: Normal speed");
313    println!("   - 4.0x: Very fast, good for quick content consumption");
314
315    Ok(())
316}
317
318/// Example 5: Demonstrate streaming audio (where supported)
319async fn streaming_audio_example(client: &Client) -> Result<(), Error> {
320    println!("Attempting to generate streaming audio...");
321
322    let text = "This is a longer text that demonstrates streaming audio capabilities. Streaming allows you to start playing audio before the entire generation is complete, which is useful for real-time applications and longer content.";
323
324    let configuration = create_configuration(client);
325
326    // Try with gpt-4o-mini-tts which supports streaming
327    let request = CreateSpeechRequest::builder()
328        .model("gpt-4o-mini-tts".to_string())
329        .input(text.to_string())
330        .voice("shimmer".to_string())
331        .response_format(ResponseFormat::Mp3)
332        .stream_format(StreamFormat::Audio)
333        .build();
334
335    println!("  šŸ“” Attempting streaming generation...");
336
337    match audio_api::create_speech()
338        .configuration(&configuration)
339        .create_speech_request(request.clone())
340        .call()
341        .await
342    {
343        Ok(response) => {
344            let audio_data = response.bytes().await.map_err(Error::Http)?;
345            let filename = "streaming_example.mp3";
346            save_audio_file(&audio_data, filename)?;
347            println!(
348                "     āœ… Streaming audio saved to: {filename} ({} bytes)",
349                audio_data.len()
350            );
351
352            println!("\nšŸ’” Note: In a real streaming implementation, you would:");
353            println!("   - Process audio chunks as they arrive");
354            println!("   - Start playback before full generation is complete");
355            println!("   - Handle streaming format appropriately");
356        }
357        Err(e) => {
358            eprintln!("     āš ļø Streaming with gpt-4o-mini-tts failed, trying fallback: {e}");
359
360            // Fallback to regular generation
361            let fallback_request = CreateSpeechRequest::builder()
362                .model("tts-1-hd".to_string())
363                .input(text.to_string())
364                .voice("shimmer".to_string())
365                .response_format(ResponseFormat::Mp3)
366                .build();
367
368            match audio_api::create_speech()
369                .configuration(&configuration)
370                .create_speech_request(fallback_request)
371                .call()
372                .await
373            {
374                Ok(response) => {
375                    let audio_data = response.bytes().await.map_err(Error::Http)?;
376                    let filename = "fallback_example.mp3";
377                    save_audio_file(&audio_data, filename)?;
378                    println!(
379                        "     āœ… Fallback audio saved to: {filename} ({} bytes)",
380                        audio_data.len()
381                    );
382                }
383                Err(e) => {
384                    eprintln!("     āŒ Fallback generation also failed: {e}");
385                }
386            }
387        }
388    }
389
390    println!("\nšŸ’” Note: Streaming support varies by model:");
391    println!("   - gpt-4o-mini-tts: Supports streaming");
392    println!("   - tts-1, tts-1-hd: No streaming support");
393    println!("   - Stream formats: 'sse' (Server-Sent Events) or 'audio' (raw audio chunks)");
394
395    Ok(())
396}
397
398/// Helper function to create configuration from client
399fn create_configuration(client: &Client) -> Configuration {
400    let mut configuration = Configuration::new();
401    configuration.bearer_access_token = Some(client.config().api_key().to_string());
402
403    if let Some(base_url) = client.config().base_url() {
404        configuration.base_path = base_url.to_string();
405    }
406
407    if let Some(org_id) = client.config().organization_id() {
408        configuration.user_agent = Some(format!(
409            "openai-ergonomic/{} org/{}",
410            env!("CARGO_PKG_VERSION"),
411            org_id
412        ));
413    }
414
415    configuration
416}
417
418/// Helper function to save audio data to file
419fn save_audio_file(audio_data: &[u8], filename: &str) -> Result<(), Error> {
420    let path = PathBuf::from(filename);
421    let mut file = std::fs::File::create(&path).map_err(Error::File)?;
422    file.write_all(audio_data).map_err(Error::File)?;
423    Ok(())
424}
425
426/// Comprehensive error handling helper
427fn handle_api_error(error: &Error) {
428    match error {
429        Error::Api {
430            status,
431            message,
432            error_type,
433            error_code,
434        } => {
435            eprintln!("🚫 API Error [{status}]: {message}");
436            if let Some(error_type) = error_type {
437                eprintln!("   Type: {error_type}");
438            }
439            if let Some(error_code) = error_code {
440                eprintln!("   Code: {error_code}");
441            }
442
443            // Provide specific guidance based on error type
444            match *status {
445                401 => eprintln!("šŸ’” Check your API key: export OPENAI_API_KEY=\"your-key\""),
446                429 => eprintln!("šŸ’” Rate limited - try again in a moment"),
447                500..=599 => eprintln!("šŸ’” Server error - try again later"),
448                _ => {}
449            }
450        }
451        Error::InvalidRequest(msg) => {
452            eprintln!("🚫 Invalid Request: {msg}");
453            eprintln!("šŸ’” Check your request parameters");
454        }
455        Error::Config(msg) => {
456            eprintln!("🚫 Configuration Error: {msg}");
457            eprintln!("šŸ’” Check your client configuration");
458        }
459        Error::Http(err) => {
460            eprintln!("🚫 HTTP Error: {err}");
461            eprintln!("šŸ’” Check your network connection");
462        }
463        Error::Json(err) => {
464            eprintln!("🚫 JSON Error: {err}");
465            eprintln!("šŸ’” Response parsing failed - may be a temporary issue");
466        }
467        Error::Authentication(msg) => {
468            eprintln!("🚫 Authentication Error: {msg}");
469            eprintln!("šŸ’” Check your API key");
470        }
471        Error::RateLimit(msg) => {
472            eprintln!("🚫 Rate Limit Error: {msg}");
473            eprintln!("šŸ’” Try again in a moment");
474        }
475        Error::Stream(msg) => {
476            eprintln!("🚫 Stream Error: {msg}");
477            eprintln!("šŸ’” Connection issue with streaming");
478        }
479        Error::File(err) => {
480            eprintln!("🚫 File Error: {err}");
481            eprintln!("šŸ’” Check file permissions and paths");
482        }
483        Error::Builder(msg) => {
484            eprintln!("🚫 Builder Error: {msg}");
485            eprintln!("šŸ’” Check your request builder configuration");
486        }
487        Error::Internal(msg) => {
488            eprintln!("🚫 Internal Error: {msg}");
489            eprintln!("šŸ’” This may be a bug, please report it");
490        }
491        Error::StreamConnection { message } => {
492            eprintln!("🚫 Stream Connection Error: {message}");
493            eprintln!("šŸ’” Check your network connection");
494        }
495        Error::StreamParsing { message, chunk } => {
496            eprintln!("🚫 Stream Parsing Error: {message}");
497            eprintln!("   Problematic chunk: {chunk}");
498            eprintln!("šŸ’” The response stream may be corrupted");
499        }
500        Error::StreamBuffer { message } => {
501            eprintln!("🚫 Stream Buffer Error: {message}");
502            eprintln!("šŸ’” The stream buffer encountered an issue");
503        }
504    }
505}