audio_speech/
audio_speech.rs

1#![allow(clippy::uninlined_format_args)]
2//! Audio Speech (Text-to-Speech) example for the openai-ergonomic crate.
3//!
4//! This example demonstrates text-to-speech functionality using `OpenAI`'s TTS models.
5//! It shows how to generate audio from text with different voices, formats, and options.
6//!
7//! ## Features Demonstrated
8//!
9//! - Basic text-to-speech conversion
10//! - Different voice options (alloy, echo, fable, onyx, nova, shimmer)
11//! - Multiple audio formats (mp3, opus, aac, flac, wav, pcm)
12//! - Speed control for generated audio
13//! - Streaming audio output
14//! - File handling for audio output
15//! - Model selection (tts-1, tts-1-hd, gpt-4o-mini-tts)
16//!
17//! ## Prerequisites
18//!
19//! Set your `OpenAI` API key:
20//! ```bash
21//! export OPENAI_API_KEY="your-key-here"
22//! ```
23//!
24//! ## Usage
25//!
26//! ```bash
27//! cargo run --example audio_speech
28//! ```
29
30use openai_client_base::{
31    apis::{audio_api, configuration::Configuration},
32    models::{
33        create_speech_request::{ResponseFormat, StreamFormat},
34        CreateSpeechRequest,
35    },
36};
37use openai_ergonomic::{Client, Error};
38use std::io::Write;
39use std::path::PathBuf;
40
41#[tokio::main]
42async fn main() -> Result<(), Box<dyn std::error::Error>> {
43    println!(" OpenAI Ergonomic - Audio Speech (Text-to-Speech) Example\n");
44
45    // Initialize client from environment variables
46    let client = match Client::from_env() {
47        Ok(client_builder) => {
48            println!(" Client initialized successfully");
49            client_builder.build()
50        }
51        Err(e) => {
52            eprintln!(" Failed to initialize client: {e}");
53            eprintln!(" Make sure OPENAI_API_KEY is set in your environment");
54            return Err(e.into());
55        }
56    };
57
58    // Example 1: Basic Text-to-Speech
59    println!("\n Example 1: Basic Text-to-Speech");
60    println!("===================================");
61
62    match basic_text_to_speech(&client).await {
63        Ok(()) => println!(" Basic TTS example completed"),
64        Err(e) => {
65            eprintln!(" Basic TTS example failed: {e}");
66            handle_api_error(&e);
67        }
68    }
69
70    // Example 2: Voice Comparison
71    println!("\n Example 2: Voice Comparison");
72    println!("===============================");
73
74    match voice_comparison_example(&client).await {
75        Ok(()) => println!(" Voice comparison example completed"),
76        Err(e) => {
77            eprintln!(" Voice comparison example failed: {e}");
78            handle_api_error(&e);
79        }
80    }
81
82    // Example 3: Audio Format Options
83    println!("\n Example 3: Audio Format Options");
84    println!("===================================");
85
86    match audio_format_example(&client).await {
87        Ok(()) => println!(" Audio format example completed"),
88        Err(e) => {
89            eprintln!(" Audio format example failed: {e}");
90            handle_api_error(&e);
91        }
92    }
93
94    // Example 4: Speed Control
95    println!("\n Example 4: Speed Control");
96    println!("===========================");
97
98    match speed_control_example(&client).await {
99        Ok(()) => println!(" Speed control example completed"),
100        Err(e) => {
101            eprintln!(" Speed control example failed: {e}");
102            handle_api_error(&e);
103        }
104    }
105
106    // Example 5: Streaming Audio (Note: requires model support)
107    println!("\n Example 5: Streaming Audio");
108    println!("==============================");
109
110    match streaming_audio_example(&client).await {
111        Ok(()) => println!(" Streaming audio example completed"),
112        Err(e) => {
113            eprintln!(" Streaming audio example failed: {e}");
114            handle_api_error(&e);
115        }
116    }
117
118    println!(
119        "\n All audio speech examples completed! Check the output files in the current directory."
120    );
121    Ok(())
122}
123
124/// Example 1: Basic text-to-speech with default settings
125async fn basic_text_to_speech(client: &Client) -> Result<(), Error> {
126    println!("Converting text to speech with default settings...");
127
128    let text = "Hello! This is a demonstration of OpenAI's text-to-speech capabilities using the openai-ergonomic crate.";
129
130    // Create speech request with basic settings
131    let request = CreateSpeechRequest::builder()
132        .model("tts-1".to_string())
133        .input(text.to_string())
134        .voice("alloy".to_string())
135        .response_format(ResponseFormat::Mp3)
136        .build();
137
138    // Note: Once audio builders are implemented, this would look like:
139    // let audio_response = client
140    //     .audio()
141    //     .speech()
142    //     .model("tts-1")
143    //     .input(text)
144    //     .voice("alloy")
145    //     .format("mp3")
146    //     .execute()
147    //     .await?;
148
149    // For now, we'll use the base client directly
150    let configuration = create_configuration(client);
151    let response = audio_api::create_speech()
152        .configuration(&configuration)
153        .create_speech_request(request)
154        .call()
155        .await
156        .map_err(|e| Error::Api {
157            status: 0,
158            message: e.to_string(),
159            error_type: None,
160            error_code: None,
161        })?;
162
163    // Save the audio data to file
164    let audio_data = response.bytes().await.map_err(Error::Http)?;
165    let filename = "basic_speech.mp3";
166    save_audio_file(&audio_data, filename)?;
167
168    println!(" Generated speech saved to: {filename}");
169    println!("   Text: \"{text}\"");
170    println!("   Voice: alloy");
171    println!("   Format: mp3");
172    println!("   Size: {} bytes", audio_data.len());
173
174    Ok(())
175}
176
177/// Example 2: Demonstrate different voice options
178async fn voice_comparison_example(client: &Client) -> Result<(), Error> {
179    println!("Generating speech with different voices...");
180
181    let text = "The quick brown fox jumps over the lazy dog.";
182    let voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
183
184    let configuration = create_configuration(client);
185
186    for voice in &voices {
187        println!("   Generating with voice: {voice}");
188
189        let request = CreateSpeechRequest::builder()
190            .model("tts-1".to_string())
191            .input(text.to_string())
192            .voice((*voice).to_string())
193            .response_format(ResponseFormat::Mp3)
194            .build();
195
196        match audio_api::create_speech()
197            .configuration(&configuration)
198            .create_speech_request(request)
199            .call()
200            .await
201        {
202            Ok(response) => {
203                let audio_data = response.bytes().await.map_err(Error::Http)?;
204                let filename = format!("voice_{voice}.mp3");
205                save_audio_file(&audio_data, &filename)?;
206                println!("      Saved to: {filename} ({} bytes)", audio_data.len());
207            }
208            Err(e) => {
209                eprintln!("      Failed to generate audio for voice {voice}: {e}");
210            }
211        }
212    }
213
214    println!("\n Note: Listen to the generated files to compare different voice characteristics");
215
216    Ok(())
217}
218
219/// Example 3: Demonstrate different audio formats
220async fn audio_format_example(client: &Client) -> Result<(), Error> {
221    println!("Generating speech in different audio formats...");
222
223    let text = "This demonstrates various audio format options.";
224    let formats = [
225        (ResponseFormat::Mp3, "mp3"),
226        (ResponseFormat::Opus, "opus"),
227        (ResponseFormat::Aac, "aac"),
228        (ResponseFormat::Flac, "flac"),
229        (ResponseFormat::Wav, "wav"),
230        (ResponseFormat::Pcm, "pcm"),
231    ];
232
233    let configuration = create_configuration(client);
234
235    for (format, extension) in &formats {
236        println!("   Generating in format: {extension}");
237
238        let request = CreateSpeechRequest::builder()
239            .model("tts-1".to_string())
240            .input(text.to_string())
241            .voice("nova".to_string())
242            .response_format(*format)
243            .build();
244
245        match audio_api::create_speech()
246            .configuration(&configuration)
247            .create_speech_request(request)
248            .call()
249            .await
250        {
251            Ok(response) => {
252                let audio_data = response.bytes().await.map_err(Error::Http)?;
253                let filename = format!("format_example.{extension}");
254                save_audio_file(&audio_data, &filename)?;
255                println!("      Saved to: {filename} ({} bytes)", audio_data.len());
256            }
257            Err(e) => {
258                eprintln!("      Failed to generate audio in format {extension}: {e}");
259            }
260        }
261    }
262
263    println!("\n Note: Different formats have different quality/compression trade-offs:");
264    println!("   - MP3: Good compression, widely supported");
265    println!("   - OPUS: Excellent compression for voice, modern codec");
266    println!("   - AAC: Good compression, Apple ecosystem friendly");
267    println!("   - FLAC: Lossless compression, larger files");
268    println!("   - WAV: Uncompressed, largest files, universal support");
269    println!("   - PCM: Raw audio data, suitable for further processing");
270
271    Ok(())
272}
273
274/// Example 4: Demonstrate speed control
275async fn speed_control_example(client: &Client) -> Result<(), Error> {
276    println!("Generating speech at different speeds...");
277
278    let text = "This sentence will be spoken at different speeds to demonstrate the speed control feature.";
279    let speeds = [0.25, 0.5, 1.0, 1.5, 2.0, 4.0];
280
281    let configuration = create_configuration(client);
282
283    for &speed in &speeds {
284        println!("   Generating at speed: {speed}x");
285
286        let request = CreateSpeechRequest::builder()
287            .model("tts-1".to_string())
288            .input(text.to_string())
289            .voice("echo".to_string())
290            .response_format(ResponseFormat::Mp3)
291            .speed(speed)
292            .build();
293
294        match audio_api::create_speech()
295            .configuration(&configuration)
296            .create_speech_request(request)
297            .call()
298            .await
299        {
300            Ok(response) => {
301                let audio_data = response.bytes().await.map_err(Error::Http)?;
302                let filename = format!("speed_{speed}.mp3");
303                save_audio_file(&audio_data, &filename)?;
304                println!("      Saved to: {filename} ({} bytes)", audio_data.len());
305            }
306            Err(e) => {
307                eprintln!("      Failed to generate audio at speed {speed}x: {e}");
308            }
309        }
310    }
311
312    println!("\n Note: Speed range is 0.25x to 4.0x normal speed");
313    println!("   - 0.25x: Very slow, good for learning pronunciation");
314    println!("   - 1.0x: Normal speed");
315    println!("   - 4.0x: Very fast, good for quick content consumption");
316
317    Ok(())
318}
319
320/// Example 5: Demonstrate streaming audio (where supported)
321async fn streaming_audio_example(client: &Client) -> Result<(), Error> {
322    println!("Attempting to generate streaming audio...");
323
324    let text = "This is a longer text that demonstrates streaming audio capabilities. Streaming allows you to start playing audio before the entire generation is complete, which is useful for real-time applications and longer content.";
325
326    let configuration = create_configuration(client);
327
328    // Try with gpt-4o-mini-tts which supports streaming
329    let request = CreateSpeechRequest::builder()
330        .model("gpt-4o-mini-tts".to_string())
331        .input(text.to_string())
332        .voice("shimmer".to_string())
333        .response_format(ResponseFormat::Mp3)
334        .stream_format(StreamFormat::Audio)
335        .build();
336
337    println!("   Attempting streaming generation...");
338
339    match audio_api::create_speech()
340        .configuration(&configuration)
341        .create_speech_request(request.clone())
342        .call()
343        .await
344    {
345        Ok(response) => {
346            let audio_data = response.bytes().await.map_err(Error::Http)?;
347            let filename = "streaming_example.mp3";
348            save_audio_file(&audio_data, filename)?;
349            println!(
350                "      Streaming audio saved to: {filename} ({} bytes)",
351                audio_data.len()
352            );
353
354            println!("\n Note: In a real streaming implementation, you would:");
355            println!("   - Process audio chunks as they arrive");
356            println!("   - Start playback before full generation is complete");
357            println!("   - Handle streaming format appropriately");
358        }
359        Err(e) => {
360            eprintln!("      Streaming with gpt-4o-mini-tts failed, trying fallback: {e}");
361
362            // Fallback to regular generation
363            let fallback_request = CreateSpeechRequest::builder()
364                .model("tts-1-hd".to_string())
365                .input(text.to_string())
366                .voice("shimmer".to_string())
367                .response_format(ResponseFormat::Mp3)
368                .build();
369
370            match audio_api::create_speech()
371                .configuration(&configuration)
372                .create_speech_request(fallback_request)
373                .call()
374                .await
375            {
376                Ok(response) => {
377                    let audio_data = response.bytes().await.map_err(Error::Http)?;
378                    let filename = "fallback_example.mp3";
379                    save_audio_file(&audio_data, filename)?;
380                    println!(
381                        "      Fallback audio saved to: {filename} ({} bytes)",
382                        audio_data.len()
383                    );
384                }
385                Err(e) => {
386                    eprintln!("      Fallback generation also failed: {e}");
387                }
388            }
389        }
390    }
391
392    println!("\n Note: Streaming support varies by model:");
393    println!("   - gpt-4o-mini-tts: Supports streaming");
394    println!("   - tts-1, tts-1-hd: No streaming support");
395    println!("   - Stream formats: 'sse' (Server-Sent Events) or 'audio' (raw audio chunks)");
396
397    Ok(())
398}
399
400/// Helper function to create configuration from client
401fn create_configuration(client: &Client) -> Configuration {
402    let mut configuration = Configuration::new();
403    configuration.bearer_access_token = Some(client.config().api_key().to_string());
404
405    if let Some(base_url) = client.config().base_url() {
406        configuration.base_path = base_url.to_string();
407    }
408
409    if let Some(org_id) = client.config().organization_id() {
410        configuration.user_agent = Some(format!(
411            "openai-ergonomic/{} org/{}",
412            env!("CARGO_PKG_VERSION"),
413            org_id
414        ));
415    }
416
417    configuration
418}
419
420/// Helper function to save audio data to file
421fn save_audio_file(audio_data: &[u8], filename: &str) -> Result<(), Error> {
422    let path = PathBuf::from(filename);
423    let mut file = std::fs::File::create(&path).map_err(Error::File)?;
424    file.write_all(audio_data).map_err(Error::File)?;
425    Ok(())
426}
427
428/// Comprehensive error handling helper
429fn handle_api_error(error: &Error) {
430    match error {
431        Error::Api {
432            status,
433            message,
434            error_type,
435            error_code,
436        } => {
437            eprintln!(" API Error [{status}]: {message}");
438            if let Some(error_type) = error_type {
439                eprintln!("   Type: {error_type}");
440            }
441            if let Some(error_code) = error_code {
442                eprintln!("   Code: {error_code}");
443            }
444
445            // Provide specific guidance based on error type
446            match *status {
447                401 => eprintln!(" Check your API key: export OPENAI_API_KEY=\"your-key\""),
448                429 => eprintln!(" Rate limited - try again in a moment"),
449                500..=599 => eprintln!(" Server error - try again later"),
450                _ => {}
451            }
452        }
453        Error::InvalidRequest(msg) => {
454            eprintln!(" Invalid Request: {msg}");
455            eprintln!(" Check your request parameters");
456        }
457        Error::Config(msg) => {
458            eprintln!(" Configuration Error: {msg}");
459            eprintln!(" Check your client configuration");
460        }
461        Error::Http(err) => {
462            eprintln!(" HTTP Error: {err}");
463            eprintln!(" Check your network connection");
464        }
465        Error::HttpMiddleware(err) => {
466            eprintln!(" HTTP Middleware Error: {err}");
467            eprintln!(" Check your network connection and middleware configuration");
468        }
469        Error::Json(err) => {
470            eprintln!(" JSON Error: {err}");
471            eprintln!(" Response parsing failed - may be a temporary issue");
472        }
473        Error::Authentication(msg) => {
474            eprintln!(" Authentication Error: {msg}");
475            eprintln!(" Check your API key");
476        }
477        Error::RateLimit(msg) => {
478            eprintln!(" Rate Limit Error: {msg}");
479            eprintln!(" Try again in a moment");
480        }
481        Error::Stream(msg) => {
482            eprintln!(" Stream Error: {msg}");
483            eprintln!(" Connection issue with streaming");
484        }
485        Error::File(err) => {
486            eprintln!(" File Error: {err}");
487            eprintln!(" Check file permissions and paths");
488        }
489        Error::Builder(msg) => {
490            eprintln!(" Builder Error: {msg}");
491            eprintln!(" Check your request builder configuration");
492        }
493        Error::Internal(msg) => {
494            eprintln!(" Internal Error: {msg}");
495            eprintln!(" This may be a bug, please report it");
496        }
497        Error::StreamConnection { message } => {
498            eprintln!(" Stream Connection Error: {message}");
499            eprintln!(" Check your network connection");
500        }
501        Error::StreamParsing { message, chunk } => {
502            eprintln!(" Stream Parsing Error: {message}");
503            eprintln!("   Problematic chunk: {chunk}");
504            eprintln!(" The response stream may be corrupted");
505        }
506        Error::StreamBuffer { message } => {
507            eprintln!(" Stream Buffer Error: {message}");
508            eprintln!(" The stream buffer encountered an issue");
509        }
510    }
511}