audio_speech/
audio_speech.rs1#![allow(clippy::uninlined_format_args)]
2use openai_client_base::{
31 apis::{audio_api, configuration::Configuration},
32 models::{
33 create_speech_request::{ResponseFormat, StreamFormat},
34 CreateSpeechRequest,
35 },
36};
37use openai_ergonomic::{Client, Error};
38use std::io::Write;
39use std::path::PathBuf;
40
41#[tokio::main]
42async fn main() -> Result<(), Box<dyn std::error::Error>> {
43 println!(" OpenAI Ergonomic - Audio Speech (Text-to-Speech) Example\n");
44
45 let client = match Client::from_env() {
47 Ok(client_builder) => {
48 println!(" Client initialized successfully");
49 client_builder.build()
50 }
51 Err(e) => {
52 eprintln!(" Failed to initialize client: {e}");
53 eprintln!(" Make sure OPENAI_API_KEY is set in your environment");
54 return Err(e.into());
55 }
56 };
57
58 println!("\n Example 1: Basic Text-to-Speech");
60 println!("===================================");
61
62 match basic_text_to_speech(&client).await {
63 Ok(()) => println!(" Basic TTS example completed"),
64 Err(e) => {
65 eprintln!(" Basic TTS example failed: {e}");
66 handle_api_error(&e);
67 }
68 }
69
70 println!("\n Example 2: Voice Comparison");
72 println!("===============================");
73
74 match voice_comparison_example(&client).await {
75 Ok(()) => println!(" Voice comparison example completed"),
76 Err(e) => {
77 eprintln!(" Voice comparison example failed: {e}");
78 handle_api_error(&e);
79 }
80 }
81
82 println!("\n Example 3: Audio Format Options");
84 println!("===================================");
85
86 match audio_format_example(&client).await {
87 Ok(()) => println!(" Audio format example completed"),
88 Err(e) => {
89 eprintln!(" Audio format example failed: {e}");
90 handle_api_error(&e);
91 }
92 }
93
94 println!("\n Example 4: Speed Control");
96 println!("===========================");
97
98 match speed_control_example(&client).await {
99 Ok(()) => println!(" Speed control example completed"),
100 Err(e) => {
101 eprintln!(" Speed control example failed: {e}");
102 handle_api_error(&e);
103 }
104 }
105
106 println!("\n Example 5: Streaming Audio");
108 println!("==============================");
109
110 match streaming_audio_example(&client).await {
111 Ok(()) => println!(" Streaming audio example completed"),
112 Err(e) => {
113 eprintln!(" Streaming audio example failed: {e}");
114 handle_api_error(&e);
115 }
116 }
117
118 println!(
119 "\n All audio speech examples completed! Check the output files in the current directory."
120 );
121 Ok(())
122}
123
124async fn basic_text_to_speech(client: &Client) -> Result<(), Error> {
126 println!("Converting text to speech with default settings...");
127
128 let text = "Hello! This is a demonstration of OpenAI's text-to-speech capabilities using the openai-ergonomic crate.";
129
130 let request = CreateSpeechRequest::builder()
132 .model("tts-1".to_string())
133 .input(text.to_string())
134 .voice("alloy".to_string())
135 .response_format(ResponseFormat::Mp3)
136 .build();
137
138 let configuration = create_configuration(client);
151 let response = audio_api::create_speech()
152 .configuration(&configuration)
153 .create_speech_request(request)
154 .call()
155 .await
156 .map_err(|e| Error::Api {
157 status: 0,
158 message: e.to_string(),
159 error_type: None,
160 error_code: None,
161 })?;
162
163 let audio_data = response.bytes().await.map_err(Error::Http)?;
165 let filename = "basic_speech.mp3";
166 save_audio_file(&audio_data, filename)?;
167
168 println!(" Generated speech saved to: {filename}");
169 println!(" Text: \"{text}\"");
170 println!(" Voice: alloy");
171 println!(" Format: mp3");
172 println!(" Size: {} bytes", audio_data.len());
173
174 Ok(())
175}
176
177async fn voice_comparison_example(client: &Client) -> Result<(), Error> {
179 println!("Generating speech with different voices...");
180
181 let text = "The quick brown fox jumps over the lazy dog.";
182 let voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
183
184 let configuration = create_configuration(client);
185
186 for voice in &voices {
187 println!(" Generating with voice: {voice}");
188
189 let request = CreateSpeechRequest::builder()
190 .model("tts-1".to_string())
191 .input(text.to_string())
192 .voice((*voice).to_string())
193 .response_format(ResponseFormat::Mp3)
194 .build();
195
196 match audio_api::create_speech()
197 .configuration(&configuration)
198 .create_speech_request(request)
199 .call()
200 .await
201 {
202 Ok(response) => {
203 let audio_data = response.bytes().await.map_err(Error::Http)?;
204 let filename = format!("voice_{voice}.mp3");
205 save_audio_file(&audio_data, &filename)?;
206 println!(" Saved to: {filename} ({} bytes)", audio_data.len());
207 }
208 Err(e) => {
209 eprintln!(" Failed to generate audio for voice {voice}: {e}");
210 }
211 }
212 }
213
214 println!("\n Note: Listen to the generated files to compare different voice characteristics");
215
216 Ok(())
217}
218
219async fn audio_format_example(client: &Client) -> Result<(), Error> {
221 println!("Generating speech in different audio formats...");
222
223 let text = "This demonstrates various audio format options.";
224 let formats = [
225 (ResponseFormat::Mp3, "mp3"),
226 (ResponseFormat::Opus, "opus"),
227 (ResponseFormat::Aac, "aac"),
228 (ResponseFormat::Flac, "flac"),
229 (ResponseFormat::Wav, "wav"),
230 (ResponseFormat::Pcm, "pcm"),
231 ];
232
233 let configuration = create_configuration(client);
234
235 for (format, extension) in &formats {
236 println!(" Generating in format: {extension}");
237
238 let request = CreateSpeechRequest::builder()
239 .model("tts-1".to_string())
240 .input(text.to_string())
241 .voice("nova".to_string())
242 .response_format(*format)
243 .build();
244
245 match audio_api::create_speech()
246 .configuration(&configuration)
247 .create_speech_request(request)
248 .call()
249 .await
250 {
251 Ok(response) => {
252 let audio_data = response.bytes().await.map_err(Error::Http)?;
253 let filename = format!("format_example.{extension}");
254 save_audio_file(&audio_data, &filename)?;
255 println!(" Saved to: {filename} ({} bytes)", audio_data.len());
256 }
257 Err(e) => {
258 eprintln!(" Failed to generate audio in format {extension}: {e}");
259 }
260 }
261 }
262
263 println!("\n Note: Different formats have different quality/compression trade-offs:");
264 println!(" - MP3: Good compression, widely supported");
265 println!(" - OPUS: Excellent compression for voice, modern codec");
266 println!(" - AAC: Good compression, Apple ecosystem friendly");
267 println!(" - FLAC: Lossless compression, larger files");
268 println!(" - WAV: Uncompressed, largest files, universal support");
269 println!(" - PCM: Raw audio data, suitable for further processing");
270
271 Ok(())
272}
273
274async fn speed_control_example(client: &Client) -> Result<(), Error> {
276 println!("Generating speech at different speeds...");
277
278 let text = "This sentence will be spoken at different speeds to demonstrate the speed control feature.";
279 let speeds = [0.25, 0.5, 1.0, 1.5, 2.0, 4.0];
280
281 let configuration = create_configuration(client);
282
283 for &speed in &speeds {
284 println!(" Generating at speed: {speed}x");
285
286 let request = CreateSpeechRequest::builder()
287 .model("tts-1".to_string())
288 .input(text.to_string())
289 .voice("echo".to_string())
290 .response_format(ResponseFormat::Mp3)
291 .speed(speed)
292 .build();
293
294 match audio_api::create_speech()
295 .configuration(&configuration)
296 .create_speech_request(request)
297 .call()
298 .await
299 {
300 Ok(response) => {
301 let audio_data = response.bytes().await.map_err(Error::Http)?;
302 let filename = format!("speed_{speed}.mp3");
303 save_audio_file(&audio_data, &filename)?;
304 println!(" Saved to: {filename} ({} bytes)", audio_data.len());
305 }
306 Err(e) => {
307 eprintln!(" Failed to generate audio at speed {speed}x: {e}");
308 }
309 }
310 }
311
312 println!("\n Note: Speed range is 0.25x to 4.0x normal speed");
313 println!(" - 0.25x: Very slow, good for learning pronunciation");
314 println!(" - 1.0x: Normal speed");
315 println!(" - 4.0x: Very fast, good for quick content consumption");
316
317 Ok(())
318}
319
320async fn streaming_audio_example(client: &Client) -> Result<(), Error> {
322 println!("Attempting to generate streaming audio...");
323
324 let text = "This is a longer text that demonstrates streaming audio capabilities. Streaming allows you to start playing audio before the entire generation is complete, which is useful for real-time applications and longer content.";
325
326 let configuration = create_configuration(client);
327
328 let request = CreateSpeechRequest::builder()
330 .model("gpt-4o-mini-tts".to_string())
331 .input(text.to_string())
332 .voice("shimmer".to_string())
333 .response_format(ResponseFormat::Mp3)
334 .stream_format(StreamFormat::Audio)
335 .build();
336
337 println!(" Attempting streaming generation...");
338
339 match audio_api::create_speech()
340 .configuration(&configuration)
341 .create_speech_request(request.clone())
342 .call()
343 .await
344 {
345 Ok(response) => {
346 let audio_data = response.bytes().await.map_err(Error::Http)?;
347 let filename = "streaming_example.mp3";
348 save_audio_file(&audio_data, filename)?;
349 println!(
350 " Streaming audio saved to: {filename} ({} bytes)",
351 audio_data.len()
352 );
353
354 println!("\n Note: In a real streaming implementation, you would:");
355 println!(" - Process audio chunks as they arrive");
356 println!(" - Start playback before full generation is complete");
357 println!(" - Handle streaming format appropriately");
358 }
359 Err(e) => {
360 eprintln!(" Streaming with gpt-4o-mini-tts failed, trying fallback: {e}");
361
362 let fallback_request = CreateSpeechRequest::builder()
364 .model("tts-1-hd".to_string())
365 .input(text.to_string())
366 .voice("shimmer".to_string())
367 .response_format(ResponseFormat::Mp3)
368 .build();
369
370 match audio_api::create_speech()
371 .configuration(&configuration)
372 .create_speech_request(fallback_request)
373 .call()
374 .await
375 {
376 Ok(response) => {
377 let audio_data = response.bytes().await.map_err(Error::Http)?;
378 let filename = "fallback_example.mp3";
379 save_audio_file(&audio_data, filename)?;
380 println!(
381 " Fallback audio saved to: {filename} ({} bytes)",
382 audio_data.len()
383 );
384 }
385 Err(e) => {
386 eprintln!(" Fallback generation also failed: {e}");
387 }
388 }
389 }
390 }
391
392 println!("\n Note: Streaming support varies by model:");
393 println!(" - gpt-4o-mini-tts: Supports streaming");
394 println!(" - tts-1, tts-1-hd: No streaming support");
395 println!(" - Stream formats: 'sse' (Server-Sent Events) or 'audio' (raw audio chunks)");
396
397 Ok(())
398}
399
400fn create_configuration(client: &Client) -> Configuration {
402 let mut configuration = Configuration::new();
403 configuration.bearer_access_token = Some(client.config().api_key().to_string());
404
405 if let Some(base_url) = client.config().base_url() {
406 configuration.base_path = base_url.to_string();
407 }
408
409 if let Some(org_id) = client.config().organization_id() {
410 configuration.user_agent = Some(format!(
411 "openai-ergonomic/{} org/{}",
412 env!("CARGO_PKG_VERSION"),
413 org_id
414 ));
415 }
416
417 configuration
418}
419
420fn save_audio_file(audio_data: &[u8], filename: &str) -> Result<(), Error> {
422 let path = PathBuf::from(filename);
423 let mut file = std::fs::File::create(&path).map_err(Error::File)?;
424 file.write_all(audio_data).map_err(Error::File)?;
425 Ok(())
426}
427
428fn handle_api_error(error: &Error) {
430 match error {
431 Error::Api {
432 status,
433 message,
434 error_type,
435 error_code,
436 } => {
437 eprintln!(" API Error [{status}]: {message}");
438 if let Some(error_type) = error_type {
439 eprintln!(" Type: {error_type}");
440 }
441 if let Some(error_code) = error_code {
442 eprintln!(" Code: {error_code}");
443 }
444
445 match *status {
447 401 => eprintln!(" Check your API key: export OPENAI_API_KEY=\"your-key\""),
448 429 => eprintln!(" Rate limited - try again in a moment"),
449 500..=599 => eprintln!(" Server error - try again later"),
450 _ => {}
451 }
452 }
453 Error::InvalidRequest(msg) => {
454 eprintln!(" Invalid Request: {msg}");
455 eprintln!(" Check your request parameters");
456 }
457 Error::Config(msg) => {
458 eprintln!(" Configuration Error: {msg}");
459 eprintln!(" Check your client configuration");
460 }
461 Error::Http(err) => {
462 eprintln!(" HTTP Error: {err}");
463 eprintln!(" Check your network connection");
464 }
465 Error::HttpMiddleware(err) => {
466 eprintln!(" HTTP Middleware Error: {err}");
467 eprintln!(" Check your network connection and middleware configuration");
468 }
469 Error::Json(err) => {
470 eprintln!(" JSON Error: {err}");
471 eprintln!(" Response parsing failed - may be a temporary issue");
472 }
473 Error::Authentication(msg) => {
474 eprintln!(" Authentication Error: {msg}");
475 eprintln!(" Check your API key");
476 }
477 Error::RateLimit(msg) => {
478 eprintln!(" Rate Limit Error: {msg}");
479 eprintln!(" Try again in a moment");
480 }
481 Error::Stream(msg) => {
482 eprintln!(" Stream Error: {msg}");
483 eprintln!(" Connection issue with streaming");
484 }
485 Error::File(err) => {
486 eprintln!(" File Error: {err}");
487 eprintln!(" Check file permissions and paths");
488 }
489 Error::Builder(msg) => {
490 eprintln!(" Builder Error: {msg}");
491 eprintln!(" Check your request builder configuration");
492 }
493 Error::Internal(msg) => {
494 eprintln!(" Internal Error: {msg}");
495 eprintln!(" This may be a bug, please report it");
496 }
497 Error::StreamConnection { message } => {
498 eprintln!(" Stream Connection Error: {message}");
499 eprintln!(" Check your network connection");
500 }
501 Error::StreamParsing { message, chunk } => {
502 eprintln!(" Stream Parsing Error: {message}");
503 eprintln!(" Problematic chunk: {chunk}");
504 eprintln!(" The response stream may be corrupted");
505 }
506 Error::StreamBuffer { message } => {
507 eprintln!(" Stream Buffer Error: {message}");
508 eprintln!(" The stream buffer encountered an issue");
509 }
510 }
511}