1#![allow(clippy::uninlined_format_args)]
2use openai_client_base::{
36 apis::{audio_api, configuration::Configuration},
37 models::{
38 create_speech_request::ResponseFormat as SpeechResponseFormat,
39 AudioResponseFormat,
40 CreateSpeechRequest,
41 TranscriptionInclude,
43 },
44};
45use openai_ergonomic::{Client, Error};
46use std::io::Write;
47use std::path::PathBuf;
48
49#[tokio::main]
50async fn main() -> Result<(), Box<dyn std::error::Error>> {
51 println!(" OpenAI Ergonomic - Audio Transcription (Speech-to-Text) Example\n");
52
53 let client = match Client::from_env() {
55 Ok(client_builder) => {
56 println!(" Client initialized successfully");
57 client_builder.build()
58 }
59 Err(e) => {
60 eprintln!(" Failed to initialize client: {e}");
61 eprintln!(" Make sure OPENAI_API_KEY is set in your environment");
62 return Err(e.into());
63 }
64 };
65
66 println!("\n Preparing sample audio files...");
68 match create_sample_audio_files(&client).await {
69 Ok(()) => println!(" Sample audio files ready"),
70 Err(e) => {
71 eprintln!(" Failed to create sample audio files: {e}");
72 eprintln!(" You may need to provide your own audio files");
73 }
74 }
75
76 println!("\n Example 1: Basic Speech-to-Text Transcription");
78 println!("===============================================");
79
80 match basic_transcription_example(&client).await {
81 Ok(()) => println!(" Basic transcription example completed"),
82 Err(e) => {
83 eprintln!(" Basic transcription example failed: {e}");
84 handle_api_error(&e);
85 }
86 }
87
88 println!("\n Example 2: Response Format Comparison");
90 println!("==========================================");
91
92 match response_format_example(&client).await {
93 Ok(()) => println!(" Response format example completed"),
94 Err(e) => {
95 eprintln!(" Response format example failed: {e}");
96 handle_api_error(&e);
97 }
98 }
99
100 println!("\n⏰ Example 3: Detailed Transcription with Timestamps");
102 println!("==================================================");
103
104 match detailed_transcription_example(&client).await {
105 Ok(()) => println!(" Detailed transcription example completed"),
106 Err(e) => {
107 eprintln!(" Detailed transcription example failed: {e}");
108 handle_api_error(&e);
109 }
110 }
111
112 println!("\n Example 4: Audio Translation to English");
114 println!("===========================================");
115
116 match translation_example(&client).await {
117 Ok(()) => println!(" Translation example completed"),
118 Err(e) => {
119 eprintln!(" Translation example failed: {e}");
120 handle_api_error(&e);
121 }
122 }
123
124 println!("\n Example 5: Advanced Transcription Options");
126 println!("============================================");
127
128 match advanced_options_example(&client).await {
129 Ok(()) => println!(" Advanced options example completed"),
130 Err(e) => {
131 eprintln!(" Advanced options example failed: {e}");
132 handle_api_error(&e);
133 }
134 }
135
136 println!("\n All audio transcription examples completed! Check the console output above for results.");
137 Ok(())
138}
139
140async fn create_sample_audio_files(client: &Client) -> Result<(), Error> {
142 let sample_files = [
143 ("sample_english.mp3", "Hello, this is a sample English audio for transcription testing. The quick brown fox jumps over the lazy dog."),
144 ("sample_long.mp3", "This is a longer audio sample that will be used to demonstrate timestamp extraction and segmentation features. It contains multiple sentences with pauses between them. The purpose is to show how the transcription API can break down longer audio into meaningful segments with accurate timing information."),
145 ("sample_numbers.mp3", "Here are some numbers for testing: one, two, three, four, five. The year is twenty twenty-four. My phone number is five five five, one two three four."),
146 ];
147
148 let configuration = create_configuration(client);
149
150 for (filename, text) in &sample_files {
151 let path = PathBuf::from(filename);
152 if path.exists() {
153 println!(" Sample audio already exists: {filename}");
154 } else {
155 println!(" Creating sample audio: {filename}");
156
157 let request = CreateSpeechRequest::builder()
158 .model("tts-1".to_string())
159 .input((*text).to_string())
160 .voice("alloy".to_string())
161 .response_format(SpeechResponseFormat::Mp3)
162 .speed(0.9) .build();
164
165 match audio_api::create_speech()
166 .configuration(&configuration)
167 .create_speech_request(request)
168 .call()
169 .await
170 {
171 Ok(response) => {
172 let audio_data = response.bytes().await.map_err(Error::Http)?;
173 save_audio_file(&audio_data, filename)?;
174 println!(" Created: {filename}");
175 }
176 Err(e) => {
177 eprintln!(" Failed to create {filename}: {e}");
178 }
179 }
180 }
181 }
182
183 Ok(())
184}
185
186async fn basic_transcription_example(client: &Client) -> Result<(), Error> {
188 println!("Performing basic speech-to-text transcription...");
189
190 let audio_file = PathBuf::from("sample_english.mp3");
191 if !audio_file.exists() {
192 eprintln!(" Audio file not found: {}", audio_file.display());
193 eprintln!(" Run the audio creation step first or provide your own audio file");
194 return Ok(());
195 }
196
197 let configuration = create_configuration(client);
208
209 println!(" Transcribing: {}", audio_file.display());
210
211 match audio_api::create_transcription()
212 .configuration(&configuration)
213 .file(audio_file.clone())
214 .model("whisper-1")
215 .response_format(AudioResponseFormat::Json)
216 .call()
217 .await
218 {
219 Ok(response) => {
220 println!(" Transcription Results:");
221 println!(" Text: \"{}\"", response.text);
222 println!(" Language: {}", response.language);
223 println!(" Duration: {:.2} seconds", response.duration);
224
225 if let Some(usage) = &response.usage {
226 println!(" Usage: {} seconds", usage.seconds);
227 }
228
229 let output_file = "basic_transcription.txt";
231 save_text_file(&response.text, output_file)?;
232 println!(" Saved transcription to: {output_file}");
233 }
234 Err(e) => {
235 eprintln!(" Transcription failed: {e}");
236 return Err(Error::Api {
237 status: 0,
238 message: e.to_string(),
239 error_type: None,
240 error_code: None,
241 });
242 }
243 }
244
245 Ok(())
246}
247
248async fn response_format_example(client: &Client) -> Result<(), Error> {
250 println!("Comparing different response formats...");
251
252 let audio_file = PathBuf::from("sample_english.mp3");
253 if !audio_file.exists() {
254 eprintln!(" Audio file not found: {}", audio_file.display());
255 return Ok(());
256 }
257
258 let formats = [
259 (
260 AudioResponseFormat::Json,
261 "json",
262 "JSON format with metadata",
263 ),
264 (AudioResponseFormat::Text, "text", "Plain text only"),
265 (AudioResponseFormat::Srt, "srt", "SubRip subtitle format"),
266 (
267 AudioResponseFormat::VerboseJson,
268 "verbose_json",
269 "JSON with detailed timing",
270 ),
271 (AudioResponseFormat::Vtt, "vtt", "WebVTT subtitle format"),
272 ];
273
274 let configuration = create_configuration(client);
275
276 for (format, extension, description) in &formats {
277 println!(" Testing format: {description}");
278
279 match audio_api::create_transcription()
280 .configuration(&configuration)
281 .file(audio_file.clone())
282 .model("whisper-1")
283 .response_format(*format)
284 .call()
285 .await
286 {
287 Ok(response) => {
288 let filename = format!("transcription_sample.{extension}");
289
290 match format {
291 AudioResponseFormat::Text => {
292 save_text_file(&response.text, &filename)?;
294 println!(" Saved as: {filename}");
295 }
296 AudioResponseFormat::Json | AudioResponseFormat::VerboseJson => {
297 let json_output = serde_json::to_string_pretty(&response)
299 .unwrap_or_else(|_| response.text.clone());
300 save_text_file(&json_output, &filename)?;
301 println!(" Saved as: {filename}");
302
303 if *format == AudioResponseFormat::VerboseJson {
304 if let Some(segments) = &response.segments {
305 println!(" Found {} segments", segments.len());
306 }
307 if let Some(words) = &response.words {
308 println!(" Found {} words with timestamps", words.len());
309 }
310 }
311 }
312 AudioResponseFormat::Srt | AudioResponseFormat::Vtt => {
313 save_text_file(&response.text, &filename)?;
315 println!(" Saved as: {filename}");
316 }
317 }
318 }
319 Err(e) => {
320 eprintln!(" Failed to transcribe in format {extension}: {e}");
321 }
322 }
323 }
324
325 println!("\n Note: Different formats serve different purposes:");
326 println!(" - JSON: Basic transcription with metadata");
327 println!(" - Text: Just the transcribed text, no metadata");
328 println!(" - SRT: SubRip subtitle format for video");
329 println!(" - Verbose JSON: Includes word-level timestamps and confidence");
330 println!(" - VTT: WebVTT format for web-based subtitles");
331
332 Ok(())
333}
334
335async fn detailed_transcription_example(client: &Client) -> Result<(), Error> {
337 println!("Performing detailed transcription with timestamps...");
338
339 let audio_file = PathBuf::from("sample_long.mp3");
340 if !audio_file.exists() {
341 eprintln!(" Audio file not found: {}", audio_file.display());
342 return Ok(());
343 }
344
345 let configuration = create_configuration(client);
346
347 println!(" Transcribing with detailed timing information...");
348
349 match audio_api::create_transcription()
351 .configuration(&configuration)
352 .file(audio_file.clone())
353 .model("whisper-1")
354 .response_format(AudioResponseFormat::VerboseJson)
355 .timestamp_granularities(vec!["word".to_string(), "segment".to_string()])
356 .include(vec![TranscriptionInclude::Logprobs])
357 .temperature(0.0) .call()
359 .await
360 {
361 Ok(response) => {
362 println!(" Detailed Transcription Results:");
363 println!(" Text: \"{}\"", response.text);
364 println!(" Language: {}", response.language);
365 println!(" Duration: {:.2} seconds", response.duration);
366
367 if let Some(segments) = &response.segments {
369 println!("\n Segment Analysis ({} segments):", segments.len());
370 for (i, segment) in segments.iter().enumerate() {
371 println!(
372 " Segment {}: [{:.2}s - {:.2}s] \"{}\"",
373 i + 1,
374 segment.start,
375 segment.end,
376 segment.text
377 );
378 let avg_logprob = segment.avg_logprob;
379 if avg_logprob != 0.0 {
380 println!(" Confidence: {avg_logprob:.3}");
381 }
382 }
383 }
384
385 if let Some(words) = &response.words {
387 println!("\n Word-level Timestamps (first 10 words):");
388 for (i, word) in words.iter().take(10).enumerate() {
389 println!(
390 " {}: [{:.2}s - {:.2}s] \"{}\"",
391 i + 1,
392 word.start,
393 word.end,
394 word.word
395 );
396 }
397 if words.len() > 10 {
398 println!(" ... and {} more words", words.len() - 10);
399 }
400 }
401
402 let json_output =
404 serde_json::to_string_pretty(&response).unwrap_or_else(|_| response.text.clone());
405 save_text_file(&json_output, "detailed_transcription.json")?;
406 println!(" Saved detailed results to: detailed_transcription.json");
407 }
408 Err(e) => {
409 eprintln!(" Detailed transcription failed: {e}");
410 return Err(Error::Api {
411 status: 0,
412 message: e.to_string(),
413 error_type: None,
414 error_code: None,
415 });
416 }
417 }
418
419 Ok(())
420}
421
422async fn translation_example(client: &Client) -> Result<(), Error> {
424 println!("Demonstrating audio translation to English...");
425
426 let audio_file = PathBuf::from("sample_english.mp3");
429 if !audio_file.exists() {
430 eprintln!(" Audio file not found: {}", audio_file.display());
431 return Ok(());
432 }
433
434 let configuration = create_configuration(client);
435
436 println!(" Translating audio to English...");
437 println!(" Note: This example uses English audio, but translation works with any language");
438
439 match audio_api::create_translation()
440 .configuration(&configuration)
441 .file(audio_file.clone())
442 .model("whisper-1")
443 .response_format("json")
444 .temperature(0.2)
445 .call()
446 .await
447 {
448 Ok(response) => {
449 println!(" Translation Results:");
450 println!(" Translated Text: \"{}\"", response.text);
451
452 save_text_file(&response.text, "translation_result.txt")?;
454 println!(" Saved translation to: translation_result.txt");
455
456 println!("\n Translation Notes:");
457 println!(" - Translation always outputs English text regardless of input language");
458 println!(
459 " - It's different from transcription which preserves the original language"
460 );
461 println!(" - Useful for creating English subtitles from foreign language audio");
462 println!(" - Works with the same audio formats as transcription");
463 }
464 Err(e) => {
465 eprintln!(" Translation failed: {e}");
466 return Err(Error::Api {
467 status: 0,
468 message: e.to_string(),
469 error_type: None,
470 error_code: None,
471 });
472 }
473 }
474
475 Ok(())
476}
477
478async fn advanced_options_example(client: &Client) -> Result<(), Error> {
480 println!("Demonstrating advanced transcription options...");
481
482 let audio_file = PathBuf::from("sample_numbers.mp3");
483 if !audio_file.exists() {
484 eprintln!(" Audio file not found: {}", audio_file.display());
485 return Ok(());
486 }
487
488 let configuration = create_configuration(client);
489
490 println!(" Advanced transcription with language and prompt...");
492
493 let prompt = "This audio contains numbers and phone numbers. Please transcribe them accurately as digits where appropriate.";
494
495 match audio_api::create_transcription()
496 .configuration(&configuration)
497 .file(audio_file.clone())
498 .model("whisper-1")
499 .language("en") .prompt(prompt) .response_format(AudioResponseFormat::VerboseJson)
502 .temperature(0.0) .include(vec![TranscriptionInclude::Logprobs])
505 .call()
506 .await
507 {
508 Ok(response) => {
509 println!(" Advanced Transcription Results:");
510 println!(" Text: \"{}\"", response.text);
511 println!(" Language: {}", response.language);
512 println!(" Duration: {:.2} seconds", response.duration);
513
514 if let Some(logprobs) = &response.logprobs {
516 println!(
517 " Log Probabilities: {} tokens with confidence scores",
518 logprobs.len()
519 );
520 }
521
522 if let Some(segments) = &response.segments {
524 println!("\n Number Detection Analysis:");
525 for (i, segment) in segments.iter().enumerate() {
526 let contains_numbers = segment.text.chars().any(|c| c.is_ascii_digit());
527 if contains_numbers {
528 println!(
529 " Segment {} (contains numbers): \"{}\"",
530 i + 1,
531 segment.text
532 );
533 let confidence = segment.avg_logprob;
534 if confidence != 0.0 {
535 println!(" Confidence: {confidence:.3}");
536 }
537 }
538 }
539 }
540
541 let json_output =
543 serde_json::to_string_pretty(&response).unwrap_or_else(|_| response.text.clone());
544 save_text_file(&json_output, "advanced_transcription.json")?;
545 println!(" Saved advanced results to: advanced_transcription.json");
546 }
547 Err(e) => {
548 eprintln!(" Advanced transcription failed: {e}");
549 return Err(Error::Api {
550 status: 0,
551 message: e.to_string(),
552 error_type: None,
553 error_code: None,
554 });
555 }
556 }
557
558 println!("\n Advanced Options Summary:");
559 println!(" - Language: Specify input language for better accuracy");
560 println!(" - Prompt: Provide context to guide transcription style");
561 println!(" - Temperature: Control randomness (0.0 = deterministic)");
562 println!(" - Chunking Strategy: How to split long audio (auto/hierarchical)");
563 println!(" - Include: Additional data like log probabilities");
564 println!(" - Timestamp Granularities: Word-level or segment-level timing");
565
566 Ok(())
567}
568
569fn create_configuration(client: &Client) -> Configuration {
571 let mut configuration = Configuration::new();
572 configuration.bearer_access_token = Some(client.config().api_key().to_string());
573
574 if let Some(base_url) = client.config().base_url() {
575 configuration.base_path = base_url.to_string();
576 }
577
578 if let Some(org_id) = client.config().organization_id() {
579 configuration.user_agent = Some(format!(
580 "openai-ergonomic/{} org/{}",
581 env!("CARGO_PKG_VERSION"),
582 org_id
583 ));
584 }
585
586 configuration
587}
588
589fn save_audio_file(audio_data: &[u8], filename: &str) -> Result<(), Error> {
591 let path = PathBuf::from(filename);
592 let mut file = std::fs::File::create(&path).map_err(Error::File)?;
593 file.write_all(audio_data).map_err(Error::File)?;
594 Ok(())
595}
596
597fn save_text_file(text: &str, filename: &str) -> Result<(), Error> {
599 let path = PathBuf::from(filename);
600 let mut file = std::fs::File::create(&path).map_err(Error::File)?;
601 file.write_all(text.as_bytes()).map_err(Error::File)?;
602 Ok(())
603}
604
605fn handle_api_error(error: &Error) {
607 match error {
608 Error::Api {
609 status,
610 message,
611 error_type,
612 error_code,
613 } => {
614 eprintln!(" API Error [{status}]: {message}");
615 if let Some(error_type) = error_type {
616 eprintln!(" Type: {error_type}");
617 }
618 if let Some(error_code) = error_code {
619 eprintln!(" Code: {error_code}");
620 }
621
622 match *status {
624 401 => eprintln!(" Check your API key: export OPENAI_API_KEY=\"your-key\""),
625 429 => eprintln!(" Rate limited - try again in a moment"),
626 500..=599 => eprintln!(" Server error - try again later"),
627 _ => {}
628 }
629 }
630 Error::InvalidRequest(msg) => {
631 eprintln!(" Invalid Request: {msg}");
632 eprintln!(" Check your request parameters and audio file format");
633 }
634 Error::Config(msg) => {
635 eprintln!(" Configuration Error: {msg}");
636 eprintln!(" Check your client configuration");
637 }
638 Error::Http(err) => {
639 eprintln!(" HTTP Error: {err}");
640 eprintln!(" Check your network connection");
641 }
642 Error::HttpMiddleware(err) => {
643 eprintln!(" HTTP Middleware Error: {err}");
644 eprintln!(" Check your network connection and middleware configuration");
645 }
646 Error::Json(err) => {
647 eprintln!(" JSON Error: {err}");
648 eprintln!(" Response parsing failed - may be a temporary issue");
649 }
650 Error::Authentication(msg) => {
651 eprintln!(" Authentication Error: {msg}");
652 eprintln!(" Check your API key");
653 }
654 Error::RateLimit(msg) => {
655 eprintln!(" Rate Limit Error: {msg}");
656 eprintln!(" Try again in a moment");
657 }
658 Error::Stream(msg) => {
659 eprintln!(" Stream Error: {msg}");
660 eprintln!(" Connection issue with streaming");
661 }
662 Error::File(err) => {
663 eprintln!(" File Error: {err}");
664 eprintln!(" Check file permissions and paths, ensure audio file exists");
665 }
666 Error::Builder(msg) => {
667 eprintln!(" Builder Error: {msg}");
668 eprintln!(" Check your request builder configuration");
669 }
670 Error::Internal(msg) => {
671 eprintln!(" Internal Error: {msg}");
672 eprintln!(" This may be a bug, please report it");
673 }
674 Error::StreamConnection { message } => {
675 eprintln!(" Stream Connection Error: {message}");
676 eprintln!(" Check your network connection");
677 }
678 Error::StreamParsing { message, chunk } => {
679 eprintln!(" Stream Parsing Error: {message}");
680 eprintln!(" Problematic chunk: {chunk}");
681 eprintln!(" The response stream may be corrupted");
682 }
683 Error::StreamBuffer { message } => {
684 eprintln!(" Stream Buffer Error: {message}");
685 eprintln!(" The stream buffer encountered an issue");
686 }
687 }
688}