use parakeet_rs::{Parakeet, ParakeetTDT, TimestampMode, Transcriber};
use std::env;
use std::time::Instant;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let start_time = Instant::now();
let args: Vec<String> = env::args().collect();
let audio_path = if args.len() > 1 {
&args[1]
} else {
"6_speakers.wav"
};
let use_tdt = args.len() > 2 && args[2] == "tdt";
let mut reader = hound::WavReader::open(audio_path)?;
let spec = reader.spec();
println!(
"Audio info: {}Hz, {} channel(s)",
spec.sample_rate, spec.channels
);
let audio: Vec<f32> = match spec.sample_format {
hound::SampleFormat::Float => reader.samples::<f32>().collect::<Result<Vec<_>, _>>()?,
hound::SampleFormat::Int => reader
.samples::<i16>()
.map(|s| s.map(|s| s as f32 / 32768.0))
.collect::<Result<Vec<_>, _>>()?,
};
if use_tdt {
println!("Loading TDT model...");
let mut parakeet = ParakeetTDT::from_pretrained("./tdt", None)?;
let result = parakeet.transcribe_samples(
audio,
spec.sample_rate,
spec.channels,
Some(TimestampMode::Sentences),
)?;
println!("{}", result.text);
println!("\nSentencess:");
for segment in result.tokens.iter() {
println!(
"[{:.2}s - {:.2}s]: {}",
segment.start, segment.end, segment.text
);
}
} else {
println!("Loading CTC model...");
let mut parakeet = Parakeet::from_pretrained(".", None)?;
let result = parakeet.transcribe_samples(
audio,
spec.sample_rate,
spec.channels,
Some(TimestampMode::Words),
)?;
println!("{}", result.text);
println!("\nWords (first 10):");
for word in result.tokens.iter().take(10) {
println!("[{:.2}s - {:.2}s]: {}", word.start, word.end, word.text);
}
}
let elapsed = start_time.elapsed();
println!(
"\n✓ Transcription completed in {:.2}s",
elapsed.as_secs_f32()
);
Ok(())
}