use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
fn main() {
let model_path = std::env::args()
.nth(1)
.expect("Please specify path to model");
let wav_path = std::env::args()
.nth(2)
.expect("Please specify path to wav file");
let language = "en";
let samples: Vec<i16> = hound::WavReader::open(wav_path)
.unwrap()
.into_samples::<i16>()
.map(|x| x.unwrap())
.collect();
let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default())
.expect("failed to load model");
let mut state = ctx.create_state().expect("failed to create state");
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
params.set_language(Some(&language));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut inter_samples = vec![Default::default(); samples.len()];
whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples)
.expect("failed to convert audio data");
let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples)
.expect("failed to convert audio data");
state
.full(params, &samples[..])
.expect("failed to run model");
let num_segments = state
.full_n_segments()
.expect("failed to get number of segments");
for i in 0..num_segments {
let segment = state
.full_get_segment_text(i)
.expect("failed to get segment");
let start_timestamp = state
.full_get_segment_t0(i)
.expect("failed to get segment start timestamp");
let end_timestamp = state
.full_get_segment_t1(i)
.expect("failed to get segment end timestamp");
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
}
}