use std::path::Path;
use whisper_rs::{
FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters,
};
use crate::error::Result;
use crate::types::{centis_to_ms, Caption};
use crate::whisper::audio::load_wav;
pub struct TranscribeOptions<'a> {
pub model_path: &'a Path,
pub language: Option<&'a str>,
pub translate: bool,
}
pub fn transcribe(wav_path: &Path, options: &TranscribeOptions<'_>) -> Result<Vec<Caption>> {
let audio = load_wav(wav_path)?;
let ctx = WhisperContext::new_with_params(
options.model_path.to_str().ok_or_else(|| {
crate::error::AppError::InvalidPath(options.model_path.to_path_buf())
})?,
WhisperContextParameters::default(),
)?;
let mut state = ctx.create_state()?;
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
params.set_n_threads(num_cpus());
params.set_translate(options.translate);
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
match options.language {
Some("auto") | None => params.set_language(None),
Some(lang) => params.set_language(Some(lang)),
}
state.full(params, &audio)?;
let mut captions = Vec::new();
for segment in state.as_iter() {
let text = segment.to_str_lossy()?.trim().to_string();
if text.is_empty() {
continue;
}
let start_ms = centis_to_ms(segment.start_timestamp());
let end_ms = centis_to_ms(segment.end_timestamp());
if end_ms <= start_ms {
continue;
}
captions.push(Caption::new(start_ms, end_ms, text));
}
Ok(captions)
}
fn num_cpus() -> i32 {
std::thread::available_parallelism()
.map(|n| n.get() as i32)
.unwrap_or(4)
.min(16)
}