mod energy_adaptive_chunked;
mod merge;
#[cfg(test)]
pub(crate) mod test_helpers;
mod vad_chunked;
pub use energy_adaptive_chunked::{EnergyAdaptiveChunked, EnergyAdaptiveConfig};
pub use merge::{merge_sequential, merge_sequential_with_separator, DEFAULT_MERGE_SEPARATOR};
pub use vad_chunked::{VadChunked, VadChunkedConfig};
pub(crate) const SAMPLE_RATE: f32 = 16000.0;
pub(crate) fn rms_energy(frame: &[f32]) -> f32 {
if frame.is_empty() {
return 0.0;
}
(frame.iter().map(|s| s * s).sum::<f32>() / frame.len() as f32).sqrt()
}
use std::path::Path;
use crate::{audio, SpeechModel, TranscribeError, TranscribeOptions, TranscriptionResult};
pub(crate) fn transcribe_padded(
model: &mut dyn SpeechModel,
samples: &[f32],
padding_secs: f32,
min_duration_secs: f32,
chunk_start_secs: f32,
options: &TranscribeOptions,
) -> Result<TranscriptionResult, TranscribeError> {
let padding_ms = (padding_secs * 1000.0) as u32;
let pad_total = 2 * padding_ms as usize * audio::SAMPLES_PER_MS;
let min_total = (min_duration_secs * SAMPLE_RATE) as usize;
let min_content = min_total.saturating_sub(pad_total);
let mut content = samples.to_vec();
if content.len() < min_content {
content.resize(min_content, 0.0);
}
let mut opts = options.clone();
opts.leading_silence_ms = Some(padding_ms);
opts.trailing_silence_ms = Some(padding_ms);
let mut result = model.transcribe(&content, &opts)?;
if chunk_start_secs > 0.0 {
result.offset_timestamps(chunk_start_secs);
}
Ok(result)
}
pub trait Transcriber: Send {
fn feed(
&mut self,
model: &mut dyn SpeechModel,
samples: &[f32],
) -> Result<Vec<TranscriptionResult>, TranscribeError>;
fn finish(
&mut self,
model: &mut dyn SpeechModel,
) -> Result<TranscriptionResult, TranscribeError>;
fn transcribe(
&mut self,
model: &mut dyn SpeechModel,
samples: &[f32],
) -> Result<TranscriptionResult, TranscribeError> {
self.feed(model, samples)?;
self.finish(model)
}
fn transcribe_file(
&mut self,
model: &mut dyn SpeechModel,
path: &Path,
) -> Result<TranscriptionResult, TranscribeError> {
let samples = audio::read_wav_samples(path)?;
self.transcribe(model, &samples)
}
}