mod energy_adaptive_chunked;
mod merge;
#[cfg(test)]
pub(crate) mod test_helpers;
mod vad_chunked;
pub use energy_adaptive_chunked::{EnergyAdaptiveChunked, EnergyAdaptiveConfig};
pub use merge::{merge_sequential, merge_sequential_with_separator, DEFAULT_MERGE_SEPARATOR};
pub use vad_chunked::{VadChunked, VadChunkedConfig};
pub(crate) const SAMPLE_RATE: f32 = 16000.0;
pub(crate) fn rms_energy(frame: &[f32]) -> f32 {
if frame.is_empty() {
return 0.0;
}
(frame.iter().map(|s| s * s).sum::<f32>() / frame.len() as f32).sqrt()
}
use std::path::Path;
use crate::{audio, SpeechModel, TranscribeError, TranscribeOptions, TranscriptionResult};
pub(crate) fn transcribe_padded(
model: &mut dyn SpeechModel,
samples: &[f32],
padding_secs: f32,
min_duration_secs: f32,
chunk_start_secs: f32,
options: &TranscribeOptions,
) -> Result<TranscriptionResult, TranscribeError> {
let pad_samples = (padding_secs * SAMPLE_RATE) as usize;
let mut padded = Vec::with_capacity(pad_samples + samples.len() + pad_samples);
padded.resize(pad_samples, 0.0);
padded.extend_from_slice(samples);
padded.resize(padded.len() + pad_samples, 0.0);
let min_samples = (min_duration_secs * SAMPLE_RATE) as usize;
if padded.len() < min_samples {
padded.resize(min_samples, 0.0);
}
let mut result = model.transcribe(&padded, options)?;
if let Some(segments) = &mut result.segments {
for seg in segments.iter_mut() {
seg.start = (seg.start - padding_secs + chunk_start_secs).max(0.0);
seg.end = (seg.end - padding_secs + chunk_start_secs).max(0.0);
}
}
Ok(result)
}
pub trait Transcriber: Send {
fn feed(
&mut self,
model: &mut dyn SpeechModel,
samples: &[f32],
) -> Result<Vec<TranscriptionResult>, TranscribeError>;
fn finish(
&mut self,
model: &mut dyn SpeechModel,
) -> Result<TranscriptionResult, TranscribeError>;
fn transcribe(
&mut self,
model: &mut dyn SpeechModel,
samples: &[f32],
) -> Result<TranscriptionResult, TranscribeError> {
self.feed(model, samples)?;
self.finish(model)
}
fn transcribe_file(
&mut self,
model: &mut dyn SpeechModel,
path: &Path,
) -> Result<TranscriptionResult, TranscribeError> {
let samples = audio::read_wav_samples(path)?;
self.transcribe(model, &samples)
}
}