use crate::error::{Error, Result};
use crate::time::AudioTimestamp;
use std::time::{Duration, Instant};
use crate::SpeechChunk;
mod analysis;
mod config;
mod overlap;
mod planner;
mod segments;
mod types;
pub use config::ChunkerConfig;
use overlap::apply_overlaps;
pub use types::{ChunkBoundary, ProcessedChunk};
#[derive(Debug, Clone, Copy)]
pub struct Chunker {
config: ChunkerConfig,
}
#[allow(clippy::multiple_inherent_impl)]
impl Chunker {
#[must_use]
pub fn new(config: ChunkerConfig) -> Self {
Self { config }
}
#[must_use]
#[allow(clippy::should_implement_trait)]
pub fn default() -> Self {
Self::new(ChunkerConfig::default())
}
pub fn chunk(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
) -> Result<Vec<ProcessedChunk>> {
self.chunk_with_stream_start(audio, sample_rate, vad_segments, AudioTimestamp::EPOCH)
}
pub fn chunk_with_stream_start(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
stream_start_time: AudioTimestamp,
) -> Result<Vec<ProcessedChunk>> {
if sample_rate == 0 {
return Err(Error::InvalidInput("sample_rate must be > 0".into()));
}
if audio.is_empty() {
return Err(Error::InvalidInput("audio buffer is empty".into()));
}
for segment in vad_segments {
if segment.end_time < segment.start_time {
return Err(Error::InvalidInput(
"VAD segment has end_time < start_time".into(),
));
}
}
let processing_start = Instant::now();
let total_samples = audio.len();
let total_duration_secs = total_samples as f64 / f64::from(sample_rate);
let total_duration = Duration::from_secs_f64(total_duration_secs);
let earliest_segment_start = vad_segments.iter().map(|seg| seg.start_time).min();
let audio_start = earliest_segment_start.map_or(stream_start_time, |start| {
std::cmp::min(start, stream_start_time)
});
let noise_baseline =
Self::compute_noise_baseline(audio, sample_rate, vad_segments, audio_start);
let estimated_chunks =
(total_duration.as_millis() / self.config.target_duration.as_millis()).max(1) as usize
+ 1;
let mut chunks = Vec::with_capacity(estimated_chunks);
if vad_segments.is_empty() {
chunks.push(Self::create_silence_chunk(
audio,
sample_rate,
audio_start,
total_duration,
audio_start,
)?);
} else {
let mut current_time = audio_start;
for segment in vad_segments {
if segment.start_time > current_time {
let silence_end = segment.start_time;
let silence_duration =
silence_end.duration_since(current_time).ok_or_else(|| {
Error::Processing("VAD segment start_time < current_time".into())
})?;
chunks.push(Self::create_silence_chunk(
audio,
sample_rate,
current_time,
silence_duration,
audio_start,
)?);
}
let segment_chunks = self.process_speech_segment(
audio,
sample_rate,
segment,
noise_baseline,
audio_start,
)?;
chunks.extend(segment_chunks);
current_time = segment.end_time;
}
let total_end_time = audio_start.add_duration(total_duration);
if total_end_time > current_time {
let trailing_duration = total_end_time
.duration_since(current_time)
.ok_or_else(|| Error::Processing("total_end_time < current_time".into()))?;
chunks.push(Self::create_silence_chunk(
audio,
sample_rate,
current_time,
trailing_duration,
audio_start,
)?);
}
}
let overlap_samples = Self::duration_to_samples(self.config.overlap_duration, sample_rate);
apply_overlaps(&mut chunks, overlap_samples, sample_rate);
let latency = processing_start.elapsed();
let chunk_count = chunks.len().max(1);
let _per_chunk = Duration::from_secs_f64(latency.as_secs_f64() / chunk_count as f64);
for _ in 0..chunk_count {}
Ok(chunks)
}
}
#[cfg(test)]
mod tests;