pub struct Chunker { /* private fields */ }Expand description
Audio chunker for segmenting streams into processing units.
Combines VAD boundaries with duration heuristics to produce chunks optimized for downstream processing by downstream consumers.
Implementations§
Source§impl Chunker
impl Chunker
Sourcepub fn new(config: ChunkerConfig) -> Self
pub fn new(config: ChunkerConfig) -> Self
Create a new chunker with the given configuration.
Sourcepub fn default() -> Self
pub fn default() -> Self
Create a chunker with default configuration (500ms chunks).
Alias for Chunker::new(ChunkerConfig::default()).
Sourcepub fn chunk(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
) -> Result<Vec<ProcessedChunk>>
pub fn chunk( &self, audio: &[f32], sample_rate: u32, vad_segments: &[SpeechChunk], ) -> Result<Vec<ProcessedChunk>>
Segment audio into processing chunks aligned to VAD boundaries.
This variant assumes that VAD timestamps are relative to the Unix epoch
(e.g., tests that build times off AudioTimestamp::EPOCH). For streaming
scenarios where VAD emits wall-clock timestamps (AudioTimestamp::now()),
prefer Chunker::chunk_with_stream_start so the chunker can normalize
against the actual stream start.
§Arguments
audio: Raw PCM samples (f32, normalized to [-1.0, 1.0])sample_rate: Audio sample rate in Hz (must be > 0)vad_segments: Speech boundaries from VAD analysis
§Returns
Vector of ProcessedChunk covering the entire input duration with no
gaps.
§Errors
Returns Error::InvalidInput if:
sample_rateis zeroaudiois empty- VAD segments have invalid timestamps (end < start)
§Performance
Target: <15ms total processing time per chunk generated.
Sourcepub fn chunk_with_stream_start(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
stream_start_time: AudioTimestamp,
) -> Result<Vec<ProcessedChunk>>
pub fn chunk_with_stream_start( &self, audio: &[f32], sample_rate: u32, vad_segments: &[SpeechChunk], stream_start_time: AudioTimestamp, ) -> Result<Vec<ProcessedChunk>>
Segment audio into processing chunks with an explicit stream start time.
Use this variant when the VAD timestamps are absolute (e.g., wall-clock) rather than relative to the Unix epoch.
use speech_prep::{Chunker, ChunkerConfig, SpeechChunk};
use speech_prep::time::{AudioDuration, AudioTimestamp};
let chunker = Chunker::new(ChunkerConfig::streaming());
let stream_start = AudioTimestamp::EPOCH;
// VAD emits wall-clock timestamps relative to the live stream
let segments = vec![SpeechChunk {
start_time: stream_start,
end_time: stream_start.add_duration(AudioDuration::from_millis(240)),
confidence: 0.92,
avg_energy: 0.4,
frame_count: 48,
}];
let audio = vec![0.0; 3840]; // 240ms @ 16kHz
let chunks = chunker.chunk_with_stream_start(&audio, 16_000, &segments, stream_start)?;
assert_eq!(chunks.len(), 1);