pub struct Chunker { /* private fields */ }Expand description
Audio chunker for segmenting streams into processing units.
Combines VAD boundaries with duration heuristics to produce processing-size chunks.
Implementations§
Source§impl Chunker
impl Chunker
Sourcepub fn new(config: ChunkerConfig) -> Self
pub fn new(config: ChunkerConfig) -> Self
Create a new chunker with the given configuration.
Sourcepub fn default() -> Self
pub fn default() -> Self
Create a chunker with default configuration (500ms chunks).
Alias for Chunker::new(ChunkerConfig::default()).
Sourcepub fn chunk(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
) -> Result<Vec<ProcessedChunk>>
pub fn chunk( &self, audio: &[f32], sample_rate: u32, vad_segments: &[SpeechChunk], ) -> Result<Vec<ProcessedChunk>>
Segment audio into processing chunks aligned to VAD boundaries.
This variant assumes the VAD timestamps use the zero-based origin from
AudioTimestamp::EPOCH. If the timestamps already include a stream
offset, prefer Chunker::chunk_with_stream_start.
§Arguments
audio: Raw PCM samples (f32, normalized to [-1.0, 1.0])sample_rate: Audio sample rate in Hz (must be > 0)vad_segments: Speech boundaries from VAD analysis
§Returns
Vector of ProcessedChunk covering the entire input duration with no
gaps.
§Errors
Returns Error::InvalidInput if:
sample_rateis zeroaudiois empty- VAD segments have invalid timestamps (end < start)
§Performance
Target: <15ms total processing time per chunk generated.
Sourcepub fn chunk_with_stream_start(
&self,
audio: &[f32],
sample_rate: u32,
vad_segments: &[SpeechChunk],
stream_start_time: AudioTimestamp,
) -> Result<Vec<ProcessedChunk>>
pub fn chunk_with_stream_start( &self, audio: &[f32], sample_rate: u32, vad_segments: &[SpeechChunk], stream_start_time: AudioTimestamp, ) -> Result<Vec<ProcessedChunk>>
Segment audio into processing chunks with an explicit stream start time.
Use this variant when the VAD timestamps should be interpreted relative to a known stream start rather than zero-based.
use speech_prep::{Chunker, ChunkerConfig, SpeechChunk};
use speech_prep::time::{AudioDuration, AudioTimestamp};
let chunker = Chunker::new(ChunkerConfig::streaming());
let stream_start = AudioTimestamp::EPOCH;
// VAD emits wall-clock timestamps relative to the live stream
let segments = vec![SpeechChunk {
start_time: stream_start,
end_time: stream_start.add_duration(AudioDuration::from_millis(240)),
confidence: 0.92,
avg_energy: 0.4,
frame_count: 48,
}];
let audio = vec![0.0; 3840]; // 240ms @ 16kHz
let chunks = chunker.chunk_with_stream_start(&audio, 16_000, &segments, stream_start)?;
assert_eq!(chunks.len(), 1);