speech_prep/chunker/
types.rs

1use crate::error::{Error, Result};
2use crate::time::{AudioDuration, AudioTimestamp};
3
4/// Type of boundary at chunk edges.
5///
6/// Indicates the speech context at the start and end of a chunk.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ChunkBoundary {
9    /// Chunk starts at beginning of speech segment.
10    SpeechStart,
11
12    /// Chunk ends at end of speech segment.
13    SpeechEnd,
14
15    /// Chunk is mid-speech (continuation of longer segment).
16    Continuation,
17
18    /// Chunk contains only silence (no speech detected by VAD).
19    Silence,
20}
21
22/// A processed audio chunk with temporal and quality metadata.
23///
24/// Represents a segment of audio aligned to speech boundaries.
25#[derive(Debug, Clone)]
26pub struct ProcessedChunk {
27    /// Audio samples in the chunk (normalized f32, range [-1.0, 1.0]).
28    pub samples: Vec<f32>,
29
30    /// Type of boundary at chunk start.
31    pub start_boundary: ChunkBoundary,
32
33    /// Type of boundary at chunk end.
34    pub end_boundary: ChunkBoundary,
35
36    /// Absolute start time of chunk in audio stream.
37    pub start_time: AudioTimestamp,
38
39    /// Absolute end time of chunk in audio stream.
40    pub end_time: AudioTimestamp,
41
42    /// Ratio of speech frames in chunk (0.0 = all silence, 1.0 = all speech).
43    ///
44    /// Derived from VAD analysis.
45    pub speech_ratio: f32,
46
47    /// RMS energy of the chunk (computed during generation).
48    ///
49    /// Useful for quality assessment and thresholding.
50    pub energy: f32,
51
52    /// Signal-to-noise ratio in decibels (dB).
53    ///
54    /// Computed as `20 * log10(signal_rms / noise_rms)`, where `noise_rms` is
55    /// estimated from silence regions. `None` if no noise baseline is available
56    /// (e.g., first chunk with no silence).
57    ///
58    /// Higher SNR values indicate cleaner audio:
59    /// - >30 dB: Excellent quality
60    /// - 20-30 dB: Good quality
61    /// - 10-20 dB: Acceptable quality
62    /// - <10 dB: Poor quality (high noise)
63    pub snr_db: Option<f32>,
64
65    /// Indicates whether the chunk contains clipping artifacts.
66    ///
67    /// Clipping occurs when sample values exceed the normalized range
68    /// [-1.0, 1.0], typically manifesting as |sample| >= 0.999.
69    /// Clipped audio may cause audible distortion.
70    ///
71    /// `true` if any sample in the chunk is clipped, `false` otherwise.
72    pub has_clipping: bool,
73
74    /// Overlap samples from the previous chunk (for context).
75    ///
76    /// Contains the trailing `overlap_duration` samples from the previous
77    /// chunk, preserving acoustic context across the boundary.
78    /// `None` for the first chunk in the stream.
79    pub overlap_prev: Option<Vec<f32>>,
80
81    /// Overlap samples for the next chunk (for context).
82    ///
83    /// Contains the trailing `overlap_duration` samples from this chunk, to be
84    /// prepended to the next chunk for context. `None` for the last chunk in
85    /// the stream.
86    pub overlap_next: Option<Vec<f32>>,
87
88    /// Actual overlap duration in milliseconds.
89    ///
90    /// The duration of samples in `overlap_prev` and `overlap_next`. Typically
91    /// matches `ChunkerConfig::overlap_duration` (default 50ms), but may be
92    /// shorter for chunks at stream boundaries.
93    pub overlap_ms: u32,
94}
95
96impl ProcessedChunk {
97    /// Get the duration of this chunk.
98    ///
99    /// # Errors
100    ///
101    /// Returns `Error::Processing` if `end_time` < `start_time` (indicates
102    /// invalid chunk).
103    pub fn duration(&self) -> Result<AudioDuration> {
104        self.end_time
105            .duration_since(self.start_time)
106            .ok_or_else(|| {
107                Error::Processing("invalid chunk times: end_time precedes start_time".into())
108            })
109    }
110
111    /// Check if this chunk contains primarily speech.
112    #[must_use]
113    pub fn is_speech(&self) -> bool {
114        self.speech_ratio > 0.5
115    }
116
117    /// Check if this chunk is silence.
118    #[must_use]
119    pub fn is_silence(&self) -> bool {
120        self.start_boundary == ChunkBoundary::Silence && self.end_boundary == ChunkBoundary::Silence
121    }
122
123    /// Get samples without overlap (deduplicated core content).
124    ///
125    /// Returns the chunk's primary samples, excluding any overlap regions that
126    /// would be duplicated when processing sequential chunks. Useful for
127    /// callers that want to avoid processing overlap regions twice.
128    pub fn samples_without_overlap(&self) -> &[f32] {
129        &self.samples
130    }
131
132    /// Returns total sample count including overlap regions.
133    ///
134    /// Useful for buffer allocation when reconstructing the full audio data
135    /// with prepended/appended overlaps.
136    #[must_use]
137    pub fn total_samples_with_overlap(&self) -> usize {
138        let prev_overlap = self.overlap_prev.as_ref().map_or(0, Vec::len);
139        let next_overlap = self.overlap_next.as_ref().map_or(0, Vec::len);
140
141        self.samples.len() + prev_overlap + next_overlap
142    }
143}
speech_prep/chunker/types.rs

speech_prep/chunker/
types.rs