speech_prep/chunker/types.rs
1use crate::error::{Error, Result};
2use crate::time::{AudioDuration, AudioTimestamp};
3
4/// Type of boundary at chunk edges.
5///
6/// Indicates the speech context at the start and end of a chunk.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ChunkBoundary {
9 /// Chunk starts at beginning of speech segment.
10 SpeechStart,
11
12 /// Chunk ends at end of speech segment.
13 SpeechEnd,
14
15 /// Chunk is mid-speech (continuation of longer segment).
16 Continuation,
17
18 /// Chunk contains only silence (no speech detected by VAD).
19 Silence,
20}
21
22/// A processed audio chunk with temporal and quality metadata.
23///
24/// Represents a segment of audio aligned to speech boundaries.
25#[derive(Debug, Clone)]
26pub struct ProcessedChunk {
27 /// Audio samples in the chunk (normalized f32, range [-1.0, 1.0]).
28 pub samples: Vec<f32>,
29
30 /// Type of boundary at chunk start.
31 pub start_boundary: ChunkBoundary,
32
33 /// Type of boundary at chunk end.
34 pub end_boundary: ChunkBoundary,
35
36 /// Absolute start time of chunk in audio stream.
37 pub start_time: AudioTimestamp,
38
39 /// Absolute end time of chunk in audio stream.
40 pub end_time: AudioTimestamp,
41
42 /// Ratio of speech frames in chunk (0.0 = all silence, 1.0 = all speech).
43 ///
44 /// Derived from VAD analysis.
45 pub speech_ratio: f32,
46
47 /// RMS energy of the chunk (computed during generation).
48 ///
49 /// Useful for quality assessment and thresholding.
50 pub energy: f32,
51
52 /// Signal-to-noise ratio in decibels (dB).
53 ///
54 /// Computed as `20 * log10(signal_rms / noise_rms)`, where `noise_rms` is
55 /// estimated from silence regions. `None` if no noise baseline is available
56 /// (e.g., first chunk with no silence).
57 ///
58 /// Higher SNR values indicate cleaner audio:
59 /// - >30 dB: Excellent quality
60 /// - 20-30 dB: Good quality
61 /// - 10-20 dB: Acceptable quality
62 /// - <10 dB: Poor quality (high noise)
63 pub snr_db: Option<f32>,
64
65 /// Indicates whether the chunk contains clipping artifacts.
66 ///
67 /// Clipping occurs when sample values exceed the normalized range
68 /// [-1.0, 1.0], typically manifesting as |sample| >= 0.999.
69 /// Clipped audio may cause audible distortion.
70 ///
71 /// `true` if any sample in the chunk is clipped, `false` otherwise.
72 pub has_clipping: bool,
73
74 /// Overlap samples from the previous chunk (for context).
75 ///
76 /// Contains the trailing `overlap_duration` samples from the previous
77 /// chunk, preserving acoustic context across the boundary.
78 /// `None` for the first chunk in the stream.
79 pub overlap_prev: Option<Vec<f32>>,
80
81 /// Overlap samples for the next chunk (for context).
82 ///
83 /// Contains the trailing `overlap_duration` samples from this chunk, to be
84 /// prepended to the next chunk for context. `None` for the last chunk in
85 /// the stream.
86 pub overlap_next: Option<Vec<f32>>,
87
88 /// Actual overlap duration in milliseconds.
89 ///
90 /// The duration of samples in `overlap_prev` and `overlap_next`. Typically
91 /// matches `ChunkerConfig::overlap_duration` (default 50ms), but may be
92 /// shorter for chunks at stream boundaries.
93 pub overlap_ms: u32,
94}
95
96impl ProcessedChunk {
97 /// Get the duration of this chunk.
98 ///
99 /// # Errors
100 ///
101 /// Returns `Error::Processing` if `end_time` < `start_time` (indicates
102 /// invalid chunk).
103 pub fn duration(&self) -> Result<AudioDuration> {
104 self.end_time
105 .duration_since(self.start_time)
106 .ok_or_else(|| {
107 Error::Processing("invalid chunk times: end_time precedes start_time".into())
108 })
109 }
110
111 /// Check if this chunk contains primarily speech.
112 #[must_use]
113 pub fn is_speech(&self) -> bool {
114 self.speech_ratio > 0.5
115 }
116
117 /// Check if this chunk is silence.
118 #[must_use]
119 pub fn is_silence(&self) -> bool {
120 self.start_boundary == ChunkBoundary::Silence && self.end_boundary == ChunkBoundary::Silence
121 }
122
123 /// Get samples without overlap (deduplicated core content).
124 ///
125 /// Returns the chunk's primary samples, excluding any overlap regions that
126 /// would be duplicated when processing sequential chunks. Useful for
127 /// callers that want to avoid processing overlap regions twice.
128 pub fn samples_without_overlap(&self) -> &[f32] {
129 &self.samples
130 }
131
132 /// Returns total sample count including overlap regions.
133 ///
134 /// Useful for buffer allocation when reconstructing the full audio data
135 /// with prepended/appended overlaps.
136 #[must_use]
137 pub fn total_samples_with_overlap(&self) -> usize {
138 let prev_overlap = self.overlap_prev.as_ref().map_or(0, Vec::len);
139 let next_overlap = self.overlap_next.as_ref().map_or(0, Vec::len);
140
141 self.samples.len() + prev_overlap + next_overlap
142 }
143}