Skip to main content

omni_dev/voice/
idle.rs

1//! Idle (silence) detection and trailing-silence trimming.
2//!
3//! Operates on the post-resample 16 kHz mono stream. Uses RMS over fixed
4//! windows; classifies each window as voiced or silent at a -40 dBFS
5//! threshold and tracks how many consecutive silent windows have arrived.
6//! The capture loop terminates when that streak reaches the
7//! `idle_after_secs` budget. A trim pass then drops the trailing silence
8//! before the WAV header is finalised — Whisper hallucinates badly on
9//! silence at the end of an input clip.
10
11use super::wav::TARGET_SAMPLE_RATE;
12
13/// Length of one RMS window. 100 ms at 16 kHz = 1600 samples.
14pub const WINDOW_SAMPLES: usize = 1600;
15
16/// `f32` amplitude corresponding to -40 dBFS, used as the silent/voiced
17/// threshold. `10^(-40/20) = 0.01`.
18pub const SILENCE_THRESHOLD_RMS: f32 = 0.01;
19
20/// Classification of a single RMS window.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum WindowClass {
23    /// Window RMS at or below the silence threshold.
24    Silent,
25    /// Window RMS above the silence threshold.
26    Voiced,
27}
28
29/// Streaming idle detector over 16 kHz mono samples.
30///
31/// Feed samples via [`IdleDetector::push`]; it buffers partial windows
32/// internally and only classifies a window once it is fully filled.
33/// [`IdleDetector::is_idle`] returns true once enough consecutive silent
34/// windows have accumulated. The detector also remembers whether *any*
35/// voiced window has ever been seen — the orchestrator uses that flag to
36/// fail loudly on all-silence recordings (muted mic, etc.) rather than
37/// emit a near-empty WAV that downstream Whisper would crash on.
38pub struct IdleDetector {
39    idle_after_secs: u32,
40    pending: Vec<f32>,
41    consecutive_silent: u32,
42    any_voiced: bool,
43}
44
45impl IdleDetector {
46    /// Builds a detector that fires after `idle_after_secs` seconds of
47    /// uninterrupted silence on the 16 kHz stream.
48    #[must_use]
49    pub fn new(idle_after_secs: u32) -> Self {
50        Self {
51            idle_after_secs,
52            pending: Vec::with_capacity(WINDOW_SAMPLES * 2),
53            consecutive_silent: 0,
54            any_voiced: false,
55        }
56    }
57
58    /// Returns the configured idle-after threshold in seconds.
59    #[must_use]
60    pub fn idle_after_secs(&self) -> u32 {
61        self.idle_after_secs
62    }
63
64    /// Feeds new samples; processes whatever windows are complete and
65    /// updates internal state. Returns the classifications of windows
66    /// completed by this call (oldest first) so callers can log or
67    /// instrument them.
68    pub fn push(&mut self, samples: &[f32]) -> Vec<WindowClass> {
69        self.pending.extend_from_slice(samples);
70        let mut classifications = Vec::new();
71        while self.pending.len() >= WINDOW_SAMPLES {
72            let class = classify_window(&self.pending[..WINDOW_SAMPLES]);
73            classifications.push(class);
74            match class {
75                WindowClass::Silent => self.consecutive_silent += 1,
76                WindowClass::Voiced => {
77                    self.consecutive_silent = 0;
78                    self.any_voiced = true;
79                }
80            }
81            self.pending.drain(..WINDOW_SAMPLES);
82        }
83        classifications
84    }
85
86    /// Returns true once `idle_after_secs` consecutive silent windows have
87    /// arrived. (At 100 ms per window, that's `10 * idle_after_secs`
88    /// windows.) Always false before `idle_after_secs == 0` — a zero
89    /// threshold disables auto-stop.
90    #[must_use]
91    pub fn is_idle(&self) -> bool {
92        if self.idle_after_secs == 0 {
93            return false;
94        }
95        let needed = u64::from(self.idle_after_secs) * windows_per_second_u64();
96        u64::from(self.consecutive_silent) >= needed
97    }
98
99    /// Returns true if at least one voiced window has been observed since
100    /// construction.
101    #[must_use]
102    pub fn has_any_voice(&self) -> bool {
103        self.any_voiced
104    }
105
106    /// Number of samples to drop from the tail of the stream when the
107    /// detector has fired — exactly the silent window streak that caused
108    /// `is_idle` to flip true.
109    #[must_use]
110    pub fn trailing_silence_samples(&self) -> usize {
111        self.consecutive_silent as usize * WINDOW_SAMPLES
112    }
113}
114
115fn classify_window(window: &[f32]) -> WindowClass {
116    if rms(window) > SILENCE_THRESHOLD_RMS {
117        WindowClass::Voiced
118    } else {
119        WindowClass::Silent
120    }
121}
122
123fn rms(samples: &[f32]) -> f32 {
124    if samples.is_empty() {
125        return 0.0;
126    }
127    let sum_sq: f32 = samples.iter().map(|s| s * s).sum();
128    (sum_sq / samples.len() as f32).sqrt()
129}
130
131const fn windows_per_second_u64() -> u64 {
132    TARGET_SAMPLE_RATE as u64 / WINDOW_SAMPLES as u64
133}
134
135/// Trims `tail_samples` from the end of `samples`, never below zero length.
136///
137/// Returns a borrowed slice; callers that need an owned `Vec` should
138/// `.to_vec()` afterwards. If `tail_samples` exceeds the buffer length,
139/// the result is the empty slice — the orchestrator catches that case
140/// upstream by checking [`IdleDetector::has_any_voice`].
141#[must_use]
142pub fn trim_trailing_silence(samples: &[f32], tail_samples: usize) -> &[f32] {
143    let end = samples.len().saturating_sub(tail_samples);
144    &samples[..end]
145}
146
147#[cfg(test)]
148#[allow(clippy::unwrap_used, clippy::expect_used)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn silence_only_input_fires_at_exact_window_budget() {
154        let mut det = IdleDetector::new(2); // 2 s ⇒ 20 windows
155        let one_window = vec![0.0_f32; WINDOW_SAMPLES];
156        // 19 silent windows: not yet idle.
157        for _ in 0..19 {
158            det.push(&one_window);
159        }
160        assert!(!det.is_idle(), "should not be idle at 19 silent windows");
161        det.push(&one_window);
162        assert!(det.is_idle(), "should be idle at 20 silent windows");
163        assert!(
164            !det.has_any_voice(),
165            "no voiced window should have been seen"
166        );
167        assert_eq!(det.trailing_silence_samples(), 20 * WINDOW_SAMPLES);
168    }
169
170    #[test]
171    fn voiced_window_resets_silent_streak() {
172        let mut det = IdleDetector::new(1); // 1 s ⇒ 10 windows
173        let silent = vec![0.0_f32; WINDOW_SAMPLES];
174        // Loud window: amplitude 0.5, RMS = 0.5 ≫ threshold.
175        let loud = vec![0.5_f32; WINDOW_SAMPLES];
176        for _ in 0..9 {
177            det.push(&silent);
178        }
179        assert!(!det.is_idle());
180        det.push(&loud); // resets
181        assert!(det.has_any_voice());
182        for _ in 0..9 {
183            det.push(&silent);
184        }
185        assert!(!det.is_idle(), "9 < 10 silent windows after the reset");
186        det.push(&silent);
187        assert!(det.is_idle(), "10 silent windows after the reset");
188    }
189
190    #[test]
191    fn voiced_only_input_never_goes_idle() {
192        let mut det = IdleDetector::new(1);
193        let loud = vec![0.5_f32; WINDOW_SAMPLES];
194        for _ in 0..50 {
195            det.push(&loud);
196        }
197        assert!(!det.is_idle());
198        assert!(det.has_any_voice());
199        assert_eq!(det.trailing_silence_samples(), 0);
200    }
201
202    #[test]
203    fn partial_window_is_buffered_until_full() {
204        let mut det = IdleDetector::new(1);
205        let half = vec![0.0_f32; WINDOW_SAMPLES / 2];
206        // Two half-windows complete one window.
207        let c1 = det.push(&half);
208        assert!(c1.is_empty(), "first half does not complete a window");
209        let c2 = det.push(&half);
210        assert_eq!(c2, vec![WindowClass::Silent]);
211    }
212
213    #[test]
214    fn rms_boundary_classification() {
215        // Sample value v with RMS = v (constant signal). 0.0099 < threshold, 0.0101 > threshold.
216        let below = vec![0.0099_f32; WINDOW_SAMPLES];
217        let above = vec![0.0101_f32; WINDOW_SAMPLES];
218        assert_eq!(classify_window(&below), WindowClass::Silent);
219        assert_eq!(classify_window(&above), WindowClass::Voiced);
220    }
221
222    #[test]
223    fn idle_after_zero_disables_autostop() {
224        let mut det = IdleDetector::new(0);
225        let silent = vec![0.0_f32; WINDOW_SAMPLES];
226        for _ in 0..100 {
227            det.push(&silent);
228        }
229        assert!(
230            !det.is_idle(),
231            "idle_after_secs=0 should never trigger auto-stop"
232        );
233    }
234
235    #[test]
236    fn trim_trailing_silence_drops_exactly_the_tail() {
237        let samples: Vec<f32> = (0..1000).map(|i| i as f32).collect();
238        let trimmed = trim_trailing_silence(&samples, 300);
239        assert_eq!(trimmed.len(), 700);
240        // Float comparisons are safe here: every sample is `i as f32`
241        // for small integers, which round-trips exactly.
242        assert!((trimmed[0] - 0.0).abs() < f32::EPSILON);
243        assert!((trimmed[1] - 1.0).abs() < f32::EPSILON);
244        assert!((trimmed[2] - 2.0).abs() < f32::EPSILON);
245        assert!((trimmed[699] - 699.0).abs() < f32::EPSILON);
246    }
247
248    #[test]
249    fn trim_trailing_silence_clamps_to_empty_on_overshoot() {
250        let samples = vec![1.0, 2.0, 3.0];
251        let trimmed = trim_trailing_silence(&samples, 99);
252        assert!(trimmed.is_empty());
253    }
254
255    #[test]
256    fn rms_empty_samples_returns_zero() {
257        assert!((rms(&[]) - 0.0).abs() < f32::EPSILON);
258    }
259
260    #[test]
261    fn push_consumes_multiple_windows_in_one_call() {
262        let mut det = IdleDetector::new(1);
263        // 2.5 windows of silence in one push — should classify 2 windows
264        // immediately and buffer the 0.5-window remainder.
265        let chunk = vec![0.0_f32; WINDOW_SAMPLES * 5 / 2];
266        let classifications = det.push(&chunk);
267        assert_eq!(classifications.len(), 2);
268        assert_eq!(classifications[0], WindowClass::Silent);
269        assert_eq!(classifications[1], WindowClass::Silent);
270    }
271}