Skip to main content

wavekat_vad/preprocessing/
mod.rs

1//! Audio preprocessing pipeline for improving VAD accuracy.
2//!
3//! This module provides configurable preprocessing stages that clean audio
4//! before voice activity detection. Each stage is optional and can be
5//! enabled/disabled via [`PreprocessorConfig`].
6//!
7//! # Example
8//!
9//! ```
10//! use wavekat_vad::preprocessing::{Preprocessor, PreprocessorConfig};
11//!
12//! let config = PreprocessorConfig {
13//!     high_pass_hz: Some(80.0),
14//!     ..Default::default()
15//! };
16//!
17//! let mut preprocessor = Preprocessor::new(&config, 16000);
18//! let samples: Vec<i16> = vec![0; 320]; // 20ms at 16kHz
19//! let cleaned = preprocessor.process(&samples);
20//! ```
21
22mod biquad;
23mod normalize;
24
25mod resample;
26
27#[cfg(feature = "denoise")]
28mod denoise;
29
30pub use biquad::BiquadFilter;
31pub use normalize::Normalizer;
32
33pub use resample::AudioResampler;
34
35#[cfg(feature = "denoise")]
36pub use denoise::{Denoiser, DENOISE_SAMPLE_RATE};
37
38/// Configuration for the audio preprocessor.
39///
40/// All fields are optional. Set to `None` or `false` to disable a stage.
41#[derive(Debug, Clone, Default, PartialEq)]
42#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43pub struct PreprocessorConfig {
44    /// High-pass filter cutoff frequency in Hz.
45    ///
46    /// Removes low-frequency noise (HVAC, rumble) that can cause false positives.
47    /// Recommended: 80Hz for raw mic input, 200Hz for telephony.
48    /// Set to `None` to disable.
49    #[cfg_attr(feature = "serde", serde(default))]
50    pub high_pass_hz: Option<f32>,
51
52    /// Enable RNNoise-based noise suppression.
53    ///
54    /// Suppresses stationary background noise while preserving speech.
55    /// Requires the `denoise` feature flag. Works at any sample rate
56    /// (automatically resamples to 48kHz internally if needed).
57    #[cfg_attr(feature = "serde", serde(default))]
58    pub denoise: bool,
59
60    /// Target level for RMS normalization in dBFS.
61    ///
62    /// Normalizes audio amplitude so VAD thresholds work consistently.
63    /// Recommended: -20.0 dBFS. Set to `None` to disable.
64    #[cfg_attr(feature = "serde", serde(default))]
65    pub normalize_dbfs: Option<f32>,
66}
67
68impl PreprocessorConfig {
69    /// No preprocessing — pass audio through unchanged.
70    pub fn none() -> Self {
71        Self::default()
72    }
73
74    /// Preset for raw microphone input.
75    ///
76    /// Enables high-pass filter at 80Hz and normalization to -20 dBFS.
77    /// With `denoise` feature, also enables noise suppression.
78    pub fn raw_mic() -> Self {
79        Self {
80            high_pass_hz: Some(80.0),
81            #[cfg(feature = "denoise")]
82            denoise: true,
83            #[cfg(not(feature = "denoise"))]
84            denoise: false,
85            normalize_dbfs: Some(-20.0),
86        }
87    }
88
89    /// Preset for telephony audio.
90    ///
91    /// Light high-pass at 200Hz (telephony is already bandpass filtered).
92    pub fn telephony() -> Self {
93        Self {
94            high_pass_hz: Some(200.0),
95            ..Default::default()
96        }
97    }
98
99    /// Returns true if any preprocessing is enabled.
100    pub fn is_enabled(&self) -> bool {
101        self.high_pass_hz.is_some() || self.denoise || self.normalize_dbfs.is_some()
102    }
103}
104
105/// Audio preprocessor that applies configured processing stages.
106///
107/// Each instance maintains its own filter state, so you should create
108/// one `Preprocessor` per audio stream (or per VAD config in vad-lab).
109///
110/// # Processing Order
111///
112/// 1. High-pass filter (removes low-frequency noise)
113/// 2. Noise suppression (RNNoise, resamples internally if not 48kHz)
114/// 3. Normalization (RMS-based amplitude adjustment)
115pub struct Preprocessor {
116    high_pass: Option<BiquadFilter>,
117    #[cfg(feature = "denoise")]
118    denoiser: Option<Denoiser>,
119    normalizer: Option<Normalizer>,
120    sample_rate: u32,
121    enabled: bool,
122}
123
124impl std::fmt::Debug for Preprocessor {
125    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126        let mut s = f.debug_struct("Preprocessor");
127        s.field("high_pass", &self.high_pass.is_some());
128        #[cfg(feature = "denoise")]
129        s.field("denoiser", &self.denoiser.is_some());
130        s.field("normalizer", &self.normalizer.is_some());
131        s.field("sample_rate", &self.sample_rate);
132        s.field("enabled", &self.enabled);
133        s.finish()
134    }
135}
136
137impl Preprocessor {
138    /// Create a new preprocessor with the given configuration.
139    ///
140    /// # Arguments
141    /// * `config` - Preprocessing configuration
142    /// * `sample_rate` - Audio sample rate in Hz
143    pub fn new(config: &PreprocessorConfig, sample_rate: u32) -> Self {
144        let high_pass = config
145            .high_pass_hz
146            .map(|cutoff| BiquadFilter::highpass_butterworth(cutoff, sample_rate));
147
148        #[cfg(feature = "denoise")]
149        let denoiser = if config.denoise {
150            Some(Denoiser::new(sample_rate))
151        } else {
152            None
153        };
154
155        let normalizer = config.normalize_dbfs.map(Normalizer::new);
156
157        #[cfg(feature = "denoise")]
158        let denoise_enabled = denoiser.is_some();
159        #[cfg(not(feature = "denoise"))]
160        let denoise_enabled = false;
161
162        let enabled = high_pass.is_some() || denoise_enabled || normalizer.is_some();
163
164        Self {
165            high_pass,
166            #[cfg(feature = "denoise")]
167            denoiser,
168            normalizer,
169            sample_rate,
170            enabled,
171        }
172    }
173
174    /// Returns the sample rate this preprocessor was configured for.
175    pub fn sample_rate(&self) -> u32 {
176        self.sample_rate
177    }
178
179    /// Returns true if any preprocessing stages are enabled.
180    pub fn is_enabled(&self) -> bool {
181        self.enabled
182    }
183
184    /// Returns true if noise suppression is active.
185    #[cfg(feature = "denoise")]
186    pub fn is_denoising(&self) -> bool {
187        self.denoiser.is_some()
188    }
189
190    /// Returns true if normalization is active.
191    pub fn is_normalizing(&self) -> bool {
192        self.normalizer.is_some()
193    }
194
195    /// Process audio samples and return the preprocessed result.
196    ///
197    /// Returns a new `Vec<i16>` with the processed samples.
198    /// If no preprocessing is enabled, returns a clone of the input.
199    pub fn process(&mut self, samples: &[i16]) -> Vec<i16> {
200        if !self.enabled {
201            return samples.to_vec();
202        }
203
204        let mut output = samples.to_vec();
205
206        // Stage 1: High-pass filter
207        if let Some(ref mut filter) = self.high_pass {
208            filter.process_i16(&mut output);
209        }
210
211        // Stage 2: Noise suppression
212        #[cfg(feature = "denoise")]
213        if let Some(ref mut denoiser) = self.denoiser {
214            output = denoiser.process(&output);
215        }
216
217        // Stage 3: Normalization
218        if let Some(ref mut normalizer) = self.normalizer {
219            output = normalizer.process(&output);
220        }
221
222        output
223    }
224
225    /// Reset all filter states.
226    ///
227    /// Call this when starting a new audio stream or after a long pause.
228    pub fn reset(&mut self) {
229        if let Some(ref mut filter) = self.high_pass {
230            filter.reset();
231        }
232        #[cfg(feature = "denoise")]
233        if let Some(ref mut denoiser) = self.denoiser {
234            denoiser.reset();
235        }
236        if let Some(ref mut normalizer) = self.normalizer {
237            normalizer.reset();
238        }
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_config_defaults() {
248        let config = PreprocessorConfig::default();
249        assert_eq!(config.high_pass_hz, None);
250        assert!(!config.denoise);
251        assert_eq!(config.normalize_dbfs, None);
252        assert!(!config.is_enabled());
253    }
254
255    #[test]
256    fn test_config_presets() {
257        let none = PreprocessorConfig::none();
258        assert!(!none.is_enabled());
259
260        let raw_mic = PreprocessorConfig::raw_mic();
261        assert!(raw_mic.is_enabled());
262        assert_eq!(raw_mic.high_pass_hz, Some(80.0));
263        assert_eq!(raw_mic.normalize_dbfs, Some(-20.0));
264
265        let telephony = PreprocessorConfig::telephony();
266        assert!(telephony.is_enabled());
267        assert_eq!(telephony.high_pass_hz, Some(200.0));
268    }
269
270    #[cfg(feature = "serde")]
271    #[test]
272    fn test_config_serde() {
273        let config = PreprocessorConfig {
274            high_pass_hz: Some(100.0),
275            denoise: true,
276            normalize_dbfs: Some(-20.0),
277        };
278
279        let json = serde_json::to_string(&config).unwrap();
280        let parsed: PreprocessorConfig = serde_json::from_str(&json).unwrap();
281        assert_eq!(config, parsed);
282    }
283
284    #[cfg(feature = "serde")]
285    #[test]
286    fn test_config_serde_defaults() {
287        // Empty JSON should deserialize to defaults
288        let json = "{}";
289        let config: PreprocessorConfig = serde_json::from_str(json).unwrap();
290        assert_eq!(config, PreprocessorConfig::default());
291    }
292
293    #[test]
294    fn test_preprocessor_disabled() {
295        let config = PreprocessorConfig::none();
296        let mut preprocessor = Preprocessor::new(&config, 16000);
297
298        assert!(!preprocessor.is_enabled());
299
300        let input: Vec<i16> = vec![100, 200, 300, 400, 500];
301        let output = preprocessor.process(&input);
302        assert_eq!(input, output);
303    }
304
305    #[test]
306    fn test_preprocessor_highpass() {
307        let config = PreprocessorConfig {
308            high_pass_hz: Some(100.0),
309            ..Default::default()
310        };
311        let mut preprocessor = Preprocessor::new(&config, 16000);
312
313        assert!(preprocessor.is_enabled());
314        assert_eq!(preprocessor.sample_rate(), 16000);
315
316        // DC offset should be removed
317        let dc_input: Vec<i16> = vec![5000; 500];
318        let output = preprocessor.process(&dc_input);
319
320        // After settling, output should be near zero
321        let last_avg: i32 = output[400..].iter().map(|&s| s.abs() as i32).sum::<i32>() / 100;
322        assert!(
323            last_avg < 500,
324            "DC should be attenuated, got avg: {last_avg}"
325        );
326    }
327
328    #[test]
329    fn test_preprocessor_normalize() {
330        let config = PreprocessorConfig {
331            normalize_dbfs: Some(-20.0),
332            ..Default::default()
333        };
334        let mut preprocessor = Preprocessor::new(&config, 16000);
335
336        assert!(preprocessor.is_enabled());
337        assert!(preprocessor.is_normalizing());
338
339        // Quiet signal should be amplified
340        let quiet: Vec<i16> = vec![100; 320];
341        let output = preprocessor.process(&quiet);
342
343        let input_rms: f64 =
344            (quiet.iter().map(|&s| (s as f64).powi(2)).sum::<f64>() / quiet.len() as f64).sqrt();
345        let output_rms: f64 =
346            (output.iter().map(|&s| (s as f64).powi(2)).sum::<f64>() / output.len() as f64).sqrt();
347
348        assert!(
349            output_rms > input_rms,
350            "Quiet signal should be amplified: {output_rms} > {input_rms}"
351        );
352    }
353
354    #[test]
355    fn test_preprocessor_reset() {
356        let config = PreprocessorConfig::raw_mic();
357        let mut preprocessor = Preprocessor::new(&config, 16000);
358
359        // Process some audio
360        let samples: Vec<i16> = vec![1000; 100];
361        preprocessor.process(&samples);
362
363        // Reset should not panic
364        preprocessor.reset();
365    }
366
367    #[cfg(feature = "denoise")]
368    #[test]
369    fn test_preprocessor_denoise_works_at_any_rate() {
370        let config = PreprocessorConfig {
371            denoise: true,
372            ..Default::default()
373        };
374
375        // At 16kHz, denoising should work (with internal resampling)
376        let preprocessor = Preprocessor::new(&config, 16000);
377        assert!(preprocessor.is_denoising());
378
379        // At 48kHz, denoising should work (no resampling needed)
380        let preprocessor = Preprocessor::new(&config, 48000);
381        assert!(preprocessor.is_denoising());
382    }
383
384    #[cfg(feature = "denoise")]
385    #[test]
386    fn test_preprocessor_denoise_48k() {
387        let config = PreprocessorConfig {
388            denoise: true,
389            ..Default::default()
390        };
391        let mut preprocessor = Preprocessor::new(&config, 48000);
392
393        assert!(preprocessor.is_enabled());
394        assert!(preprocessor.is_denoising());
395
396        // Process some audio (silence)
397        let input: Vec<i16> = vec![0; 960]; // 20ms at 48kHz
398        let output = preprocessor.process(&input);
399
400        // Output length may differ slightly due to frame buffering
401        assert!(!output.is_empty());
402    }
403
404    #[cfg(feature = "denoise")]
405    #[test]
406    fn test_preprocessor_denoise_16k() {
407        let config = PreprocessorConfig {
408            denoise: true,
409            ..Default::default()
410        };
411        let mut preprocessor = Preprocessor::new(&config, 16000);
412
413        assert!(preprocessor.is_enabled());
414        assert!(preprocessor.is_denoising());
415
416        // Process enough audio to fill resampling buffers
417        let input: Vec<i16> = vec![0; 2048];
418        let output = preprocessor.process(&input);
419
420        // Due to resampling buffering, we may not get output on first call
421        // but subsequent calls should produce output
422        let input2: Vec<i16> = vec![0; 2048];
423        let output2 = preprocessor.process(&input2);
424
425        assert!(
426            !output.is_empty() || !output2.is_empty(),
427            "Should produce output after enough input"
428        );
429    }
430}