speech_prep/preprocessing/dc_highpass.rs
1//! DC offset removal and high-pass filtering.
2//!
3//! Removes DC bias and attenuates low-frequency rumble (<80 Hz) to prepare
4//! audio for spectral analysis and noise reduction.
5
6use crate::error::{Error, Result};
7use crate::time::{AudioDuration, AudioInstant};
8use tracing::{info, warn};
9
10/// Configuration for DC offset removal and high-pass filtering.
11///
12/// # Examples
13///
14/// ```rust,no_run
15/// use speech_prep::preprocessing::PreprocessingConfig;
16///
17/// // Default: 80 Hz high-pass, 16kHz sample rate, EMA α=0.95
18/// let config = PreprocessingConfig::default();
19///
20/// // Custom configuration for noisy environment
21/// let config = PreprocessingConfig {
22/// highpass_cutoff_hz: 120.0, // More aggressive low-frequency removal
23/// dc_bias_alpha: 0.98, // Slower DC adaptation
24/// ..Default::default()
25/// };
26/// # Ok::<(), speech_prep::error::Error>(())
27/// ```
28#[derive(Debug, Clone, Copy)]
29pub struct PreprocessingConfig {
30 /// High-pass filter cutoff frequency in Hz.
31 ///
32 /// **Range**: 60.0 - 120.0
33 /// **Default**: 80.0
34 ///
35 /// **Effect**: Frequencies below this cutoff are attenuated (≥20 dB at
36 /// fc/2). Higher cutoffs remove more low-frequency content but may
37 /// affect speech naturalness.
38 ///
39 /// **Recommendation**:
40 /// - 60-80 Hz: Standard speech (default)
41 /// - 80-100 Hz: Noisy environments with HVAC/rumble
42 /// - 100-120 Hz: Extreme low-frequency noise
43 pub highpass_cutoff_hz: f32,
44
45 /// Audio sample rate in Hz.
46 ///
47 /// **Typical Values**: 16000, 44100, 48000
48 /// **Default**: 16000
49 ///
50 /// **Effect**: Determines filter coefficient calculation.
51 /// Must match the actual sample rate of input audio.
52 pub sample_rate_hz: u32,
53
54 /// EMA smoothing factor for DC bias estimation.
55 ///
56 /// **Range**: 0.9 - 0.99
57 /// **Default**: 0.95
58 ///
59 /// **Effect**: Controls adaptation speed of DC bias tracking.
60 /// - Higher (0.95-0.99): Slower adaptation, smoother (recommended)
61 /// - Lower (0.90-0.94): Faster adaptation, less smooth
62 ///
63 /// **Formula**: `bias_new = α × bias_old + (1-α) × sample_mean`
64 pub dc_bias_alpha: f32,
65
66 /// Enable DC offset removal stage.
67 ///
68 /// **Default**: true
69 ///
70 /// **Effect**: When false, DC removal is skipped (filter-only mode).
71 /// Useful if audio is already DC-free (rare).
72 pub enable_dc_removal: bool,
73
74 /// Enable high-pass filtering stage.
75 ///
76 /// **Default**: true
77 ///
78 /// **Effect**: When false, high-pass filter is skipped (DC-only mode).
79 /// Useful for testing or if audio already high-pass filtered.
80 pub enable_highpass: bool,
81
82 /// Order of the high-pass filter.
83 ///
84 /// **Default**: `FourthOrder` (two cascaded biquads)
85 ///
86 /// **Effect**: Higher order increases low-frequency attenuation at the cost
87 /// of additional computation. `FourthOrder` meets the ≥20 dB @ 40 Hz
88 /// target.
89 pub highpass_order: HighpassOrder,
90}
91
92impl Default for PreprocessingConfig {
93 fn default() -> Self {
94 Self {
95 highpass_cutoff_hz: 80.0,
96 sample_rate_hz: 16_000,
97 dc_bias_alpha: 0.95,
98 enable_dc_removal: true,
99 enable_highpass: true,
100 highpass_order: HighpassOrder::FourthOrder,
101 }
102 }
103}
104
105impl PreprocessingConfig {
106 /// Validate configuration parameters.
107 ///
108 /// # Errors
109 ///
110 /// Returns `Error::Configuration` if:
111 /// - `highpass_cutoff_hz` < 20 Hz (too low, ineffective)
112 /// - `highpass_cutoff_hz` >= Nyquist frequency (fs/2)
113 /// - `dc_bias_alpha` not in (0.0, 1.0)
114 /// - `sample_rate_hz` is zero
115 #[allow(clippy::trivially_copy_pass_by_ref)]
116 pub fn validate(&self) -> Result<()> {
117 if self.sample_rate_hz == 0 {
118 return Err(Error::Configuration(
119 "sample_rate_hz must be greater than zero".into(),
120 ));
121 }
122
123 if self.highpass_cutoff_hz < 20.0 {
124 return Err(Error::Configuration(format!(
125 "Cutoff {:.1} Hz too low (minimum 20 Hz)",
126 self.highpass_cutoff_hz
127 )));
128 }
129
130 let nyquist = self.sample_rate_hz as f32 / 2.0;
131 if self.highpass_cutoff_hz >= nyquist {
132 return Err(Error::Configuration(format!(
133 "Cutoff {:.1} Hz exceeds Nyquist {:.1} Hz",
134 self.highpass_cutoff_hz, nyquist
135 )));
136 }
137
138 if self.dc_bias_alpha <= 0.0 || self.dc_bias_alpha >= 1.0 {
139 return Err(Error::Configuration(format!(
140 "Invalid EMA alpha: {:.3} (must be in range 0.0 < α < 1.0)",
141 self.dc_bias_alpha
142 )));
143 }
144
145 Ok(())
146 }
147}
148
149/// Available high-pass filter orders.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
151pub enum HighpassOrder {
152 /// Single biquad (2nd-order Butterworth)
153 SecondOrder,
154 /// Cascaded biquads (4th-order Butterworth)
155 #[default]
156 FourthOrder,
157}
158
159impl HighpassOrder {
160 #[must_use]
161 fn stage_count(self) -> usize {
162 match self {
163 Self::SecondOrder => 1,
164 Self::FourthOrder => 2,
165 }
166 }
167}
168
169/// Optional VAD context for intelligent DC bias updates.
170///
171/// When provided, DC bias is only updated during silence periods,
172/// avoiding speech distortion. This leverages the VAD engine
173/// for progressive enhancement without tight coupling.
174///
175/// # Example
176///
177/// ```rust,no_run
178/// use speech_prep::preprocessing::{DcHighPassFilter, PreprocessingConfig, VadContext};
179///
180/// # fn main() -> speech_prep::error::Result<()> {
181/// let mut filter = DcHighPassFilter::new(PreprocessingConfig::default())?;
182/// let samples = vec![0.0; 1600];
183///
184/// // Without VAD (always update DC bias)
185/// let output1 = filter.process(&samples, None)?;
186///
187/// // With VAD (update only during silence)
188/// let vad_ctx = VadContext { is_silence: true };
189/// let output2 = filter.process(&samples, Some(&vad_ctx))?;
190/// # Ok(())
191/// # }
192/// ```
193#[derive(Debug, Clone, Copy)]
194pub struct VadContext {
195 /// True if the current audio is classified as silence.
196 ///
197 /// When true, DC bias tracking is updated.
198 /// When false, DC bias is frozen (preserves speech quality).
199 pub is_silence: bool,
200}
201
202/// DC offset removal and high-pass filtering with streaming state.
203///
204/// Implements the DC offset removal specification:
205/// - Removes DC offset using exponential moving average (EMA)
206/// - Applies cascaded Butterworth high-pass filtering (defaults to 4th order @
207/// 80 Hz)
208/// - Maintains filter state across chunks for streaming continuity
209/// - Achieves <2ms latency target per 500ms chunk (8000 samples @ 16kHz)
210///
211/// # Performance
212///
213/// - **Target**: <2ms per 500ms chunk
214/// - **Expected**: ~0.16ms (10x headroom)
215/// - **Optimization**: Precomputed coefficients, preallocated buffers
216///
217/// # Example
218///
219/// ```rust,no_run
220/// use speech_prep::preprocessing::{DcHighPassFilter, PreprocessingConfig};
221///
222/// # fn main() -> speech_prep::error::Result<()> {
223/// let mut filter = DcHighPassFilter::new(PreprocessingConfig::default())?;
224/// let audio_stream = vec![vec![0.0; 8000], vec![0.1; 8000]];
225///
226/// // Process streaming chunks with state continuity
227/// for chunk in audio_stream {
228/// let clean = filter.process(&chunk, None)?;
229/// // No discontinuities at boundaries!
230/// }
231/// # Ok(())
232/// # }
233/// ```
234#[allow(missing_copy_implementations)]
235#[derive(Debug, Clone)]
236pub struct DcHighPassFilter {
237 config: PreprocessingConfig,
238 coeffs: BiquadCoefficients,
239 stages: Vec<BiquadState>,
240 dc_bias: f32,
241}
242
243#[derive(Debug, Clone, Copy)]
244struct BiquadCoefficients {
245 b0: f32,
246 b1: f32,
247 b2: f32,
248 a1: f32,
249 a2: f32,
250}
251
252#[derive(Debug, Clone, Copy, PartialEq, Default)]
253struct BiquadState {
254 x1: f32,
255 x2: f32,
256 y1: f32,
257 y2: f32,
258}
259
260impl BiquadState {
261 #[inline]
262 fn process(&mut self, coeffs: &BiquadCoefficients, input: f32) -> f32 {
263 let acc = coeffs
264 .b0
265 .mul_add(input, coeffs.b1.mul_add(self.x1, coeffs.b2 * self.x2));
266 let output = acc - coeffs.a1.mul_add(self.y1, coeffs.a2 * self.y2);
267
268 self.x2 = self.x1;
269 self.x1 = input;
270 self.y2 = self.y1;
271 self.y1 = output;
272
273 output
274 }
275
276 fn reset(&mut self) {
277 *self = Self::default();
278 }
279
280 #[cfg(test)]
281 fn is_reset(self) -> bool {
282 self == Self::default()
283 }
284}
285
286impl DcHighPassFilter {
287 /// Create a new DC offset removal and high-pass filter.
288 ///
289 /// # Arguments
290 ///
291 /// * `config` - Configuration parameters (cutoff frequency, sample rate,
292 /// EMA alpha)
293 ///
294 /// # Errors
295 ///
296 /// Returns `Error::Configuration` if configuration is invalid.
297 ///
298 /// # Example
299 ///
300 /// ```rust,no_run
301 /// use speech_prep::preprocessing::{DcHighPassFilter, PreprocessingConfig};
302 ///
303 /// let config = PreprocessingConfig {
304 /// highpass_cutoff_hz: 100.0, // More aggressive
305 /// ..Default::default()
306 /// };
307 /// let filter = DcHighPassFilter::new(config)?;
308 /// # Ok::<(), speech_prep::error::Error>(())
309 /// ```
310 pub fn new(config: PreprocessingConfig) -> Result<Self> {
311 config.validate()?;
312
313 let (b0, b1, b2, a1, a2) = compute_butterworth_highpass_coefficients(
314 config.highpass_cutoff_hz,
315 config.sample_rate_hz,
316 )?;
317
318 let coeffs = BiquadCoefficients { b0, b1, b2, a1, a2 };
319 let stage_count = config.highpass_order.stage_count();
320 let stages = vec![BiquadState::default(); stage_count];
321
322 Ok(Self {
323 config,
324 coeffs,
325 stages,
326 dc_bias: 0.0,
327 })
328 }
329
330 /// Process audio samples with DC removal and high-pass filtering.
331 ///
332 /// # Arguments
333 ///
334 /// * `samples` - Input audio samples (typically 500ms chunk = 8000 samples
335 /// @ 16kHz)
336 /// * `vad_context` - Optional VAD state for intelligent DC bias updates
337 ///
338 /// # Returns
339 ///
340 /// Processed audio with DC removed and low frequencies attenuated.
341 ///
342 /// # Performance
343 ///
344 /// - Expected: ~0.16ms for 8000 samples (10x better than <2ms target)
345 /// - Complexity: O(n) where n = `samples.len()`
346 ///
347 /// # Example
348 ///
349 /// ```rust,no_run
350 /// use speech_prep::preprocessing::{DcHighPassFilter, PreprocessingConfig, VadContext};
351 ///
352 /// let mut filter = DcHighPassFilter::new(PreprocessingConfig::default())?;
353 ///
354 /// // Chunk 1
355 /// let chunk1 = vec![0.1, 0.2, -0.1, 0.15];
356 /// let output1 = filter.process(&chunk1, None)?;
357 ///
358 /// // Chunk 2 (state preserved from chunk1 - no discontinuity!)
359 /// let chunk2 = vec![0.2, 0.1, 0.3, 0.0];
360 /// let output2 = filter.process(&chunk2, None)?;
361 /// # Ok::<(), speech_prep::error::Error>(())
362 /// ```
363 #[allow(clippy::unnecessary_wraps)]
364 #[allow(clippy::trivially_copy_pass_by_ref)]
365 pub fn process(
366 &mut self,
367 samples: &[f32],
368 vad_context: Option<&VadContext>,
369 ) -> Result<Vec<f32>> {
370 let processing_start = AudioInstant::now();
371
372 if samples.is_empty() {
373 return Ok(Vec::new());
374 }
375
376 let should_update_bias = vad_context.is_none_or(|ctx| ctx.is_silence);
377 if self.config.enable_dc_removal && should_update_bias {
378 self.update_dc_bias(samples);
379 }
380
381 let output = self.process_samples(samples);
382
383 let elapsed = elapsed_duration(processing_start);
384 let latency_ms = elapsed.as_secs_f64() * 1000.0;
385 self.record_performance_metrics(samples.len(), latency_ms);
386
387 Ok(output)
388 }
389
390 #[inline]
391 fn process_samples(&mut self, samples: &[f32]) -> Vec<f32> {
392 let mut output = Vec::with_capacity(samples.len());
393
394 for &sample in samples {
395 let mut next = if self.config.enable_dc_removal {
396 sample - self.dc_bias
397 } else {
398 sample
399 };
400
401 if self.config.enable_highpass {
402 for stage in &mut self.stages {
403 next = stage.process(&self.coeffs, next);
404 }
405 }
406
407 output.push(next);
408 }
409
410 output
411 }
412
413 fn record_performance_metrics(&self, sample_count: usize, latency_ms: f64) {
414 if sample_count < 8000 {
415 return;
416 }
417
418 if latency_ms > 2.0 {
419 warn!(
420 target: "audio.preprocess.highpass",
421 latency_ms,
422 samples = sample_count,
423 cutoff_hz = self.config.highpass_cutoff_hz,
424 order = ?self.config.highpass_order,
425 "high-pass latency exceeded target"
426 );
427 }
428
429 info!(
430 target: "audio.preprocess.highpass",
431 dc_bias = self.dc_bias,
432 latency_ms,
433 samples = sample_count,
434 cutoff_hz = self.config.highpass_cutoff_hz,
435 order = ?self.config.highpass_order,
436 "audio preprocess high-pass metrics"
437 );
438 }
439
440 /// Reset filter state for new audio stream.
441 ///
442 /// Clears filter history (x1, x2, y1, y2) and DC bias estimate.
443 /// Use this when starting a new, independent audio stream.
444 ///
445 /// # Example
446 ///
447 /// ```rust,no_run
448 /// use speech_prep::preprocessing::{DcHighPassFilter, PreprocessingConfig};
449 ///
450 /// # fn main() -> speech_prep::error::Result<()> {
451 /// let mut filter = DcHighPassFilter::new(PreprocessingConfig::default())?;
452 /// let audio_stream_1 = vec![0.0; 8000];
453 /// let audio_stream_2 = vec![0.2; 8000];
454 ///
455 /// // Process stream 1
456 /// filter.process(&audio_stream_1, None)?;
457 ///
458 /// // Switch to unrelated stream 2 - reset state
459 /// filter.reset();
460 /// filter.process(&audio_stream_2, None)?;
461 /// # Ok(())
462 /// # }
463 /// ```
464 pub fn reset(&mut self) {
465 for stage in &mut self.stages {
466 stage.reset();
467 }
468 self.dc_bias = 0.0;
469 }
470
471 /// Get current DC bias estimate.
472 ///
473 /// Useful for debugging or observability.
474 #[must_use]
475 pub fn dc_bias(&self) -> f32 {
476 self.dc_bias
477 }
478
479 /// Get current configuration.
480 #[must_use]
481 pub fn config(&self) -> &PreprocessingConfig {
482 &self.config
483 }
484
485 fn update_dc_bias(&mut self, samples: &[f32]) {
486 if samples.is_empty() {
487 return;
488 }
489
490 let sum: f32 = samples.iter().sum();
491 let current_mean = sum / samples.len() as f32;
492
493 let alpha = self.config.dc_bias_alpha;
494 self.dc_bias = alpha.mul_add(self.dc_bias, (1.0 - alpha) * current_mean);
495 }
496}
497
498/// Compute 2nd-order Butterworth high-pass filter coefficients.
499///
500/// Implements the standard biquad coefficient formulas for Butterworth
501/// response.
502///
503/// # Arguments
504///
505/// * `cutoff_hz` - Cutoff frequency in Hz (e.g., 80.0)
506/// * `sample_rate_hz` - Sample rate in Hz (e.g., 16000)
507///
508/// # Returns
509///
510/// Tuple of normalized coefficients: (b0, b1, b2, a1, a2)
511/// where the transfer function is H(z) = (b0 + b1·z⁻¹ + b2·z⁻²) / (1 + a1·z⁻¹ +
512/// a2·z⁻²)
513///
514/// # Errors
515///
516/// Returns error if coefficients cannot be computed (invalid parameters).
517fn compute_butterworth_highpass_coefficients(
518 cutoff_hz: f32,
519 sample_rate_hz: u32,
520) -> Result<(f32, f32, f32, f32, f32)> {
521 use std::f32::consts::PI;
522
523 let w0 = 2.0 * PI * cutoff_hz / sample_rate_hz as f32;
524 let q = 0.707; // 1/sqrt(2) — Butterworth
525 let alpha = w0.sin() / (2.0 * q);
526 let cos_w0 = w0.cos();
527
528 let b0_unnorm = f32::midpoint(1.0, cos_w0);
529 let b1_unnorm = -(1.0 + cos_w0);
530 let b2_unnorm = f32::midpoint(1.0, cos_w0);
531 let a0 = 1.0 + alpha;
532 let a1_unnorm = -2.0 * cos_w0;
533 let a2_unnorm = 1.0 - alpha;
534
535 let b0 = b0_unnorm / a0;
536 let b1 = b1_unnorm / a0;
537 let b2 = b2_unnorm / a0;
538 let a1 = a1_unnorm / a0;
539 let a2 = a2_unnorm / a0;
540
541 if !b0.is_finite() || !b1.is_finite() || !b2.is_finite() || !a1.is_finite() || !a2.is_finite() {
542 return Err(Error::Processing(format!(
543 "Invalid filter coefficients for fc={cutoff_hz:.1}Hz, fs={sample_rate_hz}: \
544 b0={b0:.6}, b1={b1:.6}, b2={b2:.6}, a1={a1:.6}, a2={a2:.6}"
545 )));
546 }
547
548 Ok((b0, b1, b2, a1, a2))
549}
550
551fn elapsed_duration(start: AudioInstant) -> AudioDuration {
552 AudioInstant::now().duration_since(start)
553}
554
555#[cfg(test)]
556mod tests {
557 use super::*;
558
559 // Test helper type
560 type TestResult<T> = std::result::Result<T, String>;
561
562 // Generate sine wave for testing
563 fn generate_sine_wave(
564 frequency: f32,
565 sample_rate: u32,
566 duration_secs: f32,
567 amplitude: f32,
568 ) -> Vec<f32> {
569 use std::f32::consts::PI;
570 let samples = (sample_rate as f32 * duration_secs).round() as usize;
571 (0..samples)
572 .map(|i| {
573 let t = i as f32 / sample_rate as f32;
574 (2.0 * PI * frequency * t).sin() * amplitude
575 })
576 .collect()
577 }
578
579 // Calculate RMS (root mean square)
580 fn calculate_rms(samples: &[f32]) -> f32 {
581 if samples.is_empty() {
582 return 0.0;
583 }
584 let sum_sq: f32 = samples.iter().map(|&s| s * s).sum();
585 (sum_sq / samples.len() as f32).sqrt()
586 }
587
588 // Calculate attenuation in dB
589 fn calculate_attenuation_db(input: &[f32], output: &[f32]) -> f32 {
590 let rms_in = calculate_rms(input);
591 let rms_out = calculate_rms(output);
592
593 if rms_in == 0.0 || rms_out == 0.0 {
594 return 0.0;
595 }
596
597 20.0 * (rms_out / rms_in).log10()
598 }
599
600 #[test]
601 fn test_dc_offset_removal_synthetic_bias() -> TestResult<()> {
602 // Create realistic audio with DC offset: sine wave + DC bias
603 // This simulates real-world scenario better than constant DC
604 let dc_offset = 0.5;
605 let mut samples_with_dc: Vec<f32> = generate_sine_wave(440.0, 16000, 0.5, 0.3);
606 for sample in &mut samples_with_dc {
607 *sample += dc_offset;
608 }
609
610 let config = PreprocessingConfig {
611 enable_dc_removal: true,
612 enable_highpass: false, // DC removal only for this test
613 dc_bias_alpha: 0.5, // Fast convergence for test (vs default 0.95)
614 ..Default::default()
615 };
616 let mut filter = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
617
618 // Simulate streaming: process 10 chunks to allow EMA convergence
619 // With α=0.5, 10 iterations gives (1 - 0.5^10) ≈ 99.9% convergence
620 let mut final_output = Vec::new();
621 for _ in 0..10 {
622 final_output = filter
623 .process(&samples_with_dc, None)
624 .map_err(|e| e.to_string())?;
625 }
626
627 // After convergence (10 chunks with α=0.5), residual DC should be < 0.001 RMS
628 let mean: f32 = final_output.iter().sum::<f32>() / final_output.len() as f32;
629 assert!(
630 mean.abs() < 0.001,
631 "DC residual too high after convergence: {:.6} (expected < 0.001)",
632 mean
633 );
634
635 // DC bias estimate should be close to 0.5 (within 1%)
636 assert!(
637 (filter.dc_bias() - dc_offset).abs() < 0.005,
638 "DC bias estimate {:.6} not converged to {:.6}",
639 filter.dc_bias(),
640 dc_offset
641 );
642
643 Ok(())
644 }
645
646 #[test]
647 fn test_highpass_frequency_response() -> TestResult<()> {
648 let config = PreprocessingConfig {
649 highpass_cutoff_hz: 80.0,
650 enable_dc_removal: false, // Filter only for this test
651 ..Default::default()
652 };
653 let mut filter = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
654
655 // Test at 20 Hz (fc/4). Fourth-order should attenuate ≥30 dB
656 let input_20hz = generate_sine_wave(20.0, 16000, 1.0, 1.0);
657 let output_20hz = filter
658 .process(&input_20hz, None)
659 .map_err(|e| e.to_string())?;
660 filter.reset(); // Reset state for independent test
661
662 let attenuation_20hz = calculate_attenuation_db(&input_20hz, &output_20hz);
663 assert!(
664 attenuation_20hz <= -30.0,
665 "Insufficient attenuation at 20Hz: {:.1} dB (expected ≤ -30 dB)",
666 attenuation_20hz
667 );
668
669 // Test at 40 Hz (fc/2). Fourth-order should attenuate ≥20 dB
670 let input_40hz = generate_sine_wave(40.0, 16000, 1.0, 1.0);
671 let output_40hz = filter
672 .process(&input_40hz, None)
673 .map_err(|e| e.to_string())?;
674 filter.reset(); // Reset state for independent test
675
676 let attenuation_40hz = calculate_attenuation_db(&input_40hz, &output_40hz);
677 assert!(
678 attenuation_40hz <= -20.0,
679 "Insufficient attenuation at 40Hz: {:.1} dB (expected ≤ -20 dB)",
680 attenuation_40hz
681 );
682
683 // Test at 150 Hz (should pass with <1 dB loss)
684 let input_150hz = generate_sine_wave(150.0, 16000, 1.0, 1.0);
685 let output_150hz = filter
686 .process(&input_150hz, None)
687 .map_err(|e| e.to_string())?;
688
689 let loss_150hz = calculate_attenuation_db(&input_150hz, &output_150hz);
690 assert!(
691 loss_150hz > -1.0,
692 "Excessive loss at 150Hz: {:.1} dB (expected > -1 dB)",
693 loss_150hz
694 );
695
696 Ok(())
697 }
698
699 #[test]
700 fn test_chunk_boundary_continuity() -> TestResult<()> {
701 // Process as single buffer
702 let long_signal = generate_sine_wave(440.0, 16000, 1.0, 0.5); // 1 second
703 let config = PreprocessingConfig::default();
704 let mut filter1 = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
705 let output_single = filter1
706 .process(&long_signal, None)
707 .map_err(|e| e.to_string())?;
708
709 // Process as two chunks
710 let mut filter2 = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
711 let mid = long_signal.len() / 2;
712 let chunk1 = &long_signal[0..mid];
713 let chunk2 = &long_signal[mid..];
714 let output_chunk1 = filter2.process(chunk1, None).map_err(|e| e.to_string())?;
715 let output_chunk2 = filter2.process(chunk2, None).map_err(|e| e.to_string())?;
716
717 // Concatenate chunked output
718 let output_chunked: Vec<f32> = output_chunk1.into_iter().chain(output_chunk2).collect();
719
720 // Verify outputs match (within numerical precision)
721 // Note: Using 5e-5 tolerance because EMA DC bias updates accumulate
722 // floating-point drift at chunk boundaries — the filter carries state
723 // across chunks via f32 accumulators, and intermediate rounding differs
724 // between single-pass and chunked paths. 5e-5 still catches real
725 // discontinuities while allowing normal f32 accumulation drift.
726 for (i, (single, chunked)) in output_single.iter().zip(output_chunked.iter()).enumerate() {
727 let diff = (single - chunked).abs();
728 assert!(
729 diff < 5e-5,
730 "Discontinuity at sample {}: diff={:.9} (single={:.9}, chunked={:.9})",
731 i,
732 diff,
733 single,
734 chunked
735 );
736 }
737
738 Ok(())
739 }
740
741 #[test]
742 fn test_vad_informed_dc_update() -> TestResult<()> {
743 let config = PreprocessingConfig::default();
744 let mut filter = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
745
746 // Speech chunk (don't update DC)
747 let speech_samples = vec![0.1, 0.2, -0.1, 0.3];
748 let speech_ctx = VadContext { is_silence: false };
749 let initial_bias = filter.dc_bias();
750 filter
751 .process(&speech_samples, Some(&speech_ctx))
752 .map_err(|e| e.to_string())?;
753
754 // DC bias should NOT change during speech
755 assert_eq!(
756 filter.dc_bias(),
757 initial_bias,
758 "DC bias changed during speech"
759 );
760
761 // Silence chunk (update DC)
762 let silence_samples = vec![0.5; 1000];
763 let silence_ctx = VadContext { is_silence: true };
764 filter
765 .process(&silence_samples, Some(&silence_ctx))
766 .map_err(|e| e.to_string())?;
767
768 // DC bias should adapt toward 0.5
769 assert!(
770 filter.dc_bias() > initial_bias,
771 "DC bias did not adapt during silence (initial={:.6}, after={:.6})",
772 initial_bias,
773 filter.dc_bias()
774 );
775
776 Ok(())
777 }
778
779 #[test]
780 fn test_configuration_validation() {
781 // Valid configuration should pass
782 let valid_config = PreprocessingConfig::default();
783 assert!(valid_config.validate().is_ok());
784
785 // Cutoff too low
786 let config_low = PreprocessingConfig {
787 highpass_cutoff_hz: 10.0,
788 ..Default::default()
789 };
790 assert!(config_low.validate().is_err());
791
792 // Cutoff above Nyquist
793 let config_high = PreprocessingConfig {
794 highpass_cutoff_hz: 9000.0, // > 8000 Hz (Nyquist for 16kHz)
795 ..Default::default()
796 };
797 assert!(config_high.validate().is_err());
798
799 // Invalid EMA alpha
800 let config_alpha = PreprocessingConfig {
801 dc_bias_alpha: 1.0, // Must be < 1.0
802 ..Default::default()
803 };
804 assert!(config_alpha.validate().is_err());
805
806 // Zero sample rate
807 let config_zero_sr = PreprocessingConfig {
808 sample_rate_hz: 0,
809 ..Default::default()
810 };
811 assert!(config_zero_sr.validate().is_err());
812 }
813
814 #[test]
815 fn test_reset_clears_state() -> TestResult<()> {
816 let config = PreprocessingConfig::default();
817 let mut filter = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
818
819 // Process some audio
820 let samples = generate_sine_wave(440.0, 16000, 0.5, 0.8);
821 filter.process(&samples, None).map_err(|e| e.to_string())?;
822
823 // Verify state is non-zero
824 assert_ne!(
825 filter.dc_bias(),
826 0.0,
827 "DC bias should be non-zero after processing"
828 );
829 assert!(
830 filter.stages.iter().copied().any(|stage| !stage.is_reset()),
831 "Filter stages should accumulate state after processing"
832 );
833
834 // Reset
835 filter.reset();
836
837 // Verify state cleared
838 assert_eq!(filter.dc_bias(), 0.0, "DC bias should be zero after reset");
839 assert!(
840 filter.stages.iter().copied().all(BiquadState::is_reset),
841 "Filter stages should be reset to zero state"
842 );
843
844 Ok(())
845 }
846
847 #[test]
848 fn test_empty_input() -> TestResult<()> {
849 let config = PreprocessingConfig::default();
850 let mut filter = DcHighPassFilter::new(config).map_err(|e| e.to_string())?;
851
852 let output = filter.process(&[], None).map_err(|e| e.to_string())?;
853 assert!(output.is_empty());
854
855 Ok(())
856 }
857}