Skip to main content

speech_prep/preprocessing/
quality.rs

1//! Audio quality assessment with SNR estimation and spectral analysis.
2//!
3//! This module provides multi-dimensional quality metrics for audio
4//! preprocessing validation.
5//!
6//! # Metrics
7//!
8//! - **SNR (Signal-to-Noise Ratio)**: Measures signal power vs noise floor (dB)
9//! - **RMS Energy**: Root-mean-square energy as baseline quality indicator
10//! - **Spectral Centroid**: Weighted average frequency (brightness measure)
11//! - **Quality Score**: Unified score in [0.0, 1.0] combining all metrics
12//!
13//! # Performance
14//!
15//! - **Target**: <10ms per second of 16 kHz audio
16//! - **Memory**: Minimal allocations (reuses frame buffers)
17//!
18//! # Example
19//!
20//! ```rust,no_run
21//! use speech_prep::preprocessing::QualityAssessor;
22//!
23//! # fn main() -> speech_prep::error::Result<()> {
24//! let assessor = QualityAssessor::new(16000);
25//! let audio_samples = vec![0.5f32; 16000]; // 1 second at 16 kHz
26//!
27//! let metrics = assessor.assess(&audio_samples)?;
28//! assert!(metrics.snr_db.is_finite());
29//! # Ok(())
30//! # }
31//! ```
32
33use crate::error::{Error, Result};
34use crate::time::{AudioDuration, AudioInstant};
35use tracing::{debug, trace};
36
37/// Quality metrics for audio assessment.
38///
39/// All metrics are computed for a single audio chunk.
40#[derive(Debug, Clone, Copy)]
41pub struct QualityMetrics {
42    /// Signal-to-noise ratio in decibels [0.0, 60.0]
43    pub snr_db: f32,
44    /// RMS energy level [0.0, 1.0]
45    pub energy: f32,
46    /// Spectral centroid in Hz [0.0, `sample_rate/2`]
47    pub spectral_centroid: f32,
48    /// Unified quality score [0.0, 1.0] (higher is better)
49    pub quality_score: f32,
50}
51
52/// Audio quality assessor with configurable sample rate.
53///
54/// Computes multi-dimensional quality metrics for audio chunks,
55/// providing objective measures for quality gates and filtering.
56#[derive(Debug, Clone, Copy)]
57pub struct QualityAssessor {
58    sample_rate: u32,
59}
60
61impl QualityAssessor {
62    /// Creates a new quality assessor for the given sample rate.
63    ///
64    /// # Arguments
65    ///
66    /// - `sample_rate`: Audio sample rate in Hz (e.g., 16000)
67    ///
68    /// # Example
69    ///
70    /// ```rust
71    /// use speech_prep::preprocessing::QualityAssessor;
72    ///
73    /// let assessor = QualityAssessor::new(16000);
74    /// ```
75    pub fn new(sample_rate: u32) -> Self {
76        Self { sample_rate }
77    }
78
79    /// Assesses audio quality for the given samples.
80    ///
81    /// Computes SNR, energy, spectral centroid, and unified quality score.
82    ///
83    /// # Arguments
84    ///
85    /// - `samples`: Audio samples to assess (must not be empty)
86    ///
87    /// # Returns
88    ///
89    /// Quality metrics including SNR (dB), energy, spectral centroid (Hz),
90    /// and unified quality score [0.0, 1.0].
91    ///
92    /// # Errors
93    ///
94    /// Returns `Error::InvalidInput` if samples are empty.
95    ///
96    /// # Example
97    ///
98    /// ```rust,no_run
99    /// use speech_prep::preprocessing::QualityAssessor;
100    ///
101    /// # fn main() -> speech_prep::error::Result<()> {
102    /// let assessor = QualityAssessor::new(16000);
103    /// let audio = vec![0.5f32; 16000];
104    /// let metrics = assessor.assess(&audio)?;
105    /// assert!((0.0..=1.0).contains(&metrics.quality_score));
106    /// # Ok(())
107    /// # }
108    /// ```
109    pub fn assess(self, samples: &[f32]) -> Result<QualityMetrics> {
110        trace!(sample_count = samples.len(), "Assessing audio quality");
111
112        if samples.is_empty() {
113            return Err(Error::InvalidInput("Cannot assess empty audio".into()));
114        }
115
116        let processing_start = AudioInstant::now();
117        let energy = Self::calculate_rms(samples);
118        let snr_db = Self::calculate_snr(samples, energy)?;
119        let spectral_centroid = self.calculate_spectral_centroid(samples)?;
120        let quality_score = self.aggregate_score(snr_db, energy, spectral_centroid);
121
122        debug!(
123            snr_db,
124            energy, spectral_centroid, quality_score, "Audio quality metrics computed"
125        );
126
127        let metrics = QualityMetrics {
128            snr_db,
129            energy,
130            spectral_centroid,
131            quality_score,
132        };
133        let _latency = elapsed_duration(processing_start);
134
135        Ok(metrics)
136    }
137
138    /// Calculates RMS (root-mean-square) energy of audio samples.
139    ///
140    /// This is a static method that can be called without an assessor instance.
141    ///
142    /// # Arguments
143    ///
144    /// - `samples`: Audio samples (must not be empty)
145    ///
146    /// # Returns
147    ///
148    /// RMS energy in range [0.0, 1.0] for normalized audio
149    fn calculate_rms(samples: &[f32]) -> f32 {
150        let sum_squares: f32 = samples.iter().map(|&s| s * s).sum();
151        let mean_square = sum_squares / samples.len() as f32;
152        mean_square.sqrt()
153    }
154
155    /// Calculates signal-to-noise ratio (SNR) in decibels.
156    ///
157    /// Estimates noise floor from the quietest 10% of frames,
158    /// then computes dB ratio between signal RMS and noise floor.
159    ///
160    /// # Arguments
161    ///
162    /// - `samples`: Audio samples
163    /// - `signal_rms`: Pre-computed RMS energy of the signal
164    ///
165    /// # Returns
166    ///
167    /// SNR in dB, clamped to [0.0, 60.0] for practical purposes
168    ///
169    /// # Errors
170    ///
171    /// Returns `Error::AudioProcessing` if insufficient frames for estimation
172    fn calculate_snr(samples: &[f32], signal_rms: f32) -> Result<f32> {
173        // Compute frame energies (256 samples per frame)
174        let frame_energies = Self::frame_energy(samples);
175
176        let mut valid_energies: Vec<f32> =
177            frame_energies.into_iter().filter(|x| !x.is_nan()).collect();
178
179        if valid_energies.is_empty() {
180            return Err(Error::Processing(
181                "All frame energies are NaN; cannot estimate noise floor".into(),
182            ));
183        }
184
185        valid_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
186
187        // Quietest 10% of frames as noise floor estimate
188        let noise_frame_count = (valid_energies.len() / 10).max(1);
189        let noise_frames = valid_energies
190            .get(0..noise_frame_count)
191            .ok_or_else(|| Error::Processing("Insufficient frames for noise estimation".into()))?;
192
193        let noise_floor = noise_frames.iter().sum::<f32>() / noise_frames.len() as f32;
194
195        if signal_rms < 1e-6 {
196            return Ok(0.0);
197        }
198
199        if noise_floor < 1e-10 {
200            return Ok(60.0);
201        }
202
203        let snr = 20.0 * (signal_rms / noise_floor).log10();
204        Ok(snr.clamp(0.0, 60.0))
205    }
206
207    /// Computes RMS energy for each frame of audio.
208    ///
209    /// Divides audio into fixed-size frames and computes RMS for each.
210    ///
211    /// # Arguments
212    ///
213    /// - `samples`: Audio samples
214    ///
215    /// # Returns
216    ///
217    /// Vector of RMS energies, one per frame
218    fn frame_energy(samples: &[f32]) -> Vec<f32> {
219        const FRAME_SIZE: usize = 256;
220        samples
221            .chunks(FRAME_SIZE)
222            .map(|frame| {
223                let sum_sq: f32 = frame.iter().map(|&s| s * s).sum();
224                (sum_sq / frame.len() as f32).sqrt()
225            })
226            .collect()
227    }
228
229    /// Calculates spectral centroid (brightness measure) in Hz.
230    ///
231    /// Computes weighted average frequency from magnitude spectrum.
232    /// Uses simplified time-domain approximation (not full FFT).
233    ///
234    /// # Arguments
235    ///
236    /// - `samples`: Audio samples (should be ≥512 for meaningful result)
237    ///
238    /// # Returns
239    ///
240    /// Spectral centroid in Hz, clamped to [0.0, `sample_rate/2`]
241    ///
242    /// # Errors
243    ///
244    /// Returns `Error::AudioProcessing` if samples are too short
245    ///
246    /// # Note
247    ///
248    /// This is a simplified implementation. Full FFT-based spectral
249    /// centroid can be added in the future for more accurate results.
250    fn calculate_spectral_centroid(self, samples: &[f32]) -> Result<f32> {
251        // For very short audio, return midpoint frequency
252        if samples.len() < 512 {
253            return Ok(self.sample_rate as f32 / 4.0);
254        }
255
256        // Use first 512 samples for spectral analysis
257        let window = samples.get(0..512).ok_or_else(|| {
258            Error::Processing("Insufficient samples for spectral analysis".into())
259        })?;
260
261        // Time-domain approximation of spectral centroid
262        let (magnitude_sum, weighted_sum) =
263            window
264                .iter()
265                .enumerate()
266                .fold((0.0f32, 0.0f32), |(mag_acc, weighted_acc), (i, &s)| {
267                    let magnitude = s.abs();
268                    (
269                        mag_acc + magnitude,
270                        magnitude.mul_add(i as f32, weighted_acc),
271                    )
272                });
273
274        if magnitude_sum < 1e-10 {
275            return Ok(self.sample_rate as f32 / 4.0);
276        }
277
278        let centroid_bin = weighted_sum / magnitude_sum;
279        let centroid_hz = (centroid_bin / 512.0) * (self.sample_rate as f32 / 2.0);
280        Ok(centroid_hz.clamp(0.0, self.sample_rate as f32 / 2.0))
281    }
282
283    /// Aggregates individual metrics into unified quality score [0.0, 1.0].
284    ///
285    /// Uses weighted combination:
286    /// - 50% SNR (signal clarity)
287    /// - 30% Energy (signal strength)
288    /// - 20% Spectral centroid (frequency content)
289    ///
290    /// # Arguments
291    ///
292    /// - `snr_db`: Signal-to-noise ratio in dB
293    /// - `energy`: RMS energy
294    /// - `spectral_centroid`: Spectral centroid in Hz
295    ///
296    /// # Returns
297    ///
298    /// Quality score in [0.0, 1.0], where 1.0 is perfect quality
299    fn aggregate_score(self, snr_db: f32, energy: f32, spectral_centroid: f32) -> f32 {
300        let snr_score = (snr_db / 60.0).clamp(0.0, 1.0);
301        let energy_score = (energy / 0.5).clamp(0.0, 1.0);
302        let centroid_score = (spectral_centroid / (self.sample_rate as f32 / 2.0)).clamp(0.0, 1.0);
303
304        // 50% SNR, 30% energy, 20% spectral
305        let score = 0.5f32.mul_add(
306            snr_score,
307            0.3f32.mul_add(energy_score, 0.2 * centroid_score),
308        );
309
310        score.clamp(0.0, 1.0)
311    }
312}
313
314fn elapsed_duration(start: AudioInstant) -> AudioDuration {
315    AudioInstant::now().duration_since(start)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    const EPSILON: f32 = 0.01;
323
324    #[test]
325    fn test_high_quality_audio() {
326        let assessor = QualityAssessor::new(16000);
327        // Clean sine wave with silence periods (high quality with clear signal/noise
328        // separation)
329        let mut samples = vec![0.0f32; 16000];
330
331        // Add strong signal in middle 50% of audio (8000 samples)
332        for i in 4000..12000 {
333            samples[i] = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.5;
334        }
335        // First and last 25% remain silent (noise floor)
336
337        let metrics = assessor.assess(&samples).unwrap();
338
339        // High-quality audio with clear signal/noise separation should have high SNR
340        assert!(
341            metrics.snr_db > 20.0,
342            "Expected SNR > 20 dB, got {:.1}",
343            metrics.snr_db
344        );
345        assert!((0.0..=1.0).contains(&metrics.quality_score));
346        assert!(
347            metrics.quality_score > 0.5,
348            "Expected quality > 0.5, got {:.2}",
349            metrics.quality_score
350        );
351    }
352
353    #[test]
354    fn test_noisy_audio() {
355        let assessor = QualityAssessor::new(16000);
356        // Signal + random noise (lower quality)
357        let mut noisy = vec![0.0f32; 16000];
358        for (i, sample) in noisy.iter_mut().enumerate() {
359            let signal = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.2;
360            let noise = (i as f32 * 0.1).sin().mul_add(0.1, (i % 7) as f32 * 0.01);
361            *sample = signal + noise;
362        }
363
364        let metrics = assessor.assess(&noisy).unwrap();
365
366        // Noisy audio should have lower SNR and quality score
367        assert!(
368            metrics.snr_db < 40.0,
369            "Expected SNR < 40 dB for noisy audio"
370        );
371        assert!((0.0..=1.0).contains(&metrics.quality_score));
372    }
373
374    #[test]
375    fn test_energy_calculation() {
376        let assessor = QualityAssessor::new(16000);
377        // Constant amplitude signal
378        let audio = vec![0.5f32; 1000];
379
380        let metrics = assessor.assess(&audio).unwrap();
381
382        // RMS of constant 0.5 should be 0.5
383        assert!(
384            (metrics.energy - 0.5).abs() < EPSILON,
385            "Expected energy ~0.5, got {:.3}",
386            metrics.energy
387        );
388    }
389
390    #[test]
391    fn test_quality_score_bounds() {
392        let assessor = QualityAssessor::new(16000);
393        let audio = vec![0.3f32; 5000];
394
395        let metrics = assessor.assess(&audio).unwrap();
396
397        // Quality score must always be in [0.0, 1.0]
398        assert!(
399            (0.0..=1.0).contains(&metrics.quality_score),
400            "Quality score {:.2} out of bounds [0.0, 1.0]",
401            metrics.quality_score
402        );
403        assert!(
404            (0.0..=60.0).contains(&metrics.snr_db),
405            "SNR {:.1} dB out of bounds [0.0, 60.0]",
406            metrics.snr_db
407        );
408    }
409
410    #[test]
411    fn test_spectral_centroid_computed() {
412        let assessor = QualityAssessor::new(16000);
413        let audio = vec![0.2f32; 1024];
414
415        let metrics = assessor.assess(&audio).unwrap();
416
417        // Spectral centroid should be in valid frequency range
418        assert!(metrics.spectral_centroid >= 0.0);
419        assert!(
420            metrics.spectral_centroid <= 8000.0, // Nyquist frequency
421            "Spectral centroid {:.1} Hz exceeds Nyquist (8000 Hz)",
422            metrics.spectral_centroid
423        );
424    }
425
426    #[test]
427    fn test_empty_audio() {
428        let assessor = QualityAssessor::new(16000);
429        let result = assessor.assess(&[]);
430
431        assert!(result.is_err(), "Should reject empty audio");
432        match result.unwrap_err() {
433            Error::InvalidInput(msg) => {
434                assert!(
435                    msg.contains("empty"),
436                    "Expected 'empty' error, got: {}",
437                    msg
438                );
439            }
440            other => panic!("Expected InvalidInput error, got: {:?}", other),
441        }
442    }
443
444    #[test]
445    fn test_silence_handling() {
446        let assessor = QualityAssessor::new(16000);
447        // Pure silence (all zeros)
448        let silence = vec![0.0f32; 16000];
449
450        let metrics = assessor.assess(&silence).unwrap();
451
452        // Silence should have zero energy
453        assert!(
454            metrics.energy < EPSILON,
455            "Expected near-zero energy for silence, got {:.6}",
456            metrics.energy
457        );
458        // Silence should have 0 dB SNR (not maximum!)
459        assert!(
460            metrics.snr_db < 1.0,
461            "Expected SNR ~0 dB for silence, got {:.1} dB",
462            metrics.snr_db
463        );
464        // Silence should have LOW quality score (not high!)
465        assert!(
466            metrics.quality_score < 0.2,
467            "Expected quality <0.2 for silence, got {:.2}",
468            metrics.quality_score
469        );
470        // Quality score should still be valid bounds
471        assert!((0.0..=1.0).contains(&metrics.quality_score));
472    }
473
474    #[test]
475    fn test_short_audio() {
476        let assessor = QualityAssessor::new(16000);
477        // Very short audio (< 512 samples)
478        let short_audio = vec![0.5f32; 256];
479
480        let metrics = assessor.assess(&short_audio).unwrap();
481
482        // Should not panic, should return valid metrics
483        assert!((0.0..=1.0).contains(&metrics.quality_score));
484        assert!(metrics.spectral_centroid > 0.0);
485    }
486
487    #[test]
488    fn test_very_quiet_audio() {
489        let assessor = QualityAssessor::new(16000);
490        // Very quiet audio (below signal threshold but not exactly zero)
491        let very_quiet = vec![1e-7f32; 16000];
492
493        let metrics = assessor.assess(&very_quiet).unwrap();
494
495        // Very quiet audio should be treated similarly to silence
496        assert!(
497            metrics.energy < 1e-6,
498            "Expected near-zero energy for very quiet audio, got {:.9}",
499            metrics.energy
500        );
501        assert!(
502            metrics.snr_db < 5.0,
503            "Expected low SNR for very quiet audio, got {:.1} dB",
504            metrics.snr_db
505        );
506        assert!(
507            metrics.quality_score < 0.3,
508            "Expected low quality for very quiet audio, got {:.2}",
509            metrics.quality_score
510        );
511    }
512}