speech_prep/preprocessing/quality.rs
1//! Audio quality assessment with SNR estimation and spectral analysis.
2//!
3//! This module provides multi-dimensional quality metrics for audio
4//! preprocessing validation, enabling quality gates for downstream processing
5//! and ML processing.
6//!
7//! # Metrics
8//!
9//! - **SNR (Signal-to-Noise Ratio)**: Measures signal power vs noise floor (dB)
10//! - **RMS Energy**: Root-mean-square energy as baseline quality indicator
11//! - **Spectral Centroid**: Weighted average frequency (brightness measure)
12//! - **Quality Score**: Unified score in [0.0, 1.0] combining all metrics
13//!
14//! # Performance
15//!
16//! - **Target**: <10ms per second of 16 kHz audio
17//! - **Memory**: Minimal allocations (reuses frame buffers)
18//!
19//! # Example
20//!
21//! ```rust,no_run
22//! use speech_prep::preprocessing::QualityAssessor;
23//!
24//! # fn main() -> speech_prep::error::Result<()> {
25//! let assessor = QualityAssessor::new(16000);
26//! let audio_samples = vec![0.5f32; 16000]; // 1 second at 16 kHz
27//!
28//! let metrics = assessor.assess(&audio_samples)?;
29//! println!("SNR: {:.1} dB, Quality: {:.2}", metrics.snr_db, metrics.quality_score);
30//! # Ok(())
31//! # }
32//! ```
33
34use crate::error::{Error, Result};
35use crate::time::{AudioDuration, AudioInstant};
36use tracing::{debug, trace};
37
38/// Quality metrics for audio assessment.
39///
40/// All metrics are computed for a single audio chunk and provide
41/// objective measures of audio quality for downstream processing decisions.
42#[derive(Debug, Clone, Copy)]
43pub struct QualityMetrics {
44 /// Signal-to-noise ratio in decibels [0.0, 60.0]
45 pub snr_db: f32,
46 /// RMS energy level [0.0, 1.0]
47 pub energy: f32,
48 /// Spectral centroid in Hz [0.0, `sample_rate/2`]
49 pub spectral_centroid: f32,
50 /// Unified quality score [0.0, 1.0] (higher is better)
51 pub quality_score: f32,
52}
53
54/// Audio quality assessor with configurable sample rate.
55///
56/// Computes multi-dimensional quality metrics for audio chunks,
57/// providing objective measures for quality gates and filtering.
58#[derive(Debug, Clone, Copy)]
59pub struct QualityAssessor {
60 sample_rate: u32,
61}
62
63impl QualityAssessor {
64 /// Creates a new quality assessor for the given sample rate.
65 ///
66 /// # Arguments
67 ///
68 /// - `sample_rate`: Audio sample rate in Hz (e.g., 16000)
69 ///
70 /// # Example
71 ///
72 /// ```rust
73 /// use speech_prep::preprocessing::QualityAssessor;
74 ///
75 /// let assessor = QualityAssessor::new(16000);
76 /// ```
77 pub fn new(sample_rate: u32) -> Self {
78 Self { sample_rate }
79 }
80
81 /// Assesses audio quality for the given samples.
82 ///
83 /// Computes SNR, energy, spectral centroid, and unified quality score.
84 ///
85 /// # Arguments
86 ///
87 /// - `samples`: Audio samples to assess (must not be empty)
88 ///
89 /// # Returns
90 ///
91 /// Quality metrics including SNR (dB), energy, spectral centroid (Hz),
92 /// and unified quality score [0.0, 1.0].
93 ///
94 /// # Errors
95 ///
96 /// Returns `Error::InvalidInput` if samples are empty.
97 ///
98 /// # Example
99 ///
100 /// ```rust,no_run
101 /// use speech_prep::preprocessing::QualityAssessor;
102 ///
103 /// # fn main() -> speech_prep::error::Result<()> {
104 /// let assessor = QualityAssessor::new(16000);
105 /// let audio = vec![0.5f32; 16000];
106 /// let metrics = assessor.assess(&audio)?;
107 /// assert!((0.0..=1.0).contains(&metrics.quality_score));
108 /// # Ok(())
109 /// # }
110 /// ```
111 pub fn assess(self, samples: &[f32]) -> Result<QualityMetrics> {
112 trace!(sample_count = samples.len(), "Assessing audio quality");
113
114 if samples.is_empty() {
115 return Err(Error::InvalidInput("Cannot assess empty audio".into()));
116 }
117
118 let processing_start = AudioInstant::now();
119 let energy = Self::calculate_rms(samples);
120 let snr_db = Self::calculate_snr(samples, energy)?;
121 let spectral_centroid = self.calculate_spectral_centroid(samples)?;
122 let quality_score = self.aggregate_score(snr_db, energy, spectral_centroid);
123
124 debug!(
125 snr_db,
126 energy, spectral_centroid, quality_score, "Audio quality metrics computed"
127 );
128
129 let metrics = QualityMetrics {
130 snr_db,
131 energy,
132 spectral_centroid,
133 quality_score,
134 };
135 let _latency = elapsed_duration(processing_start);
136
137 Ok(metrics)
138 }
139
140 /// Calculates RMS (root-mean-square) energy of audio samples.
141 ///
142 /// This is a static method that can be called without an assessor instance.
143 ///
144 /// # Arguments
145 ///
146 /// - `samples`: Audio samples (must not be empty)
147 ///
148 /// # Returns
149 ///
150 /// RMS energy in range [0.0, 1.0] for normalized audio
151 fn calculate_rms(samples: &[f32]) -> f32 {
152 let sum_squares: f32 = samples.iter().map(|&s| s * s).sum();
153 let mean_square = sum_squares / samples.len() as f32;
154 mean_square.sqrt()
155 }
156
157 /// Calculates signal-to-noise ratio (SNR) in decibels.
158 ///
159 /// Estimates noise floor from the quietest 10% of frames,
160 /// then computes dB ratio between signal RMS and noise floor.
161 ///
162 /// # Arguments
163 ///
164 /// - `samples`: Audio samples
165 /// - `signal_rms`: Pre-computed RMS energy of the signal
166 ///
167 /// # Returns
168 ///
169 /// SNR in dB, clamped to [0.0, 60.0] for practical purposes
170 ///
171 /// # Errors
172 ///
173 /// Returns `Error::AudioProcessing` if insufficient frames for estimation
174 fn calculate_snr(samples: &[f32], signal_rms: f32) -> Result<f32> {
175 // Compute frame energies (256 samples per frame)
176 let frame_energies = Self::frame_energy(samples);
177
178 let mut valid_energies: Vec<f32> =
179 frame_energies.into_iter().filter(|x| !x.is_nan()).collect();
180
181 if valid_energies.is_empty() {
182 return Err(Error::Processing(
183 "All frame energies are NaN; cannot estimate noise floor".into(),
184 ));
185 }
186
187 valid_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
188
189 // Quietest 10% of frames as noise floor estimate
190 let noise_frame_count = (valid_energies.len() / 10).max(1);
191 let noise_frames = valid_energies
192 .get(0..noise_frame_count)
193 .ok_or_else(|| Error::Processing("Insufficient frames for noise estimation".into()))?;
194
195 let noise_floor = noise_frames.iter().sum::<f32>() / noise_frames.len() as f32;
196
197 if signal_rms < 1e-6 {
198 return Ok(0.0);
199 }
200
201 if noise_floor < 1e-10 {
202 return Ok(60.0);
203 }
204
205 let snr = 20.0 * (signal_rms / noise_floor).log10();
206 Ok(snr.clamp(0.0, 60.0))
207 }
208
209 /// Computes RMS energy for each frame of audio.
210 ///
211 /// Divides audio into fixed-size frames and computes RMS for each.
212 ///
213 /// # Arguments
214 ///
215 /// - `samples`: Audio samples
216 ///
217 /// # Returns
218 ///
219 /// Vector of RMS energies, one per frame
220 fn frame_energy(samples: &[f32]) -> Vec<f32> {
221 const FRAME_SIZE: usize = 256;
222 samples
223 .chunks(FRAME_SIZE)
224 .map(|frame| {
225 let sum_sq: f32 = frame.iter().map(|&s| s * s).sum();
226 (sum_sq / frame.len() as f32).sqrt()
227 })
228 .collect()
229 }
230
231 /// Calculates spectral centroid (brightness measure) in Hz.
232 ///
233 /// Computes weighted average frequency from magnitude spectrum.
234 /// Uses simplified time-domain approximation (not full FFT).
235 ///
236 /// # Arguments
237 ///
238 /// - `samples`: Audio samples (should be ≥512 for meaningful result)
239 ///
240 /// # Returns
241 ///
242 /// Spectral centroid in Hz, clamped to [0.0, `sample_rate/2`]
243 ///
244 /// # Errors
245 ///
246 /// Returns `Error::AudioProcessing` if samples are too short
247 ///
248 /// # Note
249 ///
250 /// This is a simplified implementation. Full FFT-based spectral
251 /// centroid can be added in the future for more accurate results.
252 fn calculate_spectral_centroid(self, samples: &[f32]) -> Result<f32> {
253 // For very short audio, return midpoint frequency
254 if samples.len() < 512 {
255 return Ok(self.sample_rate as f32 / 4.0);
256 }
257
258 // Use first 512 samples for spectral analysis
259 let window = samples.get(0..512).ok_or_else(|| {
260 Error::Processing("Insufficient samples for spectral analysis".into())
261 })?;
262
263 // Time-domain approximation of spectral centroid
264 let (magnitude_sum, weighted_sum) =
265 window
266 .iter()
267 .enumerate()
268 .fold((0.0f32, 0.0f32), |(mag_acc, weighted_acc), (i, &s)| {
269 let magnitude = s.abs();
270 (
271 mag_acc + magnitude,
272 magnitude.mul_add(i as f32, weighted_acc),
273 )
274 });
275
276 if magnitude_sum < 1e-10 {
277 return Ok(self.sample_rate as f32 / 4.0);
278 }
279
280 let centroid_bin = weighted_sum / magnitude_sum;
281 let centroid_hz = (centroid_bin / 512.0) * (self.sample_rate as f32 / 2.0);
282 Ok(centroid_hz.clamp(0.0, self.sample_rate as f32 / 2.0))
283 }
284
285 /// Aggregates individual metrics into unified quality score [0.0, 1.0].
286 ///
287 /// Uses weighted combination:
288 /// - 50% SNR (signal clarity)
289 /// - 30% Energy (signal strength)
290 /// - 20% Spectral centroid (frequency content)
291 ///
292 /// # Arguments
293 ///
294 /// - `snr_db`: Signal-to-noise ratio in dB
295 /// - `energy`: RMS energy
296 /// - `spectral_centroid`: Spectral centroid in Hz
297 ///
298 /// # Returns
299 ///
300 /// Quality score in [0.0, 1.0], where 1.0 is perfect quality
301 fn aggregate_score(self, snr_db: f32, energy: f32, spectral_centroid: f32) -> f32 {
302 let snr_score = (snr_db / 60.0).clamp(0.0, 1.0);
303 let energy_score = (energy / 0.5).clamp(0.0, 1.0);
304 let centroid_score = (spectral_centroid / (self.sample_rate as f32 / 2.0)).clamp(0.0, 1.0);
305
306 // 50% SNR, 30% energy, 20% spectral
307 let score = 0.5f32.mul_add(
308 snr_score,
309 0.3f32.mul_add(energy_score, 0.2 * centroid_score),
310 );
311
312 score.clamp(0.0, 1.0)
313 }
314}
315
316fn elapsed_duration(start: AudioInstant) -> AudioDuration {
317 AudioInstant::now().duration_since(start)
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 const EPSILON: f32 = 0.01;
325
326 #[test]
327 fn test_high_quality_audio() {
328 let assessor = QualityAssessor::new(16000);
329 // Clean sine wave with silence periods (high quality with clear signal/noise
330 // separation)
331 let mut samples = vec![0.0f32; 16000];
332
333 // Add strong signal in middle 50% of audio (8000 samples)
334 for i in 4000..12000 {
335 samples[i] = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.5;
336 }
337 // First and last 25% remain silent (noise floor)
338
339 let metrics = assessor.assess(&samples).unwrap();
340
341 // High-quality audio with clear signal/noise separation should have high SNR
342 assert!(
343 metrics.snr_db > 20.0,
344 "Expected SNR > 20 dB, got {:.1}",
345 metrics.snr_db
346 );
347 assert!((0.0..=1.0).contains(&metrics.quality_score));
348 assert!(
349 metrics.quality_score > 0.5,
350 "Expected quality > 0.5, got {:.2}",
351 metrics.quality_score
352 );
353 }
354
355 #[test]
356 fn test_noisy_audio() {
357 let assessor = QualityAssessor::new(16000);
358 // Signal + random noise (lower quality)
359 let mut noisy = vec![0.0f32; 16000];
360 for (i, sample) in noisy.iter_mut().enumerate() {
361 let signal = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.2;
362 let noise = (i as f32 * 0.1).sin().mul_add(0.1, (i % 7) as f32 * 0.01);
363 *sample = signal + noise;
364 }
365
366 let metrics = assessor.assess(&noisy).unwrap();
367
368 // Noisy audio should have lower SNR and quality score
369 assert!(
370 metrics.snr_db < 40.0,
371 "Expected SNR < 40 dB for noisy audio"
372 );
373 assert!((0.0..=1.0).contains(&metrics.quality_score));
374 }
375
376 #[test]
377 fn test_energy_calculation() {
378 let assessor = QualityAssessor::new(16000);
379 // Constant amplitude signal
380 let audio = vec![0.5f32; 1000];
381
382 let metrics = assessor.assess(&audio).unwrap();
383
384 // RMS of constant 0.5 should be 0.5
385 assert!(
386 (metrics.energy - 0.5).abs() < EPSILON,
387 "Expected energy ~0.5, got {:.3}",
388 metrics.energy
389 );
390 }
391
392 #[test]
393 fn test_quality_score_bounds() {
394 let assessor = QualityAssessor::new(16000);
395 let audio = vec![0.3f32; 5000];
396
397 let metrics = assessor.assess(&audio).unwrap();
398
399 // Quality score must always be in [0.0, 1.0]
400 assert!(
401 (0.0..=1.0).contains(&metrics.quality_score),
402 "Quality score {:.2} out of bounds [0.0, 1.0]",
403 metrics.quality_score
404 );
405 assert!(
406 (0.0..=60.0).contains(&metrics.snr_db),
407 "SNR {:.1} dB out of bounds [0.0, 60.0]",
408 metrics.snr_db
409 );
410 }
411
412 #[test]
413 fn test_spectral_centroid_computed() {
414 let assessor = QualityAssessor::new(16000);
415 let audio = vec![0.2f32; 1024];
416
417 let metrics = assessor.assess(&audio).unwrap();
418
419 // Spectral centroid should be in valid frequency range
420 assert!(metrics.spectral_centroid >= 0.0);
421 assert!(
422 metrics.spectral_centroid <= 8000.0, // Nyquist frequency
423 "Spectral centroid {:.1} Hz exceeds Nyquist (8000 Hz)",
424 metrics.spectral_centroid
425 );
426 }
427
428 #[test]
429 fn test_empty_audio() {
430 let assessor = QualityAssessor::new(16000);
431 let result = assessor.assess(&[]);
432
433 assert!(result.is_err(), "Should reject empty audio");
434 match result.unwrap_err() {
435 Error::InvalidInput(msg) => {
436 assert!(
437 msg.contains("empty"),
438 "Expected 'empty' error, got: {}",
439 msg
440 );
441 }
442 other => panic!("Expected InvalidInput error, got: {:?}", other),
443 }
444 }
445
446 #[test]
447 fn test_silence_handling() {
448 let assessor = QualityAssessor::new(16000);
449 // Pure silence (all zeros)
450 let silence = vec![0.0f32; 16000];
451
452 let metrics = assessor.assess(&silence).unwrap();
453
454 // Silence should have zero energy
455 assert!(
456 metrics.energy < EPSILON,
457 "Expected near-zero energy for silence, got {:.6}",
458 metrics.energy
459 );
460 // Silence should have 0 dB SNR (not maximum!)
461 assert!(
462 metrics.snr_db < 1.0,
463 "Expected SNR ~0 dB for silence, got {:.1} dB",
464 metrics.snr_db
465 );
466 // Silence should have LOW quality score (not high!)
467 assert!(
468 metrics.quality_score < 0.2,
469 "Expected quality <0.2 for silence, got {:.2}",
470 metrics.quality_score
471 );
472 // Quality score should still be valid bounds
473 assert!((0.0..=1.0).contains(&metrics.quality_score));
474 }
475
476 #[test]
477 fn test_short_audio() {
478 let assessor = QualityAssessor::new(16000);
479 // Very short audio (< 512 samples)
480 let short_audio = vec![0.5f32; 256];
481
482 let metrics = assessor.assess(&short_audio).unwrap();
483
484 // Should not panic, should return valid metrics
485 assert!((0.0..=1.0).contains(&metrics.quality_score));
486 assert!(metrics.spectral_centroid > 0.0);
487 }
488
489 #[test]
490 fn test_very_quiet_audio() {
491 let assessor = QualityAssessor::new(16000);
492 // Very quiet audio (below signal threshold but not exactly zero)
493 let very_quiet = vec![1e-7f32; 16000];
494
495 let metrics = assessor.assess(&very_quiet).unwrap();
496
497 // Very quiet audio should be treated similarly to silence
498 assert!(
499 metrics.energy < 1e-6,
500 "Expected near-zero energy for very quiet audio, got {:.9}",
501 metrics.energy
502 );
503 assert!(
504 metrics.snr_db < 5.0,
505 "Expected low SNR for very quiet audio, got {:.1} dB",
506 metrics.snr_db
507 );
508 assert!(
509 metrics.quality_score < 0.3,
510 "Expected low quality for very quiet audio, got {:.2}",
511 metrics.quality_score
512 );
513 }
514}