speech_prep/preprocessing/quality.rs
1//! Audio quality assessment with SNR estimation and spectral analysis.
2//!
3//! This module provides multi-dimensional quality metrics for audio
4//! preprocessing validation.
5//!
6//! # Metrics
7//!
8//! - **SNR (Signal-to-Noise Ratio)**: Measures signal power vs noise floor (dB)
9//! - **RMS Energy**: Root-mean-square energy as baseline quality indicator
10//! - **Spectral Centroid**: Weighted average frequency (brightness measure)
11//! - **Quality Score**: Unified score in [0.0, 1.0] combining all metrics
12//!
13//! # Performance
14//!
15//! - **Target**: <10ms per second of 16 kHz audio
16//! - **Memory**: Minimal allocations (reuses frame buffers)
17//!
18//! # Example
19//!
20//! ```rust,no_run
21//! use speech_prep::preprocessing::QualityAssessor;
22//!
23//! # fn main() -> speech_prep::error::Result<()> {
24//! let assessor = QualityAssessor::new(16000);
25//! let audio_samples = vec![0.5f32; 16000]; // 1 second at 16 kHz
26//!
27//! let metrics = assessor.assess(&audio_samples)?;
28//! assert!(metrics.snr_db.is_finite());
29//! # Ok(())
30//! # }
31//! ```
32
33use crate::error::{Error, Result};
34use crate::time::{AudioDuration, AudioInstant};
35use tracing::{debug, trace};
36
37/// Quality metrics for audio assessment.
38///
39/// All metrics are computed for a single audio chunk.
40#[derive(Debug, Clone, Copy)]
41pub struct QualityMetrics {
42 /// Signal-to-noise ratio in decibels [0.0, 60.0]
43 pub snr_db: f32,
44 /// RMS energy level [0.0, 1.0]
45 pub energy: f32,
46 /// Spectral centroid in Hz [0.0, `sample_rate/2`]
47 pub spectral_centroid: f32,
48 /// Unified quality score [0.0, 1.0] (higher is better)
49 pub quality_score: f32,
50}
51
52/// Audio quality assessor with configurable sample rate.
53///
54/// Computes multi-dimensional quality metrics for audio chunks,
55/// providing objective measures for quality gates and filtering.
56#[derive(Debug, Clone, Copy)]
57pub struct QualityAssessor {
58 sample_rate: u32,
59}
60
61impl QualityAssessor {
62 /// Creates a new quality assessor for the given sample rate.
63 ///
64 /// # Arguments
65 ///
66 /// - `sample_rate`: Audio sample rate in Hz (e.g., 16000)
67 ///
68 /// # Example
69 ///
70 /// ```rust
71 /// use speech_prep::preprocessing::QualityAssessor;
72 ///
73 /// let assessor = QualityAssessor::new(16000);
74 /// ```
75 pub fn new(sample_rate: u32) -> Self {
76 Self { sample_rate }
77 }
78
79 /// Assesses audio quality for the given samples.
80 ///
81 /// Computes SNR, energy, spectral centroid, and unified quality score.
82 ///
83 /// # Arguments
84 ///
85 /// - `samples`: Audio samples to assess (must not be empty)
86 ///
87 /// # Returns
88 ///
89 /// Quality metrics including SNR (dB), energy, spectral centroid (Hz),
90 /// and unified quality score [0.0, 1.0].
91 ///
92 /// # Errors
93 ///
94 /// Returns `Error::InvalidInput` if samples are empty.
95 ///
96 /// # Example
97 ///
98 /// ```rust,no_run
99 /// use speech_prep::preprocessing::QualityAssessor;
100 ///
101 /// # fn main() -> speech_prep::error::Result<()> {
102 /// let assessor = QualityAssessor::new(16000);
103 /// let audio = vec![0.5f32; 16000];
104 /// let metrics = assessor.assess(&audio)?;
105 /// assert!((0.0..=1.0).contains(&metrics.quality_score));
106 /// # Ok(())
107 /// # }
108 /// ```
109 pub fn assess(self, samples: &[f32]) -> Result<QualityMetrics> {
110 trace!(sample_count = samples.len(), "Assessing audio quality");
111
112 if samples.is_empty() {
113 return Err(Error::InvalidInput("Cannot assess empty audio".into()));
114 }
115
116 let processing_start = AudioInstant::now();
117 let energy = Self::calculate_rms(samples);
118 let snr_db = Self::calculate_snr(samples, energy)?;
119 let spectral_centroid = self.calculate_spectral_centroid(samples)?;
120 let quality_score = self.aggregate_score(snr_db, energy, spectral_centroid);
121
122 debug!(
123 snr_db,
124 energy, spectral_centroid, quality_score, "Audio quality metrics computed"
125 );
126
127 let metrics = QualityMetrics {
128 snr_db,
129 energy,
130 spectral_centroid,
131 quality_score,
132 };
133 let _latency = elapsed_duration(processing_start);
134
135 Ok(metrics)
136 }
137
138 /// Calculates RMS (root-mean-square) energy of audio samples.
139 ///
140 /// This is a static method that can be called without an assessor instance.
141 ///
142 /// # Arguments
143 ///
144 /// - `samples`: Audio samples (must not be empty)
145 ///
146 /// # Returns
147 ///
148 /// RMS energy in range [0.0, 1.0] for normalized audio
149 fn calculate_rms(samples: &[f32]) -> f32 {
150 let sum_squares: f32 = samples.iter().map(|&s| s * s).sum();
151 let mean_square = sum_squares / samples.len() as f32;
152 mean_square.sqrt()
153 }
154
155 /// Calculates signal-to-noise ratio (SNR) in decibels.
156 ///
157 /// Estimates noise floor from the quietest 10% of frames,
158 /// then computes dB ratio between signal RMS and noise floor.
159 ///
160 /// # Arguments
161 ///
162 /// - `samples`: Audio samples
163 /// - `signal_rms`: Pre-computed RMS energy of the signal
164 ///
165 /// # Returns
166 ///
167 /// SNR in dB, clamped to [0.0, 60.0] for practical purposes
168 ///
169 /// # Errors
170 ///
171 /// Returns `Error::AudioProcessing` if insufficient frames for estimation
172 fn calculate_snr(samples: &[f32], signal_rms: f32) -> Result<f32> {
173 // Compute frame energies (256 samples per frame)
174 let frame_energies = Self::frame_energy(samples);
175
176 let mut valid_energies: Vec<f32> =
177 frame_energies.into_iter().filter(|x| !x.is_nan()).collect();
178
179 if valid_energies.is_empty() {
180 return Err(Error::Processing(
181 "All frame energies are NaN; cannot estimate noise floor".into(),
182 ));
183 }
184
185 valid_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
186
187 // Quietest 10% of frames as noise floor estimate
188 let noise_frame_count = (valid_energies.len() / 10).max(1);
189 let noise_frames = valid_energies
190 .get(0..noise_frame_count)
191 .ok_or_else(|| Error::Processing("Insufficient frames for noise estimation".into()))?;
192
193 let noise_floor = noise_frames.iter().sum::<f32>() / noise_frames.len() as f32;
194
195 if signal_rms < 1e-6 {
196 return Ok(0.0);
197 }
198
199 if noise_floor < 1e-10 {
200 return Ok(60.0);
201 }
202
203 let snr = 20.0 * (signal_rms / noise_floor).log10();
204 Ok(snr.clamp(0.0, 60.0))
205 }
206
207 /// Computes RMS energy for each frame of audio.
208 ///
209 /// Divides audio into fixed-size frames and computes RMS for each.
210 ///
211 /// # Arguments
212 ///
213 /// - `samples`: Audio samples
214 ///
215 /// # Returns
216 ///
217 /// Vector of RMS energies, one per frame
218 fn frame_energy(samples: &[f32]) -> Vec<f32> {
219 const FRAME_SIZE: usize = 256;
220 samples
221 .chunks(FRAME_SIZE)
222 .map(|frame| {
223 let sum_sq: f32 = frame.iter().map(|&s| s * s).sum();
224 (sum_sq / frame.len() as f32).sqrt()
225 })
226 .collect()
227 }
228
229 /// Calculates spectral centroid (brightness measure) in Hz.
230 ///
231 /// Computes weighted average frequency from magnitude spectrum.
232 /// Uses simplified time-domain approximation (not full FFT).
233 ///
234 /// # Arguments
235 ///
236 /// - `samples`: Audio samples (should be ≥512 for meaningful result)
237 ///
238 /// # Returns
239 ///
240 /// Spectral centroid in Hz, clamped to [0.0, `sample_rate/2`]
241 ///
242 /// # Errors
243 ///
244 /// Returns `Error::AudioProcessing` if samples are too short
245 ///
246 /// # Note
247 ///
248 /// This is a simplified implementation. Full FFT-based spectral
249 /// centroid can be added in the future for more accurate results.
250 fn calculate_spectral_centroid(self, samples: &[f32]) -> Result<f32> {
251 // For very short audio, return midpoint frequency
252 if samples.len() < 512 {
253 return Ok(self.sample_rate as f32 / 4.0);
254 }
255
256 // Use first 512 samples for spectral analysis
257 let window = samples.get(0..512).ok_or_else(|| {
258 Error::Processing("Insufficient samples for spectral analysis".into())
259 })?;
260
261 // Time-domain approximation of spectral centroid
262 let (magnitude_sum, weighted_sum) =
263 window
264 .iter()
265 .enumerate()
266 .fold((0.0f32, 0.0f32), |(mag_acc, weighted_acc), (i, &s)| {
267 let magnitude = s.abs();
268 (
269 mag_acc + magnitude,
270 magnitude.mul_add(i as f32, weighted_acc),
271 )
272 });
273
274 if magnitude_sum < 1e-10 {
275 return Ok(self.sample_rate as f32 / 4.0);
276 }
277
278 let centroid_bin = weighted_sum / magnitude_sum;
279 let centroid_hz = (centroid_bin / 512.0) * (self.sample_rate as f32 / 2.0);
280 Ok(centroid_hz.clamp(0.0, self.sample_rate as f32 / 2.0))
281 }
282
283 /// Aggregates individual metrics into unified quality score [0.0, 1.0].
284 ///
285 /// Uses weighted combination:
286 /// - 50% SNR (signal clarity)
287 /// - 30% Energy (signal strength)
288 /// - 20% Spectral centroid (frequency content)
289 ///
290 /// # Arguments
291 ///
292 /// - `snr_db`: Signal-to-noise ratio in dB
293 /// - `energy`: RMS energy
294 /// - `spectral_centroid`: Spectral centroid in Hz
295 ///
296 /// # Returns
297 ///
298 /// Quality score in [0.0, 1.0], where 1.0 is perfect quality
299 fn aggregate_score(self, snr_db: f32, energy: f32, spectral_centroid: f32) -> f32 {
300 let snr_score = (snr_db / 60.0).clamp(0.0, 1.0);
301 let energy_score = (energy / 0.5).clamp(0.0, 1.0);
302 let centroid_score = (spectral_centroid / (self.sample_rate as f32 / 2.0)).clamp(0.0, 1.0);
303
304 // 50% SNR, 30% energy, 20% spectral
305 let score = 0.5f32.mul_add(
306 snr_score,
307 0.3f32.mul_add(energy_score, 0.2 * centroid_score),
308 );
309
310 score.clamp(0.0, 1.0)
311 }
312}
313
314fn elapsed_duration(start: AudioInstant) -> AudioDuration {
315 AudioInstant::now().duration_since(start)
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 const EPSILON: f32 = 0.01;
323
324 #[test]
325 fn test_high_quality_audio() {
326 let assessor = QualityAssessor::new(16000);
327 // Clean sine wave with silence periods (high quality with clear signal/noise
328 // separation)
329 let mut samples = vec![0.0f32; 16000];
330
331 // Add strong signal in middle 50% of audio (8000 samples)
332 for i in 4000..12000 {
333 samples[i] = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.5;
334 }
335 // First and last 25% remain silent (noise floor)
336
337 let metrics = assessor.assess(&samples).unwrap();
338
339 // High-quality audio with clear signal/noise separation should have high SNR
340 assert!(
341 metrics.snr_db > 20.0,
342 "Expected SNR > 20 dB, got {:.1}",
343 metrics.snr_db
344 );
345 assert!((0.0..=1.0).contains(&metrics.quality_score));
346 assert!(
347 metrics.quality_score > 0.5,
348 "Expected quality > 0.5, got {:.2}",
349 metrics.quality_score
350 );
351 }
352
353 #[test]
354 fn test_noisy_audio() {
355 let assessor = QualityAssessor::new(16000);
356 // Signal + random noise (lower quality)
357 let mut noisy = vec![0.0f32; 16000];
358 for (i, sample) in noisy.iter_mut().enumerate() {
359 let signal = (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 16000.0).sin() * 0.2;
360 let noise = (i as f32 * 0.1).sin().mul_add(0.1, (i % 7) as f32 * 0.01);
361 *sample = signal + noise;
362 }
363
364 let metrics = assessor.assess(&noisy).unwrap();
365
366 // Noisy audio should have lower SNR and quality score
367 assert!(
368 metrics.snr_db < 40.0,
369 "Expected SNR < 40 dB for noisy audio"
370 );
371 assert!((0.0..=1.0).contains(&metrics.quality_score));
372 }
373
374 #[test]
375 fn test_energy_calculation() {
376 let assessor = QualityAssessor::new(16000);
377 // Constant amplitude signal
378 let audio = vec![0.5f32; 1000];
379
380 let metrics = assessor.assess(&audio).unwrap();
381
382 // RMS of constant 0.5 should be 0.5
383 assert!(
384 (metrics.energy - 0.5).abs() < EPSILON,
385 "Expected energy ~0.5, got {:.3}",
386 metrics.energy
387 );
388 }
389
390 #[test]
391 fn test_quality_score_bounds() {
392 let assessor = QualityAssessor::new(16000);
393 let audio = vec![0.3f32; 5000];
394
395 let metrics = assessor.assess(&audio).unwrap();
396
397 // Quality score must always be in [0.0, 1.0]
398 assert!(
399 (0.0..=1.0).contains(&metrics.quality_score),
400 "Quality score {:.2} out of bounds [0.0, 1.0]",
401 metrics.quality_score
402 );
403 assert!(
404 (0.0..=60.0).contains(&metrics.snr_db),
405 "SNR {:.1} dB out of bounds [0.0, 60.0]",
406 metrics.snr_db
407 );
408 }
409
410 #[test]
411 fn test_spectral_centroid_computed() {
412 let assessor = QualityAssessor::new(16000);
413 let audio = vec![0.2f32; 1024];
414
415 let metrics = assessor.assess(&audio).unwrap();
416
417 // Spectral centroid should be in valid frequency range
418 assert!(metrics.spectral_centroid >= 0.0);
419 assert!(
420 metrics.spectral_centroid <= 8000.0, // Nyquist frequency
421 "Spectral centroid {:.1} Hz exceeds Nyquist (8000 Hz)",
422 metrics.spectral_centroid
423 );
424 }
425
426 #[test]
427 fn test_empty_audio() {
428 let assessor = QualityAssessor::new(16000);
429 let result = assessor.assess(&[]);
430
431 assert!(result.is_err(), "Should reject empty audio");
432 match result.unwrap_err() {
433 Error::InvalidInput(msg) => {
434 assert!(
435 msg.contains("empty"),
436 "Expected 'empty' error, got: {}",
437 msg
438 );
439 }
440 other => panic!("Expected InvalidInput error, got: {:?}", other),
441 }
442 }
443
444 #[test]
445 fn test_silence_handling() {
446 let assessor = QualityAssessor::new(16000);
447 // Pure silence (all zeros)
448 let silence = vec![0.0f32; 16000];
449
450 let metrics = assessor.assess(&silence).unwrap();
451
452 // Silence should have zero energy
453 assert!(
454 metrics.energy < EPSILON,
455 "Expected near-zero energy for silence, got {:.6}",
456 metrics.energy
457 );
458 // Silence should have 0 dB SNR (not maximum!)
459 assert!(
460 metrics.snr_db < 1.0,
461 "Expected SNR ~0 dB for silence, got {:.1} dB",
462 metrics.snr_db
463 );
464 // Silence should have LOW quality score (not high!)
465 assert!(
466 metrics.quality_score < 0.2,
467 "Expected quality <0.2 for silence, got {:.2}",
468 metrics.quality_score
469 );
470 // Quality score should still be valid bounds
471 assert!((0.0..=1.0).contains(&metrics.quality_score));
472 }
473
474 #[test]
475 fn test_short_audio() {
476 let assessor = QualityAssessor::new(16000);
477 // Very short audio (< 512 samples)
478 let short_audio = vec![0.5f32; 256];
479
480 let metrics = assessor.assess(&short_audio).unwrap();
481
482 // Should not panic, should return valid metrics
483 assert!((0.0..=1.0).contains(&metrics.quality_score));
484 assert!(metrics.spectral_centroid > 0.0);
485 }
486
487 #[test]
488 fn test_very_quiet_audio() {
489 let assessor = QualityAssessor::new(16000);
490 // Very quiet audio (below signal threshold but not exactly zero)
491 let very_quiet = vec![1e-7f32; 16000];
492
493 let metrics = assessor.assess(&very_quiet).unwrap();
494
495 // Very quiet audio should be treated similarly to silence
496 assert!(
497 metrics.energy < 1e-6,
498 "Expected near-zero energy for very quiet audio, got {:.9}",
499 metrics.energy
500 );
501 assert!(
502 metrics.snr_db < 5.0,
503 "Expected low SNR for very quiet audio, got {:.1} dB",
504 metrics.snr_db
505 );
506 assert!(
507 metrics.quality_score < 0.3,
508 "Expected low quality for very quiet audio, got {:.2}",
509 metrics.quality_score
510 );
511 }
512}