1use bliss_audio_aubio_rs::vec::CVec;
8use bliss_audio_aubio_rs::{PVoc, SpecDesc, SpecShape, bin_to_freq};
9use likely_stable::{LikelyResult, unlikely};
10use ndarray::{Axis, arr1};
11
12use crate::Feature;
13
14use super::SAMPLE_RATE;
15use super::errors::{AnalysisError, AnalysisResult};
16use super::utils::{Normalize, geometric_mean, mean, number_crossings};
17
18pub struct SpectralDesc {
32 fftgrain: Vec<f32>,
34
35 phase_vocoder: PVoc,
36 sample_rate: u32,
37
38 centroid_aubio_desc: SpecDesc,
39 rolloff_aubio_desc: SpecDesc,
40 values_centroid: Vec<f32>,
41 values_rolloff: Vec<f32>,
42 values_flatness: Vec<f32>,
43}
44
45impl SpectralDesc {
46 pub const WINDOW_SIZE: usize = 512;
47 pub const HOP_SIZE: usize = Self::WINDOW_SIZE / 4;
48
49 #[inline]
64 pub fn get_centroid(&mut self) -> [Feature; 2] {
65 [
66 self.normalize(mean(&self.values_centroid)),
67 self.normalize(
68 arr1(&self.values_centroid)
69 .std_axis(Axis(0), 0.)
70 .into_scalar(),
71 ),
72 ]
73 }
74
75 #[inline]
89 pub fn get_rolloff(&mut self) -> [Feature; 2] {
90 [
91 self.normalize(mean(&self.values_rolloff)),
92 self.normalize(
93 arr1(&self.values_rolloff)
94 .std_axis(Axis(0), 0.)
95 .into_scalar(),
96 ),
97 ]
98 }
99
100 #[inline]
117 pub fn get_flatness(&mut self) -> [Feature; 2] {
118 let max_value = 1.;
119 let min_value = 0.;
120 [
123 2. * (mean(&self.values_flatness) - min_value) / (max_value - min_value) - 1.,
124 2. * (arr1(&self.values_flatness)
125 .std_axis(Axis(0), 0.)
126 .into_scalar()
127 - min_value)
128 / (max_value - min_value)
129 - 1.,
130 ]
131 }
132
133 #[inline]
137 pub fn new(sample_rate: u32) -> AnalysisResult<Self> {
138 Ok(Self {
139 fftgrain: vec![0.0; Self::WINDOW_SIZE],
140
141 centroid_aubio_desc: SpecDesc::new(SpecShape::Centroid, Self::WINDOW_SIZE)
142 .map_err_unlikely(|e| {
143 AnalysisError::AnalysisError(format!(
144 "error while loading aubio centroid object: {e}",
145 ))
146 })?,
147 rolloff_aubio_desc: SpecDesc::new(SpecShape::Rolloff, Self::WINDOW_SIZE)
148 .map_err_unlikely(|e| {
149 AnalysisError::AnalysisError(format!(
150 "error while loading aubio rolloff object: {e}",
151 ))
152 })?,
153 phase_vocoder: PVoc::new(Self::WINDOW_SIZE, Self::HOP_SIZE).map_err_unlikely(|e| {
154 AnalysisError::AnalysisError(format!("error while loading aubio pvoc object: {e}",))
155 })?,
156 values_centroid: Vec::new(),
157 values_rolloff: Vec::new(),
158 values_flatness: Vec::new(),
159 sample_rate,
160 })
161 }
162
163 #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
171 #[allow(clippy::missing_inline_in_public_items)]
172 pub fn do_(&mut self, chunk: &[f32]) -> AnalysisResult<()> {
173 self.phase_vocoder
174 .do_(chunk, self.fftgrain.as_mut_slice())
175 .map_err_unlikely(|e| {
176 AnalysisError::AnalysisError(format!("error while processing aubio pv object: {e}"))
177 })?;
178
179 let bin = self
180 .centroid_aubio_desc
181 .do_result(self.fftgrain.as_slice())
182 .map_err_unlikely(|e| {
183 AnalysisError::AnalysisError(format!(
184 "error while processing aubio centroid object: {e}",
185 ))
186 })?;
187
188 #[allow(clippy::cast_precision_loss)]
189 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
190 self.values_centroid.push(freq);
191
192 let mut bin = self
193 .rolloff_aubio_desc
194 .do_result(self.fftgrain.as_slice())
195 .map_err_unlikely(|e| {
196 AnalysisError::AnalysisError(format!(
197 "error while processing aubio rolloff object: {e}",
198 ))
199 })?;
200
201 #[allow(clippy::cast_precision_loss)]
203 if unlikely(bin > Self::WINDOW_SIZE as f32 / 2.) {
204 bin = Self::WINDOW_SIZE as f32 / 2.;
206 }
207
208 #[allow(clippy::cast_precision_loss)]
209 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
210 self.values_rolloff.push(freq);
211
212 let cvec: CVec<'_> = self.fftgrain.as_slice().into();
213 let norm = cvec.norm();
214 let geo_mean = geometric_mean(norm);
215 if unlikely(geo_mean == 0.0) {
216 self.values_flatness.push(0.0);
217 return Ok(());
218 }
219 let flatness = geo_mean / mean(norm);
220 self.values_flatness.push(flatness);
221 Ok(())
222 }
223}
224
225impl Normalize for SpectralDesc {
226 #[allow(clippy::cast_precision_loss)]
227 const MAX_VALUE: Feature = SAMPLE_RATE as Feature / 2.;
228 const MIN_VALUE: Feature = 0.;
229}
230
231#[derive(Default, Clone)]
244pub struct ZeroCrossingRateDesc {
245 crossings_sum: usize,
246 samples_checked: usize,
247}
248
249impl ZeroCrossingRateDesc {
250 #[must_use]
251 #[inline]
252 pub fn new(_sample_rate: u32) -> Self {
253 Self::default()
254 }
255
256 #[inline]
258 pub fn do_(&mut self, chunk: &[f32]) {
259 self.crossings_sum += number_crossings(chunk);
260 self.samples_checked += chunk.len();
261 }
262
263 #[allow(clippy::cast_precision_loss)]
266 #[inline]
267 pub fn get_value(&mut self) -> Feature {
268 self.normalize(self.crossings_sum as Feature / self.samples_checked as Feature)
269 }
270}
271
272impl Normalize for ZeroCrossingRateDesc {
273 const MAX_VALUE: Feature = 1.;
274 const MIN_VALUE: Feature = 0.;
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280 use crate::decoder::{Decoder as DecoderTrait, MecompDecoder as Decoder};
281 use std::path::Path;
282
283 #[test]
284 fn test_zcr_boundaries() {
285 let mut zcr_desc = ZeroCrossingRateDesc::default();
286 let chunk = vec![0.; 1024];
287 zcr_desc.do_(&chunk);
288 let value = zcr_desc.get_value();
289 assert!(f32::EPSILON > (-1. - value).abs(), "{value} !~= -1");
290
291 let one_chunk = [-1., 1.];
292 let chunks = std::iter::repeat_n(one_chunk.iter(), 512)
293 .flatten()
294 .copied()
295 .collect::<Vec<f32>>();
296 let mut zcr_desc = ZeroCrossingRateDesc::default();
297 zcr_desc.do_(&chunks);
298 let value = zcr_desc.get_value();
299 assert!(0.001 > (0.998_046_9 - value).abs(), "{value} !~= 0.9980469");
300 }
301
302 #[test]
303 fn test_zcr() {
304 let song = Decoder::new()
305 .unwrap()
306 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
307 .unwrap();
308 let mut zcr_desc = ZeroCrossingRateDesc::default();
309 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
310 zcr_desc.do_(chunk);
311 }
312 let value = zcr_desc.get_value();
313 assert!(0.001 > (-0.85036 - value).abs(), "{value} !~= -0.85036");
314 }
315
316 #[test]
317 fn test_spectral_flatness_boundaries() {
318 let mut spectral_desc = SpectralDesc::new(10).unwrap();
319 let chunk = vec![0.; 1024];
320
321 let expected_values = [-1., -1.];
322 spectral_desc.do_(&chunk).unwrap();
323 for (expected, actual) in expected_values
324 .iter()
325 .zip(spectral_desc.get_flatness().iter())
326 {
327 assert!(
328 0.000_000_1 > (expected - actual).abs(),
329 "{expected} !~= {actual}"
330 );
331 }
332
333 let song = Decoder::new()
334 .unwrap()
335 .decode(Path::new("data/white_noise.mp3"))
336 .unwrap();
337 let mut spectral_desc = SpectralDesc::new(22050).unwrap();
338 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
339 spectral_desc.do_(chunk).unwrap();
340 }
341 println!("{:?}", spectral_desc.get_flatness());
342 let expected_values = [0.578_530_3, -0.942_630_8];
344 for (expected, actual) in expected_values
345 .iter()
346 .zip(spectral_desc.get_flatness().iter())
347 {
348 let relative_error = (expected - actual).abs() / expected.abs();
351 assert!(
352 relative_error < 0.078,
353 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
354 );
355 }
356 }
357
358 #[test]
359 fn test_spectral_flatness() {
360 let song = Decoder::new()
361 .unwrap()
362 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
363 .unwrap();
364 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
365 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
366 spectral_desc.do_(chunk).unwrap();
367 }
368 let expected_values = [-0.776_100_75, -0.814_817_9];
371 for (expected, actual) in expected_values
372 .iter()
373 .zip(spectral_desc.get_flatness().iter())
374 {
375 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
376 }
377 }
378
379 #[test]
380 fn test_spectral_roll_off_boundaries() {
381 let mut spectral_desc = SpectralDesc::new(10).unwrap();
382 let chunk = vec![0.; 512];
383
384 let expected_values = [-1., -1.];
385 spectral_desc.do_(&chunk).unwrap();
386 for (expected, actual) in expected_values
387 .iter()
388 .zip(spectral_desc.get_rolloff().iter())
389 {
390 assert!(
391 0.000_000_1 > (expected - actual).abs(),
392 "{expected} !~= {actual}"
393 );
394 }
395
396 let song = Decoder::new()
397 .unwrap()
398 .decode(Path::new("data/tone_11080Hz.flac"))
399 .unwrap();
400 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
401 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
402 spectral_desc.do_(chunk).unwrap();
403 }
404 let expected_values = [0.996_768_1, -0.996_151_75];
405 for (expected, actual) in expected_values
406 .iter()
407 .zip(spectral_desc.get_rolloff().iter())
408 {
409 assert!(
410 0.0001 > (expected - actual).abs(),
411 "{expected} !~= {actual}"
412 );
413 }
414 }
415
416 #[test]
417 fn test_spectral_roll_off() {
418 let song = Decoder::new()
419 .unwrap()
420 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
421 .unwrap();
422 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
423 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
424 spectral_desc.do_(chunk).unwrap();
425 }
426 let expected_values = [-0.632_648_6, -0.726_093_3];
427 for (expected, actual) in expected_values
430 .iter()
431 .zip(spectral_desc.get_rolloff().iter())
432 {
433 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
434 }
435 }
436
437 #[test]
438 fn test_spectral_centroid() {
439 let song = Decoder::new()
440 .unwrap()
441 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
442 .unwrap();
443 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
444 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
445 spectral_desc.do_(chunk).unwrap();
446 }
447 let expected_values = [-0.75483, -0.879_168_87];
450 for (expected, actual) in expected_values
451 .iter()
452 .zip(spectral_desc.get_centroid().iter())
453 {
454 assert!(
455 0.0001 > (expected - actual).abs(),
456 "{expected} !~= {actual}"
457 );
458 }
459 }
460
461 #[test]
462 fn test_spectral_centroid_boundaries() {
463 let mut spectral_desc = SpectralDesc::new(10).unwrap();
464 let chunk = vec![0.; 512];
465
466 spectral_desc.do_(&chunk).unwrap();
467 let expected_values = [-1., -1.];
468 for (expected, actual) in expected_values
469 .iter()
470 .zip(spectral_desc.get_centroid().iter())
471 {
472 assert!(
473 0.000_000_1 > (expected - actual).abs(),
474 "{expected} !~= {actual}"
475 );
476 }
477 let song = Decoder::new()
478 .unwrap()
479 .decode(Path::new("data/tone_11080Hz.flac"))
480 .unwrap();
481 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
482 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
483 spectral_desc.do_(chunk).unwrap();
484 }
485 let expected_values = [0.97266, -0.960_992_6];
486 for (expected, actual) in expected_values
487 .iter()
488 .zip(spectral_desc.get_centroid().iter())
489 {
490 let relative_error = (expected - actual).abs() / expected.abs();
493 assert!(
494 relative_error < 0.039,
495 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
496 );
497 }
498 }
499}