1use bliss_audio_aubio_rs::vec::CVec;
8use bliss_audio_aubio_rs::{PVoc, SpecDesc, SpecShape, bin_to_freq};
9use likely_stable::{LikelyResult, unlikely};
10use ndarray::{Axis, arr1};
11
12use crate::Feature;
13
14use super::SAMPLE_RATE;
15use super::errors::{AnalysisError, AnalysisResult};
16use super::utils::{Normalize, geometric_mean, mean, number_crossings};
17
18pub struct SpectralDesc {
32 fftgrain: Vec<f32>,
34
35 phase_vocoder: PVoc,
36 sample_rate: u32,
37
38 centroid_aubio_desc: SpecDesc,
39 rolloff_aubio_desc: SpecDesc,
40 values_centroid: Vec<f32>,
41 values_rolloff: Vec<f32>,
42 values_flatness: Vec<f32>,
43}
44
45impl SpectralDesc {
46 pub const WINDOW_SIZE: usize = 512;
47 pub const HOP_SIZE: usize = Self::WINDOW_SIZE / 4;
48
49 #[inline]
64 pub fn get_centroid(&mut self) -> [Feature; 2] {
65 [
66 self.normalize(mean(&self.values_centroid)),
67 self.normalize(
68 arr1(&self.values_centroid)
69 .std_axis(Axis(0), 0.)
70 .into_scalar(),
71 ),
72 ]
73 }
74
75 #[inline]
89 pub fn get_rolloff(&mut self) -> [Feature; 2] {
90 [
91 self.normalize(mean(&self.values_rolloff)),
92 self.normalize(
93 arr1(&self.values_rolloff)
94 .std_axis(Axis(0), 0.)
95 .into_scalar(),
96 ),
97 ]
98 }
99
100 #[inline]
117 pub fn get_flatness(&mut self) -> [Feature; 2] {
118 let max_value = 1.;
119 let min_value = 0.;
120 [
123 2. * (mean(&self.values_flatness) - min_value) / (max_value - min_value) - 1.,
124 2. * (arr1(&self.values_flatness)
125 .std_axis(Axis(0), 0.)
126 .into_scalar()
127 - min_value)
128 / (max_value - min_value)
129 - 1.,
130 ]
131 }
132
133 #[inline]
137 pub fn new(sample_rate: u32) -> AnalysisResult<Self> {
138 Ok(Self {
139 fftgrain: vec![0.0; Self::WINDOW_SIZE],
140
141 centroid_aubio_desc: SpecDesc::new(SpecShape::Centroid, Self::WINDOW_SIZE)
142 .map_err_unlikely(|e| {
143 AnalysisError::AnalysisError(format!(
144 "error while loading aubio centroid object: {e}",
145 ))
146 })?,
147 rolloff_aubio_desc: SpecDesc::new(SpecShape::Rolloff, Self::WINDOW_SIZE)
148 .map_err_unlikely(|e| {
149 AnalysisError::AnalysisError(format!(
150 "error while loading aubio rolloff object: {e}",
151 ))
152 })?,
153 phase_vocoder: PVoc::new(Self::WINDOW_SIZE, Self::HOP_SIZE).map_err_unlikely(|e| {
154 AnalysisError::AnalysisError(format!("error while loading aubio pvoc object: {e}",))
155 })?,
156 values_centroid: Vec::new(),
157 values_rolloff: Vec::new(),
158 values_flatness: Vec::new(),
159 sample_rate,
160 })
161 }
162
163 #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
171 #[allow(clippy::missing_inline_in_public_items)]
172 pub fn do_(&mut self, chunk: &[f32]) -> AnalysisResult<()> {
173 self.phase_vocoder
174 .do_(chunk, self.fftgrain.as_mut_slice())
175 .map_err_unlikely(|e| {
176 AnalysisError::AnalysisError(format!("error while processing aubio pv object: {e}"))
177 })?;
178
179 let bin = self
180 .centroid_aubio_desc
181 .do_result(self.fftgrain.as_slice())
182 .map_err_unlikely(|e| {
183 AnalysisError::AnalysisError(format!(
184 "error while processing aubio centroid object: {e}",
185 ))
186 })?;
187
188 #[allow(clippy::cast_precision_loss)]
189 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
190 self.values_centroid.push(freq);
191
192 let mut bin = self
193 .rolloff_aubio_desc
194 .do_result(self.fftgrain.as_slice())
195 .map_err_unlikely(|e| {
196 AnalysisError::AnalysisError(format!(
197 "error while processing aubio rolloff object: {e}",
198 ))
199 })?;
200
201 #[allow(clippy::cast_precision_loss)]
203 if unlikely(bin > Self::WINDOW_SIZE as f32 / 2.) {
204 bin = Self::WINDOW_SIZE as f32 / 2.;
206 }
207
208 #[allow(clippy::cast_precision_loss)]
209 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
210 self.values_rolloff.push(freq);
211
212 let cvec: CVec<'_> = self.fftgrain.as_slice().into();
213 let norm = cvec.norm();
214 let geo_mean = geometric_mean(norm);
215 let arithmetic_mean = mean(norm);
216
217 if unlikely(geo_mean == 0.0 || arithmetic_mean == 0.0) {
219 self.values_flatness.push(0.0);
220 return Ok(());
221 }
222 let flatness = geo_mean / arithmetic_mean;
223 self.values_flatness.push(flatness);
224 Ok(())
225 }
226}
227
228impl Normalize for SpectralDesc {
229 #[allow(clippy::cast_precision_loss)]
230 const MAX_VALUE: Feature = SAMPLE_RATE as Feature / 2.;
231 const MIN_VALUE: Feature = 0.;
232}
233
234#[derive(Default, Clone)]
247pub struct ZeroCrossingRateDesc {
248 crossings_sum: usize,
249 samples_checked: usize,
250}
251
252impl ZeroCrossingRateDesc {
253 #[must_use]
254 #[inline]
255 pub fn new(_sample_rate: u32) -> Self {
256 Self::default()
257 }
258
259 #[inline]
261 pub fn do_(&mut self, chunk: &[f32]) {
262 self.crossings_sum += number_crossings(chunk);
263 self.samples_checked += chunk.len();
264 }
265
266 #[allow(clippy::cast_precision_loss)]
269 #[inline]
270 pub fn get_value(&mut self) -> Feature {
271 self.normalize(self.crossings_sum as Feature / self.samples_checked as Feature)
272 }
273}
274
275impl Normalize for ZeroCrossingRateDesc {
276 const MAX_VALUE: Feature = 1.;
277 const MIN_VALUE: Feature = 0.;
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283 use crate::decoder::{Decoder as DecoderTrait, MecompDecoder as Decoder};
284 use std::path::Path;
285
286 #[test]
287 fn test_zcr_boundaries() {
288 let mut zcr_desc = ZeroCrossingRateDesc::default();
289 let chunk = vec![0.; 1024];
290 zcr_desc.do_(&chunk);
291 let value = zcr_desc.get_value();
292 assert!(f32::EPSILON > (-1. - value).abs(), "{value} !~= -1");
293
294 let one_chunk = [-1., 1.];
295 let chunks = std::iter::repeat_n(one_chunk.iter(), 512)
296 .flatten()
297 .copied()
298 .collect::<Vec<f32>>();
299 let mut zcr_desc = ZeroCrossingRateDesc::default();
300 zcr_desc.do_(&chunks);
301 let value = zcr_desc.get_value();
302 assert!(0.001 > (0.998_046_9 - value).abs(), "{value} !~= 0.9980469");
303 }
304
305 #[test]
306 fn test_zcr() {
307 let song = Decoder::new()
308 .unwrap()
309 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
310 .unwrap();
311 let mut zcr_desc = ZeroCrossingRateDesc::default();
312 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
313 zcr_desc.do_(chunk);
314 }
315 let value = zcr_desc.get_value();
316 assert!(0.001 > (-0.85036 - value).abs(), "{value} !~= -0.85036");
317 }
318
319 #[test]
320 fn test_spectral_flatness_boundaries() {
321 let mut spectral_desc = SpectralDesc::new(10).unwrap();
322 let chunk = vec![0.; 1024];
323
324 let expected_values = [-1., -1.];
325 spectral_desc.do_(&chunk).unwrap();
326 for (expected, actual) in expected_values
327 .iter()
328 .zip(spectral_desc.get_flatness().iter())
329 {
330 assert!(
331 0.000_000_1 > (expected - actual).abs(),
332 "{expected} !~= {actual}"
333 );
334 }
335
336 let song = Decoder::new()
337 .unwrap()
338 .decode(Path::new("data/white_noise.mp3"))
339 .unwrap();
340 let mut spectral_desc = SpectralDesc::new(22050).unwrap();
341 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
342 spectral_desc.do_(chunk).unwrap();
343 }
344 println!("{:?}", spectral_desc.get_flatness());
345 let expected_values = [0.578_530_3, -0.942_630_8];
347 for (expected, actual) in expected_values
348 .iter()
349 .zip(spectral_desc.get_flatness().iter())
350 {
351 let relative_error = (expected - actual).abs() / expected.abs();
354 assert!(
355 relative_error < 0.078,
356 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
357 );
358 }
359 }
360
361 #[test]
362 fn test_spectral_flatness() {
363 let song = Decoder::new()
364 .unwrap()
365 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
366 .unwrap();
367 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
368 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
369 spectral_desc.do_(chunk).unwrap();
370 }
371 let expected_values = [-0.776_100_75, -0.814_817_9];
374 for (expected, actual) in expected_values
375 .iter()
376 .zip(spectral_desc.get_flatness().iter())
377 {
378 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
379 }
380 }
381
382 #[test]
383 fn test_spectral_roll_off_boundaries() {
384 let mut spectral_desc = SpectralDesc::new(10).unwrap();
385 let chunk = vec![0.; 512];
386
387 let expected_values = [-1., -1.];
388 spectral_desc.do_(&chunk).unwrap();
389 for (expected, actual) in expected_values
390 .iter()
391 .zip(spectral_desc.get_rolloff().iter())
392 {
393 assert!(
394 0.000_000_1 > (expected - actual).abs(),
395 "{expected} !~= {actual}"
396 );
397 }
398
399 let song = Decoder::new()
400 .unwrap()
401 .decode(Path::new("data/tone_11080Hz.flac"))
402 .unwrap();
403 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
404 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
405 spectral_desc.do_(chunk).unwrap();
406 }
407 let expected_values = [0.996_768_1, -0.996_151_75];
408 for (expected, actual) in expected_values
409 .iter()
410 .zip(spectral_desc.get_rolloff().iter())
411 {
412 assert!(
413 0.0001 > (expected - actual).abs(),
414 "{expected} !~= {actual}"
415 );
416 }
417 }
418
419 #[test]
420 fn test_spectral_roll_off() {
421 let song = Decoder::new()
422 .unwrap()
423 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
424 .unwrap();
425 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
426 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
427 spectral_desc.do_(chunk).unwrap();
428 }
429 let expected_values = [-0.632_648_6, -0.726_093_3];
430 for (expected, actual) in expected_values
433 .iter()
434 .zip(spectral_desc.get_rolloff().iter())
435 {
436 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
437 }
438 }
439
440 #[test]
441 fn test_spectral_centroid() {
442 let song = Decoder::new()
443 .unwrap()
444 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
445 .unwrap();
446 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
447 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
448 spectral_desc.do_(chunk).unwrap();
449 }
450 let expected_values = [-0.75483, -0.879_168_87];
453 for (expected, actual) in expected_values
454 .iter()
455 .zip(spectral_desc.get_centroid().iter())
456 {
457 assert!(
458 0.0001 > (expected - actual).abs(),
459 "{expected} !~= {actual}"
460 );
461 }
462 }
463
464 #[test]
465 fn test_spectral_centroid_boundaries() {
466 let mut spectral_desc = SpectralDesc::new(10).unwrap();
467 let chunk = vec![0.; 512];
468
469 spectral_desc.do_(&chunk).unwrap();
470 let expected_values = [-1., -1.];
471 for (expected, actual) in expected_values
472 .iter()
473 .zip(spectral_desc.get_centroid().iter())
474 {
475 assert!(
476 0.000_000_1 > (expected - actual).abs(),
477 "{expected} !~= {actual}"
478 );
479 }
480 let song = Decoder::new()
481 .unwrap()
482 .decode(Path::new("data/tone_11080Hz.flac"))
483 .unwrap();
484 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
485 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
486 spectral_desc.do_(chunk).unwrap();
487 }
488 let expected_values = [0.97266, -0.960_992_6];
489 for (expected, actual) in expected_values
490 .iter()
491 .zip(spectral_desc.get_centroid().iter())
492 {
493 let relative_error = (expected - actual).abs() / expected.abs();
496 assert!(
497 relative_error < 0.039,
498 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
499 );
500 }
501 }
502}