1use bliss_audio_aubio_rs::vec::CVec;
8use bliss_audio_aubio_rs::{PVoc, SpecDesc, SpecShape, bin_to_freq};
9use ndarray::{Axis, arr1};
10
11use crate::Feature;
12
13use super::SAMPLE_RATE;
14use super::errors::{AnalysisError, AnalysisResult};
15use super::utils::{Normalize, geometric_mean, mean, number_crossings};
16
17pub struct SpectralDesc {
31 phase_vocoder: PVoc,
32 sample_rate: u32,
33
34 centroid_aubio_desc: SpecDesc,
35 rolloff_aubio_desc: SpecDesc,
36 values_centroid: Vec<f32>,
37 values_rolloff: Vec<f32>,
38 values_flatness: Vec<f32>,
39}
40
41impl SpectralDesc {
42 pub const WINDOW_SIZE: usize = 512;
43 pub const HOP_SIZE: usize = Self::WINDOW_SIZE / 4;
44
45 #[inline]
60 pub fn get_centroid(&mut self) -> Vec<Feature> {
61 vec![
62 self.normalize(Feature::from(mean(&self.values_centroid))),
63 self.normalize(Feature::from(
64 arr1(&self.values_centroid)
65 .std_axis(Axis(0), 0.)
66 .into_scalar(),
67 )),
68 ]
69 }
70
71 #[inline]
85 pub fn get_rolloff(&mut self) -> Vec<Feature> {
86 vec![
87 self.normalize(Feature::from(mean(&self.values_rolloff))),
88 self.normalize(Feature::from(
89 arr1(&self.values_rolloff)
90 .std_axis(Axis(0), 0.)
91 .into_scalar(),
92 )),
93 ]
94 }
95
96 #[inline]
113 pub fn get_flatness(&mut self) -> Vec<Feature> {
114 let max_value = 1.;
115 let min_value = 0.;
116 vec![
119 2. * (Feature::from(mean(&self.values_flatness)) - min_value) / (max_value - min_value)
120 - 1.,
121 2. * (Feature::from(
122 arr1(&self.values_flatness)
123 .std_axis(Axis(0), 0.)
124 .into_scalar(),
125 ) - min_value)
126 / (max_value - min_value)
127 - 1.,
128 ]
129 }
130
131 #[inline]
135 pub fn new(sample_rate: u32) -> AnalysisResult<Self> {
136 Ok(Self {
137 centroid_aubio_desc: SpecDesc::new(SpecShape::Centroid, Self::WINDOW_SIZE).map_err(
138 |e| {
139 AnalysisError::AnalysisError(format!(
140 "error while loading aubio centroid object: {e}",
141 ))
142 },
143 )?,
144 rolloff_aubio_desc: SpecDesc::new(SpecShape::Rolloff, Self::WINDOW_SIZE).map_err(
145 |e| {
146 AnalysisError::AnalysisError(format!(
147 "error while loading aubio rolloff object: {e}",
148 ))
149 },
150 )?,
151 phase_vocoder: PVoc::new(Self::WINDOW_SIZE, Self::HOP_SIZE).map_err(|e| {
152 AnalysisError::AnalysisError(format!("error while loading aubio pvoc object: {e}",))
153 })?,
154 values_centroid: Vec::new(),
155 values_rolloff: Vec::new(),
156 values_flatness: Vec::new(),
157 sample_rate,
158 })
159 }
160
161 #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
169 #[allow(clippy::missing_inline_in_public_items)]
170 pub fn do_(&mut self, chunk: &[f32]) -> AnalysisResult<()> {
171 let mut fftgrain: Vec<f32> = vec![0.0; Self::WINDOW_SIZE];
172 self.phase_vocoder
173 .do_(chunk, fftgrain.as_mut_slice())
174 .map_err(|e| {
175 AnalysisError::AnalysisError(format!("error while processing aubio pv object: {e}"))
176 })?;
177
178 let bin = self
179 .centroid_aubio_desc
180 .do_result(fftgrain.as_slice())
181 .map_err(|e| {
182 AnalysisError::AnalysisError(format!(
183 "error while processing aubio centroid object: {e}",
184 ))
185 })?;
186
187 #[allow(clippy::cast_precision_loss)]
188 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
189 self.values_centroid.push(freq);
190
191 let mut bin = self
192 .rolloff_aubio_desc
193 .do_result(fftgrain.as_slice())
194 .unwrap();
195
196 #[allow(clippy::cast_precision_loss)]
198 if bin > Self::WINDOW_SIZE as f32 / 2. {
199 bin = Self::WINDOW_SIZE as f32 / 2.;
200 }
201
202 #[allow(clippy::cast_precision_loss)]
203 let freq = bin_to_freq(bin, self.sample_rate as f32, Self::WINDOW_SIZE as f32);
204 self.values_rolloff.push(freq);
205
206 let cvec: CVec = fftgrain.as_slice().into();
207 let geo_mean = geometric_mean(cvec.norm());
208 if geo_mean == 0.0 {
209 self.values_flatness.push(0.0);
210 return Ok(());
211 }
212 let flatness = geo_mean / mean(cvec.norm());
213 self.values_flatness.push(flatness);
214 Ok(())
215 }
216}
217
218impl Normalize for SpectralDesc {
219 #[allow(clippy::cast_precision_loss)]
220 const MAX_VALUE: Feature = SAMPLE_RATE as Feature / 2.;
221 const MIN_VALUE: Feature = 0.;
222}
223
224#[derive(Default, Clone)]
237pub struct ZeroCrossingRateDesc {
238 crossings_sum: u32,
239 samples_checked: usize,
240}
241
242impl ZeroCrossingRateDesc {
243 #[must_use]
244 #[inline]
245 pub fn new(_sample_rate: u32) -> Self {
246 Self::default()
247 }
248
249 #[inline]
251 pub fn do_(&mut self, chunk: &[f32]) {
252 self.crossings_sum += number_crossings(chunk);
253 self.samples_checked += chunk.len();
254 }
255
256 #[allow(clippy::cast_precision_loss)]
259 #[inline]
260 pub fn get_value(&mut self) -> Feature {
261 self.normalize(Feature::from(self.crossings_sum) / self.samples_checked as Feature)
262 }
263}
264
265impl Normalize for ZeroCrossingRateDesc {
266 const MAX_VALUE: Feature = 1.;
267 const MIN_VALUE: Feature = 0.;
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273 use crate::decoder::{Decoder as DecoderTrait, MecompDecoder as Decoder};
274 use std::path::Path;
275
276 #[test]
277 fn test_zcr_boundaries() {
278 let mut zcr_desc = ZeroCrossingRateDesc::default();
279 let chunk = vec![0.; 1024];
280 zcr_desc.do_(&chunk);
281 let value = zcr_desc.get_value();
282 assert!(f64::EPSILON > (-1. - value).abs(), "{value} !~= -1");
283
284 let one_chunk = [-1., 1.];
285 let chunks = std::iter::repeat_n(one_chunk.iter(), 512)
286 .flatten()
287 .copied()
288 .collect::<Vec<f32>>();
289 let mut zcr_desc = ZeroCrossingRateDesc::default();
290 zcr_desc.do_(&chunks);
291 let value = zcr_desc.get_value();
292 assert!(0.001 > (0.998_046_9 - value).abs(), "{value} !~= 0.9980469");
293 }
294
295 #[test]
296 fn test_zcr() {
297 let song = Decoder::new()
298 .unwrap()
299 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
300 .unwrap();
301 let mut zcr_desc = ZeroCrossingRateDesc::default();
302 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
303 zcr_desc.do_(chunk);
304 }
305 let value = zcr_desc.get_value();
306 assert!(0.001 > (-0.85036 - value).abs(), "{value} !~= -0.85036");
307 }
308
309 #[test]
310 fn test_spectral_flatness_boundaries() {
311 let mut spectral_desc = SpectralDesc::new(10).unwrap();
312 let chunk = vec![0.; 1024];
313
314 let expected_values = [-1., -1.];
315 spectral_desc.do_(&chunk).unwrap();
316 for (expected, actual) in expected_values
317 .iter()
318 .zip(spectral_desc.get_flatness().iter())
319 {
320 assert!(
321 0.000_000_1 > (expected - actual).abs(),
322 "{expected} !~= {actual}"
323 );
324 }
325
326 let song = Decoder::new()
327 .unwrap()
328 .decode(Path::new("data/white_noise.mp3"))
329 .unwrap();
330 let mut spectral_desc = SpectralDesc::new(22050).unwrap();
331 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
332 spectral_desc.do_(chunk).unwrap();
333 }
334 println!("{:?}", spectral_desc.get_flatness());
335 let expected_values = [0.578_530_3, -0.942_630_8];
337 for (expected, actual) in expected_values
338 .iter()
339 .zip(spectral_desc.get_flatness().iter())
340 {
341 let relative_error = (expected - actual).abs() / expected.abs();
344 assert!(
345 relative_error < 0.078,
346 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
347 );
348 }
349 }
350
351 #[test]
352 fn test_spectral_flatness() {
353 let song = Decoder::new()
354 .unwrap()
355 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
356 .unwrap();
357 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
358 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
359 spectral_desc.do_(chunk).unwrap();
360 }
361 let expected_values = [-0.776_100_75, -0.814_817_9];
364 for (expected, actual) in expected_values
365 .iter()
366 .zip(spectral_desc.get_flatness().iter())
367 {
368 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
369 }
370 }
371
372 #[test]
373 fn test_spectral_roll_off_boundaries() {
374 let mut spectral_desc = SpectralDesc::new(10).unwrap();
375 let chunk = vec![0.; 512];
376
377 let expected_values = [-1., -1.];
378 spectral_desc.do_(&chunk).unwrap();
379 for (expected, actual) in expected_values
380 .iter()
381 .zip(spectral_desc.get_rolloff().iter())
382 {
383 assert!(
384 0.000_000_1 > (expected - actual).abs(),
385 "{expected} !~= {actual}"
386 );
387 }
388
389 let song = Decoder::new()
390 .unwrap()
391 .decode(Path::new("data/tone_11080Hz.flac"))
392 .unwrap();
393 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
394 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
395 spectral_desc.do_(chunk).unwrap();
396 }
397 let expected_values = [0.996_768_1, -0.996_151_75];
398 for (expected, actual) in expected_values
399 .iter()
400 .zip(spectral_desc.get_rolloff().iter())
401 {
402 assert!(
403 0.0001 > (expected - actual).abs(),
404 "{expected} !~= {actual}"
405 );
406 }
407 }
408
409 #[test]
410 fn test_spectral_roll_off() {
411 let song = Decoder::new()
412 .unwrap()
413 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
414 .unwrap();
415 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
416 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
417 spectral_desc.do_(chunk).unwrap();
418 }
419 let expected_values = [-0.632_648_6, -0.726_093_3];
420 for (expected, actual) in expected_values
423 .iter()
424 .zip(spectral_desc.get_rolloff().iter())
425 {
426 assert!(0.01 > (expected - actual).abs(), "{expected} !~= {actual}");
427 }
428 }
429
430 #[test]
431 fn test_spectral_centroid() {
432 let song = Decoder::new()
433 .unwrap()
434 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
435 .unwrap();
436 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
437 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
438 spectral_desc.do_(chunk).unwrap();
439 }
440 let expected_values = [-0.75483, -0.879_168_87];
443 for (expected, actual) in expected_values
444 .iter()
445 .zip(spectral_desc.get_centroid().iter())
446 {
447 assert!(
448 0.0001 > (expected - actual).abs(),
449 "{expected} !~= {actual}"
450 );
451 }
452 }
453
454 #[test]
455 fn test_spectral_centroid_boundaries() {
456 let mut spectral_desc = SpectralDesc::new(10).unwrap();
457 let chunk = vec![0.; 512];
458
459 spectral_desc.do_(&chunk).unwrap();
460 let expected_values = [-1., -1.];
461 for (expected, actual) in expected_values
462 .iter()
463 .zip(spectral_desc.get_centroid().iter())
464 {
465 assert!(
466 0.000_000_1 > (expected - actual).abs(),
467 "{expected} !~= {actual}"
468 );
469 }
470 let song = Decoder::new()
471 .unwrap()
472 .decode(Path::new("data/tone_11080Hz.flac"))
473 .unwrap();
474 let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE).unwrap();
475 for chunk in song.samples.chunks_exact(SpectralDesc::HOP_SIZE) {
476 spectral_desc.do_(chunk).unwrap();
477 }
478 let expected_values = [0.97266, -0.960_992_6];
479 for (expected, actual) in expected_values
480 .iter()
481 .zip(spectral_desc.get_centroid().iter())
482 {
483 let relative_error = (expected - actual).abs() / expected.abs();
486 assert!(
487 relative_error < 0.039,
488 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
489 );
490 }
491 }
492}