mecomp_analysis/
temporal.rs

1//! Temporal feature extraction module.
2//!
3//! Contains functions to extract & summarize the temporal aspects
4//! of a given Song.
5
6use crate::Feature;
7
8use super::errors::{AnalysisError, AnalysisResult};
9use super::utils::Normalize;
10use bliss_audio_aubio_rs::{OnsetMode, Tempo};
11use log::warn;
12use ndarray::arr1;
13use ndarray_stats::interpolate::Midpoint;
14use ndarray_stats::Quantile1dExt;
15use noisy_float::prelude::*;
16
17/**
18 * Beats per minutes ([BPM](https://en.wikipedia.org/wiki/Tempo#Measurement))
19 * detection object.
20 *
21 * It indicates the (subjective) "speed" of a music piece. The higher the BPM,
22 * the "quicker" the song will feel.
23 *
24 * It uses `SpecFlux`, a phase-deviation onset detection function to perform
25 * onset detection; it proved to be the best for finding out the BPM of a panel
26 * of songs I had, but it could very well be replaced by something better in the
27 * future.
28 *
29 * Ranges from 0 (theoretically...) to 206 BPM. (Even though aubio apparently
30 * has trouble to identify tempo > 190 BPM - did not investigate too much)
31 *
32 */
33pub struct BPMDesc {
34    aubio_obj: Tempo,
35    bpms: Vec<f32>,
36}
37
38// TODO>1.0 use the confidence value to discard this descriptor if confidence
39// is too low.
40impl BPMDesc {
41    pub const WINDOW_SIZE: usize = 512;
42    pub const HOP_SIZE: usize = Self::WINDOW_SIZE / 2;
43
44    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
45    #[inline]
46    pub fn new(sample_rate: u32) -> AnalysisResult<Self> {
47        Ok(Self {
48            aubio_obj: Tempo::new(
49                OnsetMode::SpecFlux,
50                Self::WINDOW_SIZE,
51                Self::HOP_SIZE,
52                sample_rate,
53            )
54            .map_err(|e| {
55                AnalysisError::AnalysisError(format!("error while loading aubio tempo object: {e}"))
56            })?,
57            bpms: Vec::new(),
58        })
59    }
60
61    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
62    #[inline]
63    pub fn do_(&mut self, chunk: &[f32]) -> AnalysisResult<()> {
64        let result = self.aubio_obj.do_result(chunk).map_err(|e| {
65            AnalysisError::AnalysisError(format!("aubio error while computing tempo {e}"))
66        })?;
67
68        if result > 0. {
69            self.bpms.push(self.aubio_obj.get_bpm());
70        }
71        Ok(())
72    }
73
74    /**
75     * Compute score related to tempo.
76     * Right now, basically returns the song's BPM.
77     *
78     * - `song` Song to compute score from
79     */
80    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
81    #[inline]
82    pub fn get_value(&mut self) -> Feature {
83        if self.bpms.is_empty() {
84            warn!("Set tempo value to zero because no beats were found.");
85            return -1.;
86        }
87        let median = arr1(&self.bpms)
88            .mapv(n32)
89            .quantile_mut(n64(0.5), &Midpoint)
90            .unwrap();
91        self.normalize(median.into())
92    }
93}
94
95impl Normalize for BPMDesc {
96    // See aubio/src/tempo/beattracking.c:387
97    // Should really be 413, needs testing
98    const MAX_VALUE: Feature = 206.;
99    const MIN_VALUE: Feature = 0.;
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105    use crate::{
106        decoder::{Decoder as DecoderTrait, MecompDecoder as Decoder},
107        SAMPLE_RATE,
108    };
109    use std::path::Path;
110
111    #[test]
112    fn test_tempo_real() {
113        let song = Decoder::decode(Path::new("data/s16_mono_22_5kHz.flac")).unwrap();
114        let mut tempo_desc = BPMDesc::new(SAMPLE_RATE).unwrap();
115        for chunk in song.samples.chunks_exact(BPMDesc::HOP_SIZE) {
116            tempo_desc.do_(chunk).unwrap();
117        }
118        assert!(
119            0.01 > (0.378_605 - tempo_desc.get_value()).abs(),
120            "{} !~= 0.378605",
121            tempo_desc.get_value()
122        );
123    }
124
125    #[test]
126    fn test_tempo_artificial() {
127        let mut tempo_desc = BPMDesc::new(22050).unwrap();
128        // This gives one beat every second, so 60 BPM
129        let mut one_chunk = vec![0.; 22000];
130        one_chunk.append(&mut vec![1.; 100]);
131        let chunks = std::iter::repeat(one_chunk.iter())
132            .take(100)
133            .flatten()
134            .copied()
135            .collect::<Vec<f32>>();
136        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
137            tempo_desc.do_(chunk).unwrap();
138        }
139
140        // -0.41 is 60 BPM normalized
141        assert!(
142            0.01 > (-0.416_853 - tempo_desc.get_value()).abs(),
143            "{} !~= -0.416853",
144            tempo_desc.get_value()
145        );
146    }
147
148    #[test]
149    fn test_tempo_boundaries() {
150        let mut tempo_desc = BPMDesc::new(10).unwrap();
151        let silence_chunk = vec![0.; 1024];
152        tempo_desc.do_(&silence_chunk).unwrap();
153        assert_eq!(-1., tempo_desc.get_value());
154
155        let mut tempo_desc = BPMDesc::new(22050).unwrap();
156        // The highest value I could obtain was with these params, even though
157        // apparently the higher bound is 206 BPM, but here I found ~189 BPM.
158        let mut one_chunk = vec![0.; 6989];
159        one_chunk.append(&mut vec![1.; 20]);
160        let chunks = std::iter::repeat(one_chunk.iter())
161            .take(500)
162            .flatten()
163            .copied()
164            .collect::<Vec<f32>>();
165        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
166            tempo_desc.do_(chunk).unwrap();
167        }
168        // 0.86 is 192BPM normalized
169        assert!(
170            0.01 > (0.86 - tempo_desc.get_value()).abs(),
171            "{} !~= 0.86",
172            tempo_desc.get_value()
173        );
174    }
175}