mecomp_analysis/
temporal.rs

1//! Temporal feature extraction module.
2//!
3//! Contains functions to extract & summarize the temporal aspects
4//! of a given Song.
5
6use crate::Feature;
7
8use super::errors::{AnalysisError, AnalysisResult};
9use super::utils::Normalize;
10use bliss_audio_aubio_rs::{OnsetMode, Tempo};
11use log::warn;
12use ndarray::arr1;
13use ndarray_stats::Quantile1dExt;
14use ndarray_stats::interpolate::Midpoint;
15use noisy_float::prelude::*;
16
17/**
18 * Beats per minutes ([BPM](https://en.wikipedia.org/wiki/Tempo#Measurement))
19 * detection object.
20 *
21 * It indicates the (subjective) "speed" of a music piece. The higher the BPM,
22 * the "quicker" the song will feel.
23 *
24 * It uses `SpecFlux`, a phase-deviation onset detection function to perform
25 * onset detection; it proved to be the best for finding out the BPM of a panel
26 * of songs I had, but it could very well be replaced by something better in the
27 * future.
28 *
29 * Ranges from 0 (theoretically...) to 206 BPM. (Even though aubio apparently
30 * has trouble to identify tempo > 190 BPM - did not investigate too much)
31 *
32 */
33pub struct BPMDesc {
34    aubio_obj: Tempo,
35    bpms: Vec<f32>,
36}
37
38// TODO>1.0 use the confidence value to discard this descriptor if confidence
39// is too low.
40impl BPMDesc {
41    pub const WINDOW_SIZE: usize = 512;
42    pub const HOP_SIZE: usize = Self::WINDOW_SIZE / 2;
43
44    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
45    #[inline]
46    pub fn new(sample_rate: u32) -> AnalysisResult<Self> {
47        Ok(Self {
48            aubio_obj: Tempo::new(
49                OnsetMode::SpecFlux,
50                Self::WINDOW_SIZE,
51                Self::HOP_SIZE,
52                sample_rate,
53            )
54            .map_err(|e| {
55                AnalysisError::AnalysisError(format!("error while loading aubio tempo object: {e}"))
56            })?,
57            bpms: Vec::new(),
58        })
59    }
60
61    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
62    #[inline]
63    pub fn do_(&mut self, chunk: &[f32]) -> AnalysisResult<()> {
64        let result = self.aubio_obj.do_result(chunk).map_err(|e| {
65            AnalysisError::AnalysisError(format!("aubio error while computing tempo {e}"))
66        })?;
67
68        if result > 0. {
69            self.bpms.push(self.aubio_obj.get_bpm());
70        }
71        Ok(())
72    }
73
74    /**
75     * Compute score related to tempo.
76     * Right now, basically returns the song's BPM.
77     *
78     * - `song` Song to compute score from
79     */
80    #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
81    #[inline]
82    pub fn get_value(&mut self) -> Feature {
83        if self.bpms.is_empty() {
84            warn!("Set tempo value to zero because no beats were found.");
85            return -1.;
86        }
87        let median = arr1(&self.bpms)
88            .mapv(n32)
89            .quantile_mut(n64(0.5), &Midpoint)
90            .unwrap();
91        self.normalize(median.into())
92    }
93}
94
95impl Normalize for BPMDesc {
96    // See aubio/src/tempo/beattracking.c:387
97    // Should really be 413, needs testing
98    const MAX_VALUE: Feature = 206.;
99    const MIN_VALUE: Feature = 0.;
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105    use crate::{
106        SAMPLE_RATE,
107        decoder::{Decoder as DecoderTrait, MecompDecoder as Decoder},
108    };
109    use std::path::Path;
110
111    #[test]
112    fn test_tempo_real() {
113        let song = Decoder::new()
114            .unwrap()
115            .decode(Path::new("data/s16_mono_22_5kHz.flac"))
116            .unwrap();
117        let mut tempo_desc = BPMDesc::new(SAMPLE_RATE).unwrap();
118        for chunk in song.samples.chunks_exact(BPMDesc::HOP_SIZE) {
119            tempo_desc.do_(chunk).unwrap();
120        }
121        assert!(
122            0.01 > (0.378_605 - tempo_desc.get_value()).abs(),
123            "{} !~= 0.378605",
124            tempo_desc.get_value()
125        );
126    }
127
128    #[test]
129    fn test_tempo_artificial() {
130        let mut tempo_desc = BPMDesc::new(22050).unwrap();
131        // This gives one beat every second, so 60 BPM
132        let mut one_chunk = vec![0.; 22000];
133        one_chunk.append(&mut vec![1.; 100]);
134        let chunks = std::iter::repeat(one_chunk.iter())
135            .take(100)
136            .flatten()
137            .copied()
138            .collect::<Vec<f32>>();
139        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
140            tempo_desc.do_(chunk).unwrap();
141        }
142
143        // -0.41 is 60 BPM normalized
144        assert!(
145            0.01 > (-0.416_853 - tempo_desc.get_value()).abs(),
146            "{} !~= -0.416853",
147            tempo_desc.get_value()
148        );
149    }
150
151    #[test]
152    fn test_tempo_boundaries() {
153        let mut tempo_desc = BPMDesc::new(10).unwrap();
154        let silence_chunk = vec![0.; 1024];
155        tempo_desc.do_(&silence_chunk).unwrap();
156        let value = tempo_desc.get_value();
157        assert!(f64::EPSILON > (-1. - value).abs(), "{value} !~= -1");
158
159        let mut tempo_desc = BPMDesc::new(22050).unwrap();
160        // The highest value I could obtain was with these params, even though
161        // apparently the higher bound is 206 BPM, but here I found ~189 BPM.
162        let mut one_chunk = vec![0.; 6989];
163        one_chunk.append(&mut vec![1.; 20]);
164        let chunks = std::iter::repeat(one_chunk.iter())
165            .take(500)
166            .flatten()
167            .copied()
168            .collect::<Vec<f32>>();
169        for chunk in chunks.chunks_exact(BPMDesc::HOP_SIZE) {
170            tempo_desc.do_(chunk).unwrap();
171        }
172        // 0.86 is 192BPM normalized
173        assert!(
174            0.01 > (0.86 - tempo_desc.get_value()).abs(),
175            "{} !~= 0.86",
176            tempo_desc.get_value()
177        );
178    }
179}