mecomp_analysis/
lib.rs

1//! This library contains stuff for song analysis and feature extraction.
2//!
3//! A lot of the code in this library is inspired by, or directly pulled from, [bliss-rs](https://github.com/Polochon-street/bliss-rs).
4//! We don't simply use bliss-rs because I don't want to bring in an ffmpeg dependency, and bliss-rs also has a lot of features that I don't need.
5//! (for example, I don't need to decode tags, process playlists, etc. etc., I'm doing all of that myself already)
6//!
7//! We use rodio to decode the audio file (overkill, but we already have the dependency for audio playback so may as well),
8//! We use rubato to resample the audio file to 22050 Hz.
9
10#![deny(clippy::missing_inline_in_public_items)]
11
12pub mod chroma;
13pub mod clustering;
14pub mod decoder;
15pub mod errors;
16pub mod misc;
17pub mod temporal;
18pub mod timbral;
19pub mod utils;
20
21use std::{ops::Index, path::PathBuf, thread::ScopedJoinHandle};
22
23use likely_stable::LikelyResult;
24use misc::LoudnessDesc;
25use serde::{Deserialize, Serialize};
26use strum::{EnumCount, EnumIter, IntoEnumIterator};
27
28use chroma::ChromaDesc;
29use errors::{AnalysisError, AnalysisResult};
30use temporal::BPMDesc;
31use timbral::{SpectralDesc, ZeroCrossingRateDesc};
32
33/// The resampled audio data used for analysis.
34///
35/// Must be in mono (1 channel), with a sample rate of 22050 Hz.
36#[derive(Debug)]
37pub struct ResampledAudio {
38    pub path: PathBuf,
39    pub samples: Vec<f32>,
40}
41
42impl TryInto<Analysis> for ResampledAudio {
43    type Error = AnalysisError;
44
45    #[inline]
46    fn try_into(self) -> Result<Analysis, Self::Error> {
47        Analysis::from_samples(&self)
48    }
49}
50
51/// The sampling rate used for the analysis.
52pub const SAMPLE_RATE: u32 = 22050;
53
54#[derive(Debug, EnumIter, EnumCount)]
55/// Indexes different fields of an Analysis.
56///
57/// Prints the tempo value of an analysis.
58///
59/// Note that this should mostly be used for debugging / distance metric
60/// customization purposes.
61pub enum AnalysisIndex {
62    /// The song's tempo.
63    Tempo,
64    /// The song's zero-crossing rate.
65    Zcr,
66    /// The mean of the song's spectral centroid.
67    MeanSpectralCentroid,
68    /// The standard deviation of the song's spectral centroid.
69    StdDeviationSpectralCentroid,
70    /// The mean of the song's spectral rolloff.
71    MeanSpectralRolloff,
72    /// The standard deviation of the song's spectral rolloff.
73    StdDeviationSpectralRolloff,
74    /// The mean of the song's spectral flatness.
75    MeanSpectralFlatness,
76    /// The standard deviation of the song's spectral flatness.
77    StdDeviationSpectralFlatness,
78    /// The mean of the song's loudness.
79    MeanLoudness,
80    /// The standard deviation of the song's loudness.
81    StdDeviationLoudness,
82    /// The proportion of pitch class set 1 (IC1) compared to the 6 other pitch class sets,
83    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
84    Chroma1,
85    /// The proportion of pitch class set 2 (IC2) compared to the 6 other pitch class sets,
86    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
87    Chroma2,
88    /// The proportion of pitch class set 3 (IC3) compared to the 6 other pitch class sets,
89    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
90    Chroma3,
91    /// The proportion of pitch class set 4 (IC4) compared to the 6 other pitch class sets,
92    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
93    Chroma4,
94    /// The proportion of pitch class set 5 (IC5) compared to the 6 other pitch class sets,
95    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
96    Chroma5,
97    /// The proportion of pitch class set 6 (IC6) compared to the 6 other pitch class sets,
98    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
99    Chroma6,
100    /// The proportion of major triads in the song, compared to the other triads.
101    Chroma7,
102    /// The proportion of minor triads in the song, compared to the other triads.
103    Chroma8,
104    /// The proportion of diminished triads in the song, compared to the other triads.
105    Chroma9,
106    /// The proportion of augmented triads in the song, compared to the other triads.
107    Chroma10,
108    /// The L2-norm of the IC1-6 (see above).
109    Chroma11,
110    /// The L2-norm of the IC7-10 (see above).
111    Chroma12,
112    /// The ratio of the L2-norm of IC7-10 and IC1-6 (proportion of triads vs dyads).
113    Chroma13,
114}
115
116/// The Type of individual features
117pub type Feature = f64;
118/// The number of features used in `Analysis`
119pub const NUMBER_FEATURES: usize = AnalysisIndex::COUNT;
120
121#[derive(Default, PartialEq, Clone, Copy, Serialize, Deserialize)]
122/// Object holding the results of the song's analysis.
123///
124/// Only use it if you want to have an in-depth look of what is
125/// happening behind the scene, or make a distance metric yourself.
126///
127/// Under the hood, it is just an array of f32 holding different numeric
128/// features.
129///
130/// For more info on the different features, build the
131/// documentation with private items included using
132/// `cargo doc --document-private-items`, and / or read up
133/// [this document](https://lelele.io/thesis.pdf), that contains a description
134/// on most of the features, except the chroma ones, which are documented
135/// directly in this code.
136pub struct Analysis {
137    pub(crate) internal_analysis: [Feature; NUMBER_FEATURES],
138}
139
140impl Index<AnalysisIndex> for Analysis {
141    type Output = Feature;
142
143    #[inline]
144    fn index(&self, index: AnalysisIndex) -> &Feature {
145        &self.internal_analysis[index as usize]
146    }
147}
148
149impl Index<usize> for Analysis {
150    type Output = Feature;
151
152    #[inline]
153    fn index(&self, index: usize) -> &Feature {
154        &self.internal_analysis[index]
155    }
156}
157
158impl std::fmt::Debug for Analysis {
159    #[inline]
160    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161        let mut debug_struct = f.debug_struct("Analysis");
162        for feature in AnalysisIndex::iter() {
163            debug_struct.field(&format!("{feature:?}"), &self[feature]);
164        }
165        debug_struct.finish()?;
166        f.write_str(&format!(" /* {:?} */", &self.as_vec()))
167    }
168}
169
170impl Analysis {
171    /// Create a new Analysis object.
172    ///
173    /// Usually not needed, unless you have already computed and stored
174    /// features somewhere, and need to recreate a Song with an already
175    /// existing Analysis yourself.
176    #[must_use]
177    #[inline]
178    pub const fn new(analysis: [Feature; NUMBER_FEATURES]) -> Self {
179        Self {
180            internal_analysis: analysis,
181        }
182    }
183
184    /// Creates a new `Analysis` object from a `Vec<Feature>`.
185    ///
186    /// invariant: `features.len() == NUMBER_FEATURES`
187    ///
188    /// # Errors
189    ///
190    /// This function will return an error if the length of the features is not equal to `NUMBER_FEATURES`.
191    #[inline]
192    pub fn from_vec(features: Vec<Feature>) -> Result<Self, AnalysisError> {
193        features
194            .try_into()
195            .map_err(|_| AnalysisError::InvalidFeaturesLen)
196            .map(Self::new)
197    }
198
199    /// Return the inner array of the analysis.
200    /// This is mostly useful if you want to store the features somewhere.
201    #[must_use]
202    #[inline]
203    pub const fn inner(&self) -> &[Feature; NUMBER_FEATURES] {
204        &self.internal_analysis
205    }
206
207    /// Return a `Vec<f32>` representing the analysis' features.
208    ///
209    /// Particularly useful if you want iterate through the values to store
210    /// them somewhere.
211    #[must_use]
212    #[inline]
213    pub fn as_vec(&self) -> Vec<Feature> {
214        self.internal_analysis.to_vec()
215    }
216
217    /// Create an `Analysis` object from a `ResampledAudio`.
218    /// This is the main function you should use to create an `Analysis` object.
219    /// It will compute all the features from the audio samples.
220    /// You can get a `ResampledAudio` object by using a `Decoder` to decode an audio file.
221    ///
222    /// # Errors
223    ///
224    /// This function will return an error if the samples are empty or too short.
225    /// Or if there is an error during the analysis.
226    ///
227    /// # Panics
228    ///
229    /// This function will panic it cannot join the threads.
230    #[allow(clippy::missing_inline_in_public_items)]
231    pub fn from_samples(audio: &ResampledAudio) -> AnalysisResult<Self> {
232        let largest_window = vec![
233            BPMDesc::WINDOW_SIZE,
234            ChromaDesc::WINDOW_SIZE,
235            SpectralDesc::WINDOW_SIZE,
236            LoudnessDesc::WINDOW_SIZE,
237        ]
238        .into_iter()
239        .max()
240        .unwrap();
241        if audio.samples.len() < largest_window {
242            return Err(AnalysisError::EmptySamples);
243        }
244
245        std::thread::scope(|s| -> AnalysisResult<Self> {
246            let child_chroma: ScopedJoinHandle<'_, AnalysisResult<Vec<Feature>>> = s.spawn(|| {
247                let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
248                chroma_desc.do_(&audio.samples)?;
249                Ok(chroma_desc.get_value())
250            });
251
252            #[allow(clippy::type_complexity)]
253            let child_timbral: ScopedJoinHandle<
254                '_,
255                AnalysisResult<([Feature; 2], [Feature; 2], [Feature; 2])>,
256            > = s.spawn(|| {
257                let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE)?;
258                let windows = audio
259                    .samples
260                    .windows(SpectralDesc::WINDOW_SIZE)
261                    .step_by(SpectralDesc::HOP_SIZE);
262                for window in windows {
263                    spectral_desc.do_(window)?;
264                }
265                let centroid = spectral_desc.get_centroid();
266                let rolloff = spectral_desc.get_rolloff();
267                let flatness = spectral_desc.get_flatness();
268                Ok((centroid, rolloff, flatness))
269            });
270
271            // we do BPM, ZCR, and Loudness at the same time since they are so much faster than the others
272            let child_temp_zcr_loudness: ScopedJoinHandle<
273                '_,
274                AnalysisResult<(Feature, Feature, Vec<Feature>)>,
275            > = s.spawn(|| {
276                // BPM
277                let mut tempo_desc = BPMDesc::new(SAMPLE_RATE)?;
278                let windows = audio
279                    .samples
280                    .windows(BPMDesc::WINDOW_SIZE)
281                    .step_by(BPMDesc::HOP_SIZE);
282                for window in windows {
283                    tempo_desc.do_(window)?;
284                }
285                let tempo = tempo_desc.get_value();
286
287                // ZCR
288                let mut zcr_desc = ZeroCrossingRateDesc::default();
289                zcr_desc.do_(&audio.samples);
290                let zcr = zcr_desc.get_value();
291
292                // Loudness
293                let mut loudness_desc = LoudnessDesc::default();
294                let windows = audio.samples.chunks(LoudnessDesc::WINDOW_SIZE);
295                for window in windows {
296                    loudness_desc.do_(window);
297                }
298                let loudness = loudness_desc.get_value();
299
300                Ok((tempo, zcr, loudness))
301            });
302
303            // Non-streaming approach for that one
304            let chroma = child_chroma.join().unwrap()?;
305            let (centroid, rolloff, flatness) = child_timbral.join().unwrap()?;
306            let (tempo, zcr, loudness) = child_temp_zcr_loudness.join().unwrap()?;
307
308            let mut result = vec![tempo, zcr];
309            result.extend_from_slice(&centroid);
310            result.extend_from_slice(&rolloff);
311            result.extend_from_slice(&flatness);
312            result.extend_from_slice(&loudness);
313            result.extend_from_slice(&chroma);
314            let array: [Feature; NUMBER_FEATURES] = result
315                .try_into()
316                .map_err_unlikely(|_| AnalysisError::InvalidFeaturesLen)?;
317            Ok(Self::new(array))
318        })
319    }
320}