mecomp_analysis/
lib.rs

1//! This library contains stuff for song analysis and feature extraction.
2//!
3//! A lot of the code in this library is inspired by, or directly pulled from, [bliss-rs](https://github.com/Polochon-street/bliss-rs).
4//! We don't simply use bliss-rs because I don't want to bring in an ffmpeg dependency, and bliss-rs also has a lot of features that I don't need.
5//! (for example, I don't need to decode tags, process playlists, etc. etc., I'm doing all of that myself already)
6//!
7//! We use rodio to decode the audio file (overkill, but we already have the dependency for audio playback so may as well),
8//! We use rubato to resample the audio file to 22050 Hz.
9
10#![deny(clippy::missing_inline_in_public_items)]
11
12pub mod chroma;
13pub mod clustering;
14pub mod decoder;
15pub mod embeddings;
16pub mod errors;
17pub mod misc;
18pub mod temporal;
19pub mod timbral;
20pub mod utils;
21
22use std::{ops::Index, path::PathBuf, thread::ScopedJoinHandle};
23
24use likely_stable::LikelyResult;
25use misc::LoudnessDesc;
26use strum::{EnumCount, EnumIter, IntoEnumIterator};
27
28use chroma::ChromaDesc;
29use errors::{AnalysisError, AnalysisResult};
30use temporal::BPMDesc;
31use timbral::{SpectralDesc, ZeroCrossingRateDesc};
32
33pub use crate::embeddings::DIM_EMBEDDING;
34
35/// The resampled audio data used for analysis.
36///
37/// Must be in mono (1 channel), with a sample rate of 22050 Hz.
38#[derive(Debug, Clone)]
39pub struct ResampledAudio {
40    pub path: PathBuf,
41    pub samples: Vec<f32>,
42}
43
44impl TryInto<Analysis> for ResampledAudio {
45    type Error = AnalysisError;
46
47    #[inline]
48    fn try_into(self) -> Result<Analysis, Self::Error> {
49        Analysis::from_samples(&self)
50    }
51}
52
53/// The sampling rate used for the analysis.
54pub const SAMPLE_RATE: u32 = 22050;
55
56#[derive(Debug, EnumIter, EnumCount)]
57/// Indexes different fields of an Analysis.
58///
59/// Prints the tempo value of an analysis.
60///
61/// Note that this should mostly be used for debugging / distance metric
62/// customization purposes.
63pub enum AnalysisIndex {
64    /// The song's tempo.
65    Tempo,
66    /// The song's zero-crossing rate.
67    Zcr,
68    /// The mean of the song's spectral centroid.
69    MeanSpectralCentroid,
70    /// The standard deviation of the song's spectral centroid.
71    StdDeviationSpectralCentroid,
72    /// The mean of the song's spectral rolloff.
73    MeanSpectralRolloff,
74    /// The standard deviation of the song's spectral rolloff.
75    StdDeviationSpectralRolloff,
76    /// The mean of the song's spectral flatness.
77    MeanSpectralFlatness,
78    /// The standard deviation of the song's spectral flatness.
79    StdDeviationSpectralFlatness,
80    /// The mean of the song's loudness.
81    MeanLoudness,
82    /// The standard deviation of the song's loudness.
83    StdDeviationLoudness,
84    /// The proportion of pitch class set 1 (IC1) compared to the 6 other pitch class sets,
85    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
86    Chroma1,
87    /// The proportion of pitch class set 2 (IC2) compared to the 6 other pitch class sets,
88    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
89    Chroma2,
90    /// The proportion of pitch class set 3 (IC3) compared to the 6 other pitch class sets,
91    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
92    Chroma3,
93    /// The proportion of pitch class set 4 (IC4) compared to the 6 other pitch class sets,
94    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
95    Chroma4,
96    /// The proportion of pitch class set 5 (IC5) compared to the 6 other pitch class sets,
97    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
98    Chroma5,
99    /// The proportion of pitch class set 6 (IC6) compared to the 6 other pitch class sets,
100    /// per this paper <https://speech.di.uoa.gr/ICMC-SMC-2014/images/VOL_2/1461.pdf>
101    Chroma6,
102    /// The proportion of major triads in the song, compared to the other triads.
103    Chroma7,
104    /// The proportion of minor triads in the song, compared to the other triads.
105    Chroma8,
106    /// The proportion of diminished triads in the song, compared to the other triads.
107    Chroma9,
108    /// The proportion of augmented triads in the song, compared to the other triads.
109    Chroma10,
110    /// The L2-norm of the IC1-6 (see above).
111    Chroma11,
112    /// The L2-norm of the IC7-10 (see above).
113    Chroma12,
114    /// The ratio of the L2-norm of IC7-10 and IC1-6 (proportion of triads vs dyads).
115    Chroma13,
116}
117
118/// The Type of individual features
119pub type Feature = f32;
120/// The number of features used in `Analysis`
121pub const NUMBER_FEATURES: usize = AnalysisIndex::COUNT;
122
123#[derive(Default, PartialEq, Clone, Copy)]
124/// Object holding the results of the song's analysis.
125///
126/// Only use it if you want to have an in-depth look of what is
127/// happening behind the scene, or make a distance metric yourself.
128///
129/// Under the hood, it is just an array of f32 holding different numeric
130/// features.
131///
132/// For more info on the different features, build the
133/// documentation with private items included using
134/// `cargo doc --document-private-items`, and / or read up
135/// [this document](https://lelele.io/thesis.pdf), that contains a description
136/// on most of the features, except the chroma ones, which are documented
137/// directly in this code.
138pub struct Analysis {
139    pub(crate) internal_analysis: [Feature; NUMBER_FEATURES],
140}
141
142impl Index<AnalysisIndex> for Analysis {
143    type Output = Feature;
144
145    #[inline]
146    fn index(&self, index: AnalysisIndex) -> &Feature {
147        &self.internal_analysis[index as usize]
148    }
149}
150
151impl Index<usize> for Analysis {
152    type Output = Feature;
153
154    #[inline]
155    fn index(&self, index: usize) -> &Feature {
156        &self.internal_analysis[index]
157    }
158}
159
160impl std::fmt::Debug for Analysis {
161    #[inline]
162    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
163        let mut debug_struct = f.debug_struct("Analysis");
164        for feature in AnalysisIndex::iter() {
165            debug_struct.field(&format!("{feature:?}"), &self[feature]);
166        }
167        debug_struct.finish()?;
168        f.write_str(&format!(" /* {:?} */", &self.as_vec()))
169    }
170}
171
172impl Analysis {
173    /// Create a new Analysis object.
174    ///
175    /// Usually not needed, unless you have already computed and stored
176    /// features somewhere, and need to recreate a Song with an already
177    /// existing Analysis yourself.
178    #[must_use]
179    #[inline]
180    pub const fn new(analysis: [Feature; NUMBER_FEATURES]) -> Self {
181        Self {
182            internal_analysis: analysis,
183        }
184    }
185
186    /// Creates a new `Analysis` object from a `Vec<Feature>`.
187    ///
188    /// invariant: `features.len() == NUMBER_FEATURES`
189    ///
190    /// # Errors
191    ///
192    /// This function will return an error if the length of the features is not equal to `NUMBER_FEATURES`.
193    #[inline]
194    pub fn from_vec(features: Vec<Feature>) -> Result<Self, AnalysisError> {
195        features
196            .try_into()
197            .map_err(|_| AnalysisError::InvalidFeaturesLen)
198            .map(Self::new)
199    }
200
201    /// Return the inner array of the analysis.
202    /// This is mostly useful if you want to store the features somewhere.
203    #[must_use]
204    #[inline]
205    pub const fn inner(&self) -> &[Feature; NUMBER_FEATURES] {
206        &self.internal_analysis
207    }
208
209    /// Return a `Vec<f32>` representing the analysis' features.
210    ///
211    /// Particularly useful if you want iterate through the values to store
212    /// them somewhere.
213    #[must_use]
214    #[inline]
215    pub fn as_vec(&self) -> Vec<Feature> {
216        self.internal_analysis.to_vec()
217    }
218
219    /// Create an `Analysis` object from a `ResampledAudio`.
220    /// This is the main function you should use to create an `Analysis` object.
221    /// It will compute all the features from the audio samples.
222    /// You can get a `ResampledAudio` object by using a `Decoder` to decode an audio file.
223    ///
224    /// # Errors
225    ///
226    /// This function will return an error if the samples are empty or too short.
227    /// Or if there is an error during the analysis.
228    ///
229    /// # Panics
230    ///
231    /// This function will panic it cannot join the threads.
232    #[allow(clippy::missing_inline_in_public_items)]
233    pub fn from_samples(audio: &ResampledAudio) -> AnalysisResult<Self> {
234        let largest_window = [
235            BPMDesc::WINDOW_SIZE,
236            ChromaDesc::WINDOW_SIZE,
237            SpectralDesc::WINDOW_SIZE,
238            LoudnessDesc::WINDOW_SIZE,
239        ]
240        .into_iter()
241        .max()
242        .unwrap();
243        if audio.samples.len() < largest_window {
244            return Err(AnalysisError::EmptySamples);
245        }
246
247        std::thread::scope(|s| -> AnalysisResult<Self> {
248            let child_chroma: ScopedJoinHandle<'_, AnalysisResult<Vec<Feature>>> = s.spawn(|| {
249                let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
250                chroma_desc.do_(&audio.samples)?;
251                Ok(chroma_desc.get_value())
252            });
253
254            #[allow(clippy::type_complexity)]
255            let child_timbral: ScopedJoinHandle<
256                '_,
257                AnalysisResult<([Feature; 2], [Feature; 2], [Feature; 2])>,
258            > = s.spawn(|| {
259                let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE)?;
260                let windows = audio
261                    .samples
262                    .windows(SpectralDesc::WINDOW_SIZE)
263                    .step_by(SpectralDesc::HOP_SIZE);
264                for window in windows {
265                    spectral_desc.do_(window)?;
266                }
267                let centroid = spectral_desc.get_centroid();
268                let rolloff = spectral_desc.get_rolloff();
269                let flatness = spectral_desc.get_flatness();
270                Ok((centroid, rolloff, flatness))
271            });
272
273            // we do BPM, ZCR, and Loudness at the same time since they are so much faster than the others
274            let child_temp_zcr_loudness: ScopedJoinHandle<
275                '_,
276                AnalysisResult<(Feature, Feature, Vec<Feature>)>,
277            > = s.spawn(|| {
278                // BPM
279                let mut tempo_desc = BPMDesc::new(SAMPLE_RATE)?;
280                let windows = audio
281                    .samples
282                    .windows(BPMDesc::WINDOW_SIZE)
283                    .step_by(BPMDesc::HOP_SIZE);
284                for window in windows {
285                    tempo_desc.do_(window)?;
286                }
287                let tempo = tempo_desc.get_value();
288
289                // ZCR
290                let mut zcr_desc = ZeroCrossingRateDesc::default();
291                zcr_desc.do_(&audio.samples);
292                let zcr = zcr_desc.get_value();
293
294                // Loudness
295                let mut loudness_desc = LoudnessDesc::default();
296                let windows = audio.samples.chunks(LoudnessDesc::WINDOW_SIZE);
297                for window in windows {
298                    loudness_desc.do_(window);
299                }
300                let loudness = loudness_desc.get_value();
301
302                Ok((tempo, zcr, loudness))
303            });
304
305            // Non-streaming approach for that one
306            let chroma = child_chroma.join().unwrap()?;
307            let (centroid, rolloff, flatness) = child_timbral.join().unwrap()?;
308            let (tempo, zcr, loudness) = child_temp_zcr_loudness.join().unwrap()?;
309
310            let mut result = vec![tempo, zcr];
311            result.extend_from_slice(&centroid);
312            result.extend_from_slice(&rolloff);
313            result.extend_from_slice(&flatness);
314            result.extend_from_slice(&loudness);
315            result.extend_from_slice(&chroma);
316            let array: [Feature; NUMBER_FEATURES] = result
317                .try_into()
318                .map_err_unlikely(|_| AnalysisError::InvalidFeaturesLen)?;
319            Ok(Self::new(array))
320        })
321    }
322}
mecomp_analysis/lib.rs

mecomp_analysis/
lib.rs