mecomp_analysis/
lib.rs

1//! This library contains stuff for song analysis and feature extraction.
2//!
3//! A lot of the code in this library is inspired by, or directly pulled from, [bliss-rs](https://github.com/Polochon-street/bliss-rs).
4//! We don't simply use bliss-rs because I don't want to bring in an ffmpeg dependency, and bliss-rs also has a lot of features that I don't need.
5//! (for example, I don't need to decode tags, process playlists, etc. etc., I'm doing all of that myself already)
6//!
7//! We use rodio to decode the audio file (overkill, but we already have the dependency for audio playback so may as well),
8//! We use rubato to resample the audio file to 22050 Hz.
9
10#![deny(clippy::missing_inline_in_public_items)]
11
12pub mod chroma;
13pub mod clustering;
14pub mod decoder;
15pub mod errors;
16pub mod misc;
17pub mod temporal;
18pub mod timbral;
19pub mod utils;
20
21use std::{ops::Index, path::PathBuf};
22
23use misc::LoudnessDesc;
24use serde::{Deserialize, Serialize};
25use strum::{EnumCount, EnumIter, IntoEnumIterator};
26
27use chroma::ChromaDesc;
28use errors::{AnalysisError, AnalysisResult};
29use temporal::BPMDesc;
30use timbral::{SpectralDesc, ZeroCrossingRateDesc};
31
32/// The resampled audio data used for analysis.
33///
34/// Must be in mono (1 channel), with a sample rate of 22050 Hz.
35#[derive(Debug)]
36pub struct ResampledAudio {
37    pub path: PathBuf,
38    pub samples: Vec<f32>,
39}
40
41impl TryInto<Analysis> for ResampledAudio {
42    type Error = AnalysisError;
43
44    #[inline]
45    fn try_into(self) -> Result<Analysis, Self::Error> {
46        Analysis::from_samples(&self)
47    }
48}
49
50/// The sampling rate used for the analysis.
51pub const SAMPLE_RATE: u32 = 22050;
52
53#[derive(Debug, EnumIter, EnumCount)]
54/// Indexes different fields of an Analysis.
55///
56/// Prints the tempo value of an analysis.
57///
58/// Note that this should mostly be used for debugging / distance metric
59/// customization purposes.
60#[allow(missing_docs, clippy::module_name_repetitions)]
61pub enum AnalysisIndex {
62    Tempo,
63    Zcr,
64    MeanSpectralCentroid,
65    StdDeviationSpectralCentroid,
66    MeanSpectralRolloff,
67    StdDeviationSpectralRolloff,
68    MeanSpectralFlatness,
69    StdDeviationSpectralFlatness,
70    MeanLoudness,
71    StdDeviationLoudness,
72    Chroma1,
73    Chroma2,
74    Chroma3,
75    Chroma4,
76    Chroma5,
77    Chroma6,
78    Chroma7,
79    Chroma8,
80    Chroma9,
81    Chroma10,
82}
83
84/// The Type of individual features
85pub type Feature = f64;
86/// The number of features used in `Analysis`
87pub const NUMBER_FEATURES: usize = AnalysisIndex::COUNT;
88
89#[derive(Default, PartialEq, Clone, Copy, Serialize, Deserialize)]
90/// Object holding the results of the song's analysis.
91///
92/// Only use it if you want to have an in-depth look of what is
93/// happening behind the scene, or make a distance metric yourself.
94///
95/// Under the hood, it is just an array of f32 holding different numeric
96/// features.
97///
98/// For more info on the different features, build the
99/// documentation with private items included using
100/// `cargo doc --document-private-items`, and / or read up
101/// [this document](https://lelele.io/thesis.pdf), that contains a description
102/// on most of the features, except the chroma ones, which are documented
103/// directly in this code.
104pub struct Analysis {
105    pub(crate) internal_analysis: [Feature; NUMBER_FEATURES],
106}
107
108impl Index<AnalysisIndex> for Analysis {
109    type Output = Feature;
110
111    #[inline]
112    fn index(&self, index: AnalysisIndex) -> &Feature {
113        &self.internal_analysis[index as usize]
114    }
115}
116
117impl Index<usize> for Analysis {
118    type Output = Feature;
119
120    #[inline]
121    fn index(&self, index: usize) -> &Feature {
122        &self.internal_analysis[index]
123    }
124}
125
126impl std::fmt::Debug for Analysis {
127    #[inline]
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        let mut debug_struct = f.debug_struct("Analysis");
130        for feature in AnalysisIndex::iter() {
131            debug_struct.field(&format!("{feature:?}"), &self[feature]);
132        }
133        debug_struct.finish()?;
134        f.write_str(&format!(" /* {:?} */", &self.as_vec()))
135    }
136}
137
138impl Analysis {
139    /// Create a new Analysis object.
140    ///
141    /// Usually not needed, unless you have already computed and stored
142    /// features somewhere, and need to recreate a Song with an already
143    /// existing Analysis yourself.
144    #[must_use]
145    #[inline]
146    pub const fn new(analysis: [Feature; NUMBER_FEATURES]) -> Self {
147        Self {
148            internal_analysis: analysis,
149        }
150    }
151
152    /// Creates a new `Analysis` object from a `Vec<Feature>`.
153    ///
154    /// invariant: `features.len() == NUMBER_FEATURES`
155    ///
156    /// # Errors
157    ///
158    /// This function will return an error if the length of the features is not equal to `NUMBER_FEATURES`.
159    #[inline]
160    pub fn from_vec(features: Vec<Feature>) -> Result<Self, AnalysisError> {
161        features
162            .try_into()
163            .map_err(|_| AnalysisError::InvalidFeaturesLen)
164            .map(Self::new)
165    }
166
167    /// Return the inner array of the analysis.
168    /// This is mostly useful if you want to store the features somewhere.
169    #[must_use]
170    #[inline]
171    pub const fn inner(&self) -> &[Feature; NUMBER_FEATURES] {
172        &self.internal_analysis
173    }
174
175    /// Return a `Vec<f32>` representing the analysis' features.
176    ///
177    /// Particularly useful if you want iterate through the values to store
178    /// them somewhere.
179    #[must_use]
180    #[inline]
181    pub fn as_vec(&self) -> Vec<Feature> {
182        self.internal_analysis.to_vec()
183    }
184
185    /// Create an `Analysis` object from a `ResampledAudio`.
186    /// This is the main function you should use to create an `Analysis` object.
187    /// It will compute all the features from the audio samples.
188    /// You can get a `ResampledAudio` object by using a `Decoder` to decode an audio file.
189    ///
190    /// # Errors
191    ///
192    /// This function will return an error if the samples are empty or too short.
193    /// Or if there is an error during the analysis.
194    ///
195    /// # Panics
196    ///
197    /// This function will panic it cannot join the threads.
198    #[allow(clippy::missing_inline_in_public_items)]
199    pub fn from_samples(audio: &ResampledAudio) -> AnalysisResult<Self> {
200        let largest_window = vec![
201            BPMDesc::WINDOW_SIZE,
202            ChromaDesc::WINDOW_SIZE,
203            SpectralDesc::WINDOW_SIZE,
204            LoudnessDesc::WINDOW_SIZE,
205        ]
206        .into_iter()
207        .max()
208        .unwrap();
209        if audio.samples.len() < largest_window {
210            return Err(AnalysisError::EmptySamples);
211        }
212
213        std::thread::scope(|s| -> AnalysisResult<Self> {
214            let child_chroma: std::thread::ScopedJoinHandle<AnalysisResult<Vec<Feature>>> = s
215                .spawn(|| {
216                    let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
217                    chroma_desc.do_(&audio.samples)?;
218                    Ok(chroma_desc.get_value())
219                });
220
221            #[allow(clippy::type_complexity)]
222            let child_timbral: std::thread::ScopedJoinHandle<
223                AnalysisResult<(Vec<Feature>, Vec<Feature>, Vec<Feature>)>,
224            > = s.spawn(|| {
225                let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE)?;
226                let windows = audio
227                    .samples
228                    .windows(SpectralDesc::WINDOW_SIZE)
229                    .step_by(SpectralDesc::HOP_SIZE);
230                for window in windows {
231                    spectral_desc.do_(window)?;
232                }
233                let centroid = spectral_desc.get_centroid();
234                let rolloff = spectral_desc.get_rolloff();
235                let flatness = spectral_desc.get_flatness();
236                Ok((centroid, rolloff, flatness))
237            });
238
239            // we do BPM, ZCR, and Loudness at the same time since they are so much faster than the others
240            let child_temp_zcr_loudness: std::thread::ScopedJoinHandle<
241                AnalysisResult<(Feature, Feature, Vec<Feature>)>,
242            > = s.spawn(|| {
243                // BPM
244                let mut tempo_desc = BPMDesc::new(SAMPLE_RATE)?;
245                let windows = audio
246                    .samples
247                    .windows(BPMDesc::WINDOW_SIZE)
248                    .step_by(BPMDesc::HOP_SIZE);
249                for window in windows {
250                    tempo_desc.do_(window)?;
251                }
252                let tempo = tempo_desc.get_value();
253
254                // ZCR
255                let mut zcr_desc = ZeroCrossingRateDesc::default();
256                zcr_desc.do_(&audio.samples);
257                let zcr = zcr_desc.get_value();
258
259                // Loudness
260                let mut loudness_desc = LoudnessDesc::default();
261                let windows = audio.samples.chunks(LoudnessDesc::WINDOW_SIZE);
262                for window in windows {
263                    loudness_desc.do_(window);
264                }
265                let loudness = loudness_desc.get_value();
266
267                Ok((tempo, zcr, loudness))
268            });
269
270            // Non-streaming approach for that one
271            let chroma = child_chroma.join().unwrap()?;
272            let (centroid, rolloff, flatness) = child_timbral.join().unwrap()?;
273            let (tempo, zcr, loudness) = child_temp_zcr_loudness.join().unwrap()?;
274
275            let mut result = vec![tempo, zcr];
276            result.extend_from_slice(&centroid);
277            result.extend_from_slice(&rolloff);
278            result.extend_from_slice(&flatness);
279            result.extend_from_slice(&loudness);
280            result.extend_from_slice(&chroma);
281            let array: [Feature; NUMBER_FEATURES] = result
282                .try_into()
283                .map_err(|_| AnalysisError::InvalidFeaturesLen)?;
284            Ok(Self::new(array))
285        })
286    }
287}