mecomp_analysis/
lib.rs

1//! This library contains stuff for song analysis and feature extraction.
2//!
3//! A lot of the code in this library is inspired by, or directly pulled from, [bliss-rs](https://github.com/Polochon-street/bliss-rs).
4//! We don't simply use bliss-rs because I don't want to bring in an ffmpeg dependency, and bliss-rs also has a lot of features that I don't need.
5//! (for example, I don't need to decode tags, process playlists, etc. etc., I'm doing all of that myself already)
6//!
7//! We use rodio to decode the audio file (overkill, but we already have the dependency for audio playback so may as well),
8//! We use rubato to resample the audio file to 22050 Hz.
9
10pub mod chroma;
11pub mod clustering;
12pub mod decoder;
13pub mod errors;
14pub mod misc;
15pub mod temporal;
16pub mod timbral;
17pub mod utils;
18
19use std::{ops::Index, path::PathBuf};
20
21use misc::LoudnessDesc;
22use serde::{Deserialize, Serialize};
23use strum::{EnumCount, EnumIter, IntoEnumIterator};
24
25use chroma::ChromaDesc;
26use errors::{AnalysisError, AnalysisResult};
27use temporal::BPMDesc;
28use timbral::{SpectralDesc, ZeroCrossingRateDesc};
29
30/// The resampled audio data used for analysis.
31///
32/// Must be in mono (1 channel), with a sample rate of 22050 Hz.
33#[derive(Debug)]
34pub struct ResampledAudio {
35    pub path: PathBuf,
36    pub samples: Vec<f32>,
37}
38
39impl TryInto<Analysis> for ResampledAudio {
40    type Error = AnalysisError;
41
42    fn try_into(self) -> Result<Analysis, Self::Error> {
43        Analysis::from_samples(&self)
44    }
45}
46
47/// The sampling rate used for the analysis.
48pub const SAMPLE_RATE: u32 = 22050;
49
50#[derive(Debug, EnumIter, EnumCount)]
51/// Indexes different fields of an Analysis.
52///
53/// Prints the tempo value of an analysis.
54///
55/// Note that this should mostly be used for debugging / distance metric
56/// customization purposes.
57#[allow(missing_docs, clippy::module_name_repetitions)]
58pub enum AnalysisIndex {
59    Tempo,
60    Zcr,
61    MeanSpectralCentroid,
62    StdDeviationSpectralCentroid,
63    MeanSpectralRolloff,
64    StdDeviationSpectralRolloff,
65    MeanSpectralFlatness,
66    StdDeviationSpectralFlatness,
67    MeanLoudness,
68    StdDeviationLoudness,
69    Chroma1,
70    Chroma2,
71    Chroma3,
72    Chroma4,
73    Chroma5,
74    Chroma6,
75    Chroma7,
76    Chroma8,
77    Chroma9,
78    Chroma10,
79}
80
81/// The Type of individual features
82pub type Feature = f64;
83/// The number of features used in `Analysis`
84pub const NUMBER_FEATURES: usize = AnalysisIndex::COUNT;
85
86#[derive(Default, PartialEq, Clone, Copy, Serialize, Deserialize)]
87/// Object holding the results of the song's analysis.
88///
89/// Only use it if you want to have an in-depth look of what is
90/// happening behind the scene, or make a distance metric yourself.
91///
92/// Under the hood, it is just an array of f32 holding different numeric
93/// features.
94///
95/// For more info on the different features, build the
96/// documentation with private items included using
97/// `cargo doc --document-private-items`, and / or read up
98/// [this document](https://lelele.io/thesis.pdf), that contains a description
99/// on most of the features, except the chroma ones, which are documented
100/// directly in this code.
101pub struct Analysis {
102    pub(crate) internal_analysis: [Feature; NUMBER_FEATURES],
103}
104
105impl Index<AnalysisIndex> for Analysis {
106    type Output = Feature;
107
108    fn index(&self, index: AnalysisIndex) -> &Feature {
109        &self.internal_analysis[index as usize]
110    }
111}
112
113impl Index<usize> for Analysis {
114    type Output = Feature;
115
116    fn index(&self, index: usize) -> &Feature {
117        &self.internal_analysis[index]
118    }
119}
120
121impl std::fmt::Debug for Analysis {
122    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
123        let mut debug_struct = f.debug_struct("Analysis");
124        for feature in AnalysisIndex::iter() {
125            debug_struct.field(&format!("{feature:?}"), &self[feature]);
126        }
127        debug_struct.finish()?;
128        f.write_str(&format!(" /* {:?} */", &self.as_vec()))
129    }
130}
131
132impl Analysis {
133    /// Create a new Analysis object.
134    ///
135    /// Usually not needed, unless you have already computed and stored
136    /// features somewhere, and need to recreate a Song with an already
137    /// existing Analysis yourself.
138    #[must_use]
139    pub const fn new(analysis: [Feature; NUMBER_FEATURES]) -> Self {
140        Self {
141            internal_analysis: analysis,
142        }
143    }
144
145    /// Creates a new `Analysis` object from a `Vec<Feature>`.
146    ///
147    /// invariant: `features.len() == NUMBER_FEATURES`
148    ///
149    /// # Errors
150    ///
151    /// This function will return an error if the length of the features is not equal to `NUMBER_FEATURES`.
152    pub fn from_vec(features: Vec<Feature>) -> Result<Self, AnalysisError> {
153        features
154            .try_into()
155            .map_err(|_| AnalysisError::InvalidFeaturesLen)
156            .map(Self::new)
157    }
158
159    /// Return the inner array of the analysis.
160    /// This is mostly useful if you want to store the features somewhere.
161    #[must_use]
162    pub const fn inner(&self) -> &[Feature; NUMBER_FEATURES] {
163        &self.internal_analysis
164    }
165
166    /// Return a `Vec<f32>` representing the analysis' features.
167    ///
168    /// Particularly useful if you want iterate through the values to store
169    /// them somewhere.
170    #[must_use]
171    pub fn as_vec(&self) -> Vec<Feature> {
172        self.internal_analysis.to_vec()
173    }
174
175    /// Create an `Analysis` object from a `ResampledAudio`.
176    /// This is the main function you should use to create an `Analysis` object.
177    /// It will compute all the features from the audio samples.
178    /// You can get a `ResampledAudio` object by using a `Decoder` to decode an audio file.
179    ///
180    /// # Errors
181    ///
182    /// This function will return an error if the samples are empty or too short.
183    /// Or if there is an error during the analysis.
184    ///
185    /// # Panics
186    ///
187    /// This function will panic it cannot join the threads.
188    pub fn from_samples(audio: &ResampledAudio) -> AnalysisResult<Self> {
189        let largest_window = vec![
190            BPMDesc::WINDOW_SIZE,
191            ChromaDesc::WINDOW_SIZE,
192            SpectralDesc::WINDOW_SIZE,
193            LoudnessDesc::WINDOW_SIZE,
194        ]
195        .into_iter()
196        .max()
197        .unwrap();
198        if audio.samples.len() < largest_window {
199            return Err(AnalysisError::EmptySamples);
200        }
201
202        std::thread::scope(|s| -> AnalysisResult<Self> {
203            let child_tempo: std::thread::ScopedJoinHandle<AnalysisResult<Feature>> =
204                s.spawn(|| {
205                    let mut tempo_desc = BPMDesc::new(SAMPLE_RATE)?;
206                    let windows = audio
207                        .samples
208                        .windows(BPMDesc::WINDOW_SIZE)
209                        .step_by(BPMDesc::HOP_SIZE);
210
211                    for window in windows {
212                        tempo_desc.do_(window)?;
213                    }
214                    Ok(tempo_desc.get_value())
215                });
216
217            let child_chroma: std::thread::ScopedJoinHandle<AnalysisResult<Vec<Feature>>> = s
218                .spawn(|| {
219                    let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
220                    chroma_desc.do_(&audio.samples)?;
221                    Ok(chroma_desc.get_value())
222                });
223
224            #[allow(clippy::type_complexity)]
225            let child_timbral: std::thread::ScopedJoinHandle<
226                AnalysisResult<(Vec<Feature>, Vec<Feature>, Vec<Feature>)>,
227            > = s.spawn(|| {
228                let mut spectral_desc = SpectralDesc::new(SAMPLE_RATE)?;
229                let windows = audio
230                    .samples
231                    .windows(SpectralDesc::WINDOW_SIZE)
232                    .step_by(SpectralDesc::HOP_SIZE);
233                for window in windows {
234                    spectral_desc.do_(window)?;
235                }
236                let centroid = spectral_desc.get_centroid();
237                let rolloff = spectral_desc.get_rolloff();
238                let flatness = spectral_desc.get_flatness();
239                Ok((centroid, rolloff, flatness))
240            });
241
242            let child_zcr: std::thread::ScopedJoinHandle<AnalysisResult<Feature>> = s.spawn(|| {
243                let mut zcr_desc = ZeroCrossingRateDesc::default();
244                zcr_desc.do_(&audio.samples);
245                Ok(zcr_desc.get_value())
246            });
247
248            let child_loudness: std::thread::ScopedJoinHandle<AnalysisResult<Vec<Feature>>> = s
249                .spawn(|| {
250                    let mut loudness_desc = LoudnessDesc::default();
251                    let windows = audio.samples.chunks(LoudnessDesc::WINDOW_SIZE);
252
253                    for window in windows {
254                        loudness_desc.do_(window);
255                    }
256                    Ok(loudness_desc.get_value())
257                });
258
259            // Non-streaming approach for that one
260            let tempo = child_tempo.join().unwrap()?;
261            let chroma = child_chroma.join().unwrap()?;
262            let (centroid, rolloff, flatness) = child_timbral.join().unwrap()?;
263            let loudness = child_loudness.join().unwrap()?;
264            let zcr = child_zcr.join().unwrap()?;
265
266            let mut result = vec![tempo, zcr];
267            result.extend_from_slice(&centroid);
268            result.extend_from_slice(&rolloff);
269            result.extend_from_slice(&flatness);
270            result.extend_from_slice(&loudness);
271            result.extend_from_slice(&chroma);
272            let array: [Feature; NUMBER_FEATURES] = result
273                .try_into()
274                .map_err(|_| AnalysisError::InvalidFeaturesLen)?;
275            Ok(Self::new(array))
276        })
277    }
278}