mecomp_analysis/decoder/
mecomp.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//! Implementation of the mecomp decoder, which is rodio/rubato based.

use std::{fs::File, io::BufReader};

use rodio::Source;
use rubato::{FastFixedIn, PolynomialDegree, Resampler};

use crate::{errors::AnalysisError, errors::AnalysisResult, ResampledAudio, SAMPLE_RATE};

use super::Decoder;

#[allow(clippy::module_name_repetitions)]
pub struct MecompDecoder();

impl Decoder for MecompDecoder {
    /// A function that should decode and resample a song, optionally
    /// extracting the song's metadata such as the artist, the album, etc.
    ///
    /// The output sample array should be resampled to f32le, one channel, with a sampling rate
    /// of 22050 Hz. Anything other than that will yield wrong results.
    fn decode(path: &std::path::Path) -> AnalysisResult<ResampledAudio> {
        let file = BufReader::new(File::open(path)?);
        let source = rodio::Decoder::new(file)?.convert_samples::<f32>();

        // we need to collapse the audio source into one channel
        // channels are interleaved, so if we have 2 channels, `[1, 2, 3, 4]` and `[5, 6, 7, 8]`,
        // they will be stored as `[1, 5, 2, 6, 3, 7, 4, 8]`
        //
        // we can make this mono by averaging the channels
        //
        // TODO: Figure out how ffmpeg does it, and do it the same way
        let num_channels = source.channels() as usize;
        let sample_rate = source.sample_rate();
        let Some(total_duration) = source.total_duration() else {
            return Err(AnalysisError::InfiniteAudioSource);
        };
        let mut mono_sample_array = if num_channels == 1 {
            source.into_iter().collect()
        } else {
            source.into_iter().enumerate().fold(
                // pre-allocate the right capacity
                #[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
                Vec::with_capacity((total_duration.as_secs() as usize + 1) * sample_rate as usize),
                // collapse the channels into one channel
                |mut acc, (i, sample)| {
                    let channel = i % num_channels;
                    #[allow(clippy::cast_precision_loss)]
                    if channel == 0 {
                        acc.push(sample / num_channels as f32);
                    } else {
                        let last_index = acc.len() - 1;
                        acc[last_index] = sample.mul_add(1. / num_channels as f32, acc[last_index]);
                    }
                    acc
                },
            )
        };

        // then we need to resample the audio source into 22050 Hz
        let resampled_array = if sample_rate == SAMPLE_RATE {
            mono_sample_array.shrink_to_fit();
            mono_sample_array
        } else {
            let mut resampler = FastFixedIn::new(
                f64::from(SAMPLE_RATE) / f64::from(sample_rate),
                1.0,
                PolynomialDegree::Cubic,
                mono_sample_array.len(),
                1,
            )?;
            resampler.process(&[&mono_sample_array], None)?[0].clone()
        };

        Ok(ResampledAudio {
            path: path.to_owned(),
            samples: resampled_array,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::{Decoder as DecoderTrait, MecompDecoder as Decoder};
    use adler32::RollingAdler32;
    use pretty_assertions::assert_eq;
    use rstest::rstest;
    use std::path::Path;

    fn _test_decode(path: &Path, expected_hash: u32) {
        let song = Decoder::decode(path).unwrap();
        let mut hasher = RollingAdler32::new();
        for sample in &song.samples {
            hasher.update_buffer(&sample.to_le_bytes());
        }

        assert_eq!(expected_hash, hasher.hash());
    }

    // expected hash Obtained through
    // ffmpeg -i data/s16_stereo_22_5kHz.flac -ar 22050 -ac 1 -c:a pcm_f32le -f hash -hash adler32 -
    #[rstest]
    #[ignore = "fails when asked to convert stereo to mono, ig ffmpeg does it differently, but I'm not sure what the difference actually is"]
    #[case::resample_multi(Path::new("data/s32_stereo_44_1_kHz.flac"), 0xbbcb_a1cf)]
    #[ignore = "fails when asked to convert stereo to mono, ig ffmpeg does it differently, but I'm not sure what the difference actually is"]
    #[case::resample_stereo(Path::new("data/s16_stereo_22_5kHz.flac"), 0x1d7b_2d6d)]
    #[case::decode_mono(Path::new("data/s16_mono_22_5kHz.flac"), 0x5e01_930b)]
    #[ignore = "fails when asked to convert stereo to mono, ig ffmpeg does it differently, but I'm not sure what the difference actually is"]
    #[case::decode_mp3(Path::new("data/s32_stereo_44_1_kHz.mp3"), 0x69ca_6906)]
    #[case::decode_wav(Path::new("data/piano.wav"), 0xde83_1e82)]
    fn test_decode(#[case] path: &Path, #[case] expected_hash: u32) {
        _test_decode(path, expected_hash);
    }

    #[test]
    fn test_dont_panic_no_channel_layout() {
        let path = Path::new("data/no_channel.wav");
        Decoder::decode(path).unwrap();
    }

    #[test]
    fn test_decode_right_capacity_vec() {
        let path = Path::new("data/s16_mono_22_5kHz.flac");
        let song = Decoder::decode(path).unwrap();
        let sample_array = song.samples;
        assert_eq!(
            sample_array.len(), // + SAMPLE_RATE as usize, // The + SAMPLE_RATE is because bliss-rs would add an extra second as a buffer, we don't need to because we know the exact length of the song
            sample_array.capacity()
        );

        let path = Path::new("data/s32_stereo_44_1_kHz.flac");
        let song = Decoder::decode(path).unwrap();
        let sample_array = song.samples;
        assert_eq!(
            sample_array.len(), // + SAMPLE_RATE as usize,
            sample_array.capacity()
        );

        // NOTE: originally used the .ogg file, but it was failing to decode with `DecodeError(IoError("end of stream"))`
        let path = Path::new("data/capacity_fix.wav");
        let song = Decoder::decode(path).unwrap();
        let sample_array = song.samples;
        assert_eq!(
            sample_array.len(), // + SAMPLE_RATE as usize,
            sample_array.capacity()
        );
    }
}