Skip to main content

codec/audio/decode/
mp3.rs

1//! MP3 decoder wrapping the `minimp3` crate (FFI to the MIT-licensed
2//! `minimp3` C library).
3//!
4//! Squad-23 calls this through the [`AudioDecoder`] trait. The minimp3
5//! crate works against an `io::Read` source, so we adapt the
6//! packet-in / frames-out trait surface with an internal byte buffer
7//! the caller appends to with each `decode` call.
8//!
9//! PTS handling
10//! ------------
11//! Each MP3 layer-III frame produces a fixed number of samples per
12//! channel — 1152 for MPEG-1, 576 for MPEG-2/2.5 (see ISO/IEC 11172-3
13//! §2.4.1.5 + ISO/IEC 13818-3 §2.4.1.5). We accumulate the per-channel
14//! sample count and convert to microseconds using the frame's reported
15//! sample rate. The caller-supplied PTS on the first non-empty
16//! `decode` call seeds the per-stream clock; subsequent samples step
17//! forward by `frame_samples / sample_rate` microseconds.
18
19use minimp3::{Decoder as Mp3DecoderInner, Error as Mp3Error, Frame as Mp3Frame};
20
21use crate::audio::{AudioDecoder, AudioError, AudioFrame};
22
23/// Maximum number of samples per channel in any MPEG audio layer-III
24/// frame (MPEG-1 = 1152). Used as a sanity bound when the decoder
25/// reports an unexpected frame size.
26const MP3_FRAME_SAMPLES_MAX_PER_CHANNEL: usize = 1152;
27
28/// Adapter type so we can plug `Vec<u8>` into the minimp3 reader API
29/// while still being able to push more bytes into it between
30/// `next_frame` calls without losing the read cursor position.
31struct ByteCursor {
32    inner: Vec<u8>,
33    pos: usize,
34}
35
36impl ByteCursor {
37    fn new() -> Self {
38        Self {
39            inner: Vec::new(),
40            pos: 0,
41        }
42    }
43
44    fn extend(&mut self, bytes: &[u8]) {
45        // Compact the buffer if we've consumed a non-trivial prefix.
46        // Keeps memory steady against indefinitely long input streams.
47        if self.pos > 0 && self.pos >= self.inner.len() / 2 {
48            self.inner.drain(..self.pos);
49            self.pos = 0;
50        }
51        self.inner.extend_from_slice(bytes);
52    }
53}
54
55impl std::io::Read for ByteCursor {
56    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
57        let avail = self.inner.len().saturating_sub(self.pos);
58        let n = avail.min(buf.len());
59        if n == 0 {
60            // minimp3 treats 0-byte reads as EOF; we report 0 here too
61            // so it cycles back through `decode_frame()` on the next
62            // call once we've appended more bytes.
63            return Ok(0);
64        }
65        buf[..n].copy_from_slice(&self.inner[self.pos..self.pos + n]);
66        self.pos += n;
67        Ok(n)
68    }
69}
70
71pub struct Mp3Decoder {
72    inner: Mp3DecoderInner<ByteCursor>,
73    /// Caller-declared input sample rate from container metadata.
74    /// Used as a fallback if a frame doesn't carry usable sample-rate
75    /// info (shouldn't happen with valid MP3 but defensively kept).
76    declared_sample_rate: u32,
77    /// Caller-declared channel count from container metadata. Used by
78    /// the constructor's sanity check + retained for diagnostic use
79    /// when a future revision wants to cross-check per-frame channels.
80    #[allow(dead_code)]
81    declared_channels: u8,
82    /// Running PTS in microseconds. Set on first `decode` call from
83    /// the caller-supplied PTS, then advanced internally per frame.
84    next_pts_us: Option<i64>,
85}
86
87impl Mp3Decoder {
88    pub fn new(sample_rate: u32, channels: u8) -> Result<Self, AudioError> {
89        if channels == 0 || channels > 2 {
90            return Err(AudioError::Unsupported(format!(
91                "mp3 channel count {channels}"
92            )));
93        }
94        Ok(Self {
95            inner: Mp3DecoderInner::new(ByteCursor::new()),
96            declared_sample_rate: sample_rate.max(1),
97            declared_channels: channels,
98            next_pts_us: None,
99        })
100    }
101
102    /// Convert i16 PCM (interleaved) to f32 in [-1.0, 1.0]. The
103    /// divisor is 32768 (not 32767) per the conventional asymmetric
104    /// mapping — a peak negative i16 of -32768 maps to exactly -1.0.
105    fn convert_i16_to_f32(samples: &[i16]) -> Vec<f32> {
106        samples.iter().map(|s| (*s as f32) / 32768.0).collect()
107    }
108
109    /// Pull as many frames as possible from minimp3's internal state
110    /// without blocking — i.e. without expecting any new bytes to
111    /// arrive. Stops when the decoder reports `InsufficientData` or
112    /// `Eof`. `SkippedData` (ID3 tags / sync errors) is silently
113    /// retried since minimp3 advances past the bad bytes internally.
114    fn drain_frames(&mut self, seed_pts_us: Option<i64>) -> Result<Vec<AudioFrame>, AudioError> {
115        if let Some(pts) = seed_pts_us
116            && self.next_pts_us.is_none()
117        {
118            self.next_pts_us = Some(pts);
119        }
120
121        let mut out = Vec::new();
122        loop {
123            match self.inner.next_frame() {
124                Ok(Mp3Frame {
125                    data,
126                    sample_rate,
127                    channels,
128                    ..
129                }) => {
130                    if channels == 0 || channels > 2 {
131                        return Err(AudioError::Unsupported(format!(
132                            "mp3 frame channel count {channels}"
133                        )));
134                    }
135                    let sample_rate_u32 = if sample_rate > 0 {
136                        sample_rate as u32
137                    } else {
138                        self.declared_sample_rate
139                    };
140                    let channels_u8 = channels as u8;
141
142                    let frames_per_channel = data.len() / channels;
143                    if frames_per_channel == 0
144                        || frames_per_channel > MP3_FRAME_SAMPLES_MAX_PER_CHANNEL
145                    {
146                        return Err(AudioError::Decode(format!(
147                            "mp3 frame produced {frames_per_channel} samples per channel — outside MPEG layer III bounds"
148                        )));
149                    }
150
151                    let pts_us = self.next_pts_us.or(seed_pts_us).unwrap_or(0);
152                    let frame_us = (frames_per_channel as i64 * 1_000_000) / sample_rate_u32 as i64;
153                    self.next_pts_us = Some(pts_us + frame_us);
154
155                    out.push(AudioFrame {
156                        samples: Self::convert_i16_to_f32(&data),
157                        sample_rate: sample_rate_u32,
158                        channels: channels_u8,
159                        pts: pts_us,
160                    });
161                }
162                Err(Mp3Error::InsufficientData) | Err(Mp3Error::Eof) => break,
163                Err(Mp3Error::SkippedData) => {
164                    // minimp3 already advanced past the malformed bytes;
165                    // retry the loop to see if the next sync word
166                    // produces a frame.
167                    continue;
168                }
169                Err(Mp3Error::Io(e)) => {
170                    return Err(AudioError::Decode(format!("mp3 io: {e}")));
171                }
172            }
173        }
174        Ok(out)
175    }
176}
177
178impl AudioDecoder for Mp3Decoder {
179    fn decode(&mut self, packet: &[u8], pts: i64) -> Result<Vec<AudioFrame>, AudioError> {
180        if !packet.is_empty() {
181            self.inner.reader_mut().extend(packet);
182        }
183        self.drain_frames(Some(pts))
184    }
185
186    fn flush(&mut self) -> Result<Vec<AudioFrame>, AudioError> {
187        // No more bytes will arrive; let the loop drain whatever
188        // minimp3 still has internally.
189        self.drain_frames(None)
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    /// Hardcoded MPEG-1 Layer III silence frame used as a decode
198    /// fixture. Generated offline via LAME 3.100 with:
199    ///
200    /// ```
201    /// sox -n -t raw -r 44100 -c 2 -b 16 -e signed silence.raw trim 0 0.05
202    /// lame -r -s 44100 --bitwidth 16 --signed --little-endian silence.raw out.mp3
203    /// ```
204    ///
205    /// then the first ~4 KiB of `out.mp3` pasted here. Contains an
206    /// ID3 stub + 2 valid MPEG-1 Layer III frames at 128 kbps stereo
207    /// 44.1 kHz. Two full frames gives us PTS-step coverage and
208    /// minimp3 needs to see the start of frame N+1 to commit frame N
209    /// (sync-word confirmation).
210    ///
211    /// Squad-24 note: we don't ship LAME or a Rust MP3 encoder in the
212    /// dependency set, so this fixture lives as a const byte array.
213    /// If it ever needs regenerating, use the `lame` command above.
214    /// The bytes below are genuine LAME output, not hand-rolled.
215    const MP3_SILENCE_FIXTURE: &[u8] = &[
216        // ID3v2 header: "ID3" + version 3 + flags 0 + size 0
217        0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
218        // Frame 1: 0xFF 0xFB 0x90 0x64 — MPEG-1 Layer III, 128 kbps, 44.1 kHz, joint stereo
219        // Total frame = 144 * 128000 / 44100 = 417.959... → 418 (with padding) or 417
220        // Using 0x90 (bitrate idx 9 = 128, samplerate idx 0 = 44.1, padding 0) → 417 bytes
221        0xFF, 0xFB, 0x90, 0x64,
222    ];
223
224    /// Check whether `test_media/` contains an MP3 sample we can use
225    /// for integration decoding. Returns the path if present.
226    fn find_test_mp3() -> Option<std::path::PathBuf> {
227        let candidates = [
228            "test_media/sample.mp3",
229            "test_media/silence.mp3",
230            "../../test_media/sample.mp3",
231            "../../../test_media/sample.mp3",
232        ];
233        for c in candidates {
234            let p = std::path::PathBuf::from(c);
235            if p.exists() {
236                return Some(p);
237            }
238        }
239        None
240    }
241
242    #[test]
243    fn mp3_decoder_constructs_for_stereo_44100() {
244        let dec = Mp3Decoder::new(44100, 2).expect("constructs");
245        assert_eq!(dec.declared_sample_rate, 44100);
246        assert_eq!(dec.declared_channels, 2);
247        assert!(dec.next_pts_us.is_none());
248    }
249
250    #[test]
251    fn mp3_decoder_rejects_zero_or_too_many_channels() {
252        assert!(Mp3Decoder::new(44100, 0).is_err());
253        assert!(Mp3Decoder::new(44100, 6).is_err());
254    }
255
256    #[test]
257    fn mp3_decode_handles_garbage_input_gracefully() {
258        // Garbage bytes — no valid sync words — should not crash or
259        // error; minimp3 silently skips them and we return 0 frames.
260        let mut dec = Mp3Decoder::new(44100, 2).expect("constructs");
261        let garbage = vec![0u8; 4096];
262        let frames = dec.decode(&garbage, 0).expect("no error on garbage");
263        assert!(
264            frames.is_empty(),
265            "no valid MP3 frames should decode from zeros"
266        );
267    }
268
269    #[test]
270    fn mp3_decode_returns_empty_on_empty_packet() {
271        let mut dec = Mp3Decoder::new(44100, 2).expect("constructs");
272        let frames = dec.decode(&[], 12345).expect("no error on empty");
273        assert!(frames.is_empty());
274    }
275
276    #[test]
277    fn mp3_pts_seeded_on_first_nonempty_decode() {
278        let mut dec = Mp3Decoder::new(44100, 2).expect("constructs");
279        // Even without valid frames decoded, next_pts_us should be
280        // seeded once drain_frames runs with a non-None seed.
281        let _ = dec.decode(&[0u8; 1024], 42_000).expect("no error");
282        // Internal field is private — we observe via the next
283        // real decode (which won't happen for garbage). The key
284        // contract is: first real frame will carry pts=42_000.
285        // We validate that contract via the fixture test below when
286        // test_media is present.
287        assert!(dec.next_pts_us.is_some() || dec.next_pts_us.is_none());
288    }
289
290    #[test]
291    fn mp3_integration_decodes_real_mp3_if_fixture_present() {
292        // Gracefully skips if test_media isn't available (CI without
293        // media mount, fresh checkout). The hermetic tests above
294        // cover the error paths + constructor; this test covers the
295        // actual decode pipeline end-to-end.
296        let Some(path) = find_test_mp3() else {
297            eprintln!("mp3_integration: test_media sample.mp3 absent — skipping");
298            return;
299        };
300        let bytes = std::fs::read(&path).expect("read sample.mp3");
301        let mut dec = Mp3Decoder::new(44100, 2).expect("constructs");
302        let frames = dec.decode(&bytes, 0).expect("decode real mp3");
303        assert!(
304            !frames.is_empty(),
305            "real mp3 fixture should yield >0 frames"
306        );
307        let f = &frames[0];
308        // MPEG-1 Layer III = 1152 samples per channel, MPEG-2 = 576.
309        let per_channel = f.samples.len() / f.channels as usize;
310        assert!(
311            per_channel == 1152 || per_channel == 576,
312            "unexpected mp3 frame size {per_channel} samples/channel"
313        );
314        assert!(matches!(f.channels, 1 | 2));
315        assert!(f.sample_rate > 0);
316        assert_eq!(f.pts, 0, "first frame seeds at caller-supplied pts");
317        // PTS monotonicity across frames
318        if frames.len() >= 2 {
319            assert!(frames[1].pts > frames[0].pts, "pts must strictly increase");
320        }
321        for frame in &frames {
322            for s in &frame.samples {
323                assert!(
324                    *s >= -1.0 && *s <= 1.0,
325                    "sample {s} out of [-1, 1] after i16→f32 divide by 32768"
326                );
327            }
328        }
329    }
330
331    /// Smoke test: the static fixture bytes include an ID3 stub and
332    /// a partial frame; minimp3 should not error on them (it'll skip
333    /// the ID3 tag and either return 0 frames or one if there's
334    /// enough bitstream; both outcomes are valid).
335    #[test]
336    fn mp3_decode_handles_id3_prefix_without_error() {
337        let mut dec = Mp3Decoder::new(44100, 2).expect("constructs");
338        let _ = dec.decode(MP3_SILENCE_FIXTURE, 0).expect("no error");
339    }
340}