mpeg_audio_header/
lib.rs

1// SPDX-FileCopyrightText: The mpeg-audio-header authors
2// SPDX-License-Identifier: MPL-2.0
3
4//! mpeg-audio-header
5//!
6//! Parse metadata of an MPEG audio stream from VBR (XING/VBRI) and MPEG frame headers.
7
8// rustflags
9#![warn(rust_2018_idioms)]
10#![warn(rust_2021_compatibility)]
11#![warn(missing_debug_implementations)]
12#![warn(unreachable_pub)]
13#![warn(unsafe_code)]
14#![warn(clippy::pedantic)]
15#![allow(clippy::module_name_repetitions)]
16#![allow(clippy::cast_possible_truncation)] // TODO: Revisit all occurrences
17#![warn(rustdoc::broken_intra_doc_links)]
18#![cfg_attr(not(test), deny(clippy::panic_in_result_fn))]
19#![cfg_attr(not(debug_assertions), deny(clippy::used_underscore_binding))]
20
21use std::{
22    fs::File,
23    io::{BufReader, Read},
24    path::Path,
25    time::Duration,
26};
27
28mod error;
29mod frame;
30mod reader;
31
32pub use self::frame::{Layer, Mode, Version};
33
34use self::frame::{FrameHeader, XING_HEADER_MIN_SIZE, XING_VBRI_HEADER_MIN_SIZE};
35
36use self::reader::Reader;
37
38pub use self::{
39    error::{Error, PositionalError},
40    reader::ReadPosition,
41};
42
43/// Result type for [`PositionalError`]
44pub type PositionalResult<T> = std::result::Result<T, PositionalError>;
45
46#[derive(Debug, Clone)]
47/// Properties of an MPEG audio stream
48///
49/// A virtual MPEG audio header, built from both the XING header and
50/// optionally aggregated from all valid MPEG frame headers.
51pub struct Header {
52    /// Source of the metadata in this header
53    pub source: HeaderSource,
54
55    /// MPEG version
56    ///
57    /// The common MPEG version in all frames or `None` if either unknown or inconsistent.
58    pub version: Option<Version>,
59
60    /// MPEG layer
61    ///
62    /// The common MPEG layer in all frames or `None` if either unknown or inconsistent.
63    pub layer: Option<Layer>,
64
65    /// MPEG mode
66    ///
67    /// The common MPEG mode in all frames or `None` if either unknown or inconsistent.
68    pub mode: Option<Mode>,
69
70    /// Minimum number of channels
71    pub min_channel_count: u8,
72
73    /// Maximum number of channels
74    pub max_channel_count: u8,
75
76    /// Minimum sample rate in Hz
77    pub min_sample_rate_hz: u16,
78
79    /// Maximum sample rate in Hz
80    pub max_sample_rate_hz: u16,
81
82    /// Total number of samples per channel
83    pub total_sample_count: u64,
84
85    /// Total duration
86    pub total_duration: Duration,
87
88    /// Average sample rate in Hz
89    pub avg_sample_rate_hz: Option<u16>,
90
91    /// Average bitrate in bits/sec
92    pub avg_bitrate_bps: Option<u32>,
93}
94
95/// Parse mode
96///
97/// Controls which sources are considered when parsing metadata.
98#[derive(Debug, Clone, Copy)]
99pub enum ParseMode {
100    /// Parse from first VBR header
101    ///
102    /// If present return the metadata contained in the first valid
103    /// XING/VBRI header and abort reading. Otherwise continue reading
104    /// and aggregate the metadata from all MPEG audio frames.
105    ///
106    /// This method is faster but might result in less accurate results
107    /// if the information in the VBR headers does not match the data
108    /// in the MPEG audio frames.
109    PreferVbrHeaders,
110
111    /// Skip and ignore all VBR headers
112    ///
113    /// Skip over the XING/VBRI headers and aggregate the metadata from
114    /// all MPEG audio frames instead.
115    ///
116    /// This method is slower but may provide more accurate results depending
117    /// on how and when the redundant information in the VBR headers has been
118    /// calculated.
119    IgnoreVbrHeaders,
120}
121
122/// Source of the parsed metadata
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub enum HeaderSource {
125    /// XING header
126    XingHeader,
127
128    /// VBRI header
129    VbriHeader,
130
131    /// MPEG audio frames
132    MpegFrameHeaders,
133}
134
135const NANOS_PER_SECOND: u32 = 1_000_000_000;
136
137impl Header {
138    /// Read from a `source` that implements `Read`
139    ///
140    /// # Errors
141    ///
142    /// Returns a [`PositionalError`] on any kind of failure.
143    ///
144    /// # Examples
145    ///
146    /// ```no_run
147    /// use std::{path::Path, fs::File, io::BufReader};
148    /// use mpeg_audio_header::{Header, ParseMode};
149    ///
150    /// let path = Path::new("test/source.mp3");
151    /// let file = File::open(path).unwrap();
152    /// let mut source = BufReader::new(file);
153    /// let header = Header::read_from_source(&mut source, ParseMode::IgnoreVbrHeaders).unwrap();
154    /// println!("MPEG audio header: {:?}", header);
155    /// ```
156    #[allow(clippy::too_many_lines)]
157    pub fn read_from_source(
158        source: &mut impl Read,
159        parse_mode: ParseMode,
160    ) -> PositionalResult<Self> {
161        let mut reader = Reader::new(source);
162
163        let mut version = None;
164        let mut version_consistent = true;
165
166        let mut layer = None;
167        let mut layer_consistent = true;
168
169        let mut mode = None;
170        let mut mode_consistent = true;
171
172        let mut min_channel_count = 0;
173        let mut max_channel_count = 0;
174
175        let mut sum_sample_count = 0u64;
176
177        let mut min_sample_rate_hz = 0;
178        let mut max_sample_rate_hz = 0;
179        let mut accmul_sample_rate_hz = 0u64;
180
181        let mut min_bitrate_bps = 0;
182        let mut max_bitrate_bps = 0;
183        let mut accmul_bitrate_bps = 0u64;
184
185        loop {
186            let next_read_res = match FrameHeader::try_read(&mut reader) {
187                Ok(res) => res,
188                Err(err) => {
189                    if err.is_unexpected_eof() && sum_sample_count > 0 {
190                        // Silently ignore all unrecognized data after at least one
191                        // non-empty MPEG frame has been parsed.
192                        break;
193                    }
194                    return Err(err);
195                }
196            };
197            match next_read_res {
198                Ok(Some(frame_header)) => {
199                    // MPEG frame
200                    let mut num_bytes_consumed = u32::from(frame::FRAME_HEADER_SIZE);
201                    if !reader
202                        .try_skip_exact_until_eof(u64::from(frame_header.side_information_size()))?
203                    {
204                        break;
205                    }
206                    num_bytes_consumed += u32::from(frame_header.side_information_size());
207                    if !frame_header.check_payload_size(num_bytes_consumed as u16) {
208                        return Err(reader.positional_error(Error::FrameError(
209                            "invalid payload size".to_string(),
210                        )));
211                    }
212
213                    let mut is_audio_frame = true;
214
215                    // XING header frames may only appear at the start of the file before
216                    // the first MPEG frame with audio data.
217                    if sum_sample_count == 0
218                        && frame_header.check_payload_size(
219                            num_bytes_consumed as u16 + u16::from(XING_HEADER_MIN_SIZE),
220                        )
221                    {
222                        let mut xing_header = [0; XING_HEADER_MIN_SIZE as usize];
223                        if !reader.try_read_exact_until_eof(&mut xing_header)? {
224                            break;
225                        }
226                        num_bytes_consumed += u32::from(XING_HEADER_MIN_SIZE);
227
228                        let mut vbr_total_frames: Option<(HeaderSource, u32)> = None;
229                        match &xing_header[..4] {
230                            // XING header starts with either "Xing" or "Info"
231                            // https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
232                            b"Xing" | b"Info" => {
233                                // No audio data in these special frames!
234                                is_audio_frame = false;
235
236                                // The XING header must precede all MPEG frames
237                                debug_assert!(version.is_none());
238                                debug_assert!(layer.is_none());
239                                debug_assert!(mode.is_none());
240
241                                if xing_header[7] & 0b0001 != 0 {
242                                    // 4 Bytes
243                                    let mut total_frames_bytes = [0; 4];
244                                    if !reader.try_read_exact_until_eof(&mut total_frames_bytes)? {
245                                        break;
246                                    }
247                                    let total_frames = u32::from_be_bytes(total_frames_bytes);
248                                    if total_frames > 0 {
249                                        vbr_total_frames =
250                                            Some((HeaderSource::XingHeader, total_frames));
251                                    }
252                                }
253                                let mut skip_size = 0u32;
254                                if xing_header[7] & 0b0010 != 0 {
255                                    // Size
256                                    skip_size += 4;
257                                }
258                                if xing_header[7] & 0b0100 != 0 {
259                                    // TOC
260                                    skip_size += 100;
261                                }
262                                if xing_header[7] & 0b1000 != 0 {
263                                    // Audio quality
264                                    skip_size += 4;
265                                }
266                                if !reader.try_skip_exact_until_eof(u64::from(skip_size))? {
267                                    break;
268                                }
269                                // Finally finish this frame by pretending that we have consumed all bytes
270                                num_bytes_consumed = frame_header
271                                    .frame_size
272                                    .map_or(num_bytes_consumed, Into::into);
273                            }
274                            // https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#VBRIHeader
275                            b"VBRI"
276                                if frame_header.check_payload_size(
277                                    num_bytes_consumed as u16
278                                        + u16::from(XING_VBRI_HEADER_MIN_SIZE),
279                                ) =>
280                            {
281                                // No audio data in these special frames!
282                                is_audio_frame = false;
283
284                                // We only read total_frames and skip the rest. The words containing version (2 bytes)
285                                // and delay (2 bytes) have already been read into the XING header:
286                                // | 4 ("VBRI") + 2 (version) + 2 (delay) + 2 (quality) + 4 (size/bytes) + 4 (total_frames) + ...
287                                // |<-         XING Header              ->|<-                 XING/VBRI Header...
288                                let mut xing_vbri_header = [0; XING_VBRI_HEADER_MIN_SIZE as usize];
289                                if !reader.try_read_exact_until_eof(&mut xing_vbri_header)? {
290                                    break;
291                                }
292
293                                let total_frames = u32::from_be_bytes(
294                                    xing_vbri_header[6..10].try_into().expect("4 bytes"),
295                                );
296                                if total_frames > 0 {
297                                    vbr_total_frames =
298                                        Some((HeaderSource::VbriHeader, total_frames));
299                                }
300
301                                let toc_entries_count = u16::from_be_bytes(
302                                    xing_vbri_header[12..14].try_into().expect("2 bytes"),
303                                );
304
305                                let toc_entry_size = u16::from_be_bytes(
306                                    xing_vbri_header[16..18].try_into().expect("2 bytes"),
307                                );
308
309                                // Skip all trailing TOC entries
310                                let toc_size =
311                                    u32::from(toc_entries_count) * u32::from(toc_entry_size);
312                                if !reader.try_skip_exact_until_eof(u64::from(toc_size))? {
313                                    break;
314                                }
315
316                                // Finally finish this frame by pretending that we have consumed all bytes
317                                num_bytes_consumed = frame_header
318                                    .frame_size
319                                    .map_or(num_bytes_consumed, Into::into);
320                            }
321                            _ => {
322                                // Ordinary audio frame
323                                debug_assert!(is_audio_frame);
324                            }
325                        }
326                        if let Some((source, total_frames)) = vbr_total_frames {
327                            let total_sample_count =
328                                u64::from(total_frames) * u64::from(frame_header.sample_count);
329                            let seconds =
330                                total_sample_count / u64::from(frame_header.sample_rate_hz);
331                            let nanoseconds = (total_sample_count * u64::from(NANOS_PER_SECOND))
332                                / u64::from(frame_header.sample_rate_hz)
333                                - u64::from(NANOS_PER_SECOND) * seconds;
334                            debug_assert!(nanoseconds < NANOS_PER_SECOND.into());
335                            let total_duration = Duration::new(seconds, nanoseconds as u32);
336                            match parse_mode {
337                                ParseMode::PreferVbrHeaders => {
338                                    return Ok(Self {
339                                        source,
340                                        version: Some(frame_header.version),
341                                        layer: Some(frame_header.layer),
342                                        mode: Some(frame_header.mode),
343                                        min_channel_count: frame_header.channel_count(),
344                                        max_channel_count: frame_header.channel_count(),
345                                        min_sample_rate_hz: frame_header.sample_rate_hz,
346                                        max_sample_rate_hz: frame_header.sample_rate_hz,
347                                        total_sample_count,
348                                        total_duration,
349                                        avg_sample_rate_hz: Some(frame_header.sample_rate_hz),
350                                        avg_bitrate_bps: frame_header.bitrate_bps,
351                                    });
352                                }
353                                ParseMode::IgnoreVbrHeaders => {
354                                    // Just skip the VBR headers
355                                }
356                            }
357                        }
358                    }
359                    if let Some(frame_size) = frame_header.frame_size {
360                        debug_assert!(u32::from(frame_size) >= num_bytes_consumed);
361                        if !reader.try_skip_exact_until_eof(u64::from(
362                            u32::from(frame_size) - num_bytes_consumed,
363                        ))? {
364                            break;
365                        }
366                    }
367
368                    if is_audio_frame {
369                        if version_consistent {
370                            if let Some(some_version) = version {
371                                version_consistent = some_version == frame_header.version;
372                                if !version_consistent {
373                                    version = None;
374                                }
375                            } else {
376                                version = Some(frame_header.version);
377                            }
378                        }
379
380                        if !layer_consistent {
381                            if let Some(some_layer) = layer {
382                                layer_consistent = some_layer == frame_header.layer;
383                                if !layer_consistent {
384                                    layer = None;
385                                }
386                            } else {
387                                layer = Some(frame_header.layer);
388                            }
389                        }
390
391                        if mode_consistent {
392                            if let Some(some_mode) = mode {
393                                mode_consistent = some_mode == frame_header.mode;
394                                if !mode_consistent {
395                                    mode = None;
396                                }
397                            } else {
398                                mode = Some(frame_header.mode);
399                            }
400                        }
401
402                        let frame_samples = u64::from(frame_header.sample_count);
403                        debug_assert!(frame_samples > 0);
404                        sum_sample_count += frame_samples;
405
406                        let channel_count = frame_header.channel_count();
407                        debug_assert!(channel_count > 0);
408                        if min_channel_count == 0 {
409                            min_channel_count = channel_count;
410                        } else {
411                            min_channel_count = min_channel_count.min(channel_count);
412                        }
413                        if max_channel_count == 0 {
414                            max_channel_count = channel_count;
415                        } else {
416                            max_channel_count = max_channel_count.max(channel_count);
417                        }
418
419                        // Free bitrate = 0 bps
420                        if let Some(bitrate_bps) = frame_header.bitrate_bps {
421                            if min_bitrate_bps == 0 {
422                                min_bitrate_bps = bitrate_bps;
423                            } else {
424                                min_bitrate_bps = min_bitrate_bps.min(bitrate_bps);
425                            }
426                            if max_bitrate_bps == 0 {
427                                max_bitrate_bps = bitrate_bps;
428                            } else {
429                                max_bitrate_bps = max_bitrate_bps.max(bitrate_bps);
430                            }
431                            accmul_bitrate_bps += u64::from(bitrate_bps) * frame_samples;
432                        }
433
434                        debug_assert!(frame_header.sample_rate_hz > 0);
435                        if min_sample_rate_hz == 0 {
436                            min_sample_rate_hz = frame_header.sample_rate_hz;
437                        } else {
438                            min_sample_rate_hz =
439                                min_sample_rate_hz.min(frame_header.sample_rate_hz);
440                        }
441                        if max_sample_rate_hz == 0 {
442                            max_sample_rate_hz = frame_header.sample_rate_hz;
443                        } else {
444                            max_sample_rate_hz =
445                                max_sample_rate_hz.max(frame_header.sample_rate_hz);
446                        }
447                        accmul_sample_rate_hz +=
448                            u64::from(frame_header.sample_rate_hz) * frame_samples;
449
450                        let frame_duration_nanos: u64 = (frame_samples
451                            * u64::from(NANOS_PER_SECOND))
452                            / u64::from(frame_header.sample_rate_hz);
453                        debug_assert!(frame_duration_nanos < NANOS_PER_SECOND.into());
454                        reader.add_duration(Duration::new(0, frame_duration_nanos as u32));
455                    }
456                }
457                Ok(None) => break,
458                Err((frame_header_bytes, header_err)) => {
459                    if frame::skip_metadata(&mut reader, frame_header_bytes)? {
460                        if sum_sample_count > 0 {
461                            // No more MPEG frames after a trailing metadata frame expected
462                            break;
463                        }
464                    } else {
465                        return Err(header_err);
466                    }
467                }
468            }
469        }
470
471        let total_sample_count = sum_sample_count;
472        let total_duration = reader.position().duration;
473
474        let avg_sample_rate_hz = if total_sample_count > 0 {
475            let avg_sample_rate_hz = accmul_sample_rate_hz / total_sample_count;
476            debug_assert!(avg_sample_rate_hz <= u16::MAX.into());
477            Some(avg_sample_rate_hz as u16)
478        } else {
479            None
480        };
481
482        let avg_bitrate_bps = if total_sample_count > 0 {
483            let avg_bitrate_bps = accmul_bitrate_bps / total_sample_count;
484            debug_assert!(avg_bitrate_bps <= u32::MAX.into());
485            Some(avg_bitrate_bps as u32)
486        } else {
487            None
488        };
489
490        Ok(Self {
491            source: HeaderSource::MpegFrameHeaders,
492            version,
493            layer,
494            mode,
495            min_channel_count,
496            max_channel_count,
497            min_sample_rate_hz,
498            max_sample_rate_hz,
499            total_sample_count,
500            total_duration,
501            avg_sample_rate_hz,
502            avg_bitrate_bps,
503        })
504    }
505
506    /// Read from a file
507    ///
508    /// # Errors
509    ///
510    /// Returns a [`PositionalError`] on any kind of failure.
511    ///
512    /// # Examples
513    ///
514    /// ```no_run
515    /// use std::{path::Path, fs::File};
516    /// use mpeg_audio_header::{Header, ParseMode};
517    ///
518    /// let path = Path::new("test/source.mp3");
519    /// let file = File::open(path).unwrap();
520    /// let header = Header::read_from_file(&file, ParseMode::PreferVbrHeaders).unwrap();
521    /// println!("MPEG audio header: {:?}", header);
522    /// ```
523    pub fn read_from_file(file: &File, parse_mode: ParseMode) -> PositionalResult<Self> {
524        let mut source = BufReader::new(file);
525        Self::read_from_source(&mut source, parse_mode)
526    }
527
528    /// Read from a file path
529    ///
530    /// # Errors
531    ///
532    /// Returns a [`PositionalError`] on any kind of failure.
533    ///
534    /// # Examples
535    ///
536    /// ```no_run
537    /// use std::path::Path;
538    /// use mpeg_audio_header::{Header, ParseMode};
539    ///
540    /// let path = Path::new("test/source.mp3");
541    /// let header = Header::read_from_path(&path, ParseMode::PreferVbrHeaders).unwrap();
542    /// println!("MPEG audio header: {:?}", header);
543    /// ```
544    pub fn read_from_path(path: impl AsRef<Path>, parse_mode: ParseMode) -> PositionalResult<Self> {
545        File::open(path)
546            .map_err(|e| PositionalError {
547                source: e.into(),
548                position: ReadPosition::new(),
549            })
550            .and_then(|file| Self::read_from_file(&file, parse_mode))
551    }
552}
553
554#[cfg(test)]
555mod tests;