avif_parse/
lib.rs

1#![allow(clippy::missing_safety_doc)]
2//! Module for parsing ISO Base Media Format aka video/mp4 streams.
3
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8use arrayvec::ArrayVec;
9use log::{debug, warn};
10
11use bitreader::BitReader;
12use byteorder::ReadBytesExt;
13use fallible_collections::{TryClone, TryReserveError};
14use std::convert::{TryFrom, TryInto as _};
15
16use std::io::{Read, Take};
17use std::num::NonZeroU32;
18use std::ops::{Range, RangeFrom};
19
20mod obu;
21
22mod boxes;
23use crate::boxes::{BoxType, FourCC};
24
25/// This crate can be used from C.
26pub mod c_api;
27
28// Arbitrary buffer size limit used for raw read_bufs on a box.
29// const BUF_SIZE_LIMIT: u64 = 10 * 1024 * 1024;
30
31/// A trait to indicate a type can be infallibly converted to `u64`.
32/// This should only be implemented for infallible conversions, so only unsigned types are valid.
33trait ToU64 {
34    fn to_u64(self) -> u64;
35}
36
37/// Statically verify that the platform `usize` can fit within a `u64`.
38/// If the size won't fit on the given platform, this will fail at compile time, but if a type
39/// which can fail `TryInto<usize>` is used, it may panic.
40impl ToU64 for usize {
41    fn to_u64(self) -> u64 {
42        const _: () = assert!(std::mem::size_of::<usize>() <= std::mem::size_of::<u64>());
43        self.try_into().ok().unwrap()
44    }
45}
46
47/// A trait to indicate a type can be infallibly converted to `usize`.
48/// This should only be implemented for infallible conversions, so only unsigned types are valid.
49pub(crate) trait ToUsize {
50    fn to_usize(self) -> usize;
51}
52
53/// Statically verify that the given type can fit within a `usize`.
54/// If the size won't fit on the given platform, this will fail at compile time, but if a type
55/// which can fail `TryInto<usize>` is used, it may panic.
56macro_rules! impl_to_usize_from {
57    ( $from_type:ty ) => {
58        impl ToUsize for $from_type {
59            fn to_usize(self) -> usize {
60                const _: () = assert!(std::mem::size_of::<$from_type>() <= std::mem::size_of::<usize>());
61                self.try_into().ok().unwrap()
62            }
63        }
64    };
65}
66
67impl_to_usize_from!(u8);
68impl_to_usize_from!(u16);
69impl_to_usize_from!(u32);
70
71/// Indicate the current offset (i.e., bytes already read) in a reader
72trait Offset {
73    fn offset(&self) -> u64;
74}
75
76/// Wraps a reader to track the current offset
77struct OffsetReader<'a, T> {
78    reader: &'a mut T,
79    offset: u64,
80}
81
82impl<'a, T> OffsetReader<'a, T> {
83    fn new(reader: &'a mut T) -> Self {
84        Self { reader, offset: 0 }
85    }
86}
87
88impl<T> Offset for OffsetReader<'_, T> {
89    fn offset(&self) -> u64 {
90        self.offset
91    }
92}
93
94impl<T: Read> Read for OffsetReader<'_, T> {
95    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
96        let bytes_read = self.reader.read(buf)?;
97        self.offset = self
98            .offset
99            .checked_add(bytes_read.to_u64())
100            .ok_or(Error::Unsupported("total bytes read too large for offset type"))?;
101        Ok(bytes_read)
102    }
103}
104
105#[doc(hidden)]
106pub type TryVec<T> = fallible_collections::TryVec<T>;
107#[doc(hidden)]
108pub type TryString = fallible_collections::TryVec<u8>;
109#[doc(hidden)]
110pub type TryHashMap<K, V> = std::collections::HashMap<K, V>;
111#[doc(hidden)]
112pub type TryBox<T> = fallible_collections::TryBox<T>;
113
114// To ensure we don't use stdlib allocating types by accident
115#[allow(dead_code)]
116struct Vec;
117#[allow(dead_code)]
118struct Box;
119#[allow(dead_code)]
120struct HashMap;
121#[allow(dead_code)]
122struct String;
123
124/// Describes parser failures.
125///
126/// This enum wraps the standard `io::Error` type, unified with
127/// our own parser error states and those of crates we use.
128#[derive(Debug)]
129pub enum Error {
130    /// Parse error caused by corrupt or malformed data.
131    InvalidData(&'static str),
132    /// Parse error caused by limited parser support rather than invalid data.
133    Unsupported(&'static str),
134    /// Reflect `std::io::ErrorKind::UnexpectedEof` for short data.
135    UnexpectedEOF,
136    /// Propagate underlying errors from `std::io`.
137    Io(std::io::Error),
138    /// `read_mp4` terminated without detecting a moov box.
139    NoMoov,
140    /// Out of memory
141    OutOfMemory,
142}
143
144impl std::fmt::Display for Error {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        let msg = match self {
147            Self::InvalidData(s) | Self::Unsupported(s) => s,
148            Self::UnexpectedEOF => "EOF",
149            Self::Io(err) => return err.fmt(f),
150            Self::NoMoov => "Missing Moov box",
151            Self::OutOfMemory => "OOM",
152        };
153        f.write_str(msg)
154    }
155}
156
157impl std::error::Error for Error {}
158
159impl From<bitreader::BitReaderError> for Error {
160    #[cold]
161    #[cfg_attr(debug_assertions, track_caller)]
162    fn from(err: bitreader::BitReaderError) -> Self {
163        log::warn!("bitreader: {err}");
164        debug_assert!(!matches!(err, bitreader::BitReaderError::TooManyBitsForType { .. })); // bug
165        Self::InvalidData("truncated bits")
166    }
167}
168
169impl From<std::io::Error> for Error {
170    fn from(err: std::io::Error) -> Self {
171        match err.kind() {
172            std::io::ErrorKind::UnexpectedEof => Self::UnexpectedEOF,
173            _ => Self::Io(err),
174        }
175    }
176}
177
178impl From<std::string::FromUtf8Error> for Error {
179    fn from(_: std::string::FromUtf8Error) -> Self {
180        Self::InvalidData("invalid utf8")
181    }
182}
183
184impl From<std::num::TryFromIntError> for Error {
185    fn from(_: std::num::TryFromIntError) -> Self {
186        Self::Unsupported("integer conversion failed")
187    }
188}
189
190impl From<Error> for std::io::Error {
191    fn from(err: Error) -> Self {
192        let kind = match err {
193            Error::InvalidData(_) => std::io::ErrorKind::InvalidData,
194            Error::UnexpectedEOF => std::io::ErrorKind::UnexpectedEof,
195            Error::Io(io_err) => return io_err,
196            _ => std::io::ErrorKind::Other,
197        };
198        Self::new(kind, err)
199    }
200}
201
202impl From<TryReserveError> for Error {
203    fn from(_: TryReserveError) -> Self {
204        Self::OutOfMemory
205    }
206}
207
208/// Result shorthand using our Error enum.
209pub type Result<T, E = Error> = std::result::Result<T, E>;
210
211/// Basic ISO box structure.
212///
213/// mp4 files are a sequence of possibly-nested 'box' structures.  Each box
214/// begins with a header describing the length of the box's data and a
215/// four-byte box type which identifies the type of the box. Together these
216/// are enough to interpret the contents of that section of the file.
217///
218/// See ISO 14496-12:2015 § 4.2
219#[derive(Debug, Clone, Copy)]
220struct BoxHeader {
221    /// Box type.
222    name: BoxType,
223    /// Size of the box in bytes.
224    size: u64,
225    /// Offset to the start of the contained data (or header size).
226    offset: u64,
227    /// Uuid for extended type.
228    #[allow(unused)]
229    uuid: Option<[u8; 16]>,
230}
231
232impl BoxHeader {
233    /// 4-byte size + 4-byte type
234    const MIN_SIZE: u64 = 8;
235    /// 4-byte size + 4-byte type + 16-byte size
236    const MIN_LARGE_SIZE: u64 = 16;
237}
238
239/// File type box 'ftyp'.
240#[derive(Debug)]
241#[allow(unused)]
242struct FileTypeBox {
243    major_brand: FourCC,
244    minor_version: u32,
245    compatible_brands: TryVec<FourCC>,
246}
247
248// Handler reference box 'hdlr'
249#[derive(Debug)]
250#[allow(unused)]
251struct HandlerBox {
252    handler_type: FourCC,
253}
254
255#[derive(Debug)]
256#[allow(unused)]
257pub(crate) struct AV1ConfigBox {
258    pub(crate) profile: u8,
259    pub(crate) level: u8,
260    pub(crate) tier: u8,
261    pub(crate) bit_depth: u8,
262    pub(crate) monochrome: bool,
263    pub(crate) chroma_subsampling_x: u8,
264    pub(crate) chroma_subsampling_y: u8,
265    pub(crate) chroma_sample_position: u8,
266    pub(crate) initial_presentation_delay_present: bool,
267    pub(crate) initial_presentation_delay_minus_one: u8,
268    pub(crate) config_obus: TryVec<u8>,
269}
270
271#[derive(Debug, Default)]
272pub struct AvifData {
273    /// AV1 data for the color channels.
274    ///
275    /// The collected data indicated by the `pitm` box, See ISO 14496-12:2015 § 8.11.4
276    pub primary_item: TryVec<u8>,
277    /// AV1 data for alpha channel.
278    ///
279    /// Associated alpha channel for the primary item, if any
280    pub alpha_item: Option<TryVec<u8>>,
281    /// If true, divide RGB values by the alpha value.
282    ///
283    /// See `prem` in MIAF § 7.3.5.2
284    pub premultiplied_alpha: bool,
285}
286
287impl AvifData {
288    pub fn from_reader<R: Read>(reader: &mut R) -> Result<Self> {
289        read_avif(reader)
290    }
291
292    /// Parses AV1 data to get basic properties of the opaque channel
293    pub fn primary_item_metadata(&self) -> Result<AV1Metadata> {
294        AV1Metadata::parse_av1_bitstream(&self.primary_item)
295    }
296
297    /// Parses AV1 data to get basic properties about the alpha channel, if any
298    pub fn alpha_item_metadata(&self) -> Result<Option<AV1Metadata>> {
299        self.alpha_item.as_deref().map(AV1Metadata::parse_av1_bitstream).transpose()
300    }
301}
302
303/// See [`AvifData::primary_item_metadata()`]
304#[non_exhaustive]
305#[derive(Debug, Clone)]
306pub struct AV1Metadata {
307    /// Should be true for non-animated AVIF
308    pub still_picture: bool,
309    pub max_frame_width: NonZeroU32,
310    pub max_frame_height: NonZeroU32,
311    /// 8, 10, or 12
312    pub bit_depth: u8,
313    /// 0, 1 or 2 for the level of complexity
314    pub seq_profile: u8,
315    /// Horizontal and vertical. `false` is full-res.
316    pub chroma_subsampling: (bool, bool),
317    pub monochrome: bool,
318}
319
320impl AV1Metadata {
321    /// Parses raw AV1 bitstream (OBU sequence header) only.
322    ///
323    /// This is for the bare image payload from an encoder, not an AVIF/HEIF file.
324    /// To parse AVIF files, see [`AvifData::from_reader()`].
325    #[inline(never)]
326    pub fn parse_av1_bitstream(obu_bitstream: &[u8]) -> Result<Self> {
327        let h = obu::parse_obu(obu_bitstream)?;
328        Ok(Self {
329            still_picture: h.still_picture,
330            max_frame_width: h.max_frame_width,
331            max_frame_height: h.max_frame_height,
332            bit_depth: h.color.bit_depth,
333            seq_profile: h.seq_profile,
334            chroma_subsampling: h.color.chroma_subsampling,
335            monochrome: h.color.monochrome,
336        })
337    }
338}
339
340struct AvifInternalMeta {
341    item_references: TryVec<SingleItemTypeReferenceBox>,
342    properties: TryVec<AssociatedProperty>,
343    primary_item_id: u32,
344    iloc_items: TryVec<ItemLocationBoxItem>,
345}
346
347/// A Media Data Box
348/// See ISO 14496-12:2015 § 8.1.1
349struct MediaDataBox {
350    /// Offset of `data` from the beginning of the file. See `ConstructionMethod::File`
351    offset: u64,
352    data: TryVec<u8>,
353}
354
355impl MediaDataBox {
356    /// Check whether the beginning of `extent` is within the bounds of the `MediaDataBox`.
357    /// We assume extents to not cross box boundaries. If so, this will cause an error
358    /// in `read_extent`.
359    fn contains_extent(&self, extent: &ExtentRange) -> bool {
360        if self.offset <= extent.start() {
361            let start_offset = extent.start() - self.offset;
362            start_offset < self.data.len().to_u64()
363        } else {
364            false
365        }
366    }
367
368    /// Check whether `extent` covers the `MediaDataBox` exactly.
369    fn matches_extent(&self, extent: &ExtentRange) -> bool {
370        if self.offset == extent.start() {
371            match extent {
372                ExtentRange::WithLength(range) => {
373                    if let Some(end) = self.offset.checked_add(self.data.len().to_u64()) {
374                        end == range.end
375                    } else {
376                        false
377                    }
378                },
379                ExtentRange::ToEnd(_) => true,
380            }
381        } else {
382            false
383        }
384    }
385
386    /// Copy the range specified by `extent` to the end of `buf` or return an error if the range
387    /// is not fully contained within `MediaDataBox`.
388    fn read_extent(&self, extent: &ExtentRange, buf: &mut TryVec<u8>) -> Result<()> {
389        let start_offset = extent
390            .start()
391            .checked_sub(self.offset)
392            .ok_or(Error::InvalidData("mdat does not contain extent"))?;
393        let slice = match extent {
394            ExtentRange::WithLength(range) => {
395                let range_len = range
396                    .end
397                    .checked_sub(range.start)
398                    .ok_or(Error::InvalidData("range start > end"))?;
399                let end = start_offset
400                    .checked_add(range_len)
401                    .ok_or(Error::InvalidData("extent end overflow"))?;
402                self.data.get(start_offset.try_into()?..end.try_into()?)
403            },
404            ExtentRange::ToEnd(_) => self.data.get(start_offset.try_into()?..),
405        };
406        let slice = slice.ok_or(Error::InvalidData("extent crosses box boundary"))?;
407        buf.extend_from_slice(slice)?;
408        Ok(())
409    }
410}
411
412/// Used for 'infe' boxes within 'iinf' boxes
413/// See ISO 14496-12:2015 § 8.11.6
414/// Only versions {2, 3} are supported
415#[derive(Debug)]
416struct ItemInfoEntry {
417    item_id: u32,
418    item_type: FourCC,
419}
420
421/// See ISO 14496-12:2015 § 8.11.12
422#[derive(Debug)]
423struct SingleItemTypeReferenceBox {
424    item_type: FourCC,
425    from_item_id: u32,
426    to_item_id: u32,
427}
428
429/// Potential sizes (in bytes) of variable-sized fields of the 'iloc' box
430/// See ISO 14496-12:2015 § 8.11.3
431#[derive(Debug)]
432enum IlocFieldSize {
433    Zero,
434    Four,
435    Eight,
436}
437
438impl IlocFieldSize {
439    const fn to_bits(&self) -> u8 {
440        match self {
441            Self::Zero => 0,
442            Self::Four => 32,
443            Self::Eight => 64,
444        }
445    }
446}
447
448impl TryFrom<u8> for IlocFieldSize {
449    type Error = Error;
450
451    fn try_from(value: u8) -> Result<Self> {
452        match value {
453            0 => Ok(Self::Zero),
454            4 => Ok(Self::Four),
455            8 => Ok(Self::Eight),
456            _ => Err(Error::InvalidData("value must be in the set {0, 4, 8}")),
457        }
458    }
459}
460
461#[derive(PartialEq)]
462enum IlocVersion {
463    Zero,
464    One,
465    Two,
466}
467
468impl TryFrom<u8> for IlocVersion {
469    type Error = Error;
470
471    fn try_from(value: u8) -> Result<Self> {
472        match value {
473            0 => Ok(Self::Zero),
474            1 => Ok(Self::One),
475            2 => Ok(Self::Two),
476            _ => Err(Error::Unsupported("unsupported version in 'iloc' box")),
477        }
478    }
479}
480
481/// Used for 'iloc' boxes
482/// See ISO 14496-12:2015 § 8.11.3
483/// `base_offset` is omitted since it is integrated into the ranges in `extents`
484/// `data_reference_index` is omitted, since only 0 (i.e., this file) is supported
485#[derive(Debug)]
486struct ItemLocationBoxItem {
487    item_id: u32,
488    construction_method: ConstructionMethod,
489    /// Unused for `ConstructionMethod::Idat`
490    extents: TryVec<ItemLocationBoxExtent>,
491}
492
493#[derive(Clone, Copy, Debug, PartialEq)]
494enum ConstructionMethod {
495    File,
496    Idat,
497    #[allow(dead_code)] // TODO: see https://github.com/mozilla/mp4parse-rust/issues/196
498    Item,
499}
500
501/// `extent_index` is omitted since it's only used for `ConstructionMethod::Item` which
502/// is currently not implemented.
503#[derive(Clone, Debug)]
504struct ItemLocationBoxExtent {
505    extent_range: ExtentRange,
506}
507
508#[derive(Clone, Debug)]
509enum ExtentRange {
510    WithLength(Range<u64>),
511    ToEnd(RangeFrom<u64>),
512}
513
514impl ExtentRange {
515    const fn start(&self) -> u64 {
516        match self {
517            Self::WithLength(r) => r.start,
518            Self::ToEnd(r) => r.start,
519        }
520    }
521}
522
523/// See ISO 14496-12:2015 § 4.2
524struct BMFFBox<'a, T> {
525    head: BoxHeader,
526    content: Take<&'a mut T>,
527}
528
529impl<T: Read> BMFFBox<'_, T> {
530    fn read_into_try_vec(&mut self) -> std::io::Result<TryVec<u8>> {
531        let mut vec = std::vec::Vec::new();
532        vec.try_reserve_exact(self.content.limit() as usize)
533            .map_err(|_| std::io::ErrorKind::OutOfMemory)?;
534        self.content.read_to_end(&mut vec)?; // The default impl
535        Ok(vec.into())
536    }
537}
538
539#[test]
540fn box_read_to_end() {
541    let tmp = &mut b"1234567890".as_slice();
542    let mut src = BMFFBox {
543        head: BoxHeader { name: BoxType::FileTypeBox, size: 5, offset: 0, uuid: None },
544        content: <_ as Read>::take(tmp, 5),
545    };
546    let buf = src.read_into_try_vec().unwrap();
547    assert_eq!(buf.len(), 5);
548    assert_eq!(buf, b"12345".as_ref());
549}
550
551#[test]
552fn box_read_to_end_oom() {
553    let tmp = &mut b"1234567890".as_slice();
554    let mut src = BMFFBox {
555        head: BoxHeader { name: BoxType::FileTypeBox, size: 5, offset: 0, uuid: None },
556        content: <_ as Read>::take(tmp, usize::MAX.try_into().expect("usize < u64")),
557    };
558    assert!(src.read_into_try_vec().is_err());
559}
560
561struct BoxIter<'a, T> {
562    src: &'a mut T,
563}
564
565impl<T: Read> BoxIter<'_, T> {
566    fn new(src: &mut T) -> BoxIter<'_, T> {
567        BoxIter { src }
568    }
569
570    fn next_box(&mut self) -> Result<Option<BMFFBox<'_, T>>> {
571        let r = read_box_header(self.src);
572        match r {
573            Ok(h) => Ok(Some(BMFFBox {
574                head: h,
575                content: self.src.take(h.size - h.offset),
576            })),
577            Err(Error::UnexpectedEOF) => Ok(None),
578            Err(e) => Err(e),
579        }
580    }
581}
582
583impl<T: Read> Read for BMFFBox<'_, T> {
584    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
585        self.content.read(buf)
586    }
587}
588
589impl<T: Offset> Offset for BMFFBox<'_, T> {
590    fn offset(&self) -> u64 {
591        self.content.get_ref().offset()
592    }
593}
594
595impl<T: Read> BMFFBox<'_, T> {
596    fn bytes_left(&self) -> u64 {
597        self.content.limit()
598    }
599
600    const fn get_header(&self) -> &BoxHeader {
601        &self.head
602    }
603
604    fn box_iter(&mut self) -> BoxIter<'_, Self> {
605        BoxIter::new(self)
606    }
607}
608
609impl<T> Drop for BMFFBox<'_, T> {
610    fn drop(&mut self) {
611        if self.content.limit() > 0 {
612            let name: FourCC = From::from(self.head.name);
613            debug!("Dropping {} bytes in '{}'", self.content.limit(), name);
614        }
615    }
616}
617
618/// Read and parse a box header.
619///
620/// Call this first to determine the type of a particular mp4 box
621/// and its length. Used internally for dispatching to specific
622/// parsers for the internal content, or to get the length to
623/// skip unknown or uninteresting boxes.
624///
625/// See ISO 14496-12:2015 § 4.2
626fn read_box_header<T: ReadBytesExt>(src: &mut T) -> Result<BoxHeader> {
627    let size32 = be_u32(src)?;
628    let name = BoxType::from(be_u32(src)?);
629    let size = match size32 {
630        // valid only for top-level box and indicates it's the last box in the file.  usually mdat.
631        0 => return Err(Error::Unsupported("unknown sized box")),
632        1 => {
633            let size64 = be_u64(src)?;
634            if size64 < BoxHeader::MIN_LARGE_SIZE {
635                return Err(Error::InvalidData("malformed wide size"));
636            }
637            size64
638        },
639        _ => {
640            if u64::from(size32) < BoxHeader::MIN_SIZE {
641                return Err(Error::InvalidData("malformed size"));
642            }
643            u64::from(size32)
644        },
645    };
646    let mut offset = match size32 {
647        1 => BoxHeader::MIN_LARGE_SIZE,
648        _ => BoxHeader::MIN_SIZE,
649    };
650    let uuid = if name == BoxType::UuidBox {
651        if size >= offset + 16 {
652            let mut buffer = [0u8; 16];
653            let count = src.read(&mut buffer)?;
654            offset += count.to_u64();
655            if count == 16 {
656                Some(buffer)
657            } else {
658                debug!("malformed uuid (short read), skipping");
659                None
660            }
661        } else {
662            debug!("malformed uuid, skipping");
663            None
664        }
665    } else {
666        None
667    };
668    assert!(offset <= size);
669    Ok(BoxHeader { name, size, offset, uuid })
670}
671
672/// Parse the extra header fields for a full box.
673fn read_fullbox_extra<T: ReadBytesExt>(src: &mut T) -> Result<(u8, u32)> {
674    let version = src.read_u8()?;
675    let flags_a = src.read_u8()?;
676    let flags_b = src.read_u8()?;
677    let flags_c = src.read_u8()?;
678    Ok((
679        version,
680        u32::from(flags_a) << 16 | u32::from(flags_b) << 8 | u32::from(flags_c),
681    ))
682}
683
684// Parse the extra fields for a full box whose flag fields must be zero.
685fn read_fullbox_version_no_flags<T: ReadBytesExt>(src: &mut T) -> Result<u8> {
686    let (version, flags) = read_fullbox_extra(src)?;
687
688    if flags != 0 {
689        return Err(Error::Unsupported("expected flags to be 0"));
690    }
691
692    Ok(version)
693}
694
695/// Skip over the entire contents of a box.
696fn skip_box_content<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<()> {
697    // Skip the contents of unknown chunks.
698    let to_skip = {
699        let header = src.get_header();
700        debug!("{header:?} (skipped)");
701        header
702            .size
703            .checked_sub(header.offset)
704            .ok_or(Error::InvalidData("header offset > size"))?
705    };
706    assert_eq!(to_skip, src.bytes_left());
707    skip(src, to_skip)
708}
709
710/// Skip over the remain data of a box.
711fn skip_box_remain<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<()> {
712    let remain = {
713        let header = src.get_header();
714        let len = src.bytes_left();
715        debug!("remain {len} (skipped) in {header:?}");
716        len
717    };
718    skip(src, remain)
719}
720
721/// Read the contents of an AVIF file
722///
723/// Metadata is accumulated and returned in [`AvifData`] struct,
724pub fn read_avif<T: Read>(f: &mut T) -> Result<AvifData> {
725    let mut f = OffsetReader::new(f);
726
727    let mut iter = BoxIter::new(&mut f);
728
729    // 'ftyp' box must occur first; see ISO 14496-12:2015 § 4.3.1
730    if let Some(mut b) = iter.next_box()? {
731        if b.head.name == BoxType::FileTypeBox {
732            let ftyp = read_ftyp(&mut b)?;
733            if ftyp.major_brand != b"avif" {
734                if ftyp.major_brand == b"avis" {
735                    return Err(Error::Unsupported("Animated AVIF is not supported. Please use real AV1 videos instead."));
736                }
737                warn!("major_brand: {}", ftyp.major_brand);
738                return Err(Error::InvalidData("ftyp must be 'avif'"));
739            }
740        } else {
741            return Err(Error::InvalidData("'ftyp' box must occur first"));
742        }
743    }
744
745    let mut meta = None;
746    let mut mdats = TryVec::new();
747
748    while let Some(mut b) = iter.next_box()? {
749        match b.head.name {
750            BoxType::MetadataBox => {
751                if meta.is_some() {
752                    return Err(Error::InvalidData("There should be zero or one meta boxes per ISO 14496-12:2015 § 8.11.1.1"));
753                }
754                meta = Some(read_avif_meta(&mut b)?);
755            },
756            BoxType::MediaDataBox => {
757                if b.bytes_left() > 0 {
758                    let offset = b.offset();
759                    let data = b.read_into_try_vec()?;
760                    mdats.push(MediaDataBox { offset, data })?;
761                }
762            },
763            _ => skip_box_content(&mut b)?,
764        }
765
766        check_parser_state(&b.content)?;
767    }
768
769    let meta = meta.ok_or(Error::InvalidData("missing meta"))?;
770
771    let alpha_item_id = meta
772        .item_references
773        .iter()
774        // Auxiliary image for the primary image
775        .filter(|iref| {
776            iref.to_item_id == meta.primary_item_id
777                && iref.from_item_id != meta.primary_item_id
778                && iref.item_type == b"auxl"
779        })
780        .map(|iref| iref.from_item_id)
781        // which has the alpha property
782        .find(|&item_id| {
783            meta.properties.iter().any(|prop| {
784                prop.item_id == item_id
785                    && match &prop.property {
786                        ItemProperty::AuxiliaryType(urn) => {
787                            urn.type_subtype().0 == b"urn:mpeg:mpegB:cicp:systems:auxiliary:alpha"
788                        }
789                        _ => false,
790                    }
791            })
792        });
793
794    let mut context = AvifData {
795        premultiplied_alpha: alpha_item_id.map_or(false, |alpha_item_id| {
796            meta.item_references.iter().any(|iref| {
797                iref.from_item_id == meta.primary_item_id
798                    && iref.to_item_id == alpha_item_id
799                    && iref.item_type == b"prem"
800            })
801        }),
802        ..Default::default()
803    };
804
805    // load data of relevant items
806    for loc in meta.iloc_items.iter() {
807        let item_data = if loc.item_id == meta.primary_item_id {
808            &mut context.primary_item
809        } else if Some(loc.item_id) == alpha_item_id {
810            context.alpha_item.get_or_insert_with(TryVec::new)
811        } else {
812            continue;
813        };
814
815        if loc.construction_method != ConstructionMethod::File {
816            return Err(Error::Unsupported("unsupported construction_method"));
817        }
818        for extent in loc.extents.iter() {
819            let mut found = false;
820            // try to find an overlapping mdat
821            for mdat in mdats.iter_mut() {
822                if mdat.matches_extent(&extent.extent_range) {
823                    item_data.append(&mut mdat.data)?;
824                    found = true;
825                    break;
826                } else if mdat.contains_extent(&extent.extent_range) {
827                    mdat.read_extent(&extent.extent_range, item_data)?;
828                    found = true;
829                    break;
830                }
831            }
832            if !found {
833                return Err(Error::InvalidData("iloc contains an extent that is not in mdat"));
834            }
835        }
836    }
837
838    Ok(context)
839}
840
841/// Parse a metadata box in the context of an AVIF
842/// Currently requires the primary item to be an av01 item type and generates
843/// an error otherwise.
844/// See ISO 14496-12:2015 § 8.11.1
845fn read_avif_meta<T: Read + Offset>(src: &mut BMFFBox<'_, T>) -> Result<AvifInternalMeta> {
846    let version = read_fullbox_version_no_flags(src)?;
847
848    if version != 0 {
849        return Err(Error::Unsupported("unsupported meta version"));
850    }
851
852    let mut primary_item_id = None;
853    let mut item_infos = None;
854    let mut iloc_items = None;
855    let mut item_references = TryVec::new();
856    let mut properties = TryVec::new();
857
858    let mut iter = src.box_iter();
859    while let Some(mut b) = iter.next_box()? {
860        match b.head.name {
861            BoxType::ItemInfoBox => {
862                if item_infos.is_some() {
863                    return Err(Error::InvalidData("There should be zero or one iinf boxes per ISO 14496-12:2015 § 8.11.6.1"));
864                }
865                item_infos = Some(read_iinf(&mut b)?);
866            },
867            BoxType::ItemLocationBox => {
868                if iloc_items.is_some() {
869                    return Err(Error::InvalidData("There should be zero or one iloc boxes per ISO 14496-12:2015 § 8.11.3.1"));
870                }
871                iloc_items = Some(read_iloc(&mut b)?);
872            },
873            BoxType::PrimaryItemBox => {
874                if primary_item_id.is_some() {
875                    return Err(Error::InvalidData("There should be zero or one iloc boxes per ISO 14496-12:2015 § 8.11.4.1"));
876                }
877                primary_item_id = Some(read_pitm(&mut b)?);
878            },
879            BoxType::ImageReferenceBox => {
880                item_references.append(&mut read_iref(&mut b)?)?;
881            },
882            BoxType::ImagePropertiesBox => {
883                properties = read_iprp(&mut b)?;
884            },
885            _ => skip_box_content(&mut b)?,
886        }
887
888        check_parser_state(&b.content)?;
889    }
890
891    let primary_item_id = primary_item_id.ok_or(Error::InvalidData("Required pitm box not present in meta box"))?;
892
893    let item_infos = item_infos.ok_or(Error::InvalidData("iinf missing"))?;
894
895    if let Some(item_info) = item_infos.iter().find(|x| x.item_id == primary_item_id) {
896        if item_info.item_type != b"av01" {
897            if item_info.item_type == b"grid" {
898                return Err(Error::Unsupported("Grid-based AVIF collage is not supported"));
899            }
900            warn!("primary_item_id type: {}", item_info.item_type);
901            return Err(Error::InvalidData("primary_item_id type is not av01"));
902        }
903    } else {
904        return Err(Error::InvalidData("primary_item_id not present in iinf box"));
905    }
906
907    Ok(AvifInternalMeta {
908        properties,
909        item_references,
910        primary_item_id,
911        iloc_items: iloc_items.ok_or(Error::InvalidData("iloc missing"))?,
912    })
913}
914
915/// Parse a Primary Item Box
916/// See ISO 14496-12:2015 § 8.11.4
917fn read_pitm<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<u32> {
918    let version = read_fullbox_version_no_flags(src)?;
919
920    let item_id = match version {
921        0 => be_u16(src)?.into(),
922        1 => be_u32(src)?,
923        _ => return Err(Error::Unsupported("unsupported pitm version")),
924    };
925
926    Ok(item_id)
927}
928
929/// Parse an Item Information Box
930/// See ISO 14496-12:2015 § 8.11.6
931fn read_iinf<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<ItemInfoEntry>> {
932    let version = read_fullbox_version_no_flags(src)?;
933
934    match version {
935        0 | 1 => (),
936        _ => return Err(Error::Unsupported("unsupported iinf version")),
937    }
938
939    let entry_count = if version == 0 {
940        be_u16(src)?.to_usize()
941    } else {
942        be_u32(src)?.to_usize()
943    };
944    let mut item_infos = TryVec::with_capacity(entry_count)?;
945
946    let mut iter = src.box_iter();
947    while let Some(mut b) = iter.next_box()? {
948        if b.head.name != BoxType::ItemInfoEntry {
949            return Err(Error::InvalidData("iinf box should contain only infe boxes"));
950        }
951
952        item_infos.push(read_infe(&mut b)?)?;
953
954        check_parser_state(&b.content)?;
955    }
956
957    Ok(item_infos)
958}
959
960/// Parse an Item Info Entry
961/// See ISO 14496-12:2015 § 8.11.6.2
962fn read_infe<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<ItemInfoEntry> {
963    // According to the standard, it seems the flags field should be 0, but
964    // at least one sample AVIF image has a nonzero value.
965    let (version, _) = read_fullbox_extra(src)?;
966
967    // mif1 brand (see ISO 23008-12:2017 § 10.2.1) only requires v2 and 3
968    let item_id = match version {
969        2 => be_u16(src)?.into(),
970        3 => be_u32(src)?,
971        _ => return Err(Error::Unsupported("unsupported version in 'infe' box")),
972    };
973
974    let item_protection_index = be_u16(src)?;
975
976    if item_protection_index != 0 {
977        return Err(Error::Unsupported("protected items (infe.item_protection_index != 0) are not supported"));
978    }
979
980    let item_type = FourCC::from(be_u32(src)?);
981    debug!("infe item_id {item_id} item_type: {item_type}");
982
983    // There are some additional fields here, but they're not of interest to us
984    skip_box_remain(src)?;
985
986    Ok(ItemInfoEntry { item_id, item_type })
987}
988
989fn read_iref<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<SingleItemTypeReferenceBox>> {
990    let mut item_references = TryVec::new();
991    let version = read_fullbox_version_no_flags(src)?;
992    if version > 1 {
993        return Err(Error::Unsupported("iref version"));
994    }
995
996    let mut iter = src.box_iter();
997    while let Some(mut b) = iter.next_box()? {
998        let from_item_id = if version == 0 {
999            be_u16(&mut b)?.into()
1000        } else {
1001            be_u32(&mut b)?
1002        };
1003        let reference_count = be_u16(&mut b)?;
1004        for _ in 0..reference_count {
1005            let to_item_id = if version == 0 {
1006                be_u16(&mut b)?.into()
1007            } else {
1008                be_u32(&mut b)?
1009            };
1010            if from_item_id == to_item_id {
1011                return Err(Error::InvalidData("from_item_id and to_item_id must be different"));
1012            }
1013            item_references.push(SingleItemTypeReferenceBox {
1014                item_type: b.head.name.into(),
1015                from_item_id,
1016                to_item_id,
1017            })?;
1018        }
1019        check_parser_state(&b.content)?;
1020    }
1021    Ok(item_references)
1022}
1023
1024fn read_iprp<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<AssociatedProperty>> {
1025    let mut iter = src.box_iter();
1026    let mut properties = TryVec::new();
1027    let mut associations = TryVec::new();
1028
1029    while let Some(mut b) = iter.next_box()? {
1030        match b.head.name {
1031            BoxType::ItemPropertyContainerBox => {
1032                properties = read_ipco(&mut b)?;
1033            },
1034            BoxType::ItemPropertyAssociationBox => {
1035                associations = read_ipma(&mut b)?;
1036            },
1037            _ => return Err(Error::InvalidData("unexpected ipco child")),
1038        }
1039    }
1040
1041    let mut associated = TryVec::new();
1042    for a in associations {
1043        let index = match a.property_index {
1044            0 => continue,
1045            x => x as usize - 1,
1046        };
1047        if let Some(prop) = properties.get(index) {
1048            if *prop != ItemProperty::Unsupported {
1049                associated.push(AssociatedProperty {
1050                    item_id: a.item_id,
1051                    property: prop.try_clone()?,
1052                })?;
1053            }
1054        }
1055    }
1056    Ok(associated)
1057}
1058
1059#[derive(Debug, PartialEq)]
1060pub(crate) enum ItemProperty {
1061    Channels(ArrayVec<u8, 16>),
1062    AuxiliaryType(AuxiliaryTypeProperty),
1063    Unsupported,
1064}
1065
1066impl TryClone for ItemProperty {
1067    fn try_clone(&self) -> Result<Self, TryReserveError> {
1068        Ok(match self {
1069            Self::Channels(val) => Self::Channels(val.clone()),
1070            Self::AuxiliaryType(val) => Self::AuxiliaryType(val.try_clone()?),
1071            Self::Unsupported => Self::Unsupported,
1072        })
1073    }
1074}
1075
1076struct Association {
1077    item_id: u32,
1078    #[allow(unused)]
1079    essential: bool,
1080    property_index: u16,
1081}
1082
1083pub(crate) struct AssociatedProperty {
1084    pub item_id: u32,
1085    pub property: ItemProperty,
1086}
1087
1088fn read_ipma<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<Association>> {
1089    let (version, flags) = read_fullbox_extra(src)?;
1090
1091    let mut associations = TryVec::new();
1092
1093    let entry_count = be_u32(src)?;
1094    for _ in 0..entry_count {
1095        let item_id = if version == 0 {
1096            be_u16(src)?.into()
1097        } else {
1098            be_u32(src)?
1099        };
1100        let association_count = src.read_u8()?;
1101        for _ in 0..association_count {
1102            let num_association_bytes = if flags & 1 == 1 { 2 } else { 1 };
1103            let association = &mut [0; 2][..num_association_bytes];
1104            src.read_exact(association)?;
1105            let mut association = BitReader::new(association);
1106            let essential = association.read_bool()?;
1107            let property_index = association.read_u16(association.remaining().try_into()?)?;
1108            associations.push(Association {
1109                item_id,
1110                essential,
1111                property_index,
1112            })?;
1113        }
1114    }
1115    Ok(associations)
1116}
1117
1118fn read_ipco<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<ItemProperty>> {
1119    let mut properties = TryVec::new();
1120
1121    let mut iter = src.box_iter();
1122    while let Some(mut b) = iter.next_box()? {
1123        // Must push for every property to have correct index for them
1124        properties.push(match b.head.name {
1125            BoxType::PixelInformationBox => ItemProperty::Channels(read_pixi(&mut b)?),
1126            BoxType::AuxiliaryTypeProperty => ItemProperty::AuxiliaryType(read_auxc(&mut b)?),
1127            _ => {
1128                skip_box_remain(&mut b)?;
1129                ItemProperty::Unsupported
1130            },
1131        })?;
1132    }
1133    Ok(properties)
1134}
1135
1136fn read_pixi<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<ArrayVec<u8, 16>> {
1137    let version = read_fullbox_version_no_flags(src)?;
1138    if version != 0 {
1139        return Err(Error::Unsupported("pixi version"));
1140    }
1141
1142    let num_channels = usize::from(src.read_u8()?);
1143    let mut channels = ArrayVec::new();
1144    channels.extend((0..num_channels.min(channels.capacity())).map(|_| 0));
1145    debug_assert_eq!(num_channels, channels.len());
1146    src.read_exact(&mut channels).map_err(|_| Error::InvalidData("invalid num_channels"))?;
1147
1148    check_parser_state(&src.content)?;
1149    Ok(channels)
1150}
1151
1152#[derive(Debug, PartialEq)]
1153#[doc(hidden)]
1154// this wasn't supposed to be public
1155pub struct AuxiliaryTypeProperty {
1156    aux_data: TryString,
1157}
1158
1159impl AuxiliaryTypeProperty {
1160    #[must_use]
1161    pub fn type_subtype(&self) -> (&[u8], &[u8]) {
1162        let split = self.aux_data.iter().position(|&b| b == b'\0')
1163            .map(|pos| self.aux_data.split_at(pos));
1164        if let Some((aux_type, rest)) = split {
1165            (aux_type, &rest[1..])
1166        } else {
1167            (&self.aux_data, &[])
1168        }
1169    }
1170}
1171
1172impl TryClone for AuxiliaryTypeProperty {
1173    fn try_clone(&self) -> Result<Self, TryReserveError> {
1174        Ok(Self {
1175            aux_data: self.aux_data.try_clone()?,
1176        })
1177    }
1178}
1179
1180fn read_auxc<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<AuxiliaryTypeProperty> {
1181    let version = read_fullbox_version_no_flags(src)?;
1182    if version != 0 {
1183        return Err(Error::Unsupported("auxC version"));
1184    }
1185
1186    let aux_data = src.read_into_try_vec()?;
1187
1188    Ok(AuxiliaryTypeProperty { aux_data })
1189}
1190
1191/// Parse an item location box inside a meta box
1192/// See ISO 14496-12:2015 § 8.11.3
1193fn read_iloc<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<TryVec<ItemLocationBoxItem>> {
1194    let version: IlocVersion = read_fullbox_version_no_flags(src)?.try_into()?;
1195
1196    let iloc = src.read_into_try_vec()?;
1197    let mut iloc = BitReader::new(&iloc);
1198
1199    let offset_size: IlocFieldSize = iloc.read_u8(4)?.try_into()?;
1200    let length_size: IlocFieldSize = iloc.read_u8(4)?.try_into()?;
1201    let base_offset_size: IlocFieldSize = iloc.read_u8(4)?.try_into()?;
1202
1203    let index_size: Option<IlocFieldSize> = match version {
1204        IlocVersion::One | IlocVersion::Two => Some(iloc.read_u8(4)?.try_into()?),
1205        IlocVersion::Zero => {
1206            let _reserved = iloc.read_u8(4)?;
1207            None
1208        },
1209    };
1210
1211    let item_count = match version {
1212        IlocVersion::Zero | IlocVersion::One => iloc.read_u32(16)?,
1213        IlocVersion::Two => iloc.read_u32(32)?,
1214    };
1215
1216    let mut items = TryVec::with_capacity(item_count.to_usize())?;
1217
1218    for _ in 0..item_count {
1219        let item_id = match version {
1220            IlocVersion::Zero | IlocVersion::One => iloc.read_u32(16)?,
1221            IlocVersion::Two => iloc.read_u32(32)?,
1222        };
1223
1224        // The spec isn't entirely clear how an `iloc` should be interpreted for version 0,
1225        // which has no `construction_method` field. It does say:
1226        // "For maximum compatibility, version 0 of this box should be used in preference to
1227        //  version 1 with `construction_method==0`, or version 2 when possible."
1228        // We take this to imply version 0 can be interpreted as using file offsets.
1229        let construction_method = match version {
1230            IlocVersion::Zero => ConstructionMethod::File,
1231            IlocVersion::One | IlocVersion::Two => {
1232                let _reserved = iloc.read_u16(12)?;
1233                match iloc.read_u16(4)? {
1234                    0 => ConstructionMethod::File,
1235                    1 => ConstructionMethod::Idat,
1236                    2 => return Err(Error::Unsupported("construction_method 'item_offset' is not supported")),
1237                    _ => return Err(Error::InvalidData("construction_method is taken from the set 0, 1 or 2 per ISO 14496-12:2015 § 8.11.3.3")),
1238                }
1239            },
1240        };
1241
1242        let data_reference_index = iloc.read_u16(16)?;
1243
1244        if data_reference_index != 0 {
1245            return Err(Error::Unsupported("external file references (iloc.data_reference_index != 0) are not supported"));
1246        }
1247
1248        let base_offset = iloc.read_u64(base_offset_size.to_bits())?;
1249        let extent_count = iloc.read_u16(16)?;
1250
1251        if extent_count < 1 {
1252            return Err(Error::InvalidData("extent_count must have a value 1 or greater per ISO 14496-12:2015 § 8.11.3.3"));
1253        }
1254
1255        let mut extents = TryVec::with_capacity(extent_count.to_usize())?;
1256
1257        for _ in 0..extent_count {
1258            // Parsed but currently ignored, see `ItemLocationBoxExtent`
1259            let _extent_index = match &index_size {
1260                None | Some(IlocFieldSize::Zero) => None,
1261                Some(index_size) => {
1262                    debug_assert!(version == IlocVersion::One || version == IlocVersion::Two);
1263                    Some(iloc.read_u64(index_size.to_bits())?)
1264                },
1265            };
1266
1267            // Per ISO 14496-12:2015 § 8.11.3.1:
1268            // "If the offset is not identified (the field has a length of zero), then the
1269            //  beginning of the source (offset 0) is implied"
1270            // This behavior will follow from BitReader::read_u64(0) -> 0.
1271            let extent_offset = iloc.read_u64(offset_size.to_bits())?;
1272            let extent_length = iloc.read_u64(length_size.to_bits())?;
1273
1274            // "If the length is not specified, or specified as zero, then the entire length of
1275            //  the source is implied" (ibid)
1276            let start = base_offset
1277                .checked_add(extent_offset)
1278                .ok_or(Error::InvalidData("offset calculation overflow"))?;
1279            let extent_range = if extent_length == 0 {
1280                ExtentRange::ToEnd(RangeFrom { start })
1281            } else {
1282                let end = start
1283                    .checked_add(extent_length)
1284                    .ok_or(Error::InvalidData("end calculation overflow"))?;
1285                ExtentRange::WithLength(Range { start, end })
1286            };
1287
1288            extents.push(ItemLocationBoxExtent { extent_range })?;
1289        }
1290
1291        items.push(ItemLocationBoxItem { item_id, construction_method, extents })?;
1292    }
1293
1294    if iloc.remaining() == 0 {
1295        Ok(items)
1296    } else {
1297        Err(Error::InvalidData("invalid iloc size"))
1298    }
1299}
1300
1301/// Parse an ftyp box.
1302/// See ISO 14496-12:2015 § 4.3
1303fn read_ftyp<T: Read>(src: &mut BMFFBox<'_, T>) -> Result<FileTypeBox> {
1304    let major = be_u32(src)?;
1305    let minor = be_u32(src)?;
1306    let bytes_left = src.bytes_left();
1307    if bytes_left % 4 != 0 {
1308        return Err(Error::InvalidData("invalid ftyp size"));
1309    }
1310    // Is a brand_count of zero valid?
1311    let brand_count = bytes_left / 4;
1312    let mut brands = TryVec::with_capacity(brand_count.try_into()?)?;
1313    for _ in 0..brand_count {
1314        brands.push(be_u32(src)?.into())?;
1315    }
1316    Ok(FileTypeBox {
1317        major_brand: From::from(major),
1318        minor_version: minor,
1319        compatible_brands: brands,
1320    })
1321}
1322
1323#[cfg_attr(debug_assertions, track_caller)]
1324fn check_parser_state<T>(left: &Take<T>) -> Result<(), Error> {
1325    let limit = left.limit();
1326    if limit == 0 {
1327        Ok(())
1328    } else {
1329        debug_assert_eq!(0, limit, "bad parser state bytes left");
1330        Err(Error::InvalidData("unread box content or bad parser sync"))
1331    }
1332}
1333
1334/// Skip a number of bytes that we don't care to parse.
1335fn skip<T: Read>(src: &mut T, bytes: u64) -> Result<()> {
1336    std::io::copy(&mut src.take(bytes), &mut std::io::sink())?;
1337    Ok(())
1338}
1339
1340fn be_u16<T: ReadBytesExt>(src: &mut T) -> Result<u16> {
1341    src.read_u16::<byteorder::BigEndian>().map_err(From::from)
1342}
1343
1344fn be_u32<T: ReadBytesExt>(src: &mut T) -> Result<u32> {
1345    src.read_u32::<byteorder::BigEndian>().map_err(From::from)
1346}
1347
1348fn be_u64<T: ReadBytesExt>(src: &mut T) -> Result<u64> {
1349    src.read_u64::<byteorder::BigEndian>().map_err(From::from)
1350}