Skip to main content

mp4_edit/
parser.rs

1/*!
2 * This mod is concerned with parsing mp4 files.
3 */
4
5use anyhow::anyhow;
6use derive_more::Display;
7use futures_io::{AsyncRead, AsyncSeek};
8use std::collections::VecDeque;
9use std::fmt::{self, Debug};
10use std::io::SeekFrom;
11use std::ops::{Deref, DerefMut};
12use thiserror::Error;
13
14use crate::atom::util::parser::stream;
15use crate::chunk_offset_builder;
16pub use crate::reader::{Mp4Reader, NonSeekable, ReadCapability, Seekable};
17use crate::{
18    atom::{
19        atom_ref::{AtomRef, AtomRefMut},
20        container::{is_container_atom, META, META_VERSION_FLAGS_SIZE, MOOV},
21        ftyp::{FileTypeAtom, FtypAtomRef, FtypAtomRefMut, FTYP},
22        stco_co64::ChunkOffsets,
23        stsc::SampleToChunkEntry,
24        stts::TimeToSampleEntry,
25        util::DebugEllipsis,
26        AtomHeader, FourCC, MdiaAtomRefMut, MinfAtomRefMut, MoovAtomRef, MoovAtomRefMut, RawData,
27        TrakAtomRef,
28    },
29    chunk_offset_builder::{ChunkInfo, ChunkOffsetBuilder},
30    writer::SerializeAtom,
31    Atom, AtomData,
32};
33
34pub const MDAT: &[u8; 4] = b"mdat";
35
36/// This trait is implemented on [`AtomData`] and the inner value of each of it's variants.
37///
38/// Note that the [`AtomHeader`] has already been consumed, this trait is concerned with parsing the data.
39pub(crate) trait ParseAtomData: Sized {
40    fn parse_atom_data(atom_type: FourCC, input: &[u8]) -> Result<Self, ParseError>;
41}
42
43#[derive(Debug, Error)]
44#[error(
45    "{kind}{}",
46    self.location.map(|(offset, length)|
47        format!(" at offset {offset} with length {length}")).unwrap_or_default()
48)]
49pub struct ParseError {
50    /// The kind of error that occurred during parsing.
51    pub(crate) kind: ParseErrorKind,
52    /// location is the (offset, length) of the input data related to the error
53    pub(crate) location: Option<(usize, usize)>,
54    /// The source error that caused this error.
55    #[source]
56    pub(crate) source: Option<Box<dyn std::error::Error + Send + Sync>>,
57}
58
59#[derive(Debug, Display)]
60#[non_exhaustive]
61pub enum ParseErrorKind {
62    #[display("I/O error")]
63    Io,
64    #[display("EOF error")]
65    Eof,
66    #[display("Invalid atom header")]
67    InvalidHeader,
68    #[display("Invalid atom size")]
69    InvalidSize,
70    #[display("Unsupported atom type")]
71    UnsupportedAtom,
72    #[display("Unexpected atom type")]
73    UnexpectedAtom,
74    #[display("Atom parsing failed")]
75    AtomParsing,
76    #[display("Insufficient data")]
77    InsufficientData,
78    #[display("moov atom is missing")]
79    MissingMoov,
80}
81
82impl ParseError {
83    pub(crate) fn new_unexpected_atom_oneof(atom_type: FourCC, expected: Vec<FourCC>) -> Self {
84        if expected.len() == 1 {
85            return Self::new_unexpected_atom(atom_type, expected[0]);
86        }
87
88        let expected = expected
89            .into_iter()
90            .map(|expected| expected.to_string())
91            .collect::<Vec<_>>()
92            .join(", ");
93        Self {
94            kind: ParseErrorKind::UnexpectedAtom,
95            location: Some((0, 4)),
96            source: Some(
97                anyhow!("expected one of {expected}, got {atom_type}").into_boxed_dyn_error(),
98            ),
99        }
100    }
101
102    fn new_unexpected_atom(atom_type: FourCC, expected: FourCC) -> Self {
103        let expected = FourCC::from(*expected);
104        Self {
105            kind: ParseErrorKind::UnexpectedAtom,
106            location: Some((0, 4)),
107            source: Some(anyhow!("expected {expected}, got {atom_type}").into_boxed_dyn_error()),
108        }
109    }
110
111    pub(crate) fn from_winnow(
112        error: winnow::error::ParseError<
113            winnow::LocatingSlice<&winnow::Bytes>,
114            winnow::error::ContextError,
115        >,
116    ) -> Self {
117        use winnow::error::StrContext;
118        let mut ctx_iter = error.inner().context().peekable();
119        let mut ctx_tree = Vec::with_capacity(ctx_iter.size_hint().0);
120        while let Some(ctx) = ctx_iter.next() {
121            eprintln!("ctx: {ctx:?}");
122            match ctx {
123                StrContext::Expected(exp) => {
124                    let mut label = None;
125                    if matches!(ctx_iter.peek(), Some(StrContext::Label(_))) {
126                        label = Some(ctx_iter.next().unwrap().to_string());
127                    }
128                    ctx_tree.push(format!(
129                        "{}({exp})",
130                        label.map(|label| label.to_string()).unwrap_or_default()
131                    ));
132                }
133                StrContext::Label(label) => {
134                    ctx_tree.push(label.to_string());
135                }
136                _ => {}
137            }
138        }
139        ctx_tree.reverse();
140
141        Self {
142            kind: crate::parser::ParseErrorKind::AtomParsing,
143            location: Some((error.offset(), 0)),
144            source: match ctx_tree {
145                ctx if ctx.is_empty() => None,
146                ctx => Some(anyhow::format_err!("{}", ctx.join(" -> ")).into_boxed_dyn_error()),
147            },
148        }
149    }
150}
151
152impl
153    From<
154        winnow::error::ParseError<
155            winnow::LocatingSlice<&winnow::Bytes>,
156            winnow::error::ContextError,
157        >,
158    > for ParseError
159{
160    fn from(
161        value: winnow::error::ParseError<
162            winnow::LocatingSlice<&winnow::Bytes>,
163            winnow::error::ContextError,
164        >,
165    ) -> Self {
166        ParseError::from_winnow(value)
167    }
168}
169
170pub struct Parser<R, C: ReadCapability = NonSeekable> {
171    reader: Mp4Reader<R, C>,
172    mdat: Option<AtomHeader>,
173}
174
175impl<R: AsyncRead + AsyncSeek + Unpin + Send> Parser<R, Seekable> {
176    pub fn new_seekable(reader: R) -> Self {
177        Parser {
178            reader: Mp4Reader::<R, Seekable>::new(reader),
179            mdat: None,
180        }
181    }
182
183    /// parses metadata atoms, both before and after mdat if moov isn't found before
184    pub async fn parse_metadata(mut self) -> Result<MdatParser<R, Seekable>, ParseError> {
185        let mut atoms = self.parse_metadata_inner(None).await?;
186        let mdat = match self.mdat.take() {
187            Some(mdat) if !atoms.iter().any(|a| a.header.atom_type == MOOV) => {
188                // moov is likely after mdat, so skip to the end of the mdat atom and parse any atoms there
189                self.reader
190                    .seek(SeekFrom::Current(mdat.data_size as i64))
191                    .await?;
192                let end_atoms = self.parse_metadata_inner(None).await?;
193                atoms.extend(end_atoms);
194                // and then return to where we were
195                self.reader
196                    .seek(SeekFrom::Start((mdat.offset + mdat.header_size) as u64))
197                    .await?;
198                Some(mdat)
199            }
200            mdat => mdat,
201        };
202        Ok(MdatParser::new(self.reader, Metadata::new(atoms), mdat))
203    }
204}
205
206impl<R: AsyncRead + Unpin + Send> Parser<R, NonSeekable> {
207    pub fn new(reader: R) -> Self {
208        Parser {
209            reader: Mp4Reader::<R, NonSeekable>::new(reader),
210            mdat: None,
211        }
212    }
213
214    /// parses metadata atoms until mdat found
215    pub async fn parse_metadata(mut self) -> Result<MdatParser<R, NonSeekable>, ParseError> {
216        let atoms = self.parse_metadata_inner(None).await?;
217        Ok(MdatParser::new(
218            self.reader,
219            Metadata::new(atoms),
220            self.mdat,
221        ))
222    }
223}
224
225impl<R: AsyncRead + Unpin + Send, C: ReadCapability> Parser<R, C> {
226    async fn parse_metadata_inner(
227        &mut self,
228        length_limit: Option<usize>,
229    ) -> Result<Vec<Atom>, ParseError> {
230        let start_offset = self.reader.current_offset;
231
232        let mut atoms = Vec::new();
233
234        loop {
235            // ensure we're respecting container bounds
236            if length_limit.is_some_and(|limit| self.reader.current_offset - start_offset >= limit)
237            {
238                break;
239            }
240
241            let header = match self.parse_next_atom().await {
242                Ok(parsed_atom) => Ok(parsed_atom),
243                Err(err) => {
244                    if matches!(
245                        err.kind,
246                        ParseErrorKind::Eof | ParseErrorKind::InvalidHeader
247                    ) {
248                        // end of stream, this means there's no mdat atom
249                        // TODO: rewrite the tests to always include an mdat atom so we can get rid of this check
250                        break;
251                    }
252                    Err(err)
253                }
254            }?;
255
256            // only parse as far as the mdat atom
257            if header.atom_type == MDAT {
258                self.mdat = Some(header);
259                break;
260            }
261
262            if is_container_atom(header.atom_type) {
263                // META containers have additional header data
264                let (size, data) = if header.atom_type == META {
265                    // Handle META version and flags as RawData
266                    let version_flags = self.reader.read_data(META_VERSION_FLAGS_SIZE).await?;
267                    (
268                        header.data_size - META_VERSION_FLAGS_SIZE,
269                        Some(AtomData::RawData(RawData::new(
270                            FourCC(*META),
271                            version_flags,
272                        ))),
273                    )
274                } else {
275                    (header.data_size, None)
276                };
277
278                let container_atom = Atom {
279                    header,
280                    data,
281                    children: Box::pin(self.parse_metadata_inner(Some(size))).await?,
282                };
283
284                atoms.push(container_atom);
285            } else {
286                let atom_data = self.parse_atom_data(&header).await?;
287                let atom = Atom {
288                    header,
289                    data: Some(atom_data),
290                    children: Vec::new(),
291                };
292                atoms.push(atom);
293            }
294        }
295
296        Ok(atoms)
297    }
298
299    async fn parse_next_atom(&mut self) -> Result<AtomHeader, ParseError> {
300        let atom_offset = self.reader.current_offset as u64;
301
302        // Try to read the atom header (size and type)
303        let mut header = [0u8; 8];
304        self.reader.read_exact(&mut header).await?;
305
306        let size = u64::from(u32::from_be_bytes([
307            header[0], header[1], header[2], header[3],
308        ]));
309        let atom_type: [u8; 4] = header[4..8].try_into().unwrap();
310
311        // Handle extended size (64-bit) if needed
312        let (header_size, data_size) = if size == 1 {
313            // Extended size format
314            let mut extended_size = [0u8; 8];
315            self.reader.read_exact(&mut extended_size).await?;
316            let full_size = u64::from_be_bytes(extended_size);
317            if full_size < 16 {
318                return Err(ParseError {
319                    kind: ParseErrorKind::InvalidSize,
320                    location: Some((atom_offset as usize, 16)),
321                    source: None,
322                });
323            }
324            (16u64, full_size - 16)
325        } else if size == 0 {
326            // Size extends to end of file - not supported in this context
327            return Err(ParseError {
328                kind: ParseErrorKind::InvalidSize,
329                location: Some((atom_offset as usize, 8)),
330                source: None,
331            });
332        } else {
333            if size < 8 {
334                return Err(ParseError {
335                    kind: ParseErrorKind::InvalidSize,
336                    location: Some((atom_offset as usize, 8)),
337                    source: None,
338                });
339            }
340            (8u64, size - 8)
341        };
342
343        let atom_type = FourCC(atom_type);
344
345        Ok(AtomHeader {
346            atom_type,
347            offset: atom_offset as usize,
348            header_size: header_size as usize,
349            data_size: data_size as usize,
350        })
351    }
352
353    async fn parse_atom_data(&mut self, header: &AtomHeader) -> Result<AtomData, ParseError> {
354        let content_data = self.reader.read_data(header.data_size).await?;
355        let input = stream(&content_data);
356
357        AtomData::parse_atom_data(header.atom_type, &input).map_err(|err| {
358            let (header_offset, _) = header.location();
359            let content_offset = header_offset + header.header_size;
360            ParseError {
361                kind: ParseErrorKind::AtomParsing,
362                location: Some(err.location.map_or_else(
363                    || (content_offset, 0),
364                    |(offset, size)| (content_offset + offset, size),
365                )),
366                source: Some(anyhow::Error::from(err).context(header.atom_type).into()),
367            }
368        })
369    }
370}
371
372pub struct MdatParser<R, C: ReadCapability> {
373    meta: Metadata,
374    reader: Option<Mp4Reader<R, C>>,
375    mdat: Option<AtomHeader>,
376}
377
378impl<R, C: ReadCapability> Clone for MdatParser<R, C> {
379    fn clone(&self) -> Self {
380        Self {
381            meta: self.meta.clone(),
382            reader: None,
383            mdat: None,
384        }
385    }
386}
387
388impl<R, C: ReadCapability> Deref for MdatParser<R, C> {
389    type Target = Metadata;
390
391    fn deref(&self) -> &Self::Target {
392        &self.meta
393    }
394}
395
396impl<R, C: ReadCapability> DerefMut for MdatParser<R, C> {
397    fn deref_mut(&mut self) -> &mut Self::Target {
398        &mut self.meta
399    }
400}
401
402impl<R, C: ReadCapability> MdatParser<R, C> {
403    fn new(reader: Mp4Reader<R, C>, meta: Metadata, mdat: Option<AtomHeader>) -> Self {
404        Self {
405            reader: Some(reader),
406            meta,
407            mdat,
408        }
409    }
410
411    /// Discards the reader and returns just the metadata
412    pub fn into_metadata(self) -> Metadata {
413        self.meta
414    }
415
416    pub fn into_inner(self) -> (Metadata, Option<Mp4Reader<R, C>>) {
417        (self.meta, self.reader)
418    }
419
420    pub fn mdat_header(&self) -> Option<&AtomHeader> {
421        self.mdat.as_ref()
422    }
423
424    /// Parse chunks along with related metadata
425    pub fn chunks(&mut self) -> Result<ChunkParser<'_, R, C>, ParseError> {
426        let _ = self.mdat.take().ok_or_else(|| ParseError {
427            kind: ParseErrorKind::InsufficientData,
428            location: None,
429            source: Some(
430                anyhow!("mdat atom is missing or has already been consumed").into_boxed_dyn_error(),
431            ),
432        })?;
433
434        let reader = self.reader.take().ok_or_else(|| ParseError {
435            kind: ParseErrorKind::Io,
436            location: None,
437            source: Some(anyhow!("reader has already been consumed").into_boxed_dyn_error()),
438        })?;
439
440        let mut parser = ChunkParser {
441            reader,
442            tracks: Vec::new(),
443            chunk_offsets: Vec::new(),
444            sample_to_chunk: Vec::new(),
445            sample_sizes: Vec::new(),
446            time_to_sample: Vec::new(),
447            chunk_info: Vec::new(),
448        };
449
450        for trak in self.meta.moov().into_tracks_iter() {
451            if let Some((trak, stco, stsc, stsz, stts)) = (|| {
452                let stbl = trak.media().media_information().sample_table();
453                let chunk_offset = stbl.chunk_offset()?;
454                let sample_entries = stbl.sample_to_chunk()?;
455                let sample_sizes = stbl.sample_size()?;
456                let time_to_sample = stbl.time_to_sample()?;
457                Some((
458                    trak,
459                    chunk_offset.chunk_offsets.inner(),
460                    sample_entries,
461                    sample_sizes,
462                    time_to_sample,
463                ))
464            })() {
465                let mut builder = ChunkOffsetBuilder::with_capacity(1);
466                builder.add_track(stsc, stsz);
467                parser.tracks.push(trak);
468                parser.chunk_offsets.push(stco);
469                parser.sample_to_chunk.push(stsc.entries.inner());
470                parser.sample_sizes.push(stsz.entry_sizes.inner());
471                parser.time_to_sample.push(stts.entries.inner());
472                parser
473                    .chunk_info
474                    .push(builder.build_chunk_info().collect::<VecDeque<_>>());
475            }
476        }
477
478        Ok(parser)
479    }
480}
481
482#[derive(Clone)]
483pub struct Metadata {
484    atoms: Vec<Atom>,
485}
486
487impl Metadata {
488    pub(crate) fn new(atoms: Vec<Atom>) -> Self {
489        Self { atoms }
490    }
491
492    /// Transforms into (reader, `current_offset`, atoms)
493    pub fn into_atoms(self) -> Vec<Atom> {
494        self.atoms
495    }
496
497    /// Iterates over the metadata atoms
498    pub fn atoms_iter(&self) -> impl Iterator<Item = &Atom> {
499        self.atoms.iter()
500    }
501
502    /// Mutably iterates over the metadata atoms
503    pub fn atoms_iter_mut(&mut self) -> impl Iterator<Item = &mut Atom> {
504        self.atoms.iter_mut()
505    }
506
507    /// Retains only the metadata atoms that satisfy the predicate
508    /// (applies to top level and nested atoms)
509    pub fn atoms_flat_retain_mut<P>(&mut self, mut pred: P)
510    where
511        P: FnMut(&mut Atom) -> bool,
512    {
513        self.atoms.retain_mut(|a| pred(a));
514        for atom in &mut self.atoms {
515            atom.children_flat_retain_mut(|a| pred(a));
516        }
517    }
518
519    fn atom_position(&self, typ: FourCC) -> Option<usize> {
520        self.atoms.iter().position(|a| a.header.atom_type == typ)
521    }
522
523    fn find_atom(&self, typ: FourCC) -> AtomRef<'_> {
524        AtomRef(self.atoms.iter().find(|a| a.header.atom_type == typ))
525    }
526
527    pub fn ftyp(&mut self) -> FtypAtomRef<'_> {
528        FtypAtomRef(self.find_atom(FTYP))
529    }
530
531    pub fn ftyp_mut(&mut self) -> FtypAtomRefMut<'_> {
532        if let Some(index) = self.atom_position(FTYP) {
533            FtypAtomRefMut(AtomRefMut(&mut self.atoms[index]))
534        } else {
535            let index = 0;
536            self.atoms.insert(
537                index,
538                Atom::builder()
539                    .header(AtomHeader::new(*FTYP))
540                    .data(FileTypeAtom::default())
541                    .build(),
542            );
543            FtypAtomRefMut(AtomRefMut(&mut self.atoms[index]))
544        }
545    }
546
547    pub fn moov(&self) -> MoovAtomRef<'_> {
548        MoovAtomRef(self.find_atom(MOOV))
549    }
550
551    pub fn moov_mut(&mut self) -> MoovAtomRefMut<'_> {
552        if let Some(index) = self.atom_position(MOOV) {
553            MoovAtomRefMut(AtomRefMut(&mut self.atoms[index]))
554        } else {
555            let index = self.atom_position(FTYP).map(|i| i + 1).unwrap_or_default();
556            self.atoms.insert(
557                index,
558                Atom::builder().header(AtomHeader::new(*MOOV)).build(),
559            );
560            MoovAtomRefMut(AtomRefMut(&mut self.atoms[index]))
561        }
562    }
563
564    /// Returns the sum of all metadata atom sizes in bytes
565    pub fn metadata_size(&self) -> usize {
566        self.atoms_iter()
567            .cloned()
568            .flat_map(SerializeAtom::into_bytes)
569            .collect::<Vec<_>>()
570            .len()
571    }
572
573    /// Returns the sum of all track sizes in bytes
574    pub fn mdat_size(&self) -> usize {
575        self.moov()
576            .into_tracks_iter()
577            .map(|trak| trak.size())
578            .sum::<usize>()
579    }
580
581    /// Returns the sum of `metadata_size` and `mdat_size`
582    pub fn file_size(&self) -> usize {
583        self.metadata_size() + self.mdat_size()
584    }
585
586    /// Updates chunk offsets for each track
587    ///
588    /// Call this before writing metadata to disk to avoid corruption
589    pub fn update_chunk_offsets(
590        &mut self,
591    ) -> Result<chunk_offset_builder::BuildMetadata, UpdateChunkOffsetError> {
592        // mdat is located directly after metadata atoms, so metadata size + 8 bytes for the mdat header
593        let mdat_content_offset = self.metadata_size() + 8;
594
595        let (chunk_offsets, original_chunk_offsets) = self.moov().into_tracks_iter().try_fold(
596            (ChunkOffsetBuilder::new(), Vec::new()),
597            |(mut builder, mut chunk_offsets), trak| {
598                let stbl = trak.media().media_information().sample_table();
599                let stsz = stbl
600                    .sample_size()
601                    .ok_or(UpdateChunkOffsetError::SampleSizeAtomNotFound)?;
602                let stsc = stbl
603                    .sample_to_chunk()
604                    .ok_or(UpdateChunkOffsetError::SampleToChunkAtomNotFound)?;
605                let stco = stbl
606                    .chunk_offset()
607                    .ok_or(UpdateChunkOffsetError::ChunkOffsetAtomNotFound)?;
608                builder.add_track(stsc, stsz);
609                chunk_offsets.push(stco.chunk_offsets.inner());
610                Ok((builder, chunk_offsets))
611            },
612        )?;
613
614        let (mut chunk_offsets, build_meta) = chunk_offsets
615            .build_chunk_offsets_ordered(original_chunk_offsets, mdat_content_offset as u64);
616
617        for (track_idx, trak) in self.moov_mut().tracks().enumerate() {
618            let mut stbl = trak
619                .into_media()
620                .and_then(MdiaAtomRefMut::into_media_information)
621                .and_then(MinfAtomRefMut::into_sample_table)
622                .ok_or(UpdateChunkOffsetError::SampleTableNotFound)?;
623            let stco = stbl.chunk_offset();
624            let chunk_offsets = std::mem::take(&mut chunk_offsets[track_idx]);
625            stco.chunk_offsets = ChunkOffsets::from(chunk_offsets);
626        }
627
628        Ok(build_meta)
629    }
630}
631
632#[derive(Debug, Error)]
633pub enum UpdateChunkOffsetError {
634    #[error("sample table atom not found")]
635    SampleTableNotFound,
636    #[error("sample size atom not found")]
637    SampleSizeAtomNotFound,
638    #[error("sample to chunk atom not found")]
639    SampleToChunkAtomNotFound,
640    #[error("chunk offset atom not found")]
641    ChunkOffsetAtomNotFound,
642}
643
644pub struct ChunkParser<'a, R, C: ReadCapability> {
645    reader: Mp4Reader<R, C>,
646    /// Reference to each track's metadata
647    tracks: Vec<TrakAtomRef<'a>>,
648    /// Chunk offsets for each track
649    chunk_offsets: Vec<&'a [u64]>,
650    /// [`SampleToChunkEntry`]s for each track
651    sample_to_chunk: Vec<&'a [SampleToChunkEntry]>,
652    /// Sample sizes for each track
653    sample_sizes: Vec<&'a [u32]>,
654    /// [`TimeToSampleEntry`]s for each track
655    time_to_sample: Vec<&'a [TimeToSampleEntry]>,
656    /// [`ChunkInfo`]s for each track
657    chunk_info: Vec<VecDeque<ChunkInfo>>,
658}
659
660impl<'a, R: AsyncRead + Unpin + Send, C: ReadCapability> ChunkParser<'a, R, C> {
661    pub async fn read_next_chunk(&mut self) -> Result<Option<Chunk<'a>>, ParseError> {
662        let current_offset = self.reader.current_offset as u64;
663
664        let mut next_offset = None;
665        let mut next_track_idx = 0;
666        let mut next_chunk_idx = 0;
667
668        for track_idx in 0..self.tracks.len() {
669            let chunk_info = self.chunk_info[track_idx].front();
670            if let Some(chunk_info) = chunk_info {
671                let chunk_idx = chunk_info.chunk_number as usize - 1;
672                let offset = self.chunk_offsets[track_idx][chunk_idx];
673                if offset >= current_offset
674                    && next_offset.is_none_or(|next_offset| offset < next_offset)
675                {
676                    next_offset = Some(offset);
677                    next_track_idx = track_idx;
678                    next_chunk_idx = chunk_idx;
679                }
680            }
681        }
682
683        if let Some(offset) = next_offset {
684            // Skip to the next chunk
685            let bytes_to_skip = offset - current_offset;
686            if bytes_to_skip > 0 {
687                self.reader.read_data(bytes_to_skip as usize).await?;
688            }
689
690            let chunk_info = self.chunk_info[next_track_idx].pop_front().unwrap();
691
692            // Read the chunk
693            self.read_chunk(next_track_idx, next_chunk_idx, chunk_info)
694                .await
695                .map(Some)
696        } else {
697            // No more chunks
698            Ok(None)
699        }
700    }
701
702    async fn read_chunk(
703        &mut self,
704        track_idx: usize,
705        chunk_idx: usize,
706        chunk_info: ChunkInfo,
707    ) -> Result<Chunk<'a>, ParseError> {
708        let time_to_sample = self.time_to_sample[track_idx];
709
710        let sample_start_idx =
711            chunk_info
712                .sample_indices
713                .first()
714                .copied()
715                .ok_or_else(|| ParseError {
716                    kind: ParseErrorKind::InsufficientData,
717                    location: None,
718                    source: Some(
719                        anyhow!("no samples indicies in chunk at index {chunk_idx}")
720                            .into_boxed_dyn_error(),
721                    ),
722                })?;
723
724        // Calculate total chunk size
725        let chunk_size = chunk_info.chunk_size;
726        let chunk_sample_sizes = chunk_info.sample_sizes.clone();
727
728        // Read the chunk data
729        let data = self.reader.read_data(chunk_size as usize).await?;
730
731        // Get the sample durations slice for this chunk
732        let sample_durations: Vec<u32> = time_to_sample
733            .iter()
734            .flat_map(|entry| {
735                std::iter::repeat_n(entry.sample_duration, entry.sample_count as usize)
736            })
737            .skip(sample_start_idx)
738            .take(chunk_sample_sizes.len())
739            .collect();
740        assert_eq!(chunk_sample_sizes.len(), sample_durations.len());
741
742        // Create the chunk
743        Ok(Chunk {
744            trak_idx: track_idx,
745            trak: self.tracks[track_idx],
746            sample_sizes: chunk_sample_sizes,
747            sample_durations,
748            data,
749        })
750    }
751}
752
753impl fmt::Debug for Chunk<'_> {
754    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
755        f.debug_struct("Chunk")
756            .field("trak", &self.trak)
757            .field(
758                "sample_sizes",
759                &DebugEllipsis(Some(self.sample_sizes.len())),
760            )
761            .field(
762                "time_to_sample",
763                &DebugEllipsis(Some(self.sample_durations.len())),
764            )
765            .field("data", &DebugEllipsis(Some(self.data.len())))
766            .finish()
767    }
768}
769
770pub struct Chunk<'a> {
771    /// Index of the trak in the file
772    pub trak_idx: usize,
773    /// Reference to the track the sample is in
774    pub trak: TrakAtomRef<'a>,
775    /// Slice of sample sizes within this chunk
776    pub sample_sizes: Vec<u32>,
777    /// Timescale duration of each sample indexed reletive to `sample_sizes`
778    pub sample_durations: Vec<u32>,
779    /// Bytes in the chunk
780    pub data: Vec<u8>,
781}
782
783impl<'a> Chunk<'a> {
784    pub fn samples(&'a self) -> impl Iterator<Item = Sample<'a>> {
785        let timescale = self
786            .trak
787            .media()
788            .header()
789            .map(|h| h.timescale)
790            .expect("trak.mdia.mvhd is missing");
791        self.sample_sizes
792            .iter()
793            .zip(self.sample_durations.iter())
794            .scan(0usize, move |offset, (size, duration)| {
795                let sample_offset = *offset;
796                *offset += *size as usize;
797                let data = &self.data[sample_offset..sample_offset + (*size as usize)];
798                Some(Sample {
799                    size: *size,
800                    duration: *duration,
801                    timescale,
802                    data,
803                })
804            })
805    }
806}
807
808pub struct Sample<'a> {
809    pub size: u32,
810    pub duration: u32,
811    pub timescale: u32,
812    pub data: &'a [u8],
813}
814
815impl fmt::Debug for Sample<'_> {
816    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
817        f.debug_struct("Sample")
818            .field("size", &self.size)
819            .field("duration", &self.duration)
820            .field("timescale", &self.timescale)
821            .finish_non_exhaustive()
822    }
823}