Skip to main content

mp4_edit/
parser.rs

1/*!
2 * This mod is concerned with parsing mp4 files.
3 */
4
5use anyhow::anyhow;
6use derive_more::Display;
7use futures_io::{AsyncRead, AsyncSeek};
8use std::collections::VecDeque;
9use std::fmt::{self, Debug};
10use std::io::SeekFrom;
11use std::ops::{Deref, DerefMut};
12use thiserror::Error;
13
14use crate::atom::util::parser::stream;
15use crate::chunk_offset_builder;
16pub use crate::reader::{Mp4Reader, NonSeekable, ReadCapability, Seekable};
17use crate::{
18    atom::{
19        atom_ref::{AtomRef, AtomRefMut},
20        container::{is_container_atom, META, META_VERSION_FLAGS_SIZE, MOOV},
21        ftyp::{FileTypeAtom, FtypAtomRef, FtypAtomRefMut, FTYP},
22        stco_co64::ChunkOffsets,
23        stsc::SampleToChunkEntry,
24        stts::TimeToSampleEntry,
25        util::DebugEllipsis,
26        AtomHeader, FourCC, MdiaAtomRefMut, MinfAtomRefMut, MoovAtomRef, MoovAtomRefMut, RawData,
27        TrakAtomRef,
28    },
29    chunk_offset_builder::{ChunkInfo, ChunkOffsetBuilder},
30    writer::SerializeAtom,
31    Atom, AtomData,
32};
33
34pub const MDAT: &[u8; 4] = b"mdat";
35
36/// This trait is implemented on [`AtomData`] and the inner value of each of it's variants.
37///
38/// Note that the [`AtomHeader`] has already been consumed, this trait is concerned with parsing the data.
39pub(crate) trait ParseAtomData: Sized {
40    fn parse_atom_data(atom_type: FourCC, input: &[u8]) -> Result<Self, ParseError>;
41}
42
43#[derive(Debug, Error)]
44#[error(
45    "{kind}{}",
46    self.location.map(|(offset, length)|
47        format!(" at offset {offset} with length {length}")).unwrap_or_default()
48)]
49pub struct ParseError {
50    /// The kind of error that occurred during parsing.
51    pub(crate) kind: ParseErrorKind,
52    /// location is the (offset, length) of the input data related to the error
53    pub(crate) location: Option<(usize, usize)>,
54    /// The source error that caused this error.
55    #[source]
56    pub(crate) source: Option<Box<dyn std::error::Error + Send + Sync>>,
57}
58
59#[derive(Debug, Display)]
60#[non_exhaustive]
61pub enum ParseErrorKind {
62    #[display("I/O error")]
63    Io,
64    #[display("EOF error")]
65    Eof,
66    #[display("Invalid atom header")]
67    InvalidHeader,
68    #[display("Invalid atom size")]
69    InvalidSize,
70    #[display("Unsupported atom type")]
71    UnsupportedAtom,
72    #[display("Unexpected atom type")]
73    UnexpectedAtom,
74    #[display("Atom parsing failed")]
75    AtomParsing,
76    #[display("Insufficient data")]
77    InsufficientData,
78    #[display("moov atom is missing")]
79    MissingMoov,
80}
81
82impl ParseError {
83    pub(crate) fn new_unexpected_atom_oneof(atom_type: FourCC, expected: Vec<FourCC>) -> Self {
84        if expected.len() == 1 {
85            return Self::new_unexpected_atom(atom_type, expected[0]);
86        }
87
88        let expected = expected
89            .into_iter()
90            .map(|expected| expected.to_string())
91            .collect::<Vec<_>>()
92            .join(", ");
93        Self {
94            kind: ParseErrorKind::UnexpectedAtom,
95            location: Some((0, 4)),
96            source: Some(
97                anyhow!("expected one of {expected}, got {atom_type}").into_boxed_dyn_error(),
98            ),
99        }
100    }
101
102    fn new_unexpected_atom(atom_type: FourCC, expected: FourCC) -> Self {
103        let expected = FourCC::from(*expected);
104        Self {
105            kind: ParseErrorKind::UnexpectedAtom,
106            location: Some((0, 4)),
107            source: Some(anyhow!("expected {expected}, got {atom_type}").into_boxed_dyn_error()),
108        }
109    }
110
111    pub(crate) fn from_winnow(
112        error: winnow::error::ParseError<
113            winnow::LocatingSlice<&winnow::Bytes>,
114            winnow::error::ContextError,
115        >,
116    ) -> Self {
117        use winnow::error::StrContext;
118        let mut ctx_iter = error.inner().context().peekable();
119        let mut ctx_tree = Vec::with_capacity(ctx_iter.size_hint().0);
120        while let Some(ctx) = ctx_iter.next() {
121            eprintln!("ctx: {ctx:?}");
122            match ctx {
123                StrContext::Expected(exp) => {
124                    let mut label = None;
125                    if matches!(ctx_iter.peek(), Some(StrContext::Label(_))) {
126                        label = Some(ctx_iter.next().unwrap().to_string());
127                    }
128                    ctx_tree.push(format!(
129                        "{}({exp})",
130                        label.map(|label| label.to_string()).unwrap_or_default()
131                    ));
132                }
133                StrContext::Label(label) => {
134                    ctx_tree.push(label.to_string());
135                }
136                _ => {}
137            }
138        }
139        ctx_tree.reverse();
140
141        Self {
142            kind: crate::parser::ParseErrorKind::AtomParsing,
143            location: Some((error.offset(), 0)),
144            source: match ctx_tree {
145                ctx if ctx.is_empty() => None,
146                ctx => Some(anyhow::format_err!("{}", ctx.join(" -> ")).into_boxed_dyn_error()),
147            },
148        }
149    }
150}
151
152impl
153    From<
154        winnow::error::ParseError<
155            winnow::LocatingSlice<&winnow::Bytes>,
156            winnow::error::ContextError,
157        >,
158    > for ParseError
159{
160    fn from(
161        value: winnow::error::ParseError<
162            winnow::LocatingSlice<&winnow::Bytes>,
163            winnow::error::ContextError,
164        >,
165    ) -> Self {
166        ParseError::from_winnow(value)
167    }
168}
169
170pub struct Parser<R, C: ReadCapability = NonSeekable> {
171    reader: Mp4Reader<R, C>,
172    mdat: Option<AtomHeader>,
173}
174
175impl<R: AsyncRead + AsyncSeek + Unpin + Send> Parser<R, Seekable> {
176    pub fn new_seekable(reader: R) -> Self {
177        Parser {
178            reader: Mp4Reader::<R, Seekable>::new(reader),
179            mdat: None,
180        }
181    }
182
183    /// parses metadata atoms, both before and after mdat if moov isn't found before
184    pub async fn parse_metadata(mut self) -> Result<MdatParser<R, Seekable>, ParseError> {
185        let mut atoms = self.parse_metadata_inner(None).await?;
186        let mdat = match self.mdat.take() {
187            Some(mdat) if !atoms.iter().any(|a| a.header.atom_type == MOOV) => {
188                // moov is likely after mdat, so skip to the end of the mdat atom and parse any atoms there
189                self.reader
190                    .seek(SeekFrom::Current(mdat.data_size as i64))
191                    .await?;
192                let end_atoms = self.parse_metadata_inner(None).await?;
193                atoms.extend(end_atoms);
194                // and then return to where we were
195                self.reader
196                    .seek(SeekFrom::Start((mdat.offset + mdat.header_size) as u64))
197                    .await?;
198                Some(mdat)
199            }
200            mdat => mdat,
201        };
202        Ok(MdatParser::new(self.reader, Metadata::new(atoms), mdat))
203    }
204}
205
206impl<R: AsyncRead + Unpin + Send> Parser<R, NonSeekable> {
207    pub fn new(reader: R) -> Self {
208        Parser {
209            reader: Mp4Reader::<R, NonSeekable>::new(reader),
210            mdat: None,
211        }
212    }
213
214    /// parses metadata atoms until mdat found
215    pub async fn parse_metadata(mut self) -> Result<MdatParser<R, NonSeekable>, ParseError> {
216        let atoms = self.parse_metadata_inner(None).await?;
217        Ok(MdatParser::new(
218            self.reader,
219            Metadata::new(atoms),
220            self.mdat,
221        ))
222    }
223}
224
225impl<R: AsyncRead + Unpin + Send, C: ReadCapability> Parser<R, C> {
226    async fn parse_metadata_inner(
227        &mut self,
228        length_limit: Option<usize>,
229    ) -> Result<Vec<Atom>, ParseError> {
230        let start_offset = self.reader.current_offset;
231
232        let mut atoms = Vec::new();
233
234        loop {
235            // ensure we're respecting container bounds
236            if length_limit.is_some_and(|limit| self.reader.current_offset - start_offset >= limit)
237            {
238                break;
239            }
240
241            let header = match self.parse_next_atom().await {
242                Ok(parsed_atom) => Ok(parsed_atom),
243                Err(err) => {
244                    if matches!(
245                        err.kind,
246                        ParseErrorKind::Eof | ParseErrorKind::InvalidHeader
247                    ) {
248                        // end of stream, this means there's no mdat atom
249                        // TODO: rewrite the tests to always include an mdat atom so we can get rid of this check
250                        break;
251                    }
252                    Err(err)
253                }
254            }?;
255
256            // only parse as far as the mdat atom
257            if header.atom_type == MDAT {
258                self.mdat = Some(header);
259                break;
260            }
261
262            if is_container_atom(header.atom_type) {
263                // META containers have additional header data
264                let (size, data) = if header.atom_type == META {
265                    // Handle META version and flags as RawData
266                    let version_flags = self.reader.read_data(META_VERSION_FLAGS_SIZE).await?;
267                    (
268                        header.data_size - META_VERSION_FLAGS_SIZE,
269                        Some(AtomData::RawData(RawData::new(
270                            FourCC(*META),
271                            version_flags,
272                        ))),
273                    )
274                } else {
275                    (header.data_size, None)
276                };
277
278                let container_atom = Atom {
279                    header,
280                    data,
281                    children: Box::pin(self.parse_metadata_inner(Some(size))).await?,
282                };
283
284                atoms.push(container_atom);
285            } else {
286                let atom_data = self.parse_atom_data(&header).await?;
287                let atom = Atom {
288                    header,
289                    data: Some(atom_data),
290                    children: Vec::new(),
291                };
292                atoms.push(atom);
293            }
294        }
295
296        Ok(atoms)
297    }
298
299    async fn parse_next_atom(&mut self) -> Result<AtomHeader, ParseError> {
300        let atom_offset = self.reader.current_offset as u64;
301
302        // Try to read the atom header (size and type)
303        let mut header = [0u8; 8];
304        self.reader.read_exact(&mut header).await?;
305
306        let size = u64::from(u32::from_be_bytes([
307            header[0], header[1], header[2], header[3],
308        ]));
309        let atom_type: [u8; 4] = header[4..8].try_into().unwrap();
310
311        // Handle extended size (64-bit) if needed
312        let (header_size, data_size) = if size == 1 {
313            // Extended size format
314            let mut extended_size = [0u8; 8];
315            self.reader.read_exact(&mut extended_size).await?;
316            let full_size = u64::from_be_bytes(extended_size);
317            if full_size < 16 {
318                return Err(ParseError {
319                    kind: ParseErrorKind::InvalidSize,
320                    location: Some((atom_offset as usize, 16)),
321                    source: None,
322                });
323            }
324            (16u64, full_size - 16)
325        } else if size == 0 {
326            // Size extends to end of file - not supported in this context
327            return Err(ParseError {
328                kind: ParseErrorKind::InvalidSize,
329                location: Some((atom_offset as usize, 8)),
330                source: None,
331            });
332        } else {
333            if size < 8 {
334                return Err(ParseError {
335                    kind: ParseErrorKind::InvalidSize,
336                    location: Some((atom_offset as usize, 8)),
337                    source: None,
338                });
339            }
340            (8u64, size - 8)
341        };
342
343        let atom_type = FourCC(atom_type);
344
345        Ok(AtomHeader {
346            atom_type,
347            offset: atom_offset as usize,
348            header_size: header_size as usize,
349            data_size: data_size as usize,
350        })
351    }
352
353    async fn parse_atom_data(&mut self, header: &AtomHeader) -> Result<AtomData, ParseError> {
354        let content_data = self.reader.read_data(header.data_size).await?;
355        let input = stream(&content_data);
356
357        AtomData::parse_atom_data(header.atom_type, &input).map_err(|err| {
358            let (header_offset, _) = header.location();
359            let content_offset = header_offset + header.header_size;
360            ParseError {
361                kind: ParseErrorKind::AtomParsing,
362                location: Some(err.location.map_or_else(
363                    || (content_offset, 0),
364                    |(offset, size)| (content_offset + offset, size),
365                )),
366                source: Some(anyhow::Error::from(err).context(header.atom_type).into()),
367            }
368        })
369    }
370}
371
372pub struct MdatParser<R, C: ReadCapability> {
373    meta: Metadata,
374    reader: Option<Mp4Reader<R, C>>,
375    mdat: Option<AtomHeader>,
376}
377
378impl<R, C: ReadCapability> Clone for MdatParser<R, C> {
379    fn clone(&self) -> Self {
380        Self {
381            meta: self.meta.clone(),
382            reader: None,
383            mdat: None,
384        }
385    }
386}
387
388impl<R, C: ReadCapability> Deref for MdatParser<R, C> {
389    type Target = Metadata;
390
391    fn deref(&self) -> &Self::Target {
392        &self.meta
393    }
394}
395
396impl<R, C: ReadCapability> DerefMut for MdatParser<R, C> {
397    fn deref_mut(&mut self) -> &mut Self::Target {
398        &mut self.meta
399    }
400}
401
402impl<R, C: ReadCapability> MdatParser<R, C> {
403    fn new(reader: Mp4Reader<R, C>, meta: Metadata, mdat: Option<AtomHeader>) -> Self {
404        Self {
405            reader: Some(reader),
406            meta,
407            mdat,
408        }
409    }
410
411    /// Discards the reader and returns just the metadata
412    pub fn into_metadata(self) -> Metadata {
413        self.meta
414    }
415
416    pub fn mdat_header(&self) -> Option<&AtomHeader> {
417        self.mdat.as_ref()
418    }
419
420    /// Parse chunks along with related metadata
421    pub fn chunks(&mut self) -> Result<ChunkParser<'_, R, C>, ParseError> {
422        let _ = self.mdat.take().ok_or_else(|| ParseError {
423            kind: ParseErrorKind::InsufficientData,
424            location: None,
425            source: Some(
426                anyhow!("mdat atom is missing or has already been consumed").into_boxed_dyn_error(),
427            ),
428        })?;
429
430        let reader = self.reader.take().ok_or_else(|| ParseError {
431            kind: ParseErrorKind::Io,
432            location: None,
433            source: Some(anyhow!("reader has already been consumed").into_boxed_dyn_error()),
434        })?;
435
436        let mut parser = ChunkParser {
437            reader,
438            tracks: Vec::new(),
439            chunk_offsets: Vec::new(),
440            sample_to_chunk: Vec::new(),
441            sample_sizes: Vec::new(),
442            time_to_sample: Vec::new(),
443            chunk_info: Vec::new(),
444        };
445
446        for trak in self.meta.moov().into_tracks_iter() {
447            if let Some((trak, stco, stsc, stsz, stts)) = (|| {
448                let stbl = trak.media().media_information().sample_table();
449                let chunk_offset = stbl.chunk_offset()?;
450                let sample_entries = stbl.sample_to_chunk()?;
451                let sample_sizes = stbl.sample_size()?;
452                let time_to_sample = stbl.time_to_sample()?;
453                Some((
454                    trak,
455                    chunk_offset.chunk_offsets.inner(),
456                    sample_entries,
457                    sample_sizes,
458                    time_to_sample,
459                ))
460            })() {
461                let mut builder = ChunkOffsetBuilder::with_capacity(1);
462                builder.add_track(stsc, stsz);
463                parser.tracks.push(trak);
464                parser.chunk_offsets.push(stco);
465                parser.sample_to_chunk.push(stsc.entries.inner());
466                parser.sample_sizes.push(stsz.entry_sizes.inner());
467                parser.time_to_sample.push(stts.entries.inner());
468                parser
469                    .chunk_info
470                    .push(builder.build_chunk_info().collect::<VecDeque<_>>());
471            }
472        }
473
474        Ok(parser)
475    }
476}
477
478#[derive(Clone)]
479pub struct Metadata {
480    atoms: Vec<Atom>,
481}
482
483impl Metadata {
484    pub(crate) fn new(atoms: Vec<Atom>) -> Self {
485        Self { atoms }
486    }
487
488    /// Transforms into (reader, `current_offset`, atoms)
489    pub fn into_atoms(self) -> Vec<Atom> {
490        self.atoms
491    }
492
493    /// Iterates over the metadata atoms
494    pub fn atoms_iter(&self) -> impl Iterator<Item = &Atom> {
495        self.atoms.iter()
496    }
497
498    /// Mutably iterates over the metadata atoms
499    pub fn atoms_iter_mut(&mut self) -> impl Iterator<Item = &mut Atom> {
500        self.atoms.iter_mut()
501    }
502
503    /// Retains only the metadata atoms that satisfy the predicate
504    /// (applies to top level and nested atoms)
505    pub fn atoms_flat_retain_mut<P>(&mut self, mut pred: P)
506    where
507        P: FnMut(&mut Atom) -> bool,
508    {
509        self.atoms.retain_mut(|a| pred(a));
510        for atom in &mut self.atoms {
511            atom.children_flat_retain_mut(|a| pred(a));
512        }
513    }
514
515    fn atom_position(&self, typ: FourCC) -> Option<usize> {
516        self.atoms.iter().position(|a| a.header.atom_type == typ)
517    }
518
519    fn find_atom(&self, typ: FourCC) -> AtomRef<'_> {
520        AtomRef(self.atoms.iter().find(|a| a.header.atom_type == typ))
521    }
522
523    pub fn ftyp(&mut self) -> FtypAtomRef<'_> {
524        FtypAtomRef(self.find_atom(FTYP))
525    }
526
527    pub fn ftyp_mut(&mut self) -> FtypAtomRefMut<'_> {
528        if let Some(index) = self.atom_position(FTYP) {
529            FtypAtomRefMut(AtomRefMut(&mut self.atoms[index]))
530        } else {
531            let index = 0;
532            self.atoms.insert(
533                index,
534                Atom::builder()
535                    .header(AtomHeader::new(*FTYP))
536                    .data(FileTypeAtom::default())
537                    .build(),
538            );
539            FtypAtomRefMut(AtomRefMut(&mut self.atoms[index]))
540        }
541    }
542
543    pub fn moov(&self) -> MoovAtomRef<'_> {
544        MoovAtomRef(self.find_atom(MOOV))
545    }
546
547    pub fn moov_mut(&mut self) -> MoovAtomRefMut<'_> {
548        if let Some(index) = self.atom_position(MOOV) {
549            MoovAtomRefMut(AtomRefMut(&mut self.atoms[index]))
550        } else {
551            let index = self.atom_position(FTYP).map(|i| i + 1).unwrap_or_default();
552            self.atoms.insert(
553                index,
554                Atom::builder().header(AtomHeader::new(*MOOV)).build(),
555            );
556            MoovAtomRefMut(AtomRefMut(&mut self.atoms[index]))
557        }
558    }
559
560    /// Returns the sum of all metadata atom sizes in bytes
561    pub fn metadata_size(&self) -> usize {
562        self.atoms_iter()
563            .cloned()
564            .flat_map(SerializeAtom::into_bytes)
565            .collect::<Vec<_>>()
566            .len()
567    }
568
569    /// Returns the sum of all track sizes in bytes
570    pub fn mdat_size(&self) -> usize {
571        self.moov()
572            .into_tracks_iter()
573            .map(|trak| trak.size())
574            .sum::<usize>()
575    }
576
577    /// Returns the sum of `metadata_size` and `mdat_size`
578    pub fn file_size(&self) -> usize {
579        self.metadata_size() + self.mdat_size()
580    }
581
582    /// Updates chunk offsets for each track
583    ///
584    /// Call this before writing metadata to disk to avoid corruption
585    pub fn update_chunk_offsets(
586        &mut self,
587    ) -> Result<chunk_offset_builder::BuildMetadata, UpdateChunkOffsetError> {
588        // mdat is located directly after metadata atoms, so metadata size + 8 bytes for the mdat header
589        let mdat_content_offset = self.metadata_size() + 8;
590
591        let (chunk_offsets, original_chunk_offsets) = self.moov().into_tracks_iter().try_fold(
592            (ChunkOffsetBuilder::new(), Vec::new()),
593            |(mut builder, mut chunk_offsets), trak| {
594                let stbl = trak.media().media_information().sample_table();
595                let stsz = stbl
596                    .sample_size()
597                    .ok_or(UpdateChunkOffsetError::SampleSizeAtomNotFound)?;
598                let stsc = stbl
599                    .sample_to_chunk()
600                    .ok_or(UpdateChunkOffsetError::SampleToChunkAtomNotFound)?;
601                let stco = stbl
602                    .chunk_offset()
603                    .ok_or(UpdateChunkOffsetError::ChunkOffsetAtomNotFound)?;
604                builder.add_track(stsc, stsz);
605                chunk_offsets.push(stco.chunk_offsets.inner());
606                Ok((builder, chunk_offsets))
607            },
608        )?;
609
610        let (mut chunk_offsets, build_meta) = chunk_offsets
611            .build_chunk_offsets_ordered(original_chunk_offsets, mdat_content_offset as u64);
612
613        for (track_idx, trak) in self.moov_mut().tracks().enumerate() {
614            let mut stbl = trak
615                .into_media()
616                .and_then(MdiaAtomRefMut::into_media_information)
617                .and_then(MinfAtomRefMut::into_sample_table)
618                .ok_or(UpdateChunkOffsetError::SampleTableNotFound)?;
619            let stco = stbl.chunk_offset();
620            let chunk_offsets = std::mem::take(&mut chunk_offsets[track_idx]);
621            stco.chunk_offsets = ChunkOffsets::from(chunk_offsets);
622        }
623
624        Ok(build_meta)
625    }
626}
627
628#[derive(Debug, Error)]
629pub enum UpdateChunkOffsetError {
630    #[error("sample table atom not found")]
631    SampleTableNotFound,
632    #[error("sample size atom not found")]
633    SampleSizeAtomNotFound,
634    #[error("sample to chunk atom not found")]
635    SampleToChunkAtomNotFound,
636    #[error("chunk offset atom not found")]
637    ChunkOffsetAtomNotFound,
638}
639
640pub struct ChunkParser<'a, R, C: ReadCapability> {
641    reader: Mp4Reader<R, C>,
642    /// Reference to each track's metadata
643    tracks: Vec<TrakAtomRef<'a>>,
644    /// Chunk offsets for each track
645    chunk_offsets: Vec<&'a [u64]>,
646    /// [`SampleToChunkEntry`]s for each track
647    sample_to_chunk: Vec<&'a [SampleToChunkEntry]>,
648    /// Sample sizes for each track
649    sample_sizes: Vec<&'a [u32]>,
650    /// [`TimeToSampleEntry`]s for each track
651    time_to_sample: Vec<&'a [TimeToSampleEntry]>,
652    /// [`ChunkInfo`]s for each track
653    chunk_info: Vec<VecDeque<ChunkInfo>>,
654}
655
656impl<'a, R: AsyncRead + Unpin + Send, C: ReadCapability> ChunkParser<'a, R, C> {
657    pub async fn read_next_chunk(&mut self) -> Result<Option<Chunk<'a>>, ParseError> {
658        let current_offset = self.reader.current_offset as u64;
659
660        let mut next_offset = None;
661        let mut next_track_idx = 0;
662        let mut next_chunk_idx = 0;
663
664        for track_idx in 0..self.tracks.len() {
665            let chunk_info = self.chunk_info[track_idx].front();
666            if let Some(chunk_info) = chunk_info {
667                let chunk_idx = chunk_info.chunk_number as usize - 1;
668                let offset = self.chunk_offsets[track_idx][chunk_idx];
669                if offset >= current_offset
670                    && next_offset.is_none_or(|next_offset| offset < next_offset)
671                {
672                    next_offset = Some(offset);
673                    next_track_idx = track_idx;
674                    next_chunk_idx = chunk_idx;
675                }
676            }
677        }
678
679        if let Some(offset) = next_offset {
680            // Skip to the next chunk
681            let bytes_to_skip = offset - current_offset;
682            if bytes_to_skip > 0 {
683                self.reader.read_data(bytes_to_skip as usize).await?;
684            }
685
686            let chunk_info = self.chunk_info[next_track_idx].pop_front().unwrap();
687
688            // Read the chunk
689            self.read_chunk(next_track_idx, next_chunk_idx, chunk_info)
690                .await
691                .map(Some)
692        } else {
693            // No more chunks
694            Ok(None)
695        }
696    }
697
698    async fn read_chunk(
699        &mut self,
700        track_idx: usize,
701        chunk_idx: usize,
702        chunk_info: ChunkInfo,
703    ) -> Result<Chunk<'a>, ParseError> {
704        let time_to_sample = self.time_to_sample[track_idx];
705
706        let sample_start_idx =
707            chunk_info
708                .sample_indices
709                .first()
710                .copied()
711                .ok_or_else(|| ParseError {
712                    kind: ParseErrorKind::InsufficientData,
713                    location: None,
714                    source: Some(
715                        anyhow!("no samples indicies in chunk at index {chunk_idx}")
716                            .into_boxed_dyn_error(),
717                    ),
718                })?;
719
720        // Calculate total chunk size
721        let chunk_size = chunk_info.chunk_size;
722        let chunk_sample_sizes = chunk_info.sample_sizes.clone();
723
724        // Read the chunk data
725        let data = self.reader.read_data(chunk_size as usize).await?;
726
727        // Get the sample durations slice for this chunk
728        let sample_durations: Vec<u32> = time_to_sample
729            .iter()
730            .flat_map(|entry| {
731                std::iter::repeat_n(entry.sample_duration, entry.sample_count as usize)
732            })
733            .skip(sample_start_idx)
734            .take(chunk_sample_sizes.len())
735            .collect();
736        assert_eq!(chunk_sample_sizes.len(), sample_durations.len());
737
738        // Create the chunk
739        Ok(Chunk {
740            trak_idx: track_idx,
741            trak: self.tracks[track_idx],
742            sample_sizes: chunk_sample_sizes,
743            sample_durations,
744            data,
745        })
746    }
747}
748
749impl fmt::Debug for Chunk<'_> {
750    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
751        f.debug_struct("Chunk")
752            .field("trak", &self.trak)
753            .field(
754                "sample_sizes",
755                &DebugEllipsis(Some(self.sample_sizes.len())),
756            )
757            .field(
758                "time_to_sample",
759                &DebugEllipsis(Some(self.sample_durations.len())),
760            )
761            .field("data", &DebugEllipsis(Some(self.data.len())))
762            .finish()
763    }
764}
765
766pub struct Chunk<'a> {
767    /// Index of the trak in the file
768    pub trak_idx: usize,
769    /// Reference to the track the sample is in
770    pub trak: TrakAtomRef<'a>,
771    /// Slice of sample sizes within this chunk
772    pub sample_sizes: Vec<u32>,
773    /// Timescale duration of each sample indexed reletive to `sample_sizes`
774    pub sample_durations: Vec<u32>,
775    /// Bytes in the chunk
776    pub data: Vec<u8>,
777}
778
779impl<'a> Chunk<'a> {
780    pub fn samples(&'a self) -> impl Iterator<Item = Sample<'a>> {
781        let timescale = self
782            .trak
783            .media()
784            .header()
785            .map(|h| h.timescale)
786            .expect("trak.mdia.mvhd is missing");
787        self.sample_sizes
788            .iter()
789            .zip(self.sample_durations.iter())
790            .scan(0usize, move |offset, (size, duration)| {
791                let sample_offset = *offset;
792                *offset += *size as usize;
793                let data = &self.data[sample_offset..sample_offset + (*size as usize)];
794                Some(Sample {
795                    size: *size,
796                    duration: *duration,
797                    timescale,
798                    data,
799                })
800            })
801    }
802}
803
804pub struct Sample<'a> {
805    pub size: u32,
806    pub duration: u32,
807    pub timescale: u32,
808    pub data: &'a [u8],
809}
810
811impl fmt::Debug for Sample<'_> {
812    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
813        f.debug_struct("Sample")
814            .field("size", &self.size)
815            .field("duration", &self.duration)
816            .field("timescale", &self.timescale)
817            .finish_non_exhaustive()
818    }
819}