Skip to main content

re_video/demux/
mod.rs

1//! Video demultiplexing.
2//!
3//! Parses a video file into a raw [`VideoDataDescription`] struct, which contains basic metadata and a list of keyframes.
4//!
5//! The entry point is [`VideoDataDescription::load_from_bytes`]
6//! which produces an instance of [`VideoDataDescription`] from any supported video container.
7
8pub mod mp4;
9
10use std::collections::BTreeMap;
11
12use bit_vec::BitVec;
13use itertools::Itertools as _;
14use re_log::{debug_assert, debug_panic};
15use re_span::Span;
16use re_tuid::Tuid;
17use web_time::Instant;
18
19use super::{Time, Timescale};
20use crate::nalu::AnnexBStreamWriteError;
21use crate::{
22    Chunk, StableIndexDeque, TrackId, TrackKind, write_avc_chunk_to_annexb,
23    write_hevc_chunk_to_annexb,
24};
25
26/// Chroma subsampling mode.
27///
28/// Unlike [`crate::YuvPixelLayout`] this does not specify a certain planarity/layout of
29/// the chroma components.
30/// Instead, this is just a description whether any subsampling occurs.
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub enum ChromaSubsamplingModes {
33    /// No subsampling at all, since the format is monochrome.
34    Monochrome,
35
36    /// No subsampling.
37    ///
38    /// Note that this also applies to RGB formats, not just YUV.
39    /// (but for video data YUV is much more common regardless)
40    Yuv444,
41
42    /// Subsampling in X only.
43    Yuv422,
44
45    /// Subsampling in both X and Y.
46    Yuv420,
47}
48
49impl std::fmt::Display for ChromaSubsamplingModes {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        match self {
52            // Could also call this 4:0:0, but that's a fairly uncommon way to describe it.
53            Self::Monochrome => write!(f, "monochrome"),
54            Self::Yuv444 => write!(f, "4:4:4"),
55            Self::Yuv422 => write!(f, "4:2:2"),
56            Self::Yuv420 => write!(f, "4:2:0"),
57        }
58    }
59}
60
61/// The basic codec family used to encode the video.
62#[derive(Clone, Copy, Debug, PartialEq, Eq)]
63pub enum VideoCodec {
64    /// Advanced Video Coding (AVC/H.264)
65    ///
66    /// See <https://en.wikipedia.org/wiki/Advanced_Video_Coding>
67    H264,
68
69    /// High Efficiency Video Coding (HEVC/H.265)
70    ///
71    /// See <https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding>
72    H265,
73
74    /// AOMedia Video 1 (AV1)
75    ///
76    /// See <https://en.wikipedia.org/wiki/AV1>
77    AV1,
78
79    /// VP8
80    ///
81    /// See <https://en.wikipedia.org/wiki/VP8>
82    VP8,
83
84    /// VP9
85    ///
86    /// See <https://en.wikipedia.org/wiki/VP9>
87    VP9,
88}
89
90impl VideoCodec {
91    /// Base part of the web codec string, without additional parameters.
92    ///
93    /// See <https://www.w3.org/TR/webcodecs-codec-registry/#video-codec-registry>
94    pub fn base_webcodec_string(&self) -> &'static str {
95        match self {
96            // https://www.w3.org/TR/webcodecs-av1-codec-registration/#fully-qualified-codec-strings
97            Self::AV1 => "av01",
98
99            // https://www.w3.org/TR/webcodecs-avc-codec-registration/#fully-qualified-codec-strings
100            // avc3 is valid as well.
101            Self::H264 => "avc1",
102
103            // https://www.w3.org/TR/webcodecs-hevc-codec-registration/#fully-qualified-codec-strings
104            // hvc1 is valid as well.
105            Self::H265 => "hev1",
106
107            // https://www.w3.org/TR/webcodecs-vp8-codec-registration/#fully-qualified-codec-strings
108            // Special! This *is* the fully qualified codec string.
109            Self::VP8 => "vp8",
110
111            // https://www.w3.org/TR/webcodecs-vp9-codec-registration/#fully-qualified-codec-strings
112            Self::VP9 => "vp09",
113        }
114    }
115}
116
117/// Index used for referencing into [`VideoDataDescription::samples`].
118pub type SampleIndex = usize;
119
120/// An index into [`VideoDataDescription::keyframe_indices`], not stable between mutations.
121pub type KeyframeIndex = usize;
122
123/// Distinguishes static videos from potentially ongoing video streams.
124#[derive(Clone)]
125pub enum VideoDeliveryMethod {
126    /// A static video with a fixed, known duration which won't be updated further.
127    Static { duration: Time },
128
129    /// A stream that *may* be periodically updated.
130    ///
131    /// Video streams may drop samples at the beginning and add new samples at the end.
132    /// The last sample's duration is treated as unknown.
133    /// However, it is typically assumed to be as long as the average sample duration.
134    Stream {
135        /// Last time we added/removed samples from the [`VideoDataDescription`].
136        ///
137        /// This is used solely as a heuristic input for how the player schedules work to decoders.
138        /// For live streams, even those that stopped, this is expected to be wallclock time of when a sample was
139        /// added do this datastructure. *Not* when the sample was first recorded.
140        last_time_updated_samples: Instant,
141    },
142}
143
144impl VideoDeliveryMethod {
145    #[inline]
146    pub fn new_stream() -> Self {
147        Self::Stream {
148            last_time_updated_samples: Instant::now(),
149        }
150    }
151}
152
153/// Description of video data.
154///
155/// Store various metadata about a video.
156/// Doesn't contain the actual data, but rather refers to samples with a byte offset.
157#[derive(Clone)]
158pub struct VideoDataDescription {
159    /// The codec used to encode the video.
160    pub codec: VideoCodec,
161
162    /// Various information about how the video was encoded.
163    ///
164    /// Should any of this change during the lifetime of a decoder, it has to be reset.
165    ///
166    /// For video streams this is derived on the fly, therefore it may arrive only with the first key frame.
167    /// For mp4 this is read from the AVCC box.
168    pub encoding_details: Option<VideoEncodingDetails>,
169
170    /// How many time units are there per second.
171    ///
172    /// `None` if the time units used don't have a defined relationship to seconds.
173    /// This happens for streams logged on a non-temporal timeline.
174    pub timescale: Option<Timescale>,
175
176    /// Whether this is a finite video or a stream.
177    pub delivery_method: VideoDeliveryMethod,
178
179    /// A sorted list of all keyframe's sample indices in the video.
180    pub keyframe_indices: Vec<SampleIndex>,
181
182    /// Samples contain the byte offsets into `data` for each frame.
183    ///
184    /// This list is sorted in ascending order of decode timestamps.
185    ///
186    /// Samples must be decoded in decode-timestamp order,
187    /// and should be presented in composition-timestamp order.
188    ///
189    /// We assume one sample yields exactly one frame from the decoder.
190    ///
191    /// To facilitate streaming, samples may be removed from the beginning and added at the end,
192    /// but individual samples are never supposed to change.
193    pub samples: StableIndexDeque<SampleMetadataState>,
194
195    /// Meta information about the samples.
196    pub samples_statistics: SamplesStatistics,
197
198    /// All the tracks in the mp4; not just the video track.
199    ///
200    /// Can be nice to show in a UI.
201    pub mp4_tracks: BTreeMap<TrackId, Option<TrackKind>>,
202}
203
204impl re_byte_size::SizeBytes for VideoDataDescription {
205    fn heap_size_bytes(&self) -> u64 {
206        let Self {
207            codec: _,
208            encoding_details: _,
209            timescale: _,
210            delivery_method: _,
211            keyframe_indices,
212            samples,
213            samples_statistics,
214            mp4_tracks,
215        } = self;
216
217        keyframe_indices.heap_size_bytes()
218            + samples.heap_size_bytes()
219            + samples_statistics.heap_size_bytes()
220            + mp4_tracks.len() as u64 * std::mem::size_of::<(TrackId, Option<TrackKind>)>() as u64
221    }
222}
223
224impl VideoDataDescription {
225    /// Get the group of pictures which use a keyframe, including the keyframe sample itself.
226    pub fn gop_sample_range_for_keyframe(
227        &self,
228        keyframe_idx: usize,
229    ) -> Option<std::ops::Range<SampleIndex>> {
230        Some(
231            *self.keyframe_indices.get(keyframe_idx)?
232                ..self
233                    .keyframe_indices
234                    .get(keyframe_idx + 1)
235                    .copied()
236                    .unwrap_or_else(|| self.samples.next_index()),
237        )
238    }
239
240    /// Checks various invariants that the video description should always uphold.
241    ///
242    /// Violation of any of these variants is **not** a user(-data) error, but instead an
243    /// implementation bug of any code manipulating the video description.
244    /// Vice versa, all code using `VideoDataDescription` can assume that these invariants hold.
245    ///
246    /// It's recommended to run these sanity check only in debug builds as they may be expensive for
247    /// large videos.
248    ///
249    /// Check implementation for details.
250    pub fn sanity_check(&self) -> Result<(), String> {
251        self.sanity_check_keyframes()?;
252        self.sanity_check_samples()?;
253
254        // If an STSD box is present, then its content type must match with the internal codec.
255        if let Some(stsd) = self.encoding_details.as_ref().and_then(|e| e.stsd.as_ref()) {
256            let stsd_codec = match &stsd.contents {
257                re_mp4::StsdBoxContent::Av01(_) => crate::VideoCodec::AV1,
258                re_mp4::StsdBoxContent::Avc1(_) => crate::VideoCodec::H264,
259                re_mp4::StsdBoxContent::Hvc1(_) | re_mp4::StsdBoxContent::Hev1(_) => {
260                    crate::VideoCodec::H265
261                }
262                re_mp4::StsdBoxContent::Vp08(_) => crate::VideoCodec::VP8,
263                re_mp4::StsdBoxContent::Vp09(_) => crate::VideoCodec::VP9,
264                _ => {
265                    return Err(format!(
266                        "STSD box content type {:?} doesn't have a supported codec.",
267                        stsd.contents
268                    ));
269                }
270            };
271            if stsd_codec != self.codec {
272                return Err(format!(
273                    "STSD box content type {:?} does not match with the internal codec {:?}.",
274                    stsd.contents, self.codec
275                ));
276            }
277        }
278
279        Ok(())
280    }
281
282    fn sanity_check_keyframes(&self) -> Result<(), String> {
283        if !self.keyframe_indices.is_sorted() {
284            return Err("Keyframes aren't sorted".to_owned());
285        }
286
287        for &keyframe in &self.keyframe_indices {
288            if keyframe < self.samples.min_index() {
289                return Err(format!(
290                    "Keyframe {keyframe} refers to sample to the left of the list of samples.",
291                ));
292            }
293
294            if keyframe >= self.samples.next_index() {
295                return Err(format!(
296                    "Keyframe {keyframe} refers to sample to the right of the list of samples.",
297                ));
298            }
299
300            match &self.samples[keyframe] {
301                SampleMetadataState::Present(sample_metadata) => {
302                    // All samples at the beginning of a GOP are marked with `is_sync==true`
303                    if !sample_metadata.is_sync {
304                        return Err(format!("Keyframe {keyframe} is not marked with `is_sync`."));
305                    }
306                }
307                SampleMetadataState::Unloaded { .. } => {
308                    return Err(format!("Keyframe {keyframe} refers to an unloaded sample"));
309                }
310            }
311        }
312
313        // Make sure all keyframes are tracked.
314        let mut keyframes = self.keyframe_indices.iter().copied();
315        for (sample_idx, sample) in self
316            .samples
317            .iter_indexed()
318            .filter_map(|(idx, s)| Some((idx, s.sample()?)))
319        {
320            if sample.is_sync && keyframes.next().is_none_or(|idx| idx != sample_idx) {
321                return Err(format!("Not tracking the keyframe {sample_idx}."));
322            }
323        }
324        Ok(())
325    }
326
327    fn sanity_check_samples(&self) -> Result<(), String> {
328        // Decode timestamps are monotonically increasing.
329        for ((a_idx, a), (b_idx, b)) in self.samples.iter_indexed().tuple_windows() {
330            if let SampleMetadataState::Present(a) = a
331                && let SampleMetadataState::Present(b) = b
332                && a.decode_timestamp > b.decode_timestamp
333            {
334                return Err(format!(
335                    "Decode timestamps for {a_idx}..{b_idx} are not monotonically increasing: {:?} {:?}",
336                    a.decode_timestamp, b.decode_timestamp
337                ));
338            }
339        }
340
341        // Sample statistics are consistent with the samples.
342        let expected_statistics = SamplesStatistics::new(&self.samples);
343        if expected_statistics != self.samples_statistics {
344            return Err(format!(
345                "Sample statistics are not consistent with the samples.\nExpected: {:?}\nActual: {:?}",
346                expected_statistics, self.samples_statistics
347            ));
348        }
349
350        Ok(())
351    }
352
353    /// Returns the encoded bytes for a sample in the format expected by [`VideoCodec`].
354    ///
355    /// * H.264/H.265: MP4 stores samples using AVCC/HVCC length-prefixed NALs and relies on container
356    ///   metadata for SPS/PPS/VPS. This method makes sure to unpack this.
357    /// * AV1 samples are stored as-is.
358    /// * VP8/VP9: Not yet supported
359    pub fn sample_data_in_stream_format(
360        &self,
361        chunk: &crate::Chunk,
362    ) -> Result<Vec<u8>, SampleConversionError> {
363        match self.codec {
364            VideoCodec::AV1 => Ok(chunk.data.clone()),
365            VideoCodec::H264 => {
366                let stsd = self
367                    .encoding_details
368                    .as_ref()
369                    .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))?
370                    .stsd
371                    .as_ref()
372                    .ok_or(SampleConversionError::MissingStsd(self.codec))?;
373
374                let re_mp4::StsdBoxContent::Avc1(avc1_box) = &stsd.contents else {
375                    return Err(SampleConversionError::UnexpectedStsdContent {
376                        codec: self.codec,
377                        found: format!("{:?}", stsd.contents),
378                    });
379                };
380
381                let mut output = Vec::new();
382                write_avc_chunk_to_annexb(avc1_box, &mut output, chunk.is_sync, chunk)
383                    .map_err(SampleConversionError::AnnexB)?;
384                Ok(output)
385            }
386            VideoCodec::H265 => {
387                let stsd = self
388                    .encoding_details
389                    .as_ref()
390                    .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))?
391                    .stsd
392                    .as_ref()
393                    .ok_or(SampleConversionError::MissingStsd(self.codec))?;
394
395                let hvcc_box = match &stsd.contents {
396                    re_mp4::StsdBoxContent::Hvc1(hvc1_box)
397                    | re_mp4::StsdBoxContent::Hev1(hvc1_box) => hvc1_box,
398                    other => {
399                        return Err(SampleConversionError::UnexpectedStsdContent {
400                            codec: self.codec,
401                            found: format!("{other:?}"),
402                        });
403                    }
404                };
405
406                let mut output = Vec::new();
407                write_hevc_chunk_to_annexb(hvcc_box, &mut output, chunk.is_sync, chunk)
408                    .map_err(SampleConversionError::AnnexB)?;
409                Ok(output)
410            }
411            VideoCodec::VP8 | VideoCodec::VP9 => {
412                // TODO(#10186): Support VP8/VP9 for the `VideoStream` archetype
413                Err(SampleConversionError::UnsupportedCodec(self.codec))
414            }
415        }
416    }
417}
418
419/// Errors converting [`VideoDataDescription`] samples into the format expected by the decoder.
420#[derive(thiserror::Error, Debug)]
421pub enum SampleConversionError {
422    #[error("Missing encoding details for codec {0:?}")]
423    MissingEncodingDetails(VideoCodec),
424
425    #[error("Missing stsd box for codec {0:?}")]
426    MissingStsd(VideoCodec),
427
428    #[error("Unexpected stsd contents for codec {codec:?}: {found}")]
429    UnexpectedStsdContent { codec: VideoCodec, found: String },
430
431    #[error("Failed converting sample to Annex-B: {0}")]
432    AnnexB(#[from] AnnexBStreamWriteError),
433
434    #[error("Unsupported codec {0:?}")]
435    UnsupportedCodec(VideoCodec),
436}
437
438/// Various information about how the video was encoded.
439///
440/// For video streams this is derived on the fly.
441/// For mp4 this is read from the AVCC box.
442#[derive(Clone, Debug, PartialEq, Eq)]
443pub struct VideoEncodingDetails {
444    /// Detailed codec string as specified by the `WebCodecs` codec registry.
445    ///
446    /// See <https://www.w3.org/TR/webcodecs-codec-registry/#video-codec-registry>
447    pub codec_string: String,
448
449    /// Encoded width & height.
450    pub coded_dimensions: [u16; 2],
451
452    /// Per color component bit depth.
453    ///
454    /// Usually 8, but 10 for HDR (for example).
455    ///
456    /// `None` if this couldn't be determined, either because of lack of implementation
457    /// or missing information at this point.
458    pub bit_depth: Option<u8>,
459
460    /// Chroma subsampling mode.
461    ///
462    /// `None` if this couldn't be determined, either because of lack of implementation
463    /// or missing information at this point.
464    pub chroma_subsampling: Option<ChromaSubsamplingModes>,
465
466    /// Optional mp4 stsd box from which this data was derived.
467    ///
468    /// Used by some decoders directly for configuration.
469    /// For H.264 & H.265, its presence implies that the bitstream is in the AVCC format rather than Annex B.
470    // TODO(andreas):
471    // It would be nice to instead have an enum of all the actually needed descriptors.
472    // We know for sure that H.264 & H.265 need an AVCC/HVCC box for data from mp4, since the stream
473    // is otherwise not readable. But what about the other codecs? On Web we *do* pass additional information right now.
474    pub stsd: Option<re_mp4::StsdBox>,
475}
476
477/// Meta information about the video samples.
478#[derive(Clone, Debug, PartialEq, Eq)]
479pub struct SamplesStatistics {
480    /// Whether all decode timestamps are equal to presentation timestamps.
481    ///
482    /// If true, the video typically has no B-frames as those require frame reordering.
483    pub dts_always_equal_pts: bool,
484
485    /// If `dts_always_equal_pts` is false, then this gives for each sample whether its PTS is the highest seen so far.
486    /// If `dts_always_equal_pts` is true, then this is left as `None`.
487    /// This is used for optimizing PTS search.
488    ///
489    /// TODO(andreas): We don't have a mechanism for shrinking this bitvec when dropping samples, i.e. it will keep growing.
490    /// ([`StableIndexDeque`] makes sure that indices in the bitvec will still match up with the samples even when samples are dropped from the front.)
491    pub has_sample_highest_pts_so_far: Option<BitVec>,
492}
493
494impl re_byte_size::SizeBytes for SamplesStatistics {
495    fn heap_size_bytes(&self) -> u64 {
496        let Self {
497            dts_always_equal_pts: _,
498            has_sample_highest_pts_so_far,
499        } = self;
500        has_sample_highest_pts_so_far
501            .as_ref()
502            .map_or(0, |bitvec| bitvec.capacity() as u64 / 8)
503    }
504}
505
506impl SamplesStatistics {
507    /// Special case for videos that have no h264/h265 B-frames.
508    ///
509    /// This is the most common case for video streams.
510    // TODO(andreas): so, av1 bframes are possible with this config, right?! confirm and then maybe come up with a better name.
511    pub const NO_BFRAMES: Self = Self {
512        dts_always_equal_pts: true,
513        has_sample_highest_pts_so_far: None,
514    };
515
516    pub fn new(samples: &StableIndexDeque<SampleMetadataState>) -> Self {
517        re_tracing::profile_function!();
518
519        let dts_always_equal_pts = samples
520            .iter()
521            .filter_map(|s| s.sample())
522            .all(|s| s.decode_timestamp == s.presentation_timestamp);
523
524        let mut biggest_pts_so_far = Time::MIN;
525        let has_sample_highest_pts_so_far = (!dts_always_equal_pts).then(|| {
526            samples
527                .iter()
528                .map(move |sample| {
529                    sample.sample().is_some_and(|sample| {
530                        if sample.presentation_timestamp > biggest_pts_so_far {
531                            biggest_pts_so_far = sample.presentation_timestamp;
532                            true
533                        } else {
534                            false
535                        }
536                    })
537                })
538                .collect()
539        });
540
541        Self {
542            dts_always_equal_pts,
543            has_sample_highest_pts_so_far,
544        }
545    }
546}
547
548impl VideoDataDescription {
549    /// Loads a video from the given data.
550    ///
551    /// Does not copy any sample data, but instead stores offsets into the buffer.
552    pub fn load_from_bytes(
553        data: &[u8],
554        media_type: &str,
555        debug_name: &str,
556        source_id: Tuid,
557    ) -> Result<Self, VideoLoadError> {
558        if data.is_empty() {
559            return Err(VideoLoadError::ZeroBytes);
560        }
561
562        re_tracing::profile_function!();
563        match media_type {
564            "video/mp4" => Self::load_mp4(data, debug_name, source_id),
565
566            media_type => {
567                if media_type.starts_with("video/") {
568                    Err(VideoLoadError::UnsupportedMimeType {
569                        provided_or_detected_media_type: media_type.to_owned(),
570                    })
571                } else {
572                    Err(VideoLoadError::MimeTypeIsNotAVideo {
573                        provided_or_detected_media_type: media_type.to_owned(),
574                    })
575                }
576            }
577        }
578    }
579
580    /// The codec used to encode the video.
581    #[inline]
582    pub fn human_readable_codec_string(&self) -> String {
583        let base_codec_string = match &self.codec {
584            VideoCodec::AV1 => "AV1",
585            VideoCodec::H264 => "H.264 AVC1",
586            VideoCodec::H265 => "H.265 HEV1",
587            VideoCodec::VP8 => "VP8",
588            VideoCodec::VP9 => "VP9",
589        }
590        .to_owned();
591
592        if let Some(encoding_details) = self.encoding_details.as_ref() {
593            format!("{base_codec_string} ({})", encoding_details.codec_string)
594        } else {
595            base_codec_string
596        }
597    }
598
599    /// The number of samples in the video.
600    ///
601    /// Video containers and codecs like talking about samples or chunks rather than frames, but for how we define a chunk today,
602    /// a frame is always a single chunk of data is always a single sample, see [`crate::decode::Chunk`].
603    /// So for all practical purposes the sample count _is_ the number of frames, at least how we use it today.
604    #[inline]
605    pub fn num_samples(&self) -> usize {
606        self.samples.num_elements()
607    }
608
609    /// Duration of all present samples.
610    ///
611    /// Returns `None` iff the video has no timescale.
612    /// Other special cases like zero samples or single sample with unknown duration will return a zero duration.
613    ///
614    /// Since this is only about present samples and not historical, future or missing data,
615    /// the duration may shrink as samples are dropped and grow as new samples are added.
616    // TODO(andreas): This makes it somewhat unsuitable for various usecases in the viewer. We should probably accumulate the max duration somewhere.
617    pub fn duration(&self) -> Option<std::time::Duration> {
618        let timescale = self.timescale?;
619
620        Some(match &self.delivery_method {
621            VideoDeliveryMethod::Static { duration } => duration.duration(timescale),
622
623            VideoDeliveryMethod::Stream { .. } => match self.samples.num_elements() {
624                0 => std::time::Duration::ZERO,
625                1 => {
626                    let first = self.samples.iter().find_map(|s| s.sample())?;
627                    first
628                        .duration
629                        .map(|d| d.duration(timescale))
630                        .unwrap_or(std::time::Duration::ZERO)
631                }
632                _ => {
633                    // TODO(#10090): This is only correct because there's no b-frames on streams right now.
634                    // If there are b-frames determining the last timestamp is a bit more complicated.
635                    let first = self.samples.iter().find_map(|s| s.sample())?;
636                    let last = self.samples.iter().rev().find_map(|s| s.sample())?;
637
638                    let last_sample_duration = last.duration.map_or_else(
639                        || {
640                            // Use average duration of all samples so far.
641                            (last.presentation_timestamp - first.presentation_timestamp)
642                                .duration(timescale)
643                                / (last.frame_nr - first.frame_nr)
644                        },
645                        |d| d.duration(timescale),
646                    );
647
648                    (last.presentation_timestamp - first.presentation_timestamp).duration(timescale)
649                        + last_sample_duration
650                }
651            },
652        })
653    }
654
655    /// `num_frames / duration`.
656    ///
657    /// Note that the video could have a variable framerate!
658    #[inline]
659    pub fn average_fps(&self) -> Option<f32> {
660        self.duration().map(|duration| {
661            let num_frames = self.num_samples();
662
663            // NOTE: the video duration includes the duration of the final frame too,
664            // so we don't have a fence-post problem here!
665            num_frames as f32 / duration.as_secs_f32()
666        })
667    }
668
669    /// Determines the video timestamps of all present frames inside a video, returning raw time values.
670    /// Reserved sample has no timestamp information and are thus ignored.
671    ///
672    /// Returns None if the video has no timescale.
673    /// Returned timestamps are in nanoseconds since start and are guaranteed to be monotonically increasing.
674    pub fn frame_timestamps_nanos(&self) -> Option<impl Iterator<Item = i64> + '_> {
675        let timescale = self.timescale?;
676
677        Some(
678            self.samples
679                .iter()
680                .filter_map(|sample| Some(sample.sample()?.presentation_timestamp))
681                .sorted()
682                .map(move |pts| pts.into_nanos(timescale)),
683        )
684    }
685
686    /// For a given decode (!) timestamp, returns the index of the first sample whose
687    /// decode timestamp is lesser than or equal to the given timestamp.
688    fn latest_sample_index_at_decode_timestamp(
689        keyframes: &[KeyframeIndex],
690        samples: &StableIndexDeque<SampleMetadataState>,
691        decode_time: Time,
692    ) -> Option<SampleIndex> {
693        // First find what keyframe this decode timestamp is in, as an optimization since
694        // we can't efficiently binary search the sample list with possible gaps.
695        //
696        // Keyframes will always be [`SampleMetadataState::Present`] and
697        // have a decode timestamp we can compare against.
698        let keyframe_idx = keyframes
699            .partition_point(|p| {
700                samples
701                    .get(*p)
702                    .map(|s| s.sample())
703                    .inspect(|_s| {
704                        debug_assert!(_s.is_some(), "Keyframes mentioned in the keyframe lookup list should always be loaded");
705                    })
706                    .flatten()
707                    .is_some_and(|s| s.decode_timestamp <= decode_time)
708            })
709            .checked_sub(1)?;
710
711        let start = *keyframes.get(keyframe_idx)?;
712        let end = keyframes
713            .get(keyframe_idx + 1)
714            .copied()
715            .unwrap_or_else(|| samples.next_index());
716
717        // Within that keyframe's range, find the most suitable frame for the given decode time.
718        let range = start..end;
719
720        let mut found_sample_idx = None;
721        for (idx, sample) in samples.iter_index_range_clamped(&range) {
722            let Some(s) = sample.sample() else {
723                continue;
724            };
725
726            if s.decode_timestamp <= decode_time {
727                found_sample_idx = Some(idx);
728            } else {
729                break;
730            }
731        }
732
733        found_sample_idx
734    }
735
736    /// See [`Self::latest_sample_index_at_presentation_timestamp`], split out for testing purposes.
737    ///
738    /// The returned sample index is guaranteed to be [`SampleMetadataState::Present`].
739    fn latest_sample_index_at_presentation_timestamp_internal(
740        keyframes: &[KeyframeIndex],
741        samples: &StableIndexDeque<SampleMetadataState>,
742        sample_statistics: &SamplesStatistics,
743        presentation_timestamp: Time,
744    ) -> Option<SampleIndex> {
745        // Find the latest sample where `decode_timestamp <= presentation_timestamp`.
746        // Because `decode <= presentation`, we never have to look further backwards in the
747        // video than this.
748        let decode_sample_idx = Self::latest_sample_index_at_decode_timestamp(
749            keyframes,
750            samples,
751            presentation_timestamp,
752        );
753
754        let decode_sample_idx = decode_sample_idx?;
755
756        // It's very common that dts==pts in which case we're done!
757        let Some(has_sample_highest_pts_so_far) =
758            sample_statistics.has_sample_highest_pts_so_far.as_ref()
759        else {
760            debug_assert!(sample_statistics.dts_always_equal_pts);
761            return Some(decode_sample_idx);
762        };
763        debug_assert!(has_sample_highest_pts_so_far.len() == samples.next_index());
764
765        // Search backwards, starting at `decode_sample_idx`, looking for
766        // the first sample where `sample.presentation_timestamp <= presentation_timestamp`.
767        // I.e. the sample with the biggest PTS that is smaller or equal to the requested PTS.
768        //
769        // The tricky part is that we can't just take the first sample with a presentation timestamp that matches
770        // since smaller presentation timestamps may still show up further back!
771        let mut best_index = SampleIndex::MAX;
772        let mut best_pts = Time::MIN;
773        for sample_idx in (samples.min_index()..=decode_sample_idx).rev() {
774            let Some(sample) = samples[sample_idx].sample() else {
775                continue;
776            };
777
778            if sample.presentation_timestamp == presentation_timestamp {
779                // Clean hit. Take this one, no questions asked :)
780                // (assuming that each PTS is unique!)
781                return Some(sample_idx);
782            }
783
784            if sample.presentation_timestamp < presentation_timestamp
785                && sample.presentation_timestamp > best_pts
786            {
787                best_pts = sample.presentation_timestamp;
788                best_index = sample_idx;
789            }
790
791            if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] {
792                // We won't see any bigger PTS values anymore, meaning we're as close as we can get to the requested PTS!
793                return Some(best_index);
794            }
795        }
796
797        None
798    }
799
800    /// For a given presentation timestamp, return the index of the first sample
801    /// whose presentation timestamp is lesser than or equal to the given timestamp.
802    ///
803    /// Remember that samples after (i.e. with higher index) may have a *lower* presentation time
804    /// if the stream has sample reordering!
805    pub fn latest_sample_index_at_presentation_timestamp(
806        &self,
807        presentation_timestamp: Time,
808    ) -> Option<SampleIndex> {
809        Self::latest_sample_index_at_presentation_timestamp_internal(
810            &self.keyframe_indices,
811            &self.samples,
812            &self.samples_statistics,
813            presentation_timestamp,
814        )
815    }
816
817    /// Returns the sample presenteed directly prior to the given sample.
818    ///
819    /// Remember that samples are ordered in decode timestamp order,
820    /// and that sample presented immediately prior to the given sample may have a higher decode timestamp.
821    /// Therefore, this may be a jump on sample index.
822    pub fn previous_presented_sample(&self, sample: &SampleMetadata) -> Option<&SampleMetadata> {
823        let idx = Self::latest_sample_index_at_presentation_timestamp_internal(
824            &self.keyframe_indices,
825            &self.samples,
826            &self.samples_statistics,
827            sample.presentation_timestamp - Time::new(1),
828        )?;
829        match self.samples.get(idx) {
830            Some(SampleMetadataState::Present(sample)) => Some(sample),
831            None | Some(_) => unreachable!(),
832        }
833    }
834
835    /// Returns the index of the keyframe for a specific sample.
836    pub fn sample_keyframe_idx(&self, sample_idx: SampleIndex) -> Option<KeyframeIndex> {
837        self.keyframe_indices
838            .partition_point(|idx| *idx <= sample_idx)
839            .checked_sub(1)
840    }
841
842    fn find_keyframe_index(
843        &self,
844        cmp_time: impl Fn(&SampleMetadata) -> bool,
845    ) -> Option<KeyframeIndex> {
846        self.keyframe_indices
847            .partition_point(|sample_idx| {
848                if let Some(sample) = self.samples[*sample_idx].sample() {
849                    cmp_time(sample)
850                } else {
851                    debug_panic!("keyframe indices should always be valid");
852
853                    false
854                }
855            })
856            .checked_sub(1)
857    }
858
859    /// For a given decode (!) timestamp, return the index of the keyframe index containing the given timestamp.
860    pub fn decode_time_keyframe_index(&self, decode_time: Time) -> Option<KeyframeIndex> {
861        self.find_keyframe_index(|t| t.decode_timestamp <= decode_time)
862    }
863
864    /// For a given presentation timestamp, return the index of the keyframe index containing the given timestamp.
865    pub fn presentation_time_keyframe_index(&self, pts: Time) -> Option<KeyframeIndex> {
866        self.find_keyframe_index(|t| t.presentation_timestamp <= pts)
867    }
868}
869
870/// The state of the current sample.
871///
872/// When the source is loaded, all of its samples will be either `Present` or `Skip`.
873#[derive(Debug, Clone)]
874pub enum SampleMetadataState {
875    /// Sample is present and contains video data.
876    Present(SampleMetadata),
877
878    /// The source for this sample hasn't arrived yet.
879    ///
880    /// `min_dts` is the minimum decode time of this unloaded sample.
881    Unloaded { source_id: Tuid, min_dts: Time },
882}
883
884impl SampleMetadataState {
885    pub fn sample(&self) -> Option<&SampleMetadata> {
886        match self {
887            Self::Present(sample_metadata) => Some(sample_metadata),
888            Self::Unloaded { .. } => None,
889        }
890    }
891
892    pub fn sample_mut(&mut self) -> Option<&mut SampleMetadata> {
893        match self {
894            Self::Present(sample_metadata) => Some(sample_metadata),
895            Self::Unloaded { .. } => None,
896        }
897    }
898
899    pub fn source_id(&self) -> Tuid {
900        match self {
901            Self::Present(sample) => sample.source_id,
902            Self::Unloaded { source_id, .. } => *source_id,
903        }
904    }
905
906    pub fn source_id_mut(&mut self) -> &mut Tuid {
907        match self {
908            Self::Present(sample) => &mut sample.source_id,
909            Self::Unloaded { source_id, .. } => source_id,
910        }
911    }
912
913    /// Get the samples decode timestamp.
914    ///
915    /// In the case of this being an unloaded sample, the timestamp
916    /// is a conservative guess. If the sample has never been loaded
917    /// all samples from a data source are located at the start of
918    /// said data source. If it has been loaded we retain the dts from
919    /// the actual loaded sample and assume next time this sample is
920    /// loaded it will have the same dts.
921    pub fn decode_timestamp(&self) -> Time {
922        match self {
923            Self::Present(sample) => sample.decode_timestamp,
924            Self::Unloaded { min_dts, .. } => *min_dts,
925        }
926    }
927
928    pub fn unload(&mut self, new_source_id: Option<Tuid>) {
929        match self {
930            Self::Present(sample) => {
931                let dts = sample.decode_timestamp;
932                let source_id = new_source_id.unwrap_or(sample.source_id);
933
934                *self = Self::Unloaded {
935                    source_id,
936                    min_dts: dts,
937                }
938            }
939            Self::Unloaded {
940                source_id,
941                min_dts: _,
942            } => {
943                if let Some(new_source_id) = new_source_id {
944                    *source_id = new_source_id;
945                }
946            }
947        }
948    }
949
950    pub fn is_loaded(&self) -> bool {
951        match self {
952            Self::Present(_) => true,
953            Self::Unloaded { .. } => false,
954        }
955    }
956
957    pub fn is_unloaded(&self) -> bool {
958        !self.is_loaded()
959    }
960}
961
962impl re_byte_size::SizeBytes for SampleMetadataState {
963    fn heap_size_bytes(&self) -> u64 {
964        match self {
965            Self::Present(sample_metadata) => sample_metadata.heap_size_bytes(),
966            Self::Unloaded {
967                source_id: _,
968                min_dts: _,
969            } => 0,
970        }
971    }
972}
973
974/// A single sample in a video.
975///
976/// This is equivalent to MP4's definition of a single sample.
977/// Note that in MP4, each sample is forms a single access unit,
978/// see 3.1.1 [ISO_IEC_14496-14](https://ossrs.io/lts/zh-cn/assets/files/ISO_IEC_14496-14-MP4-2003-9a3eb04879ded495406399602ff2e587.pdf):
979/// > 3.1.1 Elementary Stream Data
980/// > To maintain the goals of streaming protocol independence, the media data is stored in its most ‘natural’ format,
981/// > and not fragmented. This enables easy local manipulation of the media data. Therefore media-data is stored
982/// > as access units, a range of contiguous bytes for each access unit (a single access unit is the definition of a
983/// > ‘sample’ for an MPEG-4 media stream).
984///
985/// Access units in H.264/H.265 are always yielding a single frame upon decoding,
986/// see <https://en.wikipedia.org/wiki/Network_Abstraction_Layer#Access_Units/>:
987/// > A set of NAL units in a specified form is referred to as an access unit.
988/// > The decoding of each access unit results in one decoded picture.
989#[derive(Debug, Clone)]
990pub struct SampleMetadata {
991    /// Is this the start of a new (closed) group of pictures?
992    ///
993    /// What this means in detail is dependent on the codec but they are generally
994    /// at least I(DR)-frames and often have additional metadata such that
995    /// a decoder can restart at this frame.
996    pub is_sync: bool,
997
998    /// Which frame does this sample belong to?
999    ///
1000    /// This is on the assumption that each sample produces a single frame,
1001    /// which is true for MP4.
1002    ///
1003    /// This is the index of samples ordered by [`Self::presentation_timestamp`].
1004    ///
1005    /// Do **not** ever use this for indexing into the array of samples.
1006    pub frame_nr: u32,
1007
1008    /// Time at which this sample appears in the decoded bitstream, in time units.
1009    ///
1010    /// Samples should be decoded in this order.
1011    ///
1012    /// `decode_timestamp <= presentation_timestamp`
1013    pub decode_timestamp: Time,
1014
1015    /// Time at which this sample appears in the frame stream, in time units.
1016    ///
1017    /// The frame should be shown at this time.
1018    ///
1019    /// `decode_timestamp <= presentation_timestamp`
1020    pub presentation_timestamp: Time,
1021
1022    /// Duration of the sample.
1023    ///
1024    /// Typically the time difference in presentation timestamp to the next sample.
1025    /// May be unknown if this is the last sample in an ongoing video stream.
1026    pub duration: Option<Time>,
1027
1028    /// The chunk this sample comes from.
1029    pub source_id: Tuid,
1030
1031    /// Offset and length within a data buffer indicated by [`SampleMetadata::source_id`].
1032    pub byte_span: Span<u32>,
1033}
1034
1035impl re_byte_size::SizeBytes for SampleMetadata {
1036    fn heap_size_bytes(&self) -> u64 {
1037        0
1038    }
1039
1040    fn is_pod() -> bool {
1041        true
1042    }
1043}
1044
1045impl SampleMetadata {
1046    /// Read the sample from the video data.
1047    ///
1048    /// For video assets, `data` _must_ be a reference to the original asset
1049    /// from which the [`VideoDataDescription`] was loaded.
1050    /// For video streams, `data` refers to the currently available data
1051    /// which is described by the [`VideoDataDescription`].
1052    ///
1053    /// Returns `None` if the sample is out of bounds, which can only happen
1054    /// if `data` is not the original video data.
1055    pub fn get<'a>(
1056        &self,
1057        get_buffer: &dyn Fn(Tuid) -> &'a [u8],
1058        sample_idx: SampleIndex,
1059    ) -> Option<Chunk> {
1060        let buffer = get_buffer(self.source_id);
1061        let data = buffer.get(self.byte_span.range_usize())?.to_vec();
1062
1063        Some(Chunk {
1064            data,
1065            sample_idx,
1066            frame_nr: self.frame_nr,
1067            decode_timestamp: self.decode_timestamp,
1068            presentation_timestamp: self.presentation_timestamp,
1069            duration: self.duration,
1070            is_sync: self.is_sync,
1071        })
1072    }
1073}
1074
1075/// Errors that can occur when loading a video.
1076#[derive(thiserror::Error, Debug)]
1077pub enum VideoLoadError {
1078    #[error("The video file is empty (zero bytes)")]
1079    ZeroBytes,
1080
1081    #[error("MP4 error: {0}")]
1082    ParseMp4(#[from] re_mp4::Error),
1083
1084    #[error("Video file has no video tracks")]
1085    NoVideoTrack,
1086
1087    #[error("Video file track config is invalid")]
1088    InvalidConfigFormat,
1089
1090    #[error("Video file has invalid sample entries")]
1091    InvalidSamples,
1092
1093    #[error(
1094        "Video file has no timescale, which is required to determine frame timestamps in time units"
1095    )]
1096    NoTimescale,
1097
1098    #[error("The media type of the blob is not a video: {provided_or_detected_media_type}")]
1099    MimeTypeIsNotAVideo {
1100        provided_or_detected_media_type: String,
1101    },
1102
1103    #[error("MIME type '{provided_or_detected_media_type}' is not supported for videos")]
1104    UnsupportedMimeType {
1105        provided_or_detected_media_type: String,
1106    },
1107
1108    /// Not used in `re_video` itself, but useful for media type detection ahead of calling [`VideoDataDescription::load_from_bytes`].
1109    #[error("Could not detect MIME type from the video contents")]
1110    UnrecognizedMimeType,
1111
1112    // `FourCC`'s debug impl doesn't quote the result
1113    #[error("Video track uses unsupported codec \"{0}\"")] // NOLINT
1114    UnsupportedCodec(re_mp4::FourCC),
1115
1116    #[error("Unable to determine codec string from the video contents")]
1117    UnableToDetermineCodecString,
1118
1119    #[error("Failed to parse H.264 SPS from mp4: {0:?}")]
1120    SpsParsingError(h264_reader::nal::sps::SpsError),
1121}
1122
1123impl re_byte_size::SizeBytes for VideoLoadError {
1124    fn heap_size_bytes(&self) -> u64 {
1125        0 // close enough
1126    }
1127}
1128
1129impl std::fmt::Debug for VideoDataDescription {
1130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1131        f.debug_struct("Video")
1132            .field("codec", &self.codec)
1133            .field("encoding_details", &self.encoding_details)
1134            .field("timescale", &self.timescale)
1135            .field("keyframe_indices", &self.keyframe_indices)
1136            .field("samples", &self.samples.iter_indexed().collect::<Vec<_>>())
1137            .finish()
1138    }
1139}
1140
1141#[cfg(test)]
1142mod tests {
1143    use super::*;
1144    use crate::nalu::ANNEXB_NAL_START_CODE;
1145
1146    #[test]
1147    fn test_latest_sample_index_at_presentation_timestamp() {
1148        // This is a snippet of real world data!
1149        let pts = [
1150            0, 1024, 512, 256, 768, 2048, 1536, 1280, 1792, 3072, 2560, 2304, 2816, 4096, 3584,
1151            3328, 3840, 4864, 4352, 4608, 5888, 5376, 5120, 5632, 6912, 6400, 6144, 6656, 7936,
1152            7424, 7168, 7680, 8960, 8448, 8192, 8704, 9984, 9472, 9216, 9728, 11008, 10496, 10240,
1153            10752, 12032, 11520, 11264, 11776, 13056, 12544,
1154        ];
1155        let dts = [
1156            -512, -256, 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072,
1157            3328, 3584, 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656,
1158            6912, 7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240,
1159            10496, 10752, 11008, 11264, 11520, 11776, 12032,
1160        ];
1161
1162        // Checking our basic assumptions about this data:
1163        assert_eq!(pts.len(), dts.len());
1164        assert!(pts.iter().zip(dts.iter()).all(|(pts, dts)| dts <= pts));
1165
1166        // Create fake samples from this.
1167        let samples = pts
1168            .into_iter()
1169            .zip(dts)
1170            .map(|(pts, dts)| {
1171                SampleMetadataState::Present(SampleMetadata {
1172                    is_sync: true,
1173                    frame_nr: 0, // unused
1174                    decode_timestamp: Time(dts),
1175                    presentation_timestamp: Time(pts),
1176                    duration: Some(Time(1)),
1177                    source_id: Tuid::new(),
1178                    byte_span: Default::default(),
1179                })
1180            })
1181            .collect::<StableIndexDeque<_>>();
1182        let keyframe_indices: Vec<SampleIndex> =
1183            (samples.min_index()..samples.next_index()).collect();
1184
1185        let sample_statistics = SamplesStatistics::new(&samples);
1186        assert!(!sample_statistics.dts_always_equal_pts);
1187
1188        // Test queries on the samples.
1189        let query_pts = |pts| {
1190            VideoDataDescription::latest_sample_index_at_presentation_timestamp_internal(
1191                &keyframe_indices,
1192                &samples,
1193                &sample_statistics,
1194                pts,
1195            )
1196        };
1197
1198        // Check that query for all exact positions works as expected using brute force search as the reference.
1199        for (idx, sample) in samples.iter_indexed() {
1200            assert_eq!(
1201                Some(idx),
1202                query_pts(sample.sample().unwrap().presentation_timestamp)
1203            );
1204        }
1205
1206        // Check that for slightly offsetted positions the query is still correct.
1207        // This works because for this dataset we know the minimum presentation timesetampe distance is always 256.
1208        for (idx, sample) in samples.iter_indexed() {
1209            assert_eq!(
1210                Some(idx),
1211                query_pts(sample.sample().unwrap().presentation_timestamp + Time(1))
1212            );
1213            assert_eq!(
1214                Some(idx),
1215                query_pts(sample.sample().unwrap().presentation_timestamp + Time(255))
1216            );
1217        }
1218
1219        // A few hardcoded cases - both for illustrative purposes and to make sure the generic tests above are correct.
1220
1221        // Querying before the first sample.
1222        assert_eq!(None, query_pts(Time(-1)));
1223        assert_eq!(None, query_pts(Time(-123)));
1224
1225        // Querying for the first sample
1226        assert_eq!(Some(0), query_pts(Time(0)));
1227        assert_eq!(Some(0), query_pts(Time(1)));
1228        assert_eq!(Some(0), query_pts(Time(88)));
1229        assert_eq!(Some(0), query_pts(Time(255)));
1230
1231        // The next sample is a jump in index!
1232        assert_eq!(Some(3), query_pts(Time(256)));
1233        assert_eq!(Some(3), query_pts(Time(257)));
1234        assert_eq!(Some(3), query_pts(Time(400)));
1235        assert_eq!(Some(3), query_pts(Time(511)));
1236
1237        // And the one after that should jump back again.
1238        assert_eq!(Some(2), query_pts(Time(512)));
1239        assert_eq!(Some(2), query_pts(Time(513)));
1240        assert_eq!(Some(2), query_pts(Time(600)));
1241        assert_eq!(Some(2), query_pts(Time(767)));
1242
1243        // And another one!
1244        assert_eq!(Some(4), query_pts(Time(768)));
1245        assert_eq!(Some(4), query_pts(Time(1023)));
1246
1247        // Test way outside of the range.
1248        // (this is not the last element in the list since that one doesn't have the highest PTS)
1249        assert_eq!(Some(48), query_pts(Time(123123123123123123)));
1250    }
1251
1252    /// Helper function to check if data contains Annex B start codes
1253    fn has_annexb_start_codes(data: &[u8]) -> bool {
1254        data.windows(4).any(|w| w == ANNEXB_NAL_START_CODE)
1255    }
1256
1257    fn video_test_file_mp4(codec: VideoCodec, need_dts_equal_pts: bool) -> std::path::PathBuf {
1258        let workspace_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1259            .parent()
1260            .and_then(|p| p.parent())
1261            .and_then(|p| p.parent())
1262            .unwrap()
1263            .to_path_buf();
1264
1265        let codec_str = match codec {
1266            VideoCodec::H264 => "h264",
1267            VideoCodec::H265 => "h265",
1268            VideoCodec::VP9 => "vp9",
1269            VideoCodec::VP8 => {
1270                panic!("We don't have test data for vp8, because Mp4 doesn't support vp8.")
1271            }
1272            VideoCodec::AV1 => "av1",
1273        };
1274
1275        if need_dts_equal_pts && (codec == VideoCodec::H264 || codec == VideoCodec::H265) {
1276            // Only H264 and H265 have DTS != PTS when b-frames are present.
1277            workspace_dir.join(format!(
1278                "tests/assets/video/Big_Buck_Bunny_1080_1s_{codec_str}_nobframes.mp4",
1279            ))
1280        } else {
1281            workspace_dir.join(format!(
1282                "tests/assets/video/Big_Buck_Bunny_1080_1s_{codec_str}.mp4",
1283            ))
1284        }
1285    }
1286
1287    /// Helper function to test video sampling for a specific codec
1288    fn test_video_codec_sampling(codec: VideoCodec, need_dts_equal_pts: bool) {
1289        let video_path = video_test_file_mp4(codec, need_dts_equal_pts);
1290        let data = std::fs::read(&video_path).unwrap();
1291        let video_data = VideoDataDescription::load_from_bytes(
1292            &data,
1293            "video/mp4",
1294            &format!("test_{codec:?}_video_sampling"),
1295            Tuid::new(),
1296        )
1297        .unwrap();
1298
1299        let mut idr_count = 0;
1300        let mut non_idr_count = 0;
1301
1302        for (sample_idx, sample) in video_data.samples.iter_indexed() {
1303            let chunk = sample
1304                .sample()
1305                .unwrap()
1306                .get(&|_| &data, sample_idx)
1307                .unwrap();
1308            let converted = video_data.sample_data_in_stream_format(&chunk).unwrap();
1309
1310            if chunk.is_sync {
1311                idr_count += 1;
1312
1313                // IDR frame should have SPS/PPS (only for H.264)
1314                if codec == VideoCodec::H264 {
1315                    let has_sps = converted
1316                        .windows(5)
1317                        .any(|w| w[0..4] == *ANNEXB_NAL_START_CODE && (w[4] & 0x1F) == 7);
1318                    assert!(has_sps, "IDR frame at index {sample_idx} should have SPS");
1319                }
1320            } else {
1321                non_idr_count += 1;
1322            }
1323
1324            // All frames should have Annex B start codes (only for H.264/H.265)
1325            if codec == VideoCodec::H264 || codec == VideoCodec::H265 {
1326                assert!(
1327                    has_annexb_start_codes(&converted),
1328                    "Frame at index {sample_idx} should have Annex B start codes",
1329                );
1330            }
1331        }
1332
1333        assert!(idr_count > 0, "Should have at least one IDR frame");
1334        assert!(non_idr_count > 0, "Should have at least one non-IDR frame");
1335    }
1336
1337    #[test]
1338    fn test_full_video_sampling_all_codecs() {
1339        // TODO(#10186): Add VP9 once we have it.
1340        for codec in [VideoCodec::H264, VideoCodec::H265, VideoCodec::AV1] {
1341            test_video_codec_sampling(codec, false);
1342        }
1343    }
1344}