1pub mod mp4;
9
10use std::collections::BTreeMap;
11
12use bit_vec::BitVec;
13use itertools::Itertools as _;
14use re_log::{debug_assert, debug_panic};
15use re_span::Span;
16use re_tuid::Tuid;
17use web_time::Instant;
18
19use super::{Time, Timescale};
20use crate::nalu::AnnexBStreamWriteError;
21use crate::{
22 Chunk, StableIndexDeque, TrackId, TrackKind, write_avc_chunk_to_annexb,
23 write_hevc_chunk_to_annexb,
24};
25
26#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub enum ChromaSubsamplingModes {
33 Monochrome,
35
36 Yuv444,
41
42 Yuv422,
44
45 Yuv420,
47}
48
49impl std::fmt::Display for ChromaSubsamplingModes {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 match self {
52 Self::Monochrome => write!(f, "monochrome"),
54 Self::Yuv444 => write!(f, "4:4:4"),
55 Self::Yuv422 => write!(f, "4:2:2"),
56 Self::Yuv420 => write!(f, "4:2:0"),
57 }
58 }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq)]
63pub enum VideoCodec {
64 H264,
68
69 H265,
73
74 AV1,
78
79 VP8,
83
84 VP9,
88}
89
90impl VideoCodec {
91 pub fn base_webcodec_string(&self) -> &'static str {
95 match self {
96 Self::AV1 => "av01",
98
99 Self::H264 => "avc1",
102
103 Self::H265 => "hev1",
106
107 Self::VP8 => "vp8",
110
111 Self::VP9 => "vp09",
113 }
114 }
115}
116
117pub type SampleIndex = usize;
119
120pub type KeyframeIndex = usize;
122
123#[derive(Clone)]
125pub enum VideoDeliveryMethod {
126 Static { duration: Time },
128
129 Stream {
135 last_time_updated_samples: Instant,
141 },
142}
143
144impl VideoDeliveryMethod {
145 #[inline]
146 pub fn new_stream() -> Self {
147 Self::Stream {
148 last_time_updated_samples: Instant::now(),
149 }
150 }
151}
152
153#[derive(Clone)]
158pub struct VideoDataDescription {
159 pub codec: VideoCodec,
161
162 pub encoding_details: Option<VideoEncodingDetails>,
169
170 pub timescale: Option<Timescale>,
175
176 pub delivery_method: VideoDeliveryMethod,
178
179 pub keyframe_indices: Vec<SampleIndex>,
181
182 pub samples: StableIndexDeque<SampleMetadataState>,
194
195 pub samples_statistics: SamplesStatistics,
197
198 pub mp4_tracks: BTreeMap<TrackId, Option<TrackKind>>,
202}
203
204impl re_byte_size::SizeBytes for VideoDataDescription {
205 fn heap_size_bytes(&self) -> u64 {
206 let Self {
207 codec: _,
208 encoding_details: _,
209 timescale: _,
210 delivery_method: _,
211 keyframe_indices,
212 samples,
213 samples_statistics,
214 mp4_tracks,
215 } = self;
216
217 keyframe_indices.heap_size_bytes()
218 + samples.heap_size_bytes()
219 + samples_statistics.heap_size_bytes()
220 + mp4_tracks.len() as u64 * std::mem::size_of::<(TrackId, Option<TrackKind>)>() as u64
221 }
222}
223
224impl VideoDataDescription {
225 pub fn gop_sample_range_for_keyframe(
227 &self,
228 keyframe_idx: usize,
229 ) -> Option<std::ops::Range<SampleIndex>> {
230 Some(
231 *self.keyframe_indices.get(keyframe_idx)?
232 ..self
233 .keyframe_indices
234 .get(keyframe_idx + 1)
235 .copied()
236 .unwrap_or_else(|| self.samples.next_index()),
237 )
238 }
239
240 pub fn sanity_check(&self) -> Result<(), String> {
251 self.sanity_check_keyframes()?;
252 self.sanity_check_samples()?;
253
254 if let Some(stsd) = self.encoding_details.as_ref().and_then(|e| e.stsd.as_ref()) {
256 let stsd_codec = match &stsd.contents {
257 re_mp4::StsdBoxContent::Av01(_) => crate::VideoCodec::AV1,
258 re_mp4::StsdBoxContent::Avc1(_) => crate::VideoCodec::H264,
259 re_mp4::StsdBoxContent::Hvc1(_) | re_mp4::StsdBoxContent::Hev1(_) => {
260 crate::VideoCodec::H265
261 }
262 re_mp4::StsdBoxContent::Vp08(_) => crate::VideoCodec::VP8,
263 re_mp4::StsdBoxContent::Vp09(_) => crate::VideoCodec::VP9,
264 _ => {
265 return Err(format!(
266 "STSD box content type {:?} doesn't have a supported codec.",
267 stsd.contents
268 ));
269 }
270 };
271 if stsd_codec != self.codec {
272 return Err(format!(
273 "STSD box content type {:?} does not match with the internal codec {:?}.",
274 stsd.contents, self.codec
275 ));
276 }
277 }
278
279 Ok(())
280 }
281
282 fn sanity_check_keyframes(&self) -> Result<(), String> {
283 if !self.keyframe_indices.is_sorted() {
284 return Err("Keyframes aren't sorted".to_owned());
285 }
286
287 for &keyframe in &self.keyframe_indices {
288 if keyframe < self.samples.min_index() {
289 return Err(format!(
290 "Keyframe {keyframe} refers to sample to the left of the list of samples.",
291 ));
292 }
293
294 if keyframe >= self.samples.next_index() {
295 return Err(format!(
296 "Keyframe {keyframe} refers to sample to the right of the list of samples.",
297 ));
298 }
299
300 match &self.samples[keyframe] {
301 SampleMetadataState::Present(sample_metadata) => {
302 if !sample_metadata.is_sync {
304 return Err(format!("Keyframe {keyframe} is not marked with `is_sync`."));
305 }
306 }
307 SampleMetadataState::Unloaded { .. } => {
308 return Err(format!("Keyframe {keyframe} refers to an unloaded sample"));
309 }
310 }
311 }
312
313 let mut keyframes = self.keyframe_indices.iter().copied();
315 for (sample_idx, sample) in self
316 .samples
317 .iter_indexed()
318 .filter_map(|(idx, s)| Some((idx, s.sample()?)))
319 {
320 if sample.is_sync && keyframes.next().is_none_or(|idx| idx != sample_idx) {
321 return Err(format!("Not tracking the keyframe {sample_idx}."));
322 }
323 }
324 Ok(())
325 }
326
327 fn sanity_check_samples(&self) -> Result<(), String> {
328 for ((a_idx, a), (b_idx, b)) in self.samples.iter_indexed().tuple_windows() {
330 if let SampleMetadataState::Present(a) = a
331 && let SampleMetadataState::Present(b) = b
332 && a.decode_timestamp > b.decode_timestamp
333 {
334 return Err(format!(
335 "Decode timestamps for {a_idx}..{b_idx} are not monotonically increasing: {:?} {:?}",
336 a.decode_timestamp, b.decode_timestamp
337 ));
338 }
339 }
340
341 let expected_statistics = SamplesStatistics::new(&self.samples);
343 if expected_statistics != self.samples_statistics {
344 return Err(format!(
345 "Sample statistics are not consistent with the samples.\nExpected: {:?}\nActual: {:?}",
346 expected_statistics, self.samples_statistics
347 ));
348 }
349
350 Ok(())
351 }
352
353 pub fn sample_data_in_stream_format(
360 &self,
361 chunk: &crate::Chunk,
362 ) -> Result<Vec<u8>, SampleConversionError> {
363 match self.codec {
364 VideoCodec::AV1 => Ok(chunk.data.clone()),
365 VideoCodec::H264 => {
366 let stsd = self
367 .encoding_details
368 .as_ref()
369 .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))?
370 .stsd
371 .as_ref()
372 .ok_or(SampleConversionError::MissingStsd(self.codec))?;
373
374 let re_mp4::StsdBoxContent::Avc1(avc1_box) = &stsd.contents else {
375 return Err(SampleConversionError::UnexpectedStsdContent {
376 codec: self.codec,
377 found: format!("{:?}", stsd.contents),
378 });
379 };
380
381 let mut output = Vec::new();
382 write_avc_chunk_to_annexb(avc1_box, &mut output, chunk.is_sync, chunk)
383 .map_err(SampleConversionError::AnnexB)?;
384 Ok(output)
385 }
386 VideoCodec::H265 => {
387 let stsd = self
388 .encoding_details
389 .as_ref()
390 .ok_or(SampleConversionError::MissingEncodingDetails(self.codec))?
391 .stsd
392 .as_ref()
393 .ok_or(SampleConversionError::MissingStsd(self.codec))?;
394
395 let hvcc_box = match &stsd.contents {
396 re_mp4::StsdBoxContent::Hvc1(hvc1_box)
397 | re_mp4::StsdBoxContent::Hev1(hvc1_box) => hvc1_box,
398 other => {
399 return Err(SampleConversionError::UnexpectedStsdContent {
400 codec: self.codec,
401 found: format!("{other:?}"),
402 });
403 }
404 };
405
406 let mut output = Vec::new();
407 write_hevc_chunk_to_annexb(hvcc_box, &mut output, chunk.is_sync, chunk)
408 .map_err(SampleConversionError::AnnexB)?;
409 Ok(output)
410 }
411 VideoCodec::VP8 | VideoCodec::VP9 => {
412 Err(SampleConversionError::UnsupportedCodec(self.codec))
414 }
415 }
416 }
417}
418
419#[derive(thiserror::Error, Debug)]
421pub enum SampleConversionError {
422 #[error("Missing encoding details for codec {0:?}")]
423 MissingEncodingDetails(VideoCodec),
424
425 #[error("Missing stsd box for codec {0:?}")]
426 MissingStsd(VideoCodec),
427
428 #[error("Unexpected stsd contents for codec {codec:?}: {found}")]
429 UnexpectedStsdContent { codec: VideoCodec, found: String },
430
431 #[error("Failed converting sample to Annex-B: {0}")]
432 AnnexB(#[from] AnnexBStreamWriteError),
433
434 #[error("Unsupported codec {0:?}")]
435 UnsupportedCodec(VideoCodec),
436}
437
438#[derive(Clone, Debug, PartialEq, Eq)]
443pub struct VideoEncodingDetails {
444 pub codec_string: String,
448
449 pub coded_dimensions: [u16; 2],
451
452 pub bit_depth: Option<u8>,
459
460 pub chroma_subsampling: Option<ChromaSubsamplingModes>,
465
466 pub stsd: Option<re_mp4::StsdBox>,
475}
476
477#[derive(Clone, Debug, PartialEq, Eq)]
479pub struct SamplesStatistics {
480 pub dts_always_equal_pts: bool,
484
485 pub has_sample_highest_pts_so_far: Option<BitVec>,
492}
493
494impl re_byte_size::SizeBytes for SamplesStatistics {
495 fn heap_size_bytes(&self) -> u64 {
496 let Self {
497 dts_always_equal_pts: _,
498 has_sample_highest_pts_so_far,
499 } = self;
500 has_sample_highest_pts_so_far
501 .as_ref()
502 .map_or(0, |bitvec| bitvec.capacity() as u64 / 8)
503 }
504}
505
506impl SamplesStatistics {
507 pub const NO_BFRAMES: Self = Self {
512 dts_always_equal_pts: true,
513 has_sample_highest_pts_so_far: None,
514 };
515
516 pub fn new(samples: &StableIndexDeque<SampleMetadataState>) -> Self {
517 re_tracing::profile_function!();
518
519 let dts_always_equal_pts = samples
520 .iter()
521 .filter_map(|s| s.sample())
522 .all(|s| s.decode_timestamp == s.presentation_timestamp);
523
524 let mut biggest_pts_so_far = Time::MIN;
525 let has_sample_highest_pts_so_far = (!dts_always_equal_pts).then(|| {
526 samples
527 .iter()
528 .map(move |sample| {
529 sample.sample().is_some_and(|sample| {
530 if sample.presentation_timestamp > biggest_pts_so_far {
531 biggest_pts_so_far = sample.presentation_timestamp;
532 true
533 } else {
534 false
535 }
536 })
537 })
538 .collect()
539 });
540
541 Self {
542 dts_always_equal_pts,
543 has_sample_highest_pts_so_far,
544 }
545 }
546}
547
548impl VideoDataDescription {
549 pub fn load_from_bytes(
553 data: &[u8],
554 media_type: &str,
555 debug_name: &str,
556 source_id: Tuid,
557 ) -> Result<Self, VideoLoadError> {
558 if data.is_empty() {
559 return Err(VideoLoadError::ZeroBytes);
560 }
561
562 re_tracing::profile_function!();
563 match media_type {
564 "video/mp4" => Self::load_mp4(data, debug_name, source_id),
565
566 media_type => {
567 if media_type.starts_with("video/") {
568 Err(VideoLoadError::UnsupportedMimeType {
569 provided_or_detected_media_type: media_type.to_owned(),
570 })
571 } else {
572 Err(VideoLoadError::MimeTypeIsNotAVideo {
573 provided_or_detected_media_type: media_type.to_owned(),
574 })
575 }
576 }
577 }
578 }
579
580 #[inline]
582 pub fn human_readable_codec_string(&self) -> String {
583 let base_codec_string = match &self.codec {
584 VideoCodec::AV1 => "AV1",
585 VideoCodec::H264 => "H.264 AVC1",
586 VideoCodec::H265 => "H.265 HEV1",
587 VideoCodec::VP8 => "VP8",
588 VideoCodec::VP9 => "VP9",
589 }
590 .to_owned();
591
592 if let Some(encoding_details) = self.encoding_details.as_ref() {
593 format!("{base_codec_string} ({})", encoding_details.codec_string)
594 } else {
595 base_codec_string
596 }
597 }
598
599 #[inline]
605 pub fn num_samples(&self) -> usize {
606 self.samples.num_elements()
607 }
608
609 pub fn duration(&self) -> Option<std::time::Duration> {
618 let timescale = self.timescale?;
619
620 Some(match &self.delivery_method {
621 VideoDeliveryMethod::Static { duration } => duration.duration(timescale),
622
623 VideoDeliveryMethod::Stream { .. } => match self.samples.num_elements() {
624 0 => std::time::Duration::ZERO,
625 1 => {
626 let first = self.samples.iter().find_map(|s| s.sample())?;
627 first
628 .duration
629 .map(|d| d.duration(timescale))
630 .unwrap_or(std::time::Duration::ZERO)
631 }
632 _ => {
633 let first = self.samples.iter().find_map(|s| s.sample())?;
636 let last = self.samples.iter().rev().find_map(|s| s.sample())?;
637
638 let last_sample_duration = last.duration.map_or_else(
639 || {
640 (last.presentation_timestamp - first.presentation_timestamp)
642 .duration(timescale)
643 / (last.frame_nr - first.frame_nr)
644 },
645 |d| d.duration(timescale),
646 );
647
648 (last.presentation_timestamp - first.presentation_timestamp).duration(timescale)
649 + last_sample_duration
650 }
651 },
652 })
653 }
654
655 #[inline]
659 pub fn average_fps(&self) -> Option<f32> {
660 self.duration().map(|duration| {
661 let num_frames = self.num_samples();
662
663 num_frames as f32 / duration.as_secs_f32()
666 })
667 }
668
669 pub fn frame_timestamps_nanos(&self) -> Option<impl Iterator<Item = i64> + '_> {
675 let timescale = self.timescale?;
676
677 Some(
678 self.samples
679 .iter()
680 .filter_map(|sample| Some(sample.sample()?.presentation_timestamp))
681 .sorted()
682 .map(move |pts| pts.into_nanos(timescale)),
683 )
684 }
685
686 fn latest_sample_index_at_decode_timestamp(
689 keyframes: &[KeyframeIndex],
690 samples: &StableIndexDeque<SampleMetadataState>,
691 decode_time: Time,
692 ) -> Option<SampleIndex> {
693 let keyframe_idx = keyframes
699 .partition_point(|p| {
700 samples
701 .get(*p)
702 .map(|s| s.sample())
703 .inspect(|_s| {
704 debug_assert!(_s.is_some(), "Keyframes mentioned in the keyframe lookup list should always be loaded");
705 })
706 .flatten()
707 .is_some_and(|s| s.decode_timestamp <= decode_time)
708 })
709 .checked_sub(1)?;
710
711 let start = *keyframes.get(keyframe_idx)?;
712 let end = keyframes
713 .get(keyframe_idx + 1)
714 .copied()
715 .unwrap_or_else(|| samples.next_index());
716
717 let range = start..end;
719
720 let mut found_sample_idx = None;
721 for (idx, sample) in samples.iter_index_range_clamped(&range) {
722 let Some(s) = sample.sample() else {
723 continue;
724 };
725
726 if s.decode_timestamp <= decode_time {
727 found_sample_idx = Some(idx);
728 } else {
729 break;
730 }
731 }
732
733 found_sample_idx
734 }
735
736 fn latest_sample_index_at_presentation_timestamp_internal(
740 keyframes: &[KeyframeIndex],
741 samples: &StableIndexDeque<SampleMetadataState>,
742 sample_statistics: &SamplesStatistics,
743 presentation_timestamp: Time,
744 ) -> Option<SampleIndex> {
745 let decode_sample_idx = Self::latest_sample_index_at_decode_timestamp(
749 keyframes,
750 samples,
751 presentation_timestamp,
752 );
753
754 let decode_sample_idx = decode_sample_idx?;
755
756 let Some(has_sample_highest_pts_so_far) =
758 sample_statistics.has_sample_highest_pts_so_far.as_ref()
759 else {
760 debug_assert!(sample_statistics.dts_always_equal_pts);
761 return Some(decode_sample_idx);
762 };
763 debug_assert!(has_sample_highest_pts_so_far.len() == samples.next_index());
764
765 let mut best_index = SampleIndex::MAX;
772 let mut best_pts = Time::MIN;
773 for sample_idx in (samples.min_index()..=decode_sample_idx).rev() {
774 let Some(sample) = samples[sample_idx].sample() else {
775 continue;
776 };
777
778 if sample.presentation_timestamp == presentation_timestamp {
779 return Some(sample_idx);
782 }
783
784 if sample.presentation_timestamp < presentation_timestamp
785 && sample.presentation_timestamp > best_pts
786 {
787 best_pts = sample.presentation_timestamp;
788 best_index = sample_idx;
789 }
790
791 if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] {
792 return Some(best_index);
794 }
795 }
796
797 None
798 }
799
800 pub fn latest_sample_index_at_presentation_timestamp(
806 &self,
807 presentation_timestamp: Time,
808 ) -> Option<SampleIndex> {
809 Self::latest_sample_index_at_presentation_timestamp_internal(
810 &self.keyframe_indices,
811 &self.samples,
812 &self.samples_statistics,
813 presentation_timestamp,
814 )
815 }
816
817 pub fn previous_presented_sample(&self, sample: &SampleMetadata) -> Option<&SampleMetadata> {
823 let idx = Self::latest_sample_index_at_presentation_timestamp_internal(
824 &self.keyframe_indices,
825 &self.samples,
826 &self.samples_statistics,
827 sample.presentation_timestamp - Time::new(1),
828 )?;
829 match self.samples.get(idx) {
830 Some(SampleMetadataState::Present(sample)) => Some(sample),
831 None | Some(_) => unreachable!(),
832 }
833 }
834
835 pub fn sample_keyframe_idx(&self, sample_idx: SampleIndex) -> Option<KeyframeIndex> {
837 self.keyframe_indices
838 .partition_point(|idx| *idx <= sample_idx)
839 .checked_sub(1)
840 }
841
842 fn find_keyframe_index(
843 &self,
844 cmp_time: impl Fn(&SampleMetadata) -> bool,
845 ) -> Option<KeyframeIndex> {
846 self.keyframe_indices
847 .partition_point(|sample_idx| {
848 if let Some(sample) = self.samples[*sample_idx].sample() {
849 cmp_time(sample)
850 } else {
851 debug_panic!("keyframe indices should always be valid");
852
853 false
854 }
855 })
856 .checked_sub(1)
857 }
858
859 pub fn decode_time_keyframe_index(&self, decode_time: Time) -> Option<KeyframeIndex> {
861 self.find_keyframe_index(|t| t.decode_timestamp <= decode_time)
862 }
863
864 pub fn presentation_time_keyframe_index(&self, pts: Time) -> Option<KeyframeIndex> {
866 self.find_keyframe_index(|t| t.presentation_timestamp <= pts)
867 }
868}
869
870#[derive(Debug, Clone)]
874pub enum SampleMetadataState {
875 Present(SampleMetadata),
877
878 Unloaded { source_id: Tuid, min_dts: Time },
882}
883
884impl SampleMetadataState {
885 pub fn sample(&self) -> Option<&SampleMetadata> {
886 match self {
887 Self::Present(sample_metadata) => Some(sample_metadata),
888 Self::Unloaded { .. } => None,
889 }
890 }
891
892 pub fn sample_mut(&mut self) -> Option<&mut SampleMetadata> {
893 match self {
894 Self::Present(sample_metadata) => Some(sample_metadata),
895 Self::Unloaded { .. } => None,
896 }
897 }
898
899 pub fn source_id(&self) -> Tuid {
900 match self {
901 Self::Present(sample) => sample.source_id,
902 Self::Unloaded { source_id, .. } => *source_id,
903 }
904 }
905
906 pub fn source_id_mut(&mut self) -> &mut Tuid {
907 match self {
908 Self::Present(sample) => &mut sample.source_id,
909 Self::Unloaded { source_id, .. } => source_id,
910 }
911 }
912
913 pub fn decode_timestamp(&self) -> Time {
922 match self {
923 Self::Present(sample) => sample.decode_timestamp,
924 Self::Unloaded { min_dts, .. } => *min_dts,
925 }
926 }
927
928 pub fn unload(&mut self, new_source_id: Option<Tuid>) {
929 match self {
930 Self::Present(sample) => {
931 let dts = sample.decode_timestamp;
932 let source_id = new_source_id.unwrap_or(sample.source_id);
933
934 *self = Self::Unloaded {
935 source_id,
936 min_dts: dts,
937 }
938 }
939 Self::Unloaded {
940 source_id,
941 min_dts: _,
942 } => {
943 if let Some(new_source_id) = new_source_id {
944 *source_id = new_source_id;
945 }
946 }
947 }
948 }
949
950 pub fn is_loaded(&self) -> bool {
951 match self {
952 Self::Present(_) => true,
953 Self::Unloaded { .. } => false,
954 }
955 }
956
957 pub fn is_unloaded(&self) -> bool {
958 !self.is_loaded()
959 }
960}
961
962impl re_byte_size::SizeBytes for SampleMetadataState {
963 fn heap_size_bytes(&self) -> u64 {
964 match self {
965 Self::Present(sample_metadata) => sample_metadata.heap_size_bytes(),
966 Self::Unloaded {
967 source_id: _,
968 min_dts: _,
969 } => 0,
970 }
971 }
972}
973
974#[derive(Debug, Clone)]
990pub struct SampleMetadata {
991 pub is_sync: bool,
997
998 pub frame_nr: u32,
1007
1008 pub decode_timestamp: Time,
1014
1015 pub presentation_timestamp: Time,
1021
1022 pub duration: Option<Time>,
1027
1028 pub source_id: Tuid,
1030
1031 pub byte_span: Span<u32>,
1033}
1034
1035impl re_byte_size::SizeBytes for SampleMetadata {
1036 fn heap_size_bytes(&self) -> u64 {
1037 0
1038 }
1039
1040 fn is_pod() -> bool {
1041 true
1042 }
1043}
1044
1045impl SampleMetadata {
1046 pub fn get<'a>(
1056 &self,
1057 get_buffer: &dyn Fn(Tuid) -> &'a [u8],
1058 sample_idx: SampleIndex,
1059 ) -> Option<Chunk> {
1060 let buffer = get_buffer(self.source_id);
1061 let data = buffer.get(self.byte_span.range_usize())?.to_vec();
1062
1063 Some(Chunk {
1064 data,
1065 sample_idx,
1066 frame_nr: self.frame_nr,
1067 decode_timestamp: self.decode_timestamp,
1068 presentation_timestamp: self.presentation_timestamp,
1069 duration: self.duration,
1070 is_sync: self.is_sync,
1071 })
1072 }
1073}
1074
1075#[derive(thiserror::Error, Debug)]
1077pub enum VideoLoadError {
1078 #[error("The video file is empty (zero bytes)")]
1079 ZeroBytes,
1080
1081 #[error("MP4 error: {0}")]
1082 ParseMp4(#[from] re_mp4::Error),
1083
1084 #[error("Video file has no video tracks")]
1085 NoVideoTrack,
1086
1087 #[error("Video file track config is invalid")]
1088 InvalidConfigFormat,
1089
1090 #[error("Video file has invalid sample entries")]
1091 InvalidSamples,
1092
1093 #[error(
1094 "Video file has no timescale, which is required to determine frame timestamps in time units"
1095 )]
1096 NoTimescale,
1097
1098 #[error("The media type of the blob is not a video: {provided_or_detected_media_type}")]
1099 MimeTypeIsNotAVideo {
1100 provided_or_detected_media_type: String,
1101 },
1102
1103 #[error("MIME type '{provided_or_detected_media_type}' is not supported for videos")]
1104 UnsupportedMimeType {
1105 provided_or_detected_media_type: String,
1106 },
1107
1108 #[error("Could not detect MIME type from the video contents")]
1110 UnrecognizedMimeType,
1111
1112 #[error("Video track uses unsupported codec \"{0}\"")] UnsupportedCodec(re_mp4::FourCC),
1115
1116 #[error("Unable to determine codec string from the video contents")]
1117 UnableToDetermineCodecString,
1118
1119 #[error("Failed to parse H.264 SPS from mp4: {0:?}")]
1120 SpsParsingError(h264_reader::nal::sps::SpsError),
1121}
1122
1123impl re_byte_size::SizeBytes for VideoLoadError {
1124 fn heap_size_bytes(&self) -> u64 {
1125 0 }
1127}
1128
1129impl std::fmt::Debug for VideoDataDescription {
1130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1131 f.debug_struct("Video")
1132 .field("codec", &self.codec)
1133 .field("encoding_details", &self.encoding_details)
1134 .field("timescale", &self.timescale)
1135 .field("keyframe_indices", &self.keyframe_indices)
1136 .field("samples", &self.samples.iter_indexed().collect::<Vec<_>>())
1137 .finish()
1138 }
1139}
1140
1141#[cfg(test)]
1142mod tests {
1143 use super::*;
1144 use crate::nalu::ANNEXB_NAL_START_CODE;
1145
1146 #[test]
1147 fn test_latest_sample_index_at_presentation_timestamp() {
1148 let pts = [
1150 0, 1024, 512, 256, 768, 2048, 1536, 1280, 1792, 3072, 2560, 2304, 2816, 4096, 3584,
1151 3328, 3840, 4864, 4352, 4608, 5888, 5376, 5120, 5632, 6912, 6400, 6144, 6656, 7936,
1152 7424, 7168, 7680, 8960, 8448, 8192, 8704, 9984, 9472, 9216, 9728, 11008, 10496, 10240,
1153 10752, 12032, 11520, 11264, 11776, 13056, 12544,
1154 ];
1155 let dts = [
1156 -512, -256, 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072,
1157 3328, 3584, 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656,
1158 6912, 7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240,
1159 10496, 10752, 11008, 11264, 11520, 11776, 12032,
1160 ];
1161
1162 assert_eq!(pts.len(), dts.len());
1164 assert!(pts.iter().zip(dts.iter()).all(|(pts, dts)| dts <= pts));
1165
1166 let samples = pts
1168 .into_iter()
1169 .zip(dts)
1170 .map(|(pts, dts)| {
1171 SampleMetadataState::Present(SampleMetadata {
1172 is_sync: true,
1173 frame_nr: 0, decode_timestamp: Time(dts),
1175 presentation_timestamp: Time(pts),
1176 duration: Some(Time(1)),
1177 source_id: Tuid::new(),
1178 byte_span: Default::default(),
1179 })
1180 })
1181 .collect::<StableIndexDeque<_>>();
1182 let keyframe_indices: Vec<SampleIndex> =
1183 (samples.min_index()..samples.next_index()).collect();
1184
1185 let sample_statistics = SamplesStatistics::new(&samples);
1186 assert!(!sample_statistics.dts_always_equal_pts);
1187
1188 let query_pts = |pts| {
1190 VideoDataDescription::latest_sample_index_at_presentation_timestamp_internal(
1191 &keyframe_indices,
1192 &samples,
1193 &sample_statistics,
1194 pts,
1195 )
1196 };
1197
1198 for (idx, sample) in samples.iter_indexed() {
1200 assert_eq!(
1201 Some(idx),
1202 query_pts(sample.sample().unwrap().presentation_timestamp)
1203 );
1204 }
1205
1206 for (idx, sample) in samples.iter_indexed() {
1209 assert_eq!(
1210 Some(idx),
1211 query_pts(sample.sample().unwrap().presentation_timestamp + Time(1))
1212 );
1213 assert_eq!(
1214 Some(idx),
1215 query_pts(sample.sample().unwrap().presentation_timestamp + Time(255))
1216 );
1217 }
1218
1219 assert_eq!(None, query_pts(Time(-1)));
1223 assert_eq!(None, query_pts(Time(-123)));
1224
1225 assert_eq!(Some(0), query_pts(Time(0)));
1227 assert_eq!(Some(0), query_pts(Time(1)));
1228 assert_eq!(Some(0), query_pts(Time(88)));
1229 assert_eq!(Some(0), query_pts(Time(255)));
1230
1231 assert_eq!(Some(3), query_pts(Time(256)));
1233 assert_eq!(Some(3), query_pts(Time(257)));
1234 assert_eq!(Some(3), query_pts(Time(400)));
1235 assert_eq!(Some(3), query_pts(Time(511)));
1236
1237 assert_eq!(Some(2), query_pts(Time(512)));
1239 assert_eq!(Some(2), query_pts(Time(513)));
1240 assert_eq!(Some(2), query_pts(Time(600)));
1241 assert_eq!(Some(2), query_pts(Time(767)));
1242
1243 assert_eq!(Some(4), query_pts(Time(768)));
1245 assert_eq!(Some(4), query_pts(Time(1023)));
1246
1247 assert_eq!(Some(48), query_pts(Time(123123123123123123)));
1250 }
1251
1252 fn has_annexb_start_codes(data: &[u8]) -> bool {
1254 data.windows(4).any(|w| w == ANNEXB_NAL_START_CODE)
1255 }
1256
1257 fn video_test_file_mp4(codec: VideoCodec, need_dts_equal_pts: bool) -> std::path::PathBuf {
1258 let workspace_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1259 .parent()
1260 .and_then(|p| p.parent())
1261 .and_then(|p| p.parent())
1262 .unwrap()
1263 .to_path_buf();
1264
1265 let codec_str = match codec {
1266 VideoCodec::H264 => "h264",
1267 VideoCodec::H265 => "h265",
1268 VideoCodec::VP9 => "vp9",
1269 VideoCodec::VP8 => {
1270 panic!("We don't have test data for vp8, because Mp4 doesn't support vp8.")
1271 }
1272 VideoCodec::AV1 => "av1",
1273 };
1274
1275 if need_dts_equal_pts && (codec == VideoCodec::H264 || codec == VideoCodec::H265) {
1276 workspace_dir.join(format!(
1278 "tests/assets/video/Big_Buck_Bunny_1080_1s_{codec_str}_nobframes.mp4",
1279 ))
1280 } else {
1281 workspace_dir.join(format!(
1282 "tests/assets/video/Big_Buck_Bunny_1080_1s_{codec_str}.mp4",
1283 ))
1284 }
1285 }
1286
1287 fn test_video_codec_sampling(codec: VideoCodec, need_dts_equal_pts: bool) {
1289 let video_path = video_test_file_mp4(codec, need_dts_equal_pts);
1290 let data = std::fs::read(&video_path).unwrap();
1291 let video_data = VideoDataDescription::load_from_bytes(
1292 &data,
1293 "video/mp4",
1294 &format!("test_{codec:?}_video_sampling"),
1295 Tuid::new(),
1296 )
1297 .unwrap();
1298
1299 let mut idr_count = 0;
1300 let mut non_idr_count = 0;
1301
1302 for (sample_idx, sample) in video_data.samples.iter_indexed() {
1303 let chunk = sample
1304 .sample()
1305 .unwrap()
1306 .get(&|_| &data, sample_idx)
1307 .unwrap();
1308 let converted = video_data.sample_data_in_stream_format(&chunk).unwrap();
1309
1310 if chunk.is_sync {
1311 idr_count += 1;
1312
1313 if codec == VideoCodec::H264 {
1315 let has_sps = converted
1316 .windows(5)
1317 .any(|w| w[0..4] == *ANNEXB_NAL_START_CODE && (w[4] & 0x1F) == 7);
1318 assert!(has_sps, "IDR frame at index {sample_idx} should have SPS");
1319 }
1320 } else {
1321 non_idr_count += 1;
1322 }
1323
1324 if codec == VideoCodec::H264 || codec == VideoCodec::H265 {
1326 assert!(
1327 has_annexb_start_codes(&converted),
1328 "Frame at index {sample_idx} should have Annex B start codes",
1329 );
1330 }
1331 }
1332
1333 assert!(idr_count > 0, "Should have at least one IDR frame");
1334 assert!(non_idr_count > 0, "Should have at least one non-IDR frame");
1335 }
1336
1337 #[test]
1338 fn test_full_video_sampling_all_codecs() {
1339 for codec in [VideoCodec::H264, VideoCodec::H265, VideoCodec::AV1] {
1341 test_video_codec_sampling(codec, false);
1342 }
1343 }
1344}