use std::io::Cursor;
use cros_codecs::codec::h265::parser::{
Nalu as H265Nalu, NaluType as H265NaluType, Parser as H265Parser,
};
use h264_reader::nal::{self, Nal as _};
use itertools::Itertools as _;
use re_span::Span;
use saturating_cast::SaturatingCast as _;
use super::{SampleMetadata, VideoDataDescription, VideoLoadError};
use crate::demux::{
ChromaSubsamplingModes, SampleMetadataState, SamplesStatistics, VideoDeliveryMethod,
VideoEncodingDetails,
};
use crate::h264::encoding_details_from_h264_sps;
use crate::h265::encoding_details_from_h265_sps;
use crate::nalu::ANNEXB_NAL_START_CODE;
use crate::{StableIndexDeque, Time, Timescale};
impl VideoDataDescription {
pub fn load_mp4(
bytes: &[u8],
debug_name: &str,
source_id: re_tuid::Tuid,
) -> Result<Self, VideoLoadError> {
re_tracing::profile_function!();
let mp4 = {
re_tracing::profile_scope!("Mp4::read_bytes");
re_mp4::Mp4::read_bytes(bytes)?
};
let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect();
let track = mp4
.tracks()
.values()
.find(|t| t.kind == Some(re_mp4::TrackKind::Video))
.ok_or(VideoLoadError::NoVideoTrack)?;
let stsd = track.trak(&mp4).mdia.minf.stbl.stsd.clone();
let timescale = Timescale::new(track.timescale);
let mut samples =
StableIndexDeque::<SampleMetadataState>::with_capacity(track.samples.len());
let mut keyframe_indices = Vec::new();
{
re_tracing::profile_scope!("copy samples & build gops");
for sample in &track.samples {
if sample.is_sync {
keyframe_indices.push(samples.next_index());
}
let decode_timestamp = Time::new(sample.decode_timestamp);
let presentation_timestamp = Time::new(sample.composition_timestamp);
let duration = Time::new(sample.duration.saturating_cast());
let byte_span = Span {
start: sample.offset as u32,
len: sample.size as u32,
};
samples.push_back(SampleMetadataState::Present(SampleMetadata {
is_sync: sample.is_sync,
frame_nr: 0, decode_timestamp,
presentation_timestamp,
duration: Some(duration),
source_id,
byte_span,
}));
}
}
if false {
re_log::info!(
"pts: {:?}",
samples
.iter()
.take(50)
.filter_map(|s| Some(s.sample()?.presentation_timestamp.0))
.collect::<Vec<_>>()
);
re_log::info!(
"dts: {:?}",
samples
.iter()
.take(50)
.filter_map(|s| Some(s.sample()?.decode_timestamp.0))
.collect::<Vec<_>>()
);
}
{
re_tracing::profile_scope!("Sanity-check samples");
let mut samples_are_in_decode_order = true;
for (a, b) in samples
.iter()
.tuple_windows::<(&SampleMetadataState, &SampleMetadataState)>()
.filter_map(|(a, b)| Some((a.sample()?, b.sample()?)))
{
samples_are_in_decode_order &= a.decode_timestamp <= b.decode_timestamp;
}
if !samples_are_in_decode_order {
re_log::warn!(
"Video samples are NOT in decode order. This implies either invalid video data or a bug in parsing the mp4."
);
}
}
{
re_tracing::profile_scope!("Calculate frame numbers");
let mut samples_sorted_by_pts = samples
.iter_mut()
.filter_map(|f| f.sample_mut())
.collect::<Vec<_>>();
samples_sorted_by_pts.sort_by_key(|s| s.presentation_timestamp);
for (frame_nr, sample) in samples_sorted_by_pts.into_iter().enumerate() {
sample.frame_nr = frame_nr as u32;
}
}
let samples_statistics = SamplesStatistics::new(&samples);
let codec = match &stsd.contents {
re_mp4::StsdBoxContent::Av01(_) => crate::VideoCodec::AV1,
re_mp4::StsdBoxContent::Avc1(_) => crate::VideoCodec::H264,
re_mp4::StsdBoxContent::Hvc1(_) | re_mp4::StsdBoxContent::Hev1(_) => {
crate::VideoCodec::H265
}
re_mp4::StsdBoxContent::Vp08(_) => crate::VideoCodec::VP8,
re_mp4::StsdBoxContent::Vp09(_) => crate::VideoCodec::VP9,
_ => {
return Err(VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(
&mp4, track,
)));
}
};
let video_data_description = Self {
codec,
encoding_details: Some(codec_details_from_stds(track, stsd)?),
timescale: Some(timescale),
delivery_method: VideoDeliveryMethod::Static {
duration: Time::new(track.duration.saturating_cast()),
},
samples_statistics,
keyframe_indices,
samples,
mp4_tracks,
};
if cfg!(debug_assertions)
&& let Err(err) = video_data_description.sanity_check()
{
panic!("VideoDataDescription sanity check for {debug_name} failed: {err}");
}
Ok(video_data_description)
}
}
fn unknown_codec_fourcc(mp4: &re_mp4::Mp4, track: &re_mp4::Track) -> re_mp4::FourCC {
let stsd = &track.trak(mp4).mdia.minf.stbl.stsd;
match &stsd.contents {
re_mp4::StsdBoxContent::Unknown(four_cc) => *four_cc,
_ => Default::default(),
}
}
fn codec_details_from_stds(
track: &re_mp4::Track,
stsd: re_mp4::StsdBox,
) -> Result<VideoEncodingDetails, VideoLoadError> {
match &stsd.contents {
re_mp4::StsdBoxContent::Avc1(avcc_box) => {
if let Some(sps_nal) = avcc_box.avcc.sequence_parameter_sets.first() {
let complete = true;
let sps_nal = nal::RefNal::new(sps_nal.bytes.as_slice(), &[], complete);
return nal::sps::SeqParameterSet::from_bits(sps_nal.rbsp_bits())
.and_then(|sps| encoding_details_from_h264_sps(&sps))
.map_err(VideoLoadError::SpsParsingError)
.map(|details| VideoEncodingDetails {
stsd: Some(stsd),
..details
});
}
}
re_mp4::StsdBoxContent::Hev1(hvc1_box) | re_mp4::StsdBoxContent::Hvc1(hvc1_box) => {
let hvcc = &*hvc1_box.hvcc;
for array in &hvcc.arrays {
if let Ok(nalu_type) = H265NaluType::try_from(array.nal_unit_type as u32)
&& matches!(nalu_type, H265NaluType::SpsNut)
{
for nal in &array.nalus {
let mut annexb =
Vec::with_capacity(ANNEXB_NAL_START_CODE.len() + nal.size as usize);
annexb.extend_from_slice(ANNEXB_NAL_START_CODE);
annexb.extend_from_slice(&nal.data);
let mut parser = H265Parser::default();
let mut rdr = Cursor::new(annexb.as_slice());
if let Ok(nalu) = H265Nalu::next(&mut rdr) {
let sps_ref = parser
.parse_sps(&nalu)
.map_err(|_err| VideoLoadError::NoVideoTrack)?;
let details = encoding_details_from_h265_sps(sps_ref);
return Ok(VideoEncodingDetails {
stsd: Some(stsd.clone()),
..details
});
}
}
}
}
}
_ => {}
}
Ok(VideoEncodingDetails {
codec_string: stsd
.contents
.codec_string()
.ok_or(VideoLoadError::UnableToDetermineCodecString)?,
coded_dimensions: [track.width, track.height],
bit_depth: stsd.contents.bit_depth(),
chroma_subsampling: subsampling_mode(&stsd),
stsd: Some(stsd),
})
}
fn subsampling_mode(stsd: &re_mp4::StsdBox) -> Option<ChromaSubsamplingModes> {
match &stsd.contents {
re_mp4::StsdBoxContent::Av01(av01_box) => {
if av01_box.av1c.monochrome {
Some(ChromaSubsamplingModes::Monochrome)
} else {
match (
av01_box.av1c.chroma_subsampling_x != 0,
av01_box.av1c.chroma_subsampling_y != 0,
) {
(true, true) => Some(ChromaSubsamplingModes::Yuv420), (true, false) => Some(ChromaSubsamplingModes::Yuv422),
(false, true) => None, (false, false) => Some(ChromaSubsamplingModes::Yuv444),
}
}
}
re_mp4::StsdBoxContent::Avc1(_) => {
None
}
re_mp4::StsdBoxContent::Hvc1(_) | re_mp4::StsdBoxContent::Hev1(_) => {
None
}
re_mp4::StsdBoxContent::Vp08(vp08_box) => {
match vp08_box.vpcc.chroma_subsampling {
0 | 1 => Some(ChromaSubsamplingModes::Yuv420),
2 => Some(ChromaSubsamplingModes::Yuv422),
3 => Some(ChromaSubsamplingModes::Yuv444),
_ => None, }
}
re_mp4::StsdBoxContent::Vp09(vp09_box) => {
match vp09_box.vpcc.chroma_subsampling {
0 | 1 => Some(ChromaSubsamplingModes::Yuv420),
2 => Some(ChromaSubsamplingModes::Yuv422),
3 => Some(ChromaSubsamplingModes::Yuv444),
_ => None, }
}
re_mp4::StsdBoxContent::Mp4a(_)
| re_mp4::StsdBoxContent::Tx3g(_)
| re_mp4::StsdBoxContent::Unknown(_) => None,
}
}