use anyhow::{Context, Result};
use codec::frame::{ColorMetadata, ColorSpace, PixelFormat, StreamInfo};
use mp4::Mp4Reader;
use std::io::Cursor;
use crate::annexb::{NaluCodec, ParamSetTracker, length_prefixed_to_annexb_tracked};
use crate::mp4_sanitize::sanitize_isobmff_box_sizes;
use crate::streaming::{DemuxHeader, Sample, StreamingDemuxer};
use super::super::AudioTrack;
use super::sample_entry::{
extract_avc_config, extract_hevc_config, has_av01_sample_entry, hevc_sample_entry_fourcc,
prores_sample_entry_fourcc,
};
pub(crate) struct FragSample {
pub(crate) offset: u64,
pub(crate) size: u32,
pub(crate) pts_ticks: i64,
pub(crate) duration_ticks: u32,
}
pub struct Mp4StreamingDemuxer {
data: Vec<u8>,
reader: Mp4Reader<Cursor<Vec<u8>>>,
header: DemuxHeader,
audio: Option<AudioTrack>,
track_id: u32,
sample_count: u32,
next_idx: u32,
sps_pps: Vec<Vec<u8>>,
length_size: u8,
tracker: Option<ParamSetTracker>,
fragmented_samples: Option<Vec<FragSample>>,
}
pub(crate) fn demux_mp4_streaming_init(data: &[u8]) -> Result<Mp4StreamingDemuxer> {
let owned = sanitize_isobmff_box_sizes(data);
let size = owned.len() as u64;
let probe = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
.context("reading MP4 header")?;
let video_track = probe
.tracks()
.values()
.find(|t| t.track_type().ok() == Some(mp4::TrackType::Video))
.context("no video track in MP4")?;
let track_id = video_track.track_id();
let codec_from_mp4 = super::format_codec(video_track);
let codec = if codec_from_mp4 == "unknown" && has_av01_sample_entry(&owned) {
"av1".to_string()
} else if codec_from_mp4 == "unknown" && hevc_sample_entry_fourcc(&owned).is_some() {
"h265".to_string()
} else if codec_from_mp4 == "unknown" && prores_sample_entry_fourcc(&owned).is_some() {
"prores".to_string()
} else {
codec_from_mp4
};
let width = video_track.width() as u32;
let height = video_track.height() as u32;
let sample_count = video_track.sample_count();
let duration = video_track.duration().as_secs_f64();
let video_track_timescale = video_track.timescale();
let frame_rate = super::mp4_frame_rate(video_track, duration);
let bitrate = video_track.bitrate() as u64;
let mp4_color = super::super::hdr::extract_mp4_visual_color_metadata(&owned);
let initial_color_metadata = ColorMetadata {
mastering_display: mp4_color.mastering_display,
content_light_level: mp4_color.content_light_level,
..Default::default()
};
let mut info = StreamInfo {
codec: codec.clone(),
width,
height,
frame_rate,
duration,
pixel_format: PixelFormat::Yuv420p,
color_space: ColorSpace::Bt709,
total_frames: sample_count as u64,
bitrate,
color_metadata: initial_color_metadata,
};
let needs_annexb = matches!(codec.as_str(), "h264" | "h265");
let (sps_pps, length_size) = if needs_annexb {
if codec == "h264" {
match extract_avc_config(&owned) {
Some(cfg) => (cfg.parameter_sets, cfg.length_size),
None => (super::extract_sps_pps(&probe, track_id), 4u8),
}
} else {
match extract_hevc_config(&owned) {
Some(cfg) => (cfg.parameter_sets, cfg.length_size),
None => (Vec::new(), 4u8),
}
}
} else {
(Vec::new(), 4u8)
};
if sample_count > 0 {
let detect_input: Vec<u8> = if !sps_pps.is_empty() {
let mut buf = Vec::new();
for ps in &sps_pps {
buf.extend_from_slice(&[0, 0, 0, 1]);
buf.extend_from_slice(ps);
}
buf
} else {
let mut probe_for_pf = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
.context("re-reading MP4 for pixel-format probe")?;
match probe_for_pf.read_sample(track_id, 1) {
Ok(Some(s)) => s.bytes.to_vec(),
_ => Vec::new(),
}
};
if !detect_input.is_empty() {
info.pixel_format = codec::pixel_format::detect(&codec, &[detect_input]);
}
}
drop(probe);
let audio = super::super::audio::extract_mp4_audio(&owned);
let reader_cursor = Cursor::new(owned.clone());
let reader =
Mp4Reader::read_header(reader_cursor, size).context("opening MP4 streaming reader")?;
let tracker = if needs_annexb {
Some(ParamSetTracker::new(if codec == "h264" {
NaluCodec::Avc
} else {
NaluCodec::Hevc
}))
} else {
None
};
let _ = needs_annexb;
let fragmented_samples = build_fragmented_sample_table(&owned, track_id, 0, 0).map(|table| {
tracing::info!(
track_id,
sample_count = table.len(),
"fragmented MP4 detected; built sample table from moof/traf/trun"
);
table
});
let final_sample_count = match &fragmented_samples {
Some(table) => table.len() as u32,
None => sample_count,
};
if let Some(table) = fragmented_samples.as_ref() {
if !table.is_empty() && (sample_count == 0 || duration <= 0.0) && video_track_timescale > 0
{
let total_ticks: u64 = table.iter().map(|s| s.duration_ticks as u64).sum();
if total_ticks > 0 {
let total_seconds = total_ticks as f64 / video_track_timescale as f64;
if total_seconds > 0.0 {
let avg_fps = table.len() as f64 / total_seconds;
info.frame_rate = avg_fps.clamp(1.0, 240.0);
info.duration = total_seconds;
info.total_frames = table.len() as u64;
tracing::info!(
track_id,
avg_fps,
total_seconds,
sample_count = table.len(),
timescale = video_track_timescale,
"fragmented MP4: recomputed frame_rate + duration from \
moof/traf/trun timestamps (static moov sample table \
was empty)"
);
}
}
}
}
Ok(Mp4StreamingDemuxer {
data: owned,
reader,
header: DemuxHeader { codec, info },
audio,
track_id,
sample_count: final_sample_count,
next_idx: 1,
sps_pps,
length_size,
tracker,
fragmented_samples,
})
}
impl StreamingDemuxer for Mp4StreamingDemuxer {
fn header(&self) -> &DemuxHeader {
&self.header
}
fn next_video_sample(&mut self) -> Result<Option<Sample>> {
if let Some(table) = self.fragmented_samples.as_ref() {
let idx_zero_based = (self.next_idx - 1) as usize;
if idx_zero_based >= table.len() {
return Ok(None);
}
self.next_idx += 1;
let entry = &table[idx_zero_based];
let off = entry.offset as usize;
let end = off.saturating_add(entry.size as usize);
if end > self.data.len() {
tracing::warn!(
idx = idx_zero_based + 1,
offset = entry.offset,
size = entry.size,
data_len = self.data.len(),
"fragmented sample reaches past EOF; stopping at the previous frame"
);
return Ok(None);
}
let raw = self.data[off..end].to_vec();
let data = if let Some(tracker) = self.tracker.as_mut() {
length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
} else {
raw
};
return Ok(Some(Sample {
data,
pts_ticks: entry.pts_ticks,
duration_ticks: entry.duration_ticks,
}));
}
loop {
if self.next_idx > self.sample_count {
return Ok(None);
}
let idx = self.next_idx;
self.next_idx += 1;
let s = match self.reader.read_sample(self.track_id, idx) {
Ok(s) => s,
Err(e) => {
tracing::warn!(
track_id = self.track_id,
idx,
emitted = idx.saturating_sub(1),
sample_count = self.sample_count,
error = %e,
"video stream: read_sample error mid-track; \
stopping at sample {} of {} (truncated source — \
iPhone fragmented MP4 with a missing trun entry \
is the typical cause)",
idx.saturating_sub(1),
self.sample_count,
);
return Ok(None);
}
};
let Some(sample) = s else { continue };
let pts_ticks = sample.start_time as i64;
let duration_ticks = sample.duration;
let raw = sample.bytes.to_vec();
let data = if let Some(tracker) = self.tracker.as_mut() {
length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
} else {
raw
};
return Ok(Some(Sample {
data,
pts_ticks,
duration_ticks,
}));
}
}
fn audio(&self) -> Option<&AudioTrack> {
self.audio.as_ref()
}
}
impl Mp4StreamingDemuxer {
#[allow(dead_code)]
pub(crate) fn raw_bytes(&self) -> &[u8] {
&self.data
}
}
pub(crate) fn build_fragmented_sample_table(
data: &[u8],
track_id: u32,
default_sample_duration_from_trex: u32,
default_sample_size_from_trex: u32,
) -> Option<Vec<FragSample>> {
let mut samples: Vec<FragSample> = Vec::new();
let mut pos: usize = 0;
let mut accumulated_pts: i64 = 0;
let mut found_any_moof = false;
while pos + 8 <= data.len() {
let box_size_field = u32::from_be_bytes(data[pos..pos + 4].try_into().ok()?);
let box_type = &data[pos + 4..pos + 8];
let (box_size, header_size): (usize, usize) = if box_size_field == 1 {
if pos + 16 > data.len() {
break;
}
let big = u64::from_be_bytes(data[pos + 8..pos + 16].try_into().ok()?);
(big as usize, 16)
} else if box_size_field == 0 {
(data.len() - pos, 8)
} else {
(box_size_field as usize, 8)
};
if box_size < header_size || pos + box_size > data.len() {
break;
}
if box_type == b"moof" {
found_any_moof = true;
let moof_start = pos;
let moof_end = pos + box_size;
walk_moof(
data,
moof_start + header_size,
moof_end,
moof_start as u64,
track_id,
default_sample_duration_from_trex,
default_sample_size_from_trex,
&mut accumulated_pts,
&mut samples,
);
}
pos = pos
.checked_add(box_size)
.filter(|&n| n <= data.len())
.unwrap_or(data.len());
}
if found_any_moof { Some(samples) } else { None }
}
#[allow(clippy::too_many_arguments)]
fn walk_moof(
data: &[u8],
children_start: usize,
moof_end: usize,
moof_offset: u64,
track_id: u32,
default_sample_duration_from_trex: u32,
default_sample_size_from_trex: u32,
accumulated_pts: &mut i64,
samples: &mut Vec<FragSample>,
) {
let mut pos = children_start;
while pos + 8 <= moof_end {
let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
Ok(b) => b,
Err(_) => break,
});
let typ = &data[pos + 4..pos + 8];
if size == 0 || size as usize + pos > moof_end {
break;
}
if typ == b"traf" {
walk_traf(
data,
pos + 8,
pos + size as usize,
moof_offset,
track_id,
default_sample_duration_from_trex,
default_sample_size_from_trex,
accumulated_pts,
samples,
);
}
pos += size as usize;
}
}
#[allow(clippy::too_many_arguments)]
fn walk_traf(
data: &[u8],
children_start: usize,
traf_end: usize,
moof_offset: u64,
track_id: u32,
default_sample_duration_from_trex: u32,
default_sample_size_from_trex: u32,
accumulated_pts: &mut i64,
samples: &mut Vec<FragSample>,
) {
let mut this_track: Option<u32> = None;
let mut tfhd_default_sample_duration: u32 = default_sample_duration_from_trex;
let mut tfhd_default_sample_size: u32 = default_sample_size_from_trex;
let mut base_data_offset: u64 = moof_offset; let mut base_data_offset_explicit = false;
let mut tfdt_base_pts: Option<i64> = None;
let mut pos = children_start;
while pos + 8 <= traf_end {
let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
Ok(b) => b,
Err(_) => break,
});
let typ = &data[pos + 4..pos + 8];
if size == 0 || size as usize + pos > traf_end {
break;
}
if typ == b"tfhd" {
if pos + 16 > traf_end {
pos += size as usize;
continue;
}
let flags = u32::from_be_bytes(match data[pos + 8..pos + 12].try_into() {
Ok(b) => b,
Err(_) => break,
}) & 0x00ff_ffff;
let tk = u32::from_be_bytes(match data[pos + 12..pos + 16].try_into() {
Ok(b) => b,
Err(_) => break,
});
this_track = Some(tk);
let mut p = pos + 16;
if flags & 0x01 != 0 {
if p + 8 > traf_end {
break;
}
base_data_offset = u64::from_be_bytes(match data[p..p + 8].try_into() {
Ok(b) => b,
Err(_) => break,
});
base_data_offset_explicit = true;
p += 8;
}
if flags & 0x02 != 0 {
p += 4;
}
if flags & 0x08 != 0 {
if p + 4 > traf_end {
break;
}
tfhd_default_sample_duration =
u32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => break,
});
p += 4;
}
if flags & 0x10 != 0 {
if p + 4 > traf_end {
break;
}
tfhd_default_sample_size = u32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => break,
});
p += 4;
}
if flags & 0x20 != 0 {
p += 4;
}
let _ = p;
} else if typ == b"tfdt" {
if pos + 12 > traf_end {
pos += size as usize;
continue;
}
let version = data[pos + 8];
if version == 1 {
if pos + 20 > traf_end {
pos += size as usize;
continue;
}
let bmdt =
u64::from_be_bytes(data[pos + 12..pos + 20].try_into().unwrap_or([0; 8]));
tfdt_base_pts = Some(bmdt as i64);
} else {
let bmdt =
u32::from_be_bytes(data[pos + 12..pos + 16].try_into().unwrap_or([0; 4]));
tfdt_base_pts = Some(bmdt as i64);
}
}
pos += size as usize;
}
let Some(tk) = this_track else {
return;
};
if tk != track_id {
return;
}
if let Some(bp) = tfdt_base_pts {
*accumulated_pts = bp;
}
let mut pos = children_start;
while pos + 8 <= traf_end {
let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
Ok(b) => b,
Err(_) => break,
});
let typ = &data[pos + 4..pos + 8];
if size == 0 || size as usize + pos > traf_end {
break;
}
if typ == b"trun" {
walk_trun(
data,
pos + 8,
pos + size as usize,
if base_data_offset_explicit {
base_data_offset
} else {
moof_offset
},
tfhd_default_sample_duration,
tfhd_default_sample_size,
accumulated_pts,
samples,
);
}
pos += size as usize;
}
let _ = base_data_offset_explicit;
}
#[allow(clippy::too_many_arguments)]
fn walk_trun(
data: &[u8],
children_start: usize,
trun_end: usize,
base_offset: u64,
default_sample_duration: u32,
default_sample_size: u32,
accumulated_pts: &mut i64,
samples: &mut Vec<FragSample>,
) {
if children_start + 8 > trun_end {
return;
}
let version = data[children_start];
let flags = u32::from_be_bytes(match data[children_start..children_start + 4].try_into() {
Ok(b) => b,
Err(_) => return,
}) & 0x00ff_ffff;
let sample_count = u32::from_be_bytes(
match data[children_start + 4..children_start + 8].try_into() {
Ok(b) => b,
Err(_) => return,
},
);
let mut p = children_start + 8;
let mut data_offset_in_trun: i32 = 0;
if flags & 0x000_001 != 0 {
if p + 4 > trun_end {
return;
}
data_offset_in_trun = i32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => return,
});
p += 4;
}
if flags & 0x000_004 != 0 {
p += 4;
}
let sample_duration_present = flags & 0x000_100 != 0;
let sample_size_present = flags & 0x000_200 != 0;
let sample_flags_present = flags & 0x000_400 != 0;
let sample_cto_present = flags & 0x000_800 != 0;
let mut current_offset = base_offset.wrapping_add(data_offset_in_trun as u64);
for _ in 0..sample_count {
let dur = if sample_duration_present {
if p + 4 > trun_end {
return;
}
let d = u32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => return,
});
p += 4;
d
} else {
default_sample_duration
};
let sz = if sample_size_present {
if p + 4 > trun_end {
return;
}
let s = u32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => return,
});
p += 4;
s
} else {
default_sample_size
};
if sample_flags_present {
p += 4;
}
let cto: i32 = if sample_cto_present {
if p + 4 > trun_end {
return;
}
let c = if version == 0 {
u32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => return,
}) as i32
} else {
i32::from_be_bytes(match data[p..p + 4].try_into() {
Ok(b) => b,
Err(_) => return,
})
};
p += 4;
c
} else {
0
};
if sz > 0 {
samples.push(FragSample {
offset: current_offset,
size: sz,
pts_ticks: accumulated_pts.saturating_add(cto as i64),
duration_ticks: dur,
});
}
current_offset = current_offset.saturating_add(sz as u64);
*accumulated_pts = accumulated_pts.saturating_add(dur as i64);
}
}