1use anyhow::{Context, Result};
12use codec::frame::{ColorMetadata, ColorSpace, PixelFormat, StreamInfo};
13use mp4::Mp4Reader;
14use std::io::Cursor;
15
16use crate::annexb::{NaluCodec, ParamSetTracker, length_prefixed_to_annexb_tracked};
17use crate::mp4_sanitize::sanitize_isobmff_box_sizes;
18use crate::streaming::{DemuxHeader, Sample, StreamingDemuxer};
19
20use super::super::AudioTrack;
21use super::sample_entry::{
22 extract_avc_config, extract_hevc_config, has_av01_sample_entry, hevc_sample_entry_fourcc,
23 prores_sample_entry_fourcc,
24};
25
26pub(crate) struct FragSample {
39 pub(crate) offset: u64,
40 pub(crate) size: u32,
41 pub(crate) pts_ticks: i64,
42 pub(crate) duration_ticks: u32,
43}
44
45pub struct Mp4StreamingDemuxer {
74 data: Vec<u8>,
77 reader: Mp4Reader<Cursor<Vec<u8>>>,
78 header: DemuxHeader,
79 audio: Option<AudioTrack>,
80 track_id: u32,
81 sample_count: u32,
82 next_idx: u32,
83 sps_pps: Vec<Vec<u8>>,
85 length_size: u8,
86 tracker: Option<ParamSetTracker>,
87 fragmented_samples: Option<Vec<FragSample>>,
93}
94
95pub(crate) fn demux_mp4_streaming_init(data: &[u8]) -> Result<Mp4StreamingDemuxer> {
96 let owned = sanitize_isobmff_box_sizes(data);
99 let size = owned.len() as u64;
100 let probe = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
105 .context("reading MP4 header")?;
106
107 let video_track = probe
108 .tracks()
109 .values()
110 .find(|t| t.track_type().ok() == Some(mp4::TrackType::Video))
111 .context("no video track in MP4")?;
112
113 let track_id = video_track.track_id();
114 let codec_from_mp4 = super::format_codec(video_track);
115 let codec = if codec_from_mp4 == "unknown" && has_av01_sample_entry(&owned) {
116 "av1".to_string()
117 } else if codec_from_mp4 == "unknown" && hevc_sample_entry_fourcc(&owned).is_some() {
118 "h265".to_string()
119 } else if codec_from_mp4 == "unknown" && prores_sample_entry_fourcc(&owned).is_some() {
120 "prores".to_string()
121 } else {
122 codec_from_mp4
123 };
124 let width = video_track.width() as u32;
125 let height = video_track.height() as u32;
126 let sample_count = video_track.sample_count();
127 let duration = video_track.duration().as_secs_f64();
128 let video_track_timescale = video_track.timescale();
129 let frame_rate = super::mp4_frame_rate(video_track, duration);
130 let bitrate = video_track.bitrate() as u64;
131
132 let mp4_color = super::super::hdr::extract_mp4_visual_color_metadata(&owned);
133 let initial_color_metadata = ColorMetadata {
134 mastering_display: mp4_color.mastering_display,
135 content_light_level: mp4_color.content_light_level,
136 ..Default::default()
137 };
138
139 let mut info = StreamInfo {
140 codec: codec.clone(),
141 width,
142 height,
143 frame_rate,
144 duration,
145 pixel_format: PixelFormat::Yuv420p,
146 color_space: ColorSpace::Bt709,
147 total_frames: sample_count as u64,
148 bitrate,
149 color_metadata: initial_color_metadata,
150 };
151
152 let needs_annexb = matches!(codec.as_str(), "h264" | "h265");
153 let (sps_pps, length_size) = if needs_annexb {
154 if codec == "h264" {
155 match extract_avc_config(&owned) {
156 Some(cfg) => (cfg.parameter_sets, cfg.length_size),
157 None => (super::extract_sps_pps(&probe, track_id), 4u8),
158 }
159 } else {
160 match extract_hevc_config(&owned) {
161 Some(cfg) => (cfg.parameter_sets, cfg.length_size),
162 None => (Vec::new(), 4u8),
163 }
164 }
165 } else {
166 (Vec::new(), 4u8)
167 };
168
169 if sample_count > 0 {
176 let detect_input: Vec<u8> = if !sps_pps.is_empty() {
177 let mut buf = Vec::new();
178 for ps in &sps_pps {
179 buf.extend_from_slice(&[0, 0, 0, 1]);
180 buf.extend_from_slice(ps);
181 }
182 buf
183 } else {
184 let mut probe_for_pf = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
185 .context("re-reading MP4 for pixel-format probe")?;
186 match probe_for_pf.read_sample(track_id, 1) {
187 Ok(Some(s)) => s.bytes.to_vec(),
188 _ => Vec::new(),
189 }
190 };
191 if !detect_input.is_empty() {
192 info.pixel_format = codec::pixel_format::detect(&codec, &[detect_input]);
193 }
194 }
195
196 drop(probe);
197
198 let audio = super::super::audio::extract_mp4_audio(&owned);
199
200 let reader_cursor = Cursor::new(owned.clone());
202 let reader =
203 Mp4Reader::read_header(reader_cursor, size).context("opening MP4 streaming reader")?;
204
205 let tracker = if needs_annexb {
206 Some(ParamSetTracker::new(if codec == "h264" {
207 NaluCodec::Avc
208 } else {
209 NaluCodec::Hevc
210 }))
211 } else {
212 None
213 };
214
215 let _ = needs_annexb; let fragmented_samples = build_fragmented_sample_table(&owned, track_id, 0, 0).map(|table| {
225 tracing::info!(
226 track_id,
227 sample_count = table.len(),
228 "fragmented MP4 detected; built sample table from moof/traf/trun"
229 );
230 table
231 });
232 let final_sample_count = match &fragmented_samples {
233 Some(table) => table.len() as u32,
234 None => sample_count,
235 };
236
237 if let Some(table) = fragmented_samples.as_ref() {
249 if !table.is_empty() && (sample_count == 0 || duration <= 0.0) && video_track_timescale > 0
250 {
251 let total_ticks: u64 = table.iter().map(|s| s.duration_ticks as u64).sum();
252 if total_ticks > 0 {
253 let total_seconds = total_ticks as f64 / video_track_timescale as f64;
254 if total_seconds > 0.0 {
255 let avg_fps = table.len() as f64 / total_seconds;
256 info.frame_rate = avg_fps.clamp(1.0, 240.0);
257 info.duration = total_seconds;
258 info.total_frames = table.len() as u64;
259 tracing::info!(
260 track_id,
261 avg_fps,
262 total_seconds,
263 sample_count = table.len(),
264 timescale = video_track_timescale,
265 "fragmented MP4: recomputed frame_rate + duration from \
266 moof/traf/trun timestamps (static moov sample table \
267 was empty)"
268 );
269 }
270 }
271 }
272 }
273 Ok(Mp4StreamingDemuxer {
274 data: owned,
275 reader,
276 header: DemuxHeader { codec, info },
277 audio,
278 track_id,
279 sample_count: final_sample_count,
280 next_idx: 1,
281 sps_pps,
282 length_size,
283 tracker,
284 fragmented_samples,
285 })
286}
287
288impl StreamingDemuxer for Mp4StreamingDemuxer {
289 fn header(&self) -> &DemuxHeader {
290 &self.header
291 }
292
293 fn next_video_sample(&mut self) -> Result<Option<Sample>> {
294 if let Some(table) = self.fragmented_samples.as_ref() {
297 let idx_zero_based = (self.next_idx - 1) as usize;
298 if idx_zero_based >= table.len() {
299 return Ok(None);
300 }
301 self.next_idx += 1;
302 let entry = &table[idx_zero_based];
303 let off = entry.offset as usize;
304 let end = off.saturating_add(entry.size as usize);
305 if end > self.data.len() {
306 tracing::warn!(
307 idx = idx_zero_based + 1,
308 offset = entry.offset,
309 size = entry.size,
310 data_len = self.data.len(),
311 "fragmented sample reaches past EOF; stopping at the previous frame"
312 );
313 return Ok(None);
314 }
315 let raw = self.data[off..end].to_vec();
316 let data = if let Some(tracker) = self.tracker.as_mut() {
317 length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
318 } else {
319 raw
320 };
321 return Ok(Some(Sample {
322 data,
323 pts_ticks: entry.pts_ticks,
324 duration_ticks: entry.duration_ticks,
325 }));
326 }
327 loop {
328 if self.next_idx > self.sample_count {
329 return Ok(None);
330 }
331 let idx = self.next_idx;
332 self.next_idx += 1;
333 let s = match self.reader.read_sample(self.track_id, idx) {
345 Ok(s) => s,
346 Err(e) => {
347 tracing::warn!(
348 track_id = self.track_id,
349 idx,
350 emitted = idx.saturating_sub(1),
351 sample_count = self.sample_count,
352 error = %e,
353 "video stream: read_sample error mid-track; \
354 stopping at sample {} of {} (truncated source — \
355 iPhone fragmented MP4 with a missing trun entry \
356 is the typical cause)",
357 idx.saturating_sub(1),
358 self.sample_count,
359 );
360 return Ok(None);
361 }
362 };
363 let Some(sample) = s else { continue };
364 let pts_ticks = sample.start_time as i64;
365 let duration_ticks = sample.duration;
366 let raw = sample.bytes.to_vec();
367 let data = if let Some(tracker) = self.tracker.as_mut() {
368 length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
369 } else {
370 raw
371 };
372 return Ok(Some(Sample {
373 data,
374 pts_ticks,
375 duration_ticks,
376 }));
377 }
378 }
379
380 fn audio(&self) -> Option<&AudioTrack> {
381 self.audio.as_ref()
382 }
383}
384
385impl Mp4StreamingDemuxer {
386 #[allow(dead_code)]
389 pub(crate) fn raw_bytes(&self) -> &[u8] {
390 &self.data
391 }
392}
393
394pub(crate) fn build_fragmented_sample_table(
414 data: &[u8],
415 track_id: u32,
416 default_sample_duration_from_trex: u32,
417 default_sample_size_from_trex: u32,
418) -> Option<Vec<FragSample>> {
419 let mut samples: Vec<FragSample> = Vec::new();
420 let mut pos: usize = 0;
421 let mut accumulated_pts: i64 = 0;
422 let mut found_any_moof = false;
423
424 while pos + 8 <= data.len() {
425 let box_size_field = u32::from_be_bytes(data[pos..pos + 4].try_into().ok()?);
426 let box_type = &data[pos + 4..pos + 8];
427 let (box_size, header_size): (usize, usize) = if box_size_field == 1 {
428 if pos + 16 > data.len() {
430 break;
431 }
432 let big = u64::from_be_bytes(data[pos + 8..pos + 16].try_into().ok()?);
433 (big as usize, 16)
434 } else if box_size_field == 0 {
435 (data.len() - pos, 8)
437 } else {
438 (box_size_field as usize, 8)
439 };
440 if box_size < header_size || pos + box_size > data.len() {
441 break;
442 }
443
444 if box_type == b"moof" {
445 found_any_moof = true;
446 let moof_start = pos;
447 let moof_end = pos + box_size;
448 walk_moof(
449 data,
450 moof_start + header_size,
451 moof_end,
452 moof_start as u64,
453 track_id,
454 default_sample_duration_from_trex,
455 default_sample_size_from_trex,
456 &mut accumulated_pts,
457 &mut samples,
458 );
459 }
460 pos = pos
461 .checked_add(box_size)
462 .filter(|&n| n <= data.len())
463 .unwrap_or(data.len());
464 }
465
466 if found_any_moof { Some(samples) } else { None }
467}
468
469#[allow(clippy::too_many_arguments)]
470fn walk_moof(
471 data: &[u8],
472 children_start: usize,
473 moof_end: usize,
474 moof_offset: u64,
475 track_id: u32,
476 default_sample_duration_from_trex: u32,
477 default_sample_size_from_trex: u32,
478 accumulated_pts: &mut i64,
479 samples: &mut Vec<FragSample>,
480) {
481 let mut pos = children_start;
482 while pos + 8 <= moof_end {
483 let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
484 Ok(b) => b,
485 Err(_) => break,
486 });
487 let typ = &data[pos + 4..pos + 8];
488 if size == 0 || size as usize + pos > moof_end {
489 break;
490 }
491 if typ == b"traf" {
492 walk_traf(
493 data,
494 pos + 8,
495 pos + size as usize,
496 moof_offset,
497 track_id,
498 default_sample_duration_from_trex,
499 default_sample_size_from_trex,
500 accumulated_pts,
501 samples,
502 );
503 }
504 pos += size as usize;
505 }
506}
507
508#[allow(clippy::too_many_arguments)]
509fn walk_traf(
510 data: &[u8],
511 children_start: usize,
512 traf_end: usize,
513 moof_offset: u64,
514 track_id: u32,
515 default_sample_duration_from_trex: u32,
516 default_sample_size_from_trex: u32,
517 accumulated_pts: &mut i64,
518 samples: &mut Vec<FragSample>,
519) {
520 let mut this_track: Option<u32> = None;
523 let mut tfhd_default_sample_duration: u32 = default_sample_duration_from_trex;
524 let mut tfhd_default_sample_size: u32 = default_sample_size_from_trex;
525 let mut base_data_offset: u64 = moof_offset; let mut base_data_offset_explicit = false;
527 let mut tfdt_base_pts: Option<i64> = None;
528
529 let mut pos = children_start;
530 while pos + 8 <= traf_end {
531 let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
532 Ok(b) => b,
533 Err(_) => break,
534 });
535 let typ = &data[pos + 4..pos + 8];
536 if size == 0 || size as usize + pos > traf_end {
537 break;
538 }
539 if typ == b"tfhd" {
540 if pos + 16 > traf_end {
542 pos += size as usize;
543 continue;
544 }
545 let flags = u32::from_be_bytes(match data[pos + 8..pos + 12].try_into() {
546 Ok(b) => b,
547 Err(_) => break,
548 }) & 0x00ff_ffff;
549 let tk = u32::from_be_bytes(match data[pos + 12..pos + 16].try_into() {
550 Ok(b) => b,
551 Err(_) => break,
552 });
553 this_track = Some(tk);
554 let mut p = pos + 16;
555 if flags & 0x01 != 0 {
557 if p + 8 > traf_end {
558 break;
559 }
560 base_data_offset = u64::from_be_bytes(match data[p..p + 8].try_into() {
561 Ok(b) => b,
562 Err(_) => break,
563 });
564 base_data_offset_explicit = true;
565 p += 8;
566 }
567 if flags & 0x02 != 0 {
569 p += 4;
570 }
571 if flags & 0x08 != 0 {
573 if p + 4 > traf_end {
574 break;
575 }
576 tfhd_default_sample_duration =
577 u32::from_be_bytes(match data[p..p + 4].try_into() {
578 Ok(b) => b,
579 Err(_) => break,
580 });
581 p += 4;
582 }
583 if flags & 0x10 != 0 {
585 if p + 4 > traf_end {
586 break;
587 }
588 tfhd_default_sample_size = u32::from_be_bytes(match data[p..p + 4].try_into() {
589 Ok(b) => b,
590 Err(_) => break,
591 });
592 p += 4;
593 }
594 if flags & 0x20 != 0 {
596 p += 4;
597 }
598 let _ = p;
601 } else if typ == b"tfdt" {
602 if pos + 12 > traf_end {
604 pos += size as usize;
605 continue;
606 }
607 let version = data[pos + 8];
608 if version == 1 {
609 if pos + 20 > traf_end {
610 pos += size as usize;
611 continue;
612 }
613 let bmdt =
614 u64::from_be_bytes(data[pos + 12..pos + 20].try_into().unwrap_or([0; 8]));
615 tfdt_base_pts = Some(bmdt as i64);
616 } else {
617 let bmdt =
618 u32::from_be_bytes(data[pos + 12..pos + 16].try_into().unwrap_or([0; 4]));
619 tfdt_base_pts = Some(bmdt as i64);
620 }
621 }
622 pos += size as usize;
623 }
624
625 let Some(tk) = this_track else {
626 return;
627 };
628 if tk != track_id {
629 return;
630 }
631
632 if let Some(bp) = tfdt_base_pts {
633 *accumulated_pts = bp;
634 }
635
636 let mut pos = children_start;
638 while pos + 8 <= traf_end {
639 let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
640 Ok(b) => b,
641 Err(_) => break,
642 });
643 let typ = &data[pos + 4..pos + 8];
644 if size == 0 || size as usize + pos > traf_end {
645 break;
646 }
647 if typ == b"trun" {
648 walk_trun(
649 data,
650 pos + 8,
651 pos + size as usize,
652 if base_data_offset_explicit {
653 base_data_offset
654 } else {
655 moof_offset
656 },
657 tfhd_default_sample_duration,
658 tfhd_default_sample_size,
659 accumulated_pts,
660 samples,
661 );
662 }
663 pos += size as usize;
664 }
665 let _ = base_data_offset_explicit;
666}
667
668#[allow(clippy::too_many_arguments)]
669fn walk_trun(
670 data: &[u8],
671 children_start: usize,
672 trun_end: usize,
673 base_offset: u64,
674 default_sample_duration: u32,
675 default_sample_size: u32,
676 accumulated_pts: &mut i64,
677 samples: &mut Vec<FragSample>,
678) {
679 if children_start + 8 > trun_end {
680 return;
681 }
682 let version = data[children_start];
683 let flags = u32::from_be_bytes(match data[children_start..children_start + 4].try_into() {
684 Ok(b) => b,
685 Err(_) => return,
686 }) & 0x00ff_ffff;
687 let sample_count = u32::from_be_bytes(
688 match data[children_start + 4..children_start + 8].try_into() {
689 Ok(b) => b,
690 Err(_) => return,
691 },
692 );
693 let mut p = children_start + 8;
694 let mut data_offset_in_trun: i32 = 0;
695 if flags & 0x000_001 != 0 {
696 if p + 4 > trun_end {
697 return;
698 }
699 data_offset_in_trun = i32::from_be_bytes(match data[p..p + 4].try_into() {
700 Ok(b) => b,
701 Err(_) => return,
702 });
703 p += 4;
704 }
705 if flags & 0x000_004 != 0 {
706 p += 4;
708 }
709
710 let sample_duration_present = flags & 0x000_100 != 0;
711 let sample_size_present = flags & 0x000_200 != 0;
712 let sample_flags_present = flags & 0x000_400 != 0;
713 let sample_cto_present = flags & 0x000_800 != 0;
714
715 let mut current_offset = base_offset.wrapping_add(data_offset_in_trun as u64);
716 for _ in 0..sample_count {
717 let dur = if sample_duration_present {
718 if p + 4 > trun_end {
719 return;
720 }
721 let d = u32::from_be_bytes(match data[p..p + 4].try_into() {
722 Ok(b) => b,
723 Err(_) => return,
724 });
725 p += 4;
726 d
727 } else {
728 default_sample_duration
729 };
730 let sz = if sample_size_present {
731 if p + 4 > trun_end {
732 return;
733 }
734 let s = u32::from_be_bytes(match data[p..p + 4].try_into() {
735 Ok(b) => b,
736 Err(_) => return,
737 });
738 p += 4;
739 s
740 } else {
741 default_sample_size
742 };
743 if sample_flags_present {
744 p += 4;
745 }
746 let cto: i32 = if sample_cto_present {
747 if p + 4 > trun_end {
748 return;
749 }
750 let c = if version == 0 {
751 u32::from_be_bytes(match data[p..p + 4].try_into() {
752 Ok(b) => b,
753 Err(_) => return,
754 }) as i32
755 } else {
756 i32::from_be_bytes(match data[p..p + 4].try_into() {
757 Ok(b) => b,
758 Err(_) => return,
759 })
760 };
761 p += 4;
762 c
763 } else {
764 0
765 };
766
767 if sz > 0 {
768 samples.push(FragSample {
769 offset: current_offset,
770 size: sz,
771 pts_ticks: accumulated_pts.saturating_add(cto as i64),
772 duration_ticks: dur,
773 });
774 }
775 current_offset = current_offset.saturating_add(sz as u64);
776 *accumulated_pts = accumulated_pts.saturating_add(dur as i64);
777 }
778}