1use std::{
2 fs::File,
3 io::BufReader,
4 path::{Path, PathBuf},
5 str::FromStr,
6};
7
8use anyhow::{anyhow, bail, Context, Result};
9use mp4::{FourCC, Mp4Reader, TrackType};
10
11use crate::h264::H264Config;
12
13#[derive(Debug, Clone)]
14pub struct EncodedSample {
15 pub data_avcc: Vec<u8>,
16 pub dts_us: i64,
18 pub pts_us: i64,
20 pub dur_us: i64,
21}
22
23#[derive(Debug)]
24struct Prefetched {
25 start_time: u64,
26 duration: u32,
27 rendering_offset: i32,
28 bytes: Vec<u8>,
29}
30
31pub struct Mp4H264Source {
32 path: PathBuf,
33 reader: Mp4Reader<BufReader<File>>,
34 track_id: u32,
35 timescale: u32,
36 sample_count: u32,
37 next_sample_id: u32,
38 prefetched: Option<Prefetched>,
39
40 pub config: H264Config,
41}
42
43impl Mp4H264Source {
44 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
45 let path = path.as_ref().to_path_buf();
46
47 let f = File::open(&path).with_context(|| format!("open mp4: {}", path.display()))?;
48 let size = f
49 .metadata()
50 .with_context(|| format!("stat mp4: {}", path.display()))?
51 .len();
52
53 let reader = BufReader::new(f);
54 let mut mp4 = Mp4Reader::read_header(reader, size).context("mp4::read_header")?;
55
56 let (track_id, timescale, sample_count, width, height, sps, pps) =
57 select_h264_video_track(&mp4).context("select H.264 track")?;
58
59 let prefetched = mp4
63 .read_sample(track_id, 1)
64 .context("read first sample")?
65 .map(|s| Prefetched {
66 start_time: s.start_time,
67 duration: s.duration,
68 rendering_offset: s.rendering_offset,
69 bytes: s.bytes.to_vec(),
70 });
71
72 let nal_len_size = prefetched
73 .as_ref()
74 .map(|p| detect_nal_length_size(&p.bytes))
75 .unwrap_or(4);
76
77 let avcc = build_avcc_record(&sps, &pps, nal_len_size)?;
78 let config = H264Config::parse_from_avcc(width, height, &avcc)
79 .context("parse avcC from SPS/PPS")?;
80
81 Ok(Self {
82 path,
83 reader: mp4,
84 track_id,
85 timescale,
86 sample_count,
87 next_sample_id: 1,
88 prefetched,
89 config,
90 })
91 }
92
93 pub fn next_sample(&mut self) -> Result<Option<EncodedSample>> {
94 if self.next_sample_id == 0 {
95 bail!("internal error: sample ids are 1-based");
96 }
97
98 if self.next_sample_id > self.sample_count {
99 return Ok(None);
100 }
101
102 let (start_time, duration, rendering_offset, bytes) = if self.next_sample_id == 1 {
103 if let Some(p) = self.prefetched.take() {
104 (p.start_time, p.duration, p.rendering_offset, p.bytes)
105 } else {
106 let s = self
107 .reader
108 .read_sample(self.track_id, 1)
109 .context("read sample #1")?
110 .ok_or_else(|| anyhow!("sample #1 missing"))?;
111 (s.start_time, s.duration, s.rendering_offset, s.bytes.to_vec())
112 }
113 } else {
114 let s = self
115 .reader
116 .read_sample(self.track_id, self.next_sample_id)
117 .with_context(|| format!("read sample #{}", self.next_sample_id))?
118 .ok_or_else(|| anyhow!("sample #{} missing", self.next_sample_id))?;
119 (s.start_time, s.duration, s.rendering_offset, s.bytes.to_vec())
120 };
121
122 self.next_sample_id += 1;
123
124 let dts_ticks = start_time as i128;
128 let pts_ticks = dts_ticks + (rendering_offset as i128);
129
130 let dts_us = ticks_to_us(dts_ticks, self.timescale);
131 let pts_us = ticks_to_us(pts_ticks, self.timescale);
132 let dur_us = ticks_to_us(duration as i128, self.timescale);
133
134 Ok(Some(EncodedSample {
135 data_avcc: bytes,
136 dts_us,
137 pts_us,
138 dur_us,
139 }))
140 }
141
142 pub fn path(&self) -> &Path {
143 &self.path
144 }
145}
146
147fn ticks_to_us(ticks: i128, timescale: u32) -> i64 {
148 if timescale == 0 {
149 return 0;
150 }
151 let us = ticks.saturating_mul(1_000_000i128) / (timescale as i128);
153 if us > (i64::MAX as i128) {
154 i64::MAX
155 } else if us < (i64::MIN as i128) {
156 i64::MIN
157 } else {
158 us as i64
159 }
160}
161
162fn select_h264_video_track(
163 mp4: &Mp4Reader<BufReader<File>>,
164) -> Result<(u32, u32, u32, u32, u32, Vec<u8>, Vec<u8>)> {
165 let avc1 = FourCC::from_str("avc1").unwrap();
166 let avc3 = FourCC::from_str("avc3").unwrap();
167
168 for (track_id, track) in mp4.tracks().iter() {
169 let tt = track.track_type().context("track_type")?;
170 if tt != TrackType::Video {
171 continue;
172 }
173
174 let bt = track.box_type().context("box_type")?;
175 if bt != avc1 && bt != avc3 {
176 continue;
177 }
178
179 let timescale = track.timescale();
180 let sample_count = track.sample_count();
181
182 let width = track.width() as u32;
183 let height = track.height() as u32;
184
185 let sps = track
186 .sequence_parameter_set()
187 .context("sequence_parameter_set")?
188 .to_vec();
189 let pps = track
190 .picture_parameter_set()
191 .context("picture_parameter_set")?
192 .to_vec();
193
194 return Ok((*track_id, timescale, sample_count, width, height, sps, pps));
195 }
196
197 bail!("no H.264 (avc1/avc3) video track found")
198}
199
200fn detect_nal_length_size(avcc_sample: &[u8]) -> usize {
201 if avcc_sample.len() < 8 {
203 return 4;
204 }
205 4
207}
208
209fn build_avcc_record(sps: &[u8], pps: &[u8], nal_len_size: usize) -> Result<Vec<u8>> {
210 if !(1..=4).contains(&nal_len_size) {
211 bail!("invalid nal length size: {nal_len_size}");
212 }
213
214 let mut out = Vec::new();
217 out.push(1); out.push(*sps.get(1).unwrap_or(&0)); out.push(*sps.get(2).unwrap_or(&0)); out.push(*sps.get(3).unwrap_or(&0)); out.push(0xFC | ((nal_len_size as u8 - 1) & 0x03));
224
225 out.push(0xE0 | 1);
227 out.extend_from_slice(&(sps.len() as u16).to_be_bytes());
228 out.extend_from_slice(sps);
229
230 out.push(1);
232 out.extend_from_slice(&(pps.len() as u16).to_be_bytes());
233 out.extend_from_slice(pps);
234
235 Ok(out)
236}