1use crate::bytes::{read_u32_be, read_u64_be};
7use crate::convert::usize_from;
8use crate::error::{FormatError, Result};
9use crate::input::{
10 ArtInput, BinaryTagInput, EmbeddedBinaryTag, EmbeddedPicture, PictureType, TagInput,
11};
12use crate::layout::{RegionLayout, Segment};
13use crate::size;
14use std::io::{self, Read, Seek, SeekFrom};
15
16const MAX_MP4_METADATA_BYTES: u64 = 256 * 1024 * 1024;
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20struct BoxRef {
21 kind: [u8; 4],
22 start: usize,
23 header_len: usize, total_len: usize, }
26
27impl BoxRef {
28 fn payload_start(&self) -> usize {
29 self.start + self.header_len
30 }
31 fn end(&self) -> usize {
32 self.start + self.total_len
33 }
34 fn payload<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
37 debug_assert!(
38 self.end() <= buf.len(),
39 "BoxRef::payload called with a buffer it was not parsed from"
40 );
41 &buf[self.payload_start()..self.end()]
42 }
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub struct BoxHeader {
49 pub kind: [u8; 4],
51 pub header_len: u64,
53 pub total_len: u64,
55}
56
57pub fn box_header(hdr: &[u8], remaining: u64) -> Result<BoxHeader> {
61 let size32 = u64::from(read_u32_be(hdr, 0)?);
62 let kind: [u8; 4] = hdr
63 .get(4..8)
64 .ok_or(FormatError::Malformed)?
65 .try_into()
66 .unwrap();
67 let (header_len, total_len) = match size32 {
68 1 => (16u64, read_u64_be(hdr, 8)?),
69 0 => (8u64, remaining),
70 n => (8u64, n),
71 };
72 if total_len < header_len || total_len > remaining {
73 return Err(FormatError::Malformed);
74 }
75 Ok(BoxHeader {
76 kind,
77 header_len,
78 total_len,
79 })
80}
81
82#[derive(Debug, thiserror::Error)]
86pub enum Mp4ScanError {
87 #[error(transparent)]
88 Io(#[from] io::Error),
89 #[error(transparent)]
90 Format(#[from] FormatError),
91 #[error("MP4 {box_kind} box is {size} bytes, exceeds the {cap}-byte metadata cap")]
92 MetadataTooLarge {
93 box_kind: &'static str,
94 size: u64,
95 cap: u64,
96 },
97}
98
99fn read_box(buf: &[u8], pos: usize) -> Result<BoxRef> {
100 let size32 = u64::from(read_u32_be(buf, pos)?);
101 let kind: [u8; 4] = buf
102 .get(pos + 4..pos + 8)
103 .ok_or(FormatError::Malformed)?
104 .try_into()
105 .unwrap();
106 let (header_len, total) = match size32 {
107 1 => (16usize, read_u64_be(buf, pos + 8)?),
108 0 => (8usize, (buf.len() - pos) as u64),
109 n => (8usize, n),
110 };
111 let total = usize_from(total);
112 let Some(end) = pos.checked_add(total) else {
113 return Err(FormatError::Malformed);
114 };
115 if total < header_len || end > buf.len() {
116 return Err(FormatError::Malformed);
117 }
118 Ok(BoxRef {
119 kind,
120 start: pos,
121 header_len,
122 total_len: total,
123 })
124}
125
126fn child_boxes(buf: &[u8]) -> Result<Vec<BoxRef>> {
127 let mut out = Vec::new();
128 let mut pos = 0;
129 while pos + 8 <= buf.len() {
130 let b = read_box(buf, pos)?;
131 pos = b.end();
132 out.push(b);
133 }
134 Ok(out)
135}
136
137fn child_boxes_lenient(buf: &[u8]) -> Vec<BoxRef> {
143 let mut out = Vec::new();
144 let mut pos = 0;
145 while pos + 8 <= buf.len() {
146 let Ok(b) = read_box(buf, pos) else { break };
147 pos = b.end();
148 out.push(b);
149 }
150 out
151}
152
153fn find_box(buf: &[u8], kind: &[u8; 4]) -> Result<Option<BoxRef>> {
154 Ok(child_boxes(buf)?.into_iter().find(|b| &b.kind == kind))
155}
156
157fn find_box_lenient(buf: &[u8], kind: &[u8; 4]) -> Option<BoxRef> {
162 child_boxes_lenient(buf)
163 .into_iter()
164 .find(|b| &b.kind == kind)
165}
166
167fn find_path(buf: &[u8], path: &[&[u8; 4]]) -> Result<Option<(usize, usize)>> {
170 let mut base = 0usize;
171 let mut last = None;
172 for kind in path {
173 let region = &buf[base..];
174 let Some(b) = find_box(region, kind)? else {
175 return Ok(None);
176 };
177 let ps = base + b.payload_start();
178 last = Some((ps, b.total_len - b.header_len));
179 base = ps;
180 }
181 Ok(last)
182}
183
184#[derive(Debug, Clone, Copy, PartialEq, Eq)]
186pub struct Mp4Bounds {
187 pub audio_offset: u64,
188 pub audio_length: u64,
189}
190
191fn validate_moov(moov_payload: &[u8]) -> Result<()> {
195 if find_box(moov_payload, b"mvex")?.is_some() {
196 return Err(FormatError::NotMp4);
197 }
198 let traks: Vec<_> = child_boxes(moov_payload)?
199 .into_iter()
200 .filter(|b| &b.kind == b"trak")
201 .collect();
202 if traks.len() != 1 {
203 return Err(FormatError::NotMp4);
204 }
205 let trak = traks[0].payload(moov_payload);
206 let (hp, hl) = find_path(trak, &[b"mdia", b"hdlr"])?.ok_or(FormatError::NotMp4)?;
207 if trak[hp..hp + hl].get(8..12) != Some(b"soun") {
208 return Err(FormatError::NotMp4);
209 }
210 Ok(())
211}
212
213fn locate(buf: &[u8]) -> Result<(BoxRef, BoxRef, BoxRef)> {
216 let top = child_boxes(buf).map_err(|_| FormatError::NotMp4)?;
217 if top.iter().any(|b| &b.kind == b"moof") {
218 return Err(FormatError::NotMp4);
219 }
220 let one = |kind: &[u8; 4]| -> Result<BoxRef> {
221 let mut it = top.iter().filter(|b| &b.kind == kind);
222 let first = it.next().copied().ok_or(FormatError::NotMp4)?;
223 if it.next().is_some() {
224 return Err(FormatError::NotMp4);
225 }
226 Ok(first)
227 };
228 let ftyp = one(b"ftyp")?;
229 let moov = one(b"moov")?;
230 let mdat = one(b"mdat")?;
231
232 validate_moov(moov.payload(buf))?;
233 Ok((ftyp, moov, mdat))
234}
235
236pub fn locate_audio(buf: &[u8]) -> Result<Mp4Bounds> {
238 let (_ftyp, _moov, mdat) = locate(buf)?;
239 Ok(Mp4Bounds {
240 audio_offset: mdat.payload_start() as u64,
241 audio_length: (mdat.total_len - mdat.header_len) as u64,
242 })
243}
244
245#[derive(Debug, Clone, PartialEq)]
247pub struct Mp4Scan {
248 pub ftyp: Vec<u8>,
249 pub moov: Vec<u8>,
250 pub mdat_header: Vec<u8>,
251 pub mdat_payload_offset: u64,
252 pub mdat_payload_len: u64,
253}
254
255pub fn read_structure(buf: &[u8]) -> Result<Mp4Scan> {
256 let (ftyp, moov, mdat) = locate(buf)?;
257 Ok(Mp4Scan {
258 ftyp: buf[ftyp.start..ftyp.end()].to_vec(),
259 moov: buf[moov.start..moov.end()].to_vec(),
260 mdat_header: buf[mdat.start..mdat.payload_start()].to_vec(),
261 mdat_payload_offset: mdat.payload_start() as u64,
262 mdat_payload_len: (mdat.total_len - mdat.header_len) as u64,
263 })
264}
265
266pub fn read_structure_from<R: Read + Seek>(
274 r: &mut R,
275 file_len: u64,
276) -> std::result::Result<Mp4Scan, Mp4ScanError> {
277 fn region<R: Read + Seek>(r: &mut R, off: u64, len: usize) -> io::Result<Vec<u8>> {
278 r.seek(SeekFrom::Start(off))?;
279 let mut buf = vec![0u8; len];
280 r.read_exact(&mut buf)?;
281 Ok(buf)
282 }
283
284 let mut ftyp: Option<(u64, BoxHeader)> = None;
286 let mut moov: Option<(u64, BoxHeader)> = None;
287 let mut mdat: Option<(u64, BoxHeader)> = None;
288 let mut dup = false;
289
290 let mut pos = 0u64;
291 while pos + 8 <= file_len {
292 let first8 = region(r, pos, 8)?;
295 let size32 = u32::from_be_bytes(first8[0..4].try_into().unwrap());
296 let hdr = if size32 == 1 {
299 let mut h = first8;
300 h.extend_from_slice(®ion(r, pos + 8, 8)?);
301 h
302 } else {
303 first8
304 };
305 let bh = box_header(&hdr, file_len - pos)?;
306 let total = bh.total_len;
307 match &bh.kind {
308 b"moof" => return Err(FormatError::NotMp4.into()),
309 b"ftyp" => dup |= ftyp.replace((pos, bh)).is_some(),
310 b"moov" => dup |= moov.replace((pos, bh)).is_some(),
311 b"mdat" => dup |= mdat.replace((pos, bh)).is_some(),
312 _ => {}
313 }
314 pos += total;
315 }
316 if dup {
317 return Err(FormatError::NotMp4.into());
318 }
319
320 let (ftyp_s, ftyp_h) = ftyp.ok_or(FormatError::NotMp4)?;
321 let (moov_s, moov_h) = moov.ok_or(FormatError::NotMp4)?;
322 let (mdat_s, mdat_h) = mdat.ok_or(FormatError::NotMp4)?;
323
324 for (box_kind, total_len) in [("ftyp", ftyp_h.total_len), ("moov", moov_h.total_len)] {
325 if total_len > MAX_MP4_METADATA_BYTES {
326 return Err(Mp4ScanError::MetadataTooLarge {
327 box_kind,
328 size: total_len,
329 cap: MAX_MP4_METADATA_BYTES,
330 });
331 }
332 }
333
334 let ftyp_len = usize::try_from(ftyp_h.total_len).map_err(|_| FormatError::Malformed)?;
337 let moov_len = usize::try_from(moov_h.total_len).map_err(|_| FormatError::Malformed)?;
338 let ftyp_bytes = region(r, ftyp_s, ftyp_len)?;
339 let moov_bytes = region(r, moov_s, moov_len)?;
340 let mdat_header = region(r, mdat_s, usize_from(mdat_h.header_len))?;
341
342 validate_moov(&moov_bytes[usize_from(moov_h.header_len)..])?;
343
344 Ok(Mp4Scan {
345 ftyp: ftyp_bytes,
346 moov: moov_bytes,
347 mdat_header,
348 mdat_payload_offset: mdat_s + mdat_h.header_len,
349 mdat_payload_len: mdat_h.total_len - mdat_h.header_len,
350 })
351}
352
353fn ilst_region(buf: &[u8]) -> Option<(usize, usize)> {
366 let moov = find_box_lenient(buf, b"moov")?;
367 let mp = moov.payload(buf);
368 let base = moov.payload_start();
369 let udta = find_box_lenient(mp, b"udta")?;
370 let up = udta.payload_start();
371 let udta_payload = udta.payload(mp);
372 let meta = find_box_lenient(udta_payload, b"meta")?;
373 let meta_payload = meta.payload(udta_payload);
374 let prefix = if meta_payload.get(..4) == Some(&[0, 0, 0, 0][..]) {
375 4
376 } else {
377 0
378 };
379 let meta_children = meta_payload.get(prefix..)?;
380 let il = find_box_lenient(meta_children, b"ilst")?;
381 let start = base + up + meta.payload_start() + prefix + il.payload_start();
382 Some((start, il.total_len - il.header_len))
383}
384
385fn read_freeform(inner: &[u8]) -> Vec<(String, String)> {
391 let Some(name_box) = find_box_lenient(inner, b"name") else {
392 return Vec::new();
393 };
394 let np = name_box.payload(inner);
395 if np.len() < 4 {
396 return Vec::new();
397 }
398 let Ok(name) = std::str::from_utf8(&np[4..]) else {
400 return Vec::new();
401 };
402 let mean = find_box_lenient(inner, b"mean").map_or("com.apple.iTunes", |m| {
403 let p = m.payload(inner);
404 if p.len() >= 4 {
405 std::str::from_utf8(&p[4..]).unwrap_or("com.apple.iTunes")
406 } else {
407 "com.apple.iTunes"
408 }
409 });
410 let key = crate::tagmap::mp4_freeform_to_key(mean, name)
411 .map_or_else(|| name.to_string(), str::to_string);
412 let mut out = Vec::new();
413 for data in child_boxes_lenient(inner) {
414 if &data.kind != b"data" {
415 continue;
416 }
417 let dp = data.payload(inner);
418 if dp.len() < 8 {
419 continue;
420 }
421 let type_code = u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]);
424 if type_code != 1 {
425 continue;
426 }
427 if let Ok(value) = std::str::from_utf8(&dp[8..]) {
428 out.push((key.clone(), value.to_string()));
429 }
430 }
431 out
432}
433
434fn number_total(value: &[u8]) -> String {
439 debug_assert!(
440 value.len() >= 4,
441 "number_total requires the 4-byte number prefix"
442 );
443 let number = u16::from_be_bytes([value[2], value[3]]);
444 let total = if value.len() >= 6 {
445 u16::from_be_bytes([value[4], value[5]])
446 } else {
447 0
448 };
449 if total != 0 {
450 format!("{number}/{total}")
451 } else {
452 number.to_string()
453 }
454}
455
456pub fn read_tags(buf: &[u8]) -> Vec<(String, String)> {
463 let Some((start, len)) = ilst_region(buf) else {
464 return Vec::new();
465 };
466 let ilst = &buf[start..start + len];
467 let mut out = Vec::new();
468 for atom in child_boxes_lenient(ilst) {
469 let inner = atom.payload(ilst);
470 if &atom.kind == b"----" {
471 out.extend(read_freeform(inner));
472 continue;
473 }
474 let text_key = crate::tagmap::mp4_atom_to_key(&atom.kind);
475 for data in child_boxes_lenient(inner) {
476 if &data.kind != b"data" {
477 continue;
478 }
479 let dp = data.payload(inner);
480 if dp.len() < 8 {
481 continue;
482 }
483 let value = &dp[8..]; if let Some(key) = text_key {
485 if let Ok(s) = std::str::from_utf8(value) {
486 out.push((key.to_string(), s.to_string()));
487 }
488 } else if &atom.kind == b"trkn" && value.len() >= 4 {
489 out.push(("tracknumber".into(), number_total(value)));
490 } else if &atom.kind == b"disk" && value.len() >= 4 {
491 out.push(("discnumber".into(), number_total(value)));
492 } else if let Some(key) = crate::tagmap::mp4_integer_atom_to_key(&atom.kind) {
493 let mut n: u64 = 0;
495 for &b in value.iter().take(8) {
496 n = (n << 8) | u64::from(b);
497 }
498 out.push((key.to_string(), n.to_string()));
499 }
500 }
501 }
502 out
503}
504
505#[derive(Debug, Clone, PartialEq, Eq)]
511pub struct OversizeDrop {
512 pub descriptor: String,
514 pub bytes: usize,
516}
517
518pub fn read_pictures_reporting(
523 buf: &[u8],
524 max_art_bytes: usize,
525) -> (Vec<EmbeddedPicture>, Vec<OversizeDrop>) {
526 let Some((start, len)) = ilst_region(buf) else {
527 return (Vec::new(), Vec::new());
528 };
529 let ilst = &buf[start..start + len];
530 let mut out = Vec::new();
531 let mut dropped = Vec::new();
532 for atom in child_boxes_lenient(ilst) {
533 if &atom.kind != b"covr" {
534 continue;
535 }
536 let inner = atom.payload(ilst);
537 for data in child_boxes_lenient(inner) {
538 if &data.kind != b"data" {
539 continue;
540 }
541 let dp = data.payload(inner);
542 if dp.len() < 8 {
543 continue;
544 }
545 let mime = match u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]) {
546 13 => "image/jpeg",
547 14 => "image/png",
548 _ => continue,
549 };
550 if dp.len() - 8 > max_art_bytes {
551 dropped.push(OversizeDrop {
552 descriptor: mime.to_string(),
553 bytes: dp.len() - 8,
554 });
555 continue;
556 }
557 out.push(EmbeddedPicture {
558 mime: mime.to_string(),
559 picture_type: PictureType::new(3).expect("3 is in range"),
560 description: String::new(),
561 width: 0,
562 height: 0,
563 data: dp[8..].to_vec(),
564 });
565 }
566 }
567 (out, dropped)
568}
569
570pub fn read_pictures(buf: &[u8], max_art_bytes: usize) -> Vec<EmbeddedPicture> {
580 read_pictures_reporting(buf, max_art_bytes).0
581}
582
583pub fn read_binary_tags_reporting(
588 buf: &[u8],
589 max_binary_tag_bytes: usize,
590) -> (Vec<EmbeddedBinaryTag>, Vec<OversizeDrop>) {
591 let Some((start, len)) = ilst_region(buf) else {
592 return (Vec::new(), Vec::new());
593 };
594 let ilst = &buf[start..start + len];
595 let mut out = Vec::new();
596 let mut dropped = Vec::new();
597 for atom in child_boxes_lenient(ilst) {
598 if &atom.kind != b"----" {
599 continue;
600 }
601 let inner = atom.payload(ilst);
602 let Some(name) = find_box_lenient(inner, b"name").and_then(|n| {
604 let p = n.payload(inner);
605 (p.len() >= 4)
606 .then(|| std::str::from_utf8(&p[4..]).ok())
607 .flatten()
608 }) else {
609 continue;
610 };
611 let mean = find_box_lenient(inner, b"mean").map_or("com.apple.iTunes", |m| {
612 let p = m.payload(inner);
613 if p.len() >= 4 {
614 std::str::from_utf8(&p[4..]).unwrap_or("com.apple.iTunes")
615 } else {
616 "com.apple.iTunes"
617 }
618 });
619 let key = format!("----:{mean}:{name}");
620 for data in child_boxes_lenient(inner) {
624 if &data.kind != b"data" {
625 continue;
626 }
627 let dp = data.payload(inner);
628 if dp.len() < 8 {
629 continue;
630 }
631 let type_code = u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]);
634 if type_code == 1 {
635 continue;
636 }
637 if dp.len() - 8 > max_binary_tag_bytes {
638 dropped.push(OversizeDrop {
639 descriptor: key.clone(),
640 bytes: dp.len() - 8,
641 });
642 continue;
643 }
644 out.push(EmbeddedBinaryTag {
645 key: key.clone(),
646 payload: dp[8..].to_vec(),
647 });
648 }
649 }
650 (out, dropped)
651}
652
653pub fn read_binary_tags(buf: &[u8], max_binary_tag_bytes: usize) -> Vec<EmbeddedBinaryTag> {
666 read_binary_tags_reporting(buf, max_binary_tag_bytes).0
667}
668
669mod synth;
670pub use synth::synthesize_layout;
671
672#[cfg(test)]
673mod tests;