Skip to main content

musefs_format/
mp4.rs

1//! Hand-rolled MP4/M4A box layer: parse the structure, read iTunes metadata, and
2//! regenerate `moov` (with patched chunk offsets) to synthesize a re-tagged file
3//! whose `mdat` audio payload is served verbatim. Strict: anything outside the
4//! supported shape (single audio track, one `mdat`, non-fragmented) is rejected.
5
6use crate::bytes::{read_u32_be, read_u64_be};
7use crate::convert::usize_from;
8use crate::error::{FormatError, Result};
9use crate::input::{
10    ArtInput, BinaryTagInput, EmbeddedBinaryTag, EmbeddedPicture, PictureType, TagInput,
11};
12use crate::layout::{RegionLayout, Segment};
13use crate::size;
14use std::io::{self, Read, Seek, SeekFrom};
15
16const MAX_MP4_METADATA_BYTES: u64 = 256 * 1024 * 1024;
17
18/// A located box header within some buffer. `start` is relative to that buffer.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20struct BoxRef {
21    kind: [u8; 4],
22    start: usize,
23    header_len: usize, // 8, or 16 for 64-bit largesize
24    total_len: usize,  // header + payload
25}
26
27impl BoxRef {
28    fn payload_start(&self) -> usize {
29        self.start + self.header_len
30    }
31    fn end(&self) -> usize {
32        self.start + self.total_len
33    }
34    /// `buf` must be the same buffer `read_box` parsed this header from — offsets
35    /// are relative to it. The debug assertion catches a wrong-buffer call in tests.
36    fn payload<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
37        debug_assert!(
38            self.end() <= buf.len(),
39            "BoxRef::payload called with a buffer it was not parsed from"
40        );
41        &buf[self.payload_start()..self.end()]
42    }
43}
44
45/// A parsed box header (the payload need not be in memory). Public so the core
46/// reader can reason about box bounds while seeking.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub struct BoxHeader {
49    /// The 4-byte box type, e.g. `*b"moov"`.
50    pub kind: [u8; 4],
51    /// 8, or 16 for a 64-bit largesize.
52    pub header_len: u64,
53    /// Total box length: header + payload.
54    pub total_len: u64,
55}
56
57/// Parse a box header from `hdr` (>= 8 bytes; >= 16 if it uses a 64-bit
58/// largesize). `remaining` is the byte count from this box's start to EOF, used
59/// to resolve a `size == 0` ("extends to end") box.
60pub fn box_header(hdr: &[u8], remaining: u64) -> Result<BoxHeader> {
61    let size32 = u64::from(read_u32_be(hdr, 0)?);
62    let kind: [u8; 4] = hdr
63        .get(4..8)
64        .ok_or(FormatError::Malformed)?
65        .try_into()
66        .unwrap();
67    let (header_len, total_len) = match size32 {
68        1 => (16u64, read_u64_be(hdr, 8)?),
69        0 => (8u64, remaining),
70        n => (8u64, n),
71    };
72    if total_len < header_len || total_len > remaining {
73        return Err(FormatError::Malformed);
74    }
75    Ok(BoxHeader {
76        kind,
77        header_len,
78        total_len,
79    })
80}
81
82/// Error from the seeking MP4 reader: an IO failure reading the file, or a
83/// structural/format problem. Kept distinct so the core layer can map IO to
84/// `CoreError::Io` (preserving errno) and format to `CoreError::Format`.
85#[derive(Debug, thiserror::Error)]
86pub enum Mp4ScanError {
87    #[error(transparent)]
88    Io(#[from] io::Error),
89    #[error(transparent)]
90    Format(#[from] FormatError),
91    #[error("MP4 {box_kind} box is {size} bytes, exceeds the {cap}-byte metadata cap")]
92    MetadataTooLarge {
93        box_kind: &'static str,
94        size: u64,
95        cap: u64,
96    },
97}
98
99fn read_box(buf: &[u8], pos: usize) -> Result<BoxRef> {
100    let size32 = u64::from(read_u32_be(buf, pos)?);
101    let kind: [u8; 4] = buf
102        .get(pos + 4..pos + 8)
103        .ok_or(FormatError::Malformed)?
104        .try_into()
105        .unwrap();
106    let (header_len, total) = match size32 {
107        1 => (16usize, read_u64_be(buf, pos + 8)?),
108        0 => (8usize, (buf.len() - pos) as u64),
109        n => (8usize, n),
110    };
111    let total = usize_from(total);
112    let Some(end) = pos.checked_add(total) else {
113        return Err(FormatError::Malformed);
114    };
115    if total < header_len || end > buf.len() {
116        return Err(FormatError::Malformed);
117    }
118    Ok(BoxRef {
119        kind,
120        start: pos,
121        header_len,
122        total_len: total,
123    })
124}
125
126fn child_boxes(buf: &[u8]) -> Result<Vec<BoxRef>> {
127    let mut out = Vec::new();
128    let mut pos = 0;
129    while pos + 8 <= buf.len() {
130        let b = read_box(buf, pos)?;
131        pos = b.end();
132        out.push(b);
133    }
134    Ok(out)
135}
136
137/// Like [`child_boxes`] but lenient: stops at the first unreadable box and
138/// returns the well-formed ones parsed so far, rather than discarding the whole
139/// list. Box sizes chain, so a malformed box leaves no reliable way to find the
140/// next — the prefix is the most that can be recovered. Used by the metadata
141/// extractors, whose contract is to seed what they can and skip the rest (#524).
142fn child_boxes_lenient(buf: &[u8]) -> Vec<BoxRef> {
143    let mut out = Vec::new();
144    let mut pos = 0;
145    while pos + 8 <= buf.len() {
146        let Ok(b) = read_box(buf, pos) else { break };
147        pos = b.end();
148        out.push(b);
149    }
150    out
151}
152
153fn find_box(buf: &[u8], kind: &[u8; 4]) -> Result<Option<BoxRef>> {
154    Ok(child_boxes(buf)?.into_iter().find(|b| &b.kind == kind))
155}
156
157/// Like [`find_box`] but lenient: scans only the well-formed prefix
158/// ([`child_boxes_lenient`]), so a malformed trailing child of a `----` atom
159/// can't drop a valid `name`/`mean` that precedes it — keeping the metadata
160/// extractors' "seed what you can" contract end-to-end (#524).
161fn find_box_lenient(buf: &[u8], kind: &[u8; 4]) -> Option<BoxRef> {
162    child_boxes_lenient(buf)
163        .into_iter()
164        .find(|b| &b.kind == kind)
165}
166
167/// Descend a path of box types; return `(payload_start, payload_len)` relative to
168/// `buf` for the box at the end of the path, or None if any step is missing.
169fn find_path(buf: &[u8], path: &[&[u8; 4]]) -> Result<Option<(usize, usize)>> {
170    let mut base = 0usize;
171    let mut last = None;
172    for kind in path {
173        let region = &buf[base..];
174        let Some(b) = find_box(region, kind)? else {
175            return Ok(None);
176        };
177        let ps = base + b.payload_start();
178        last = Some((ps, b.total_len - b.header_len));
179        base = ps;
180    }
181    Ok(last)
182}
183
184/// Audio payload bounds within the backing file (the verbatim `mdat` payload).
185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
186pub struct Mp4Bounds {
187    pub audio_offset: u64,
188    pub audio_length: u64,
189}
190
191/// Validate the internal `moov` shape: no fragmentation (`mvex`), exactly one
192/// track, and that track is audio (`soun`). `moov_payload` is the bytes inside
193/// the `moov` box (after its header).
194fn validate_moov(moov_payload: &[u8]) -> Result<()> {
195    if find_box(moov_payload, b"mvex")?.is_some() {
196        return Err(FormatError::NotMp4);
197    }
198    let traks: Vec<_> = child_boxes(moov_payload)?
199        .into_iter()
200        .filter(|b| &b.kind == b"trak")
201        .collect();
202    if traks.len() != 1 {
203        return Err(FormatError::NotMp4);
204    }
205    let trak = traks[0].payload(moov_payload);
206    let (hp, hl) = find_path(trak, &[b"mdia", b"hdlr"])?.ok_or(FormatError::NotMp4)?;
207    if trak[hp..hp + hl].get(8..12) != Some(b"soun") {
208        return Err(FormatError::NotMp4);
209    }
210    Ok(())
211}
212
213/// Validate the supported shape; return the ftyp/moov/mdat boxes (absolute offsets
214/// in `buf`). Rejects fragmented, video, multi-track, and multi-`mdat` files.
215fn locate(buf: &[u8]) -> Result<(BoxRef, BoxRef, BoxRef)> {
216    let top = child_boxes(buf).map_err(|_| FormatError::NotMp4)?;
217    if top.iter().any(|b| &b.kind == b"moof") {
218        return Err(FormatError::NotMp4);
219    }
220    let one = |kind: &[u8; 4]| -> Result<BoxRef> {
221        let mut it = top.iter().filter(|b| &b.kind == kind);
222        let first = it.next().copied().ok_or(FormatError::NotMp4)?;
223        if it.next().is_some() {
224            return Err(FormatError::NotMp4);
225        }
226        Ok(first)
227    };
228    let ftyp = one(b"ftyp")?;
229    let moov = one(b"moov")?;
230    let mdat = one(b"mdat")?;
231
232    validate_moov(moov.payload(buf))?;
233    Ok((ftyp, moov, mdat))
234}
235
236/// Parse the file and return the `mdat` payload bounds, or an error to skip it.
237pub fn locate_audio(buf: &[u8]) -> Result<Mp4Bounds> {
238    let (_ftyp, _moov, mdat) = locate(buf)?;
239    Ok(Mp4Bounds {
240        audio_offset: mdat.payload_start() as u64,
241        audio_length: (mdat.total_len - mdat.header_len) as u64,
242    })
243}
244
245/// Everything `synthesize_layout` needs, read from the backing file once.
246#[derive(Debug, Clone, PartialEq)]
247pub struct Mp4Scan {
248    pub ftyp: Vec<u8>,
249    pub moov: Vec<u8>,
250    pub mdat_header: Vec<u8>,
251    pub mdat_payload_offset: u64,
252    pub mdat_payload_len: u64,
253}
254
255pub fn read_structure(buf: &[u8]) -> Result<Mp4Scan> {
256    let (ftyp, moov, mdat) = locate(buf)?;
257    Ok(Mp4Scan {
258        ftyp: buf[ftyp.start..ftyp.end()].to_vec(),
259        moov: buf[moov.start..moov.end()].to_vec(),
260        mdat_header: buf[mdat.start..mdat.payload_start()].to_vec(),
261        mdat_payload_offset: mdat.payload_start() as u64,
262        mdat_payload_len: (mdat.total_len - mdat.header_len) as u64,
263    })
264}
265
266/// Read the structural boxes (`ftyp`, `moov`, and the `mdat` header) by seeking,
267/// **never** reading the `mdat` payload — for audiobooks that payload is hundreds
268/// of MB and is served from the backing file at read time. Produces an `Mp4Scan`
269/// byte-identical to `read_structure` on the same file, so synthesis is unchanged.
270///
271/// The header walk reads only 8 bytes per top-level box (16 for a 64-bit
272/// largesize), so it skips over the `mdat` payload to reach a trailing `moov`.
273pub fn read_structure_from<R: Read + Seek>(
274    r: &mut R,
275    file_len: u64,
276) -> std::result::Result<Mp4Scan, Mp4ScanError> {
277    fn region<R: Read + Seek>(r: &mut R, off: u64, len: usize) -> io::Result<Vec<u8>> {
278        r.seek(SeekFrom::Start(off))?;
279        let mut buf = vec![0u8; len];
280        r.read_exact(&mut buf)?;
281        Ok(buf)
282    }
283
284    // (start_offset, header) for each box we care about.
285    let mut ftyp: Option<(u64, BoxHeader)> = None;
286    let mut moov: Option<(u64, BoxHeader)> = None;
287    let mut mdat: Option<(u64, BoxHeader)> = None;
288    let mut dup = false;
289
290    let mut pos = 0u64;
291    while pos + 8 <= file_len {
292        // Read exactly the header — 8 bytes, plus 8 more only for a largesize box.
293        // This guarantees we never touch a box's payload (notably mdat's).
294        let first8 = region(r, pos, 8)?;
295        let size32 = u32::from_be_bytes(first8[0..4].try_into().unwrap());
296        // A largesize box needs 8 more header bytes; if the file is truncated
297        // mid-header this read surfaces as Mp4ScanError::Io (UnexpectedEof).
298        let hdr = if size32 == 1 {
299            let mut h = first8;
300            h.extend_from_slice(&region(r, pos + 8, 8)?);
301            h
302        } else {
303            first8
304        };
305        let bh = box_header(&hdr, file_len - pos)?;
306        let total = bh.total_len;
307        match &bh.kind {
308            b"moof" => return Err(FormatError::NotMp4.into()),
309            b"ftyp" => dup |= ftyp.replace((pos, bh)).is_some(),
310            b"moov" => dup |= moov.replace((pos, bh)).is_some(),
311            b"mdat" => dup |= mdat.replace((pos, bh)).is_some(),
312            _ => {}
313        }
314        pos += total;
315    }
316    if dup {
317        return Err(FormatError::NotMp4.into());
318    }
319
320    let (ftyp_s, ftyp_h) = ftyp.ok_or(FormatError::NotMp4)?;
321    let (moov_s, moov_h) = moov.ok_or(FormatError::NotMp4)?;
322    let (mdat_s, mdat_h) = mdat.ok_or(FormatError::NotMp4)?;
323
324    for (box_kind, total_len) in [("ftyp", ftyp_h.total_len), ("moov", moov_h.total_len)] {
325        if total_len > MAX_MP4_METADATA_BYTES {
326            return Err(Mp4ScanError::MetadataTooLarge {
327                box_kind,
328                size: total_len,
329                cap: MAX_MP4_METADATA_BYTES,
330            });
331        }
332    }
333
334    // `try_from` rather than `as usize`: on a 32-bit target an oversized box would
335    // truncate silently; a box larger than `usize` is malformed for our purposes.
336    let ftyp_len = usize::try_from(ftyp_h.total_len).map_err(|_| FormatError::Malformed)?;
337    let moov_len = usize::try_from(moov_h.total_len).map_err(|_| FormatError::Malformed)?;
338    let ftyp_bytes = region(r, ftyp_s, ftyp_len)?;
339    let moov_bytes = region(r, moov_s, moov_len)?;
340    let mdat_header = region(r, mdat_s, usize_from(mdat_h.header_len))?;
341
342    validate_moov(&moov_bytes[usize_from(moov_h.header_len)..])?;
343
344    Ok(Mp4Scan {
345        ftyp: ftyp_bytes,
346        moov: moov_bytes,
347        mdat_header,
348        mdat_payload_offset: mdat_s + mdat_h.header_len,
349        mdat_payload_len: mdat_h.total_len - mdat_h.header_len,
350    })
351}
352
353/// Locate `moov/udta/meta/ilst` and return the ilst payload range absolute within
354/// `buf`. The walk is lenient ([`find_box_lenient`]) at every level: a single
355/// malformed sibling box anywhere on the path must not suppress an otherwise
356/// well-formed `ilst`, matching the metadata extractors' "seed what you can"
357/// contract (#542). Strictness is reserved for the audio/structure path.
358///
359/// `meta` is normally an ISO FullBox — 4 version/flags bytes precede its children —
360/// but QuickTime also uses a bare `meta` with no such prefix. Per ISO 14496-12 the
361/// version/flags word is always zero, so a zero first word marks the FullBox variant
362/// (skip 4) and any other value marks the bare variant (skip 0) (#543). A bare
363/// first child declaring `size == 0` ("extends to end") is the one ambiguous case
364/// and is misread as a FullBox, mirroring the wider tooling's heuristic.
365fn ilst_region(buf: &[u8]) -> Option<(usize, usize)> {
366    let moov = find_box_lenient(buf, b"moov")?;
367    let mp = moov.payload(buf);
368    let base = moov.payload_start();
369    let udta = find_box_lenient(mp, b"udta")?;
370    let up = udta.payload_start();
371    let udta_payload = udta.payload(mp);
372    let meta = find_box_lenient(udta_payload, b"meta")?;
373    let meta_payload = meta.payload(udta_payload);
374    let prefix = if meta_payload.get(..4) == Some(&[0, 0, 0, 0][..]) {
375        4
376    } else {
377        0
378    };
379    let meta_children = meta_payload.get(prefix..)?;
380    let il = find_box_lenient(meta_children, b"ilst")?;
381    let start = base + up + meta.payload_start() + prefix + il.payload_start();
382    Some((start, il.total_len - il.header_len))
383}
384
385/// Parse a `----` freeform atom payload into `(key, value)` pairs. Folds
386/// (mean, name) to a canonical key via the vocabulary, else keys on the verbatim
387/// `name`. One pair per UTF-8 (`type 1`) `data` sub-box — the iTunes multi-value
388/// convention; binary-typed `data` boxes are left to [`read_binary_tags`]. Empty
389/// if malformed.
390fn read_freeform(inner: &[u8]) -> Vec<(String, String)> {
391    let Some(name_box) = find_box_lenient(inner, b"name") else {
392        return Vec::new();
393    };
394    let np = name_box.payload(inner);
395    if np.len() < 4 {
396        return Vec::new();
397    }
398    // name/mean payloads start with a 4-byte FullBox [version 1][flags 3] prefix.
399    let Ok(name) = std::str::from_utf8(&np[4..]) else {
400        return Vec::new();
401    };
402    let mean = find_box_lenient(inner, b"mean").map_or("com.apple.iTunes", |m| {
403        let p = m.payload(inner);
404        if p.len() >= 4 {
405            std::str::from_utf8(&p[4..]).unwrap_or("com.apple.iTunes")
406        } else {
407            "com.apple.iTunes"
408        }
409    });
410    let key = crate::tagmap::mp4_freeform_to_key(mean, name)
411        .map_or_else(|| name.to_string(), str::to_string);
412    let mut out = Vec::new();
413    for data in child_boxes_lenient(inner) {
414        if &data.kind != b"data" {
415            continue;
416        }
417        let dp = data.payload(inner);
418        if dp.len() < 8 {
419            continue;
420        }
421        // The `data` box is `[type: u32][locale: u32][value]`; type 1 == UTF-8 text.
422        // Binary-typed freeform values are not text tags, so skip them.
423        let type_code = u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]);
424        if type_code != 1 {
425            continue;
426        }
427        if let Ok(value) = std::str::from_utf8(&dp[8..]) {
428            out.push((key.clone(), value.to_string()));
429        }
430    }
431    out
432}
433
434/// Format a `trkn`/`disk` value body `[reserved 2][number 2][total 2]…` as the
435/// canonical `"N"` or `"N/M"` string. The `"N/M"` form matches how ID3
436/// `TRCK`/`TPOS` carry the total in the shared `tracknumber`/`discnumber` value;
437/// a zero or absent total drops the `/M`. Caller guarantees `value.len() >= 4`.
438fn number_total(value: &[u8]) -> String {
439    debug_assert!(
440        value.len() >= 4,
441        "number_total requires the 4-byte number prefix"
442    );
443    let number = u16::from_be_bytes([value[2], value[3]]);
444    let total = if value.len() >= 6 {
445        u16::from_be_bytes([value[4], value[5]])
446    } else {
447        0
448    };
449    if total != 0 {
450        format!("{number}/{total}")
451    } else {
452        number.to_string()
453    }
454}
455
456/// Lenient: returns empty / skips any malformed atom and never errors — this only
457/// seeds metadata from existing files, so a missing or garbled tag must simply be
458/// absent. Text atoms map via the vocabulary; `trkn`/`disk` yield track/disc
459/// numbers as `"N"`/`"N/M"`; `----` freeform atoms key on their name (folded when
460/// known). Every `data` sub-box of an atom is read, so multi-value atoms recover
461/// all their values. Other atoms are skipped.
462pub fn read_tags(buf: &[u8]) -> Vec<(String, String)> {
463    let Some((start, len)) = ilst_region(buf) else {
464        return Vec::new();
465    };
466    let ilst = &buf[start..start + len];
467    let mut out = Vec::new();
468    for atom in child_boxes_lenient(ilst) {
469        let inner = atom.payload(ilst);
470        if &atom.kind == b"----" {
471            out.extend(read_freeform(inner));
472            continue;
473        }
474        let text_key = crate::tagmap::mp4_atom_to_key(&atom.kind);
475        for data in child_boxes_lenient(inner) {
476            if &data.kind != b"data" {
477                continue;
478            }
479            let dp = data.payload(inner);
480            if dp.len() < 8 {
481                continue;
482            }
483            let value = &dp[8..]; // skip [type 4][locale 4]
484            if let Some(key) = text_key {
485                if let Ok(s) = std::str::from_utf8(value) {
486                    out.push((key.to_string(), s.to_string()));
487                }
488            } else if &atom.kind == b"trkn" && value.len() >= 4 {
489                out.push(("tracknumber".into(), number_total(value)));
490            } else if &atom.kind == b"disk" && value.len() >= 4 {
491                out.push(("discnumber".into(), number_total(value)));
492            } else if let Some(key) = crate::tagmap::mp4_integer_atom_to_key(&atom.kind) {
493                // tmpo/cpil/pgap: a big-endian unsigned integer in the value bytes.
494                let mut n: u64 = 0;
495                for &b in value.iter().take(8) {
496                    n = (n << 8) | u64::from(b);
497                }
498                out.push((key.to_string(), n.to_string()));
499            }
500        }
501    }
502    out
503}
504
505/// An embedded `covr` image or binary `----` payload that a reader skipped
506/// because it exceeded the caller's size cap. Carries only a descriptor and the
507/// payload's byte size — never the bytes themselves — so the caller can log the
508/// lossy drop (the format layer has no logging facade) without materializing the
509/// oversized item out of a potentially large `moov` (#343).
510#[derive(Debug, Clone, PartialEq, Eq)]
511pub struct OversizeDrop {
512    /// Cover-art MIME type, or the binary tag's `----:<mean>:<name>` key.
513    pub descriptor: String,
514    /// Size of the dropped payload body in bytes (after the 8-byte `data` header).
515    pub bytes: usize,
516}
517
518/// Like [`read_pictures`], but also returns the oversized `covr` images skipped
519/// over `max_art_bytes`, so the caller can log each lossy drop. The size check
520/// still happens before any copy — an oversized image is described, never
521/// materialized. See [`OversizeDrop`].
522pub fn read_pictures_reporting(
523    buf: &[u8],
524    max_art_bytes: usize,
525) -> (Vec<EmbeddedPicture>, Vec<OversizeDrop>) {
526    let Some((start, len)) = ilst_region(buf) else {
527        return (Vec::new(), Vec::new());
528    };
529    let ilst = &buf[start..start + len];
530    let mut out = Vec::new();
531    let mut dropped = Vec::new();
532    for atom in child_boxes_lenient(ilst) {
533        if &atom.kind != b"covr" {
534            continue;
535        }
536        let inner = atom.payload(ilst);
537        for data in child_boxes_lenient(inner) {
538            if &data.kind != b"data" {
539                continue;
540            }
541            let dp = data.payload(inner);
542            if dp.len() < 8 {
543                continue;
544            }
545            let mime = match u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]) {
546                13 => "image/jpeg",
547                14 => "image/png",
548                _ => continue,
549            };
550            if dp.len() - 8 > max_art_bytes {
551                dropped.push(OversizeDrop {
552                    descriptor: mime.to_string(),
553                    bytes: dp.len() - 8,
554                });
555                continue;
556            }
557            out.push(EmbeddedPicture {
558                mime: mime.to_string(),
559                picture_type: PictureType::new(3).expect("3 is in range"),
560                description: String::new(),
561                width: 0,
562                height: 0,
563                data: dp[8..].to_vec(),
564            });
565        }
566    }
567    (out, dropped)
568}
569
570/// Lenient: returns empty / skips any malformed atom and never errors — this only
571/// seeds cover art from existing files, so a missing or garbled picture must simply be absent.
572/// Every `data` child of every `covr` atom yields one picture (the iTunes
573/// multiple-artwork convention); non-`data` children are skipped.
574///
575/// `max_art_bytes` caps each image body: a `data` payload whose image bytes
576/// (after the 8-byte `[type][locale]` header) exceed it is skipped before any
577/// copy, so an oversized `covr` in a large `moov` is never materialized. Use
578/// [`read_pictures_reporting`] to also recover the oversized drops for logging.
579pub fn read_pictures(buf: &[u8], max_art_bytes: usize) -> Vec<EmbeddedPicture> {
580    read_pictures_reporting(buf, max_art_bytes).0
581}
582
583/// Like [`read_binary_tags`], but also returns the oversized `----` values
584/// skipped over `max_binary_tag_bytes`, so the caller can log each lossy drop.
585/// The size check still happens before any copy — an oversized value is
586/// described, never materialized. See [`OversizeDrop`].
587pub fn read_binary_tags_reporting(
588    buf: &[u8],
589    max_binary_tag_bytes: usize,
590) -> (Vec<EmbeddedBinaryTag>, Vec<OversizeDrop>) {
591    let Some((start, len)) = ilst_region(buf) else {
592        return (Vec::new(), Vec::new());
593    };
594    let ilst = &buf[start..start + len];
595    let mut out = Vec::new();
596    let mut dropped = Vec::new();
597    for atom in child_boxes_lenient(ilst) {
598        if &atom.kind != b"----" {
599            continue;
600        }
601        let inner = atom.payload(ilst);
602        // name/mean payloads carry a 4-byte FullBox prefix; default mean to iTunes.
603        let Some(name) = find_box_lenient(inner, b"name").and_then(|n| {
604            let p = n.payload(inner);
605            (p.len() >= 4)
606                .then(|| std::str::from_utf8(&p[4..]).ok())
607                .flatten()
608        }) else {
609            continue;
610        };
611        let mean = find_box_lenient(inner, b"mean").map_or("com.apple.iTunes", |m| {
612            let p = m.payload(inner);
613            if p.len() >= 4 {
614                std::str::from_utf8(&p[4..]).unwrap_or("com.apple.iTunes")
615            } else {
616                "com.apple.iTunes"
617            }
618        });
619        let key = format!("----:{mean}:{name}");
620        // Iterate every `data` sub-box, mirroring the text path: a `----` atom can
621        // carry a type-1 text value and a separate binary value, so inspecting only
622        // the first `data` would lose the binary one (#525).
623        for data in child_boxes_lenient(inner) {
624            if &data.kind != b"data" {
625                continue;
626            }
627            let dp = data.payload(inner);
628            if dp.len() < 8 {
629                continue;
630            }
631            // `data` body is `[type: u32][locale: u32][value]`; type 1 == UTF-8 text,
632            // which is the text path's job. Everything else is opaque binary.
633            let type_code = u32::from_be_bytes([dp[0], dp[1], dp[2], dp[3]]);
634            if type_code == 1 {
635                continue;
636            }
637            if dp.len() - 8 > max_binary_tag_bytes {
638                dropped.push(OversizeDrop {
639                    descriptor: key.clone(),
640                    bytes: dp.len() - 8,
641                });
642                continue;
643            }
644            out.push(EmbeddedBinaryTag {
645                key: key.clone(),
646                payload: dp[8..].to_vec(),
647            });
648        }
649    }
650    (out, dropped)
651}
652
653/// Extract opaque (non-text) MP4 `----` freeform atoms for binary-tag passthrough.
654/// One `EmbeddedBinaryTag` per binary-typed (type code != 1) `data` sub-box of
655/// each `----` atom: key `----:<mean>:<name>`, payload the `data` value bytes
656/// (after the 8-byte `[type][locale]` header). Text freeform atoms (type 1) are
657/// handled by `read_tags`, so the two paths never double-store. Lenient:
658/// malformed atoms are skipped. Every `data` sub-box is inspected, so a mixed
659/// atom carrying both a text and a binary value recovers the binary one.
660///
661/// `max_binary_tag_bytes` caps each value: a `data` payload whose value bytes
662/// (after the 8-byte `[type][locale]` header) exceed it is skipped before any
663/// copy, so an oversized `----` in a large `moov` is never materialized. Use
664/// [`read_binary_tags_reporting`] to also recover the oversized drops for logging.
665pub fn read_binary_tags(buf: &[u8], max_binary_tag_bytes: usize) -> Vec<EmbeddedBinaryTag> {
666    read_binary_tags_reporting(buf, max_binary_tag_bytes).0
667}
668
669mod synth;
670pub use synth::synthesize_layout;
671
672#[cfg(test)]
673mod tests;