Skip to main content

container/
demux.rs

1use anyhow::{Context, Result, bail};
2use codec::frame::{
3    ColorMetadata, ColorSpace, ContentLightLevel, MasteringDisplay, PixelFormat, StreamInfo,
4    TransferFn,
5};
6use matroska_demuxer::{
7    Colour as MkvColour, Frame as MkvFrame, MasteringMetadata as MkvMastering, MatrixCoefficients,
8    MatroskaFile, Primaries, Range as MkvRange, TrackType as MkvTrackType, TransferCharacteristics,
9};
10use mp4::Mp4Reader;
11use std::io::Cursor;
12
13use crate::mp4_sanitize::sanitize_isobmff_box_sizes;
14
15use crate::annexb::{
16    AvcConfig, HevcConfig, NaluCodec, ParamSetTracker, length_prefixed_to_annexb_tracked,
17    parse_avcc, parse_hvcc,
18};
19use crate::avi::demux_avi;
20use crate::streaming::{DemuxHeader, Sample, StreamingDemuxer};
21use crate::ts::demux_ts;
22use crate::{MkvColorInfo, MkvMasteringMetadata};
23
24pub struct DemuxResult {
25    pub codec: String,
26    pub info: StreamInfo,
27    pub samples: Vec<Vec<u8>>,
28    /// Optional audio track carried through for passthrough muxing. Populated
29    /// when the input has an AAC track (MP4: `mp4a` sample entry; MKV codec
30    /// id `A_AAC`). Other audio codecs log a warning and are dropped.
31    pub audio: Option<AudioTrack>,
32}
33
34/// Audio track extracted for passthrough or transcode. Supports two codec
35/// families today (Squad-18 + Squad-23):
36/// - **AAC-LC**: `codec = "aac"`, `asc` holds the verbatim
37///   AudioSpecificConfig bytes sourced from the MP4 esds descriptor (not
38///   the mp4 crate's rebuilt form) or MKV `CodecPrivate`, so HE-AAC /
39///   xHE-AAC signaling survives the copy. `codec_private` is empty.
40/// - **Opus**: `codec = "opus"`, `codec_private` holds the RFC 7845 §5.1
41///   `OpusHead` body verbatim — for MKV/WebM that's exactly the
42///   `CodecPrivate` element bytes (post-magic — RFC 7845 §5.2 specifies
43///   no magic prefix for the MKV CodecPrivate); for MP4-Opus that's the
44///   `dOps` body re-serialised in OpusHead's LE numeric convention. `asc`
45///   is empty.
46///
47/// `samples` are codec-native packets (AAC: ADTS-stripped raw access
48/// units; Opus: TOC-prefixed Opus packets, one per frame). `durations`
49/// are per-sample in `timescale` units.
50#[derive(Debug, Clone)]
51pub struct AudioTrack {
52    pub codec: String,
53    pub samples: Vec<Vec<u8>>,
54    pub sample_rate: u32,
55    pub channels: u16,
56    /// AAC-only: AudioSpecificConfig bytes. Empty for non-AAC codecs.
57    pub asc: Vec<u8>,
58    /// Opus-only: OpusHead body bytes (RFC 7845 §5.1). Empty for non-Opus
59    /// codecs. The 8-byte 'OpusHead' magic prefix is NOT included — only
60    /// the post-magic body.
61    pub codec_private: Vec<u8>,
62    pub timescale: u32,
63    pub durations: Vec<u32>,
64}
65
66/// Dispatch to the right demuxer based on container magic bytes.
67pub fn demux(data: &[u8]) -> Result<DemuxResult> {
68    match detect_container(data) {
69        // MOV shares its demuxer with MP4 — same ISOBMFF box tree, same
70        // sample-entry structure. `detect_container` returns "mp4" for
71        // both `ftyp mp4*` and `ftyp qt  ` / bare-moov MOVs.
72        "mp4" => demux_mp4(data),
73        "mkv" => demux_mkv(data),
74        "avi" => demux_avi(data),
75        "ts" => demux_ts(data),
76        other => bail!("unsupported container: {other}"),
77    }
78}
79
80fn detect_container(data: &[u8]) -> &'static str {
81    if data.len() < 12 {
82        return "unknown";
83    }
84    // ISOBMFF: MP4 (`ftyp mp41`/`mp42`/`isom`/...) and MOV (`ftyp qt  `)
85    // both land here. Older MOV files sometimes ship without a top-level
86    // `ftyp` and lead with `moov` or `mdat` directly — accept those too.
87    if &data[4..8] == b"ftyp" || &data[4..8] == b"moov" || &data[4..8] == b"mdat" {
88        return "mp4";
89    }
90    // Matroska/WebM: EBML signature.
91    if data[0] == 0x1A && data[1] == 0x45 && data[2] == 0xDF && data[3] == 0xA3 {
92        return "mkv";
93    }
94    // RIFF-based AVI: "RIFF" <size> "AVI ".
95    if &data[..4] == b"RIFF" && &data[8..12] == b"AVI " {
96        return "avi";
97    }
98    // MPEG-TS: 0x47 sync byte at offset 0 AND at offset 188 (and 376 if
99    // we have the bytes). A single 0x47 appears routinely in random
100    // payloads, so require two confirming hits before committing.
101    if data[0] == 0x47
102        && data.len() > 188
103        && data[188] == 0x47
104        && (data.len() <= 376 || data[376] == 0x47)
105    {
106        return "ts";
107    }
108    "unknown"
109}
110
111pub fn demux_mp4(data: &[u8]) -> Result<DemuxResult> {
112    // Pre-pass to clamp any over-reported child box sizes (common
113    // on iPhone-recorded MP4s where the legacy QuickTime `wave`
114    // atom inside `mp4a` exposes child boxes whose advertised size
115    // exceeds the parent's remaining payload). The sanitizer is
116    // byte-identical on clean files, so this is safe to run
117    // unconditionally — only malformed files mutate. See
118    // `mp4_sanitize::sanitize_isobmff_box_sizes`.
119    let sanitized = sanitize_isobmff_box_sizes(data);
120    let data: &[u8] = &sanitized;
121    let size = data.len() as u64;
122    let cursor = Cursor::new(data);
123    let reader = Mp4Reader::read_header(cursor, size).context("reading MP4 header")?;
124
125    let video_track = reader
126        .tracks()
127        .values()
128        .find(|t| t.track_type().ok() == Some(mp4::TrackType::Video))
129        .context("no video track in MP4")?;
130
131    let track_id = video_track.track_id();
132    let codec_from_mp4 = format_codec(video_track);
133    // mp4 0.14 has no av01 sample-entry support: tracks using AV1 come back
134    // as "unknown" and the decoder factory would fail. Byte-scan stsd for
135    // the av01 fourcc to recover the codec label. Sample iteration still
136    // works because stco/stsc/stsz are read independently of the sample
137    // entry; AV1-in-MP4 samples are raw OBU streams with no AVCC wrapping.
138    let codec = if codec_from_mp4 == "unknown" && has_av01_sample_entry(data) {
139        "av1".to_string()
140    } else if codec_from_mp4 == "unknown" && hevc_sample_entry_fourcc(data).is_some() {
141        // hvc1 sample entry — mp4 0.14 only parses hev1. Same length-
142        // prefixed bitstream, different fourcc. We retrieve VPS/SPS/PPS
143        // from the hvcC box via byte-scan (below) and convert samples
144        // to Annex-B the same way as avc1.
145        "h265".to_string()
146    } else if codec_from_mp4 == "unknown" && prores_sample_entry_fourcc(data).is_some() {
147        // Apple ProRes lives in MOV (which is ISOBMFF, same box tree as
148        // MP4) under one of six fourccs — mp4 0.14 returns `unknown` for
149        // all of them. Samples are stored as self-contained ProRes frames
150        // with no AVCC-style length prefix, so stco/stsc/stsz iteration
151        // already reads them correctly — we just need the codec label so
152        // downstream decode (legacy-cpu-eng's lane) can dispatch.
153        "prores".to_string()
154    } else {
155        codec_from_mp4
156    };
157    let width = video_track.width() as u32;
158    let height = video_track.height() as u32;
159    let sample_count = video_track.sample_count();
160    let duration = video_track.duration().as_secs_f64();
161    let frame_rate = if duration > 0.0 {
162        sample_count as f64 / duration
163    } else {
164        30.0
165    };
166    let bitrate = video_track.bitrate() as u64;
167
168    // Squad-21: pull `mdcv` and `clli` boxes nested inside the visual
169    // sample entry (`stsd > {av01, hvc1, hev1, ...}`) and surface them
170    // to ColorMetadata so the muxer can round-trip them. These boxes
171    // are an HDR10 / HDR10+ requirement — without them, Apple's player
172    // (and many TVs) silently fall back to BT.709 limited even when
173    // colr nclx says BT.2020.
174    let mp4_color = extract_mp4_visual_color_metadata(data);
175    let initial_color_metadata = ColorMetadata {
176        mastering_display: mp4_color.mastering_display,
177        content_light_level: mp4_color.content_light_level,
178        ..Default::default()
179    };
180
181    let info = StreamInfo {
182        codec: codec.clone(),
183        width,
184        height,
185        frame_rate,
186        duration,
187        pixel_format: PixelFormat::Yuv420p,
188        color_space: ColorSpace::Bt709,
189        total_frames: sample_count as u64,
190        bitrate,
191        // SDR defaults for primaries/transfer/matrix at demux layer —
192        // those flow from the decoder's sequence_callback (NVDEC) or
193        // SPS VUI parser (HEVC CPU). Mastering display + content
194        // light level live in MP4 sample-entry boxes (extracted above)
195        // so they CAN come from the demuxer directly.
196        color_metadata: initial_color_metadata,
197    };
198
199    let cursor = Cursor::new(data);
200    let mut reader = Mp4Reader::read_header(cursor, size).context("re-reading MP4 for samples")?;
201
202    let mut samples = Vec::with_capacity(sample_count as usize);
203
204    let needs_annexb = matches!(codec.as_str(), "h264" | "h265");
205    // length_size defaults to 4 (the ISOBMFF near-universal pick); when
206    // we can reach the avcC/hvcC box we override with the recorded value.
207    // A length_size of 2 or even 1 is legal and has been observed in
208    // streaming-profile MP4s.
209    let (sps_pps, length_size) = if needs_annexb {
210        if codec == "h264" {
211            match extract_avc_config(data) {
212                Some(cfg) => (cfg.parameter_sets, cfg.length_size),
213                // mp4 0.14 successfully parsed the avcC high-level but we
214                // couldn't recover length_size from the box bytes — fall
215                // back to the crate's parsed SPS/PPS and assume 4-byte.
216                None => (extract_sps_pps(&reader, track_id), 4u8),
217            }
218        } else {
219            // h265: parse hvcC straight from the box bytes (mp4 0.14
220            // doesn't surface either length_size or the hvcC arrays).
221            match extract_hevc_config(data) {
222                Some(cfg) => (cfg.parameter_sets, cfg.length_size),
223                None => (Vec::new(), 4u8),
224            }
225        }
226    } else {
227        (Vec::new(), 4u8)
228    };
229
230    // Per-stream parameter-set emission tracker (#67/#68). Replaces the
231    // older `prepend on sample_idx==1` heuristic, which mishandled
232    // ExoPlayer open-GOP MP4s where sample 0 is `SPS + non-IDR slice`
233    // and the first IRAP arrives later carrying only a slice NAL.
234    // The tracker scans inline NAL types per sample and prepends only
235    // the parameter-set kinds that are still missing on the first IRAP.
236    let mut avc_tracker = if needs_annexb {
237        Some(ParamSetTracker::new(if codec == "h264" {
238            NaluCodec::Avc
239        } else {
240            NaluCodec::Hevc
241        }))
242    } else {
243        None
244    };
245
246    for sample_idx in 1..=sample_count {
247        let sample = reader
248            .read_sample(track_id, sample_idx)
249            .context("reading sample")?;
250
251        if let Some(sample) = sample {
252            let sample_data = sample.bytes.to_vec();
253
254            if let Some(tracker) = avc_tracker.as_mut() {
255                let annexb =
256                    length_prefixed_to_annexb_tracked(&sample_data, length_size, tracker, &sps_pps);
257                samples.push(annexb);
258            } else {
259                samples.push(sample_data);
260            }
261        }
262    }
263
264    // Replace the hard-coded yuv420p with a real sniff from the first
265    // sample's sequence header. detect() is safe on short/malformed
266    // data — falls back to Yuv420p.
267    let detected_pf = codec::pixel_format::detect(&codec, &samples);
268    let info = StreamInfo {
269        pixel_format: detected_pf,
270        ..info
271    };
272
273    let audio = extract_mp4_audio(data);
274
275    Ok(DemuxResult {
276        codec,
277        info,
278        samples,
279        audio,
280    })
281}
282
283/// Pull the audio track out of an MP4 / MOV for passthrough.
284///
285/// ─── Codec families recognised ──────────────────────────────────────
286/// (Squad-18 + Squad-23 + Squad-26)
287/// - AAC-LC + HE-AAC v1/v2 + xHE-AAC USAC (`mp4a` / `enca` sample entry
288///   + `esds`): emits `codec="aac"`, `asc` populated, `codec_private`
289///   empty.
290/// - Opus (`Opus` sample entry + `dOps`, RFC 7845 §4.4): emits
291///   `codec="opus"`, `codec_private` populated with the OpusHead-form
292///   body (LE numeric convention), `asc` empty.
293/// - AC-3 (`ac-3` sample entry + `dac3`, ETSI TS 102 366 §F.2): emits
294///   `codec="ac3"`, `codec_private` populated with the 3-byte dac3 body.
295/// - E-AC-3 (`ec-3` sample entry + `dec3`, ETSI TS 102 366 §F.5): emits
296///   `codec="eac3"`, `codec_private` populated with the dec3 body.
297///
298/// Other audio codecs (MP3, Vorbis, ...) log a warning and the track is
299/// dropped — pipeline falls back to video-only.
300///
301/// ─── iPhone / Apple QuickTime resilience ────────────────────────────
302///
303/// Apple's recorder tooling produces several MOV / MP4 shapes that
304/// trip strict ISOBMFF parsers and the `mp4` crate's classifier in
305/// particular. The full path here was rebuilt incrementally against
306/// real-world iPhone uploads (2026-05-03 → 2026-05-04 → 2026-05-07);
307/// the contract has THREE pieces that all must be in place for an
308/// iPhone source to round-trip with audio:
309///
310///   1. **`crates/container/src/mp4_sanitize.rs::sanitize_isobmff_box_sizes`**
311///      runs at every MP4 demux entry point. Clamps over-reported
312///      child box sizes (legacy QuickTime tooling sometimes emits
313///      `wave` children whose advertised size exceeds the parent),
314///      and CRITICALLY skips the 28-byte AudioSampleEntry fixed prefix
315///      ONLY when the parent fourcc is `stsd` — without that
316///      context-aware prefix handling, the inner `mp4a` inside `wave`
317///      gets mis-aligned and the recursion loses the `esds` sibling.
318///
319///   2. **`extract_aac_asc` (this file)** identifies audio traks by
320///      `smhd` presence (positive evidence of audio intent — strictly
321///      stronger than guessing by stsd[0]'s fourcc), walks ALL stsd
322///      entries (not just entry[0] — some Apple sources emit
323///      multi-entry stsd), accepts `mp4a` AND `enca`, descends into
324///      `wave` via `find_esds_recursive`, and falls back to a
325///      brute-force `esds` scan with a warn so unforeseen wrapper
326///      shapes still produce audio.
327///
328///   3. **`mp4_has_aac_sample_entry` (this file)** mirrors the same
329///      smhd-based detection so the pre-flight check that bypasses
330///      `mp4 0.14`'s broken `track.media_type()` matches the
331///      extraction path's notion of "this trak has AAC".
332///
333/// Diagnostic logging: every silent-drop path here emits a
334/// `tracing::warn!` with enough context (codec, hex prefix of ASC,
335/// trak structure hint) that the next iPhone-shaped failure mode is
336/// reproducible from CloudWatch alone. If you change this method, do
337/// NOT remove the warns — add new ones for any new fail paths you
338/// introduce.
339///
340/// Test coverage worth maintaining:
341/// - `mp4_sanitize::tests::inner_mp4a_inside_wave_is_not_treated_as_sample_entry`
342/// - any future test that constructs an iPhone-shaped synthetic MOV
343///   and asserts `extract_mp4_audio` returns `Some(AudioTrack)` with
344///   non-empty samples.
345fn extract_mp4_audio(data: &[u8]) -> Option<AudioTrack> {
346    let size = data.len() as u64;
347    let cursor = Cursor::new(data);
348    let reader = Mp4Reader::read_header(cursor, size).ok()?;
349    let track = reader
350        .tracks()
351        .values()
352        .find(|t| t.track_type().ok() == Some(mp4::TrackType::Audio))?;
353    let track_id = track.track_id();
354
355    // Detect Opus / AC-3 / E-AC-3 first by sample-entry 4-cc — mp4 0.14's
356    // `media_type()` doesn't surface those (it returns `unknown`), so we
357    // walk the stsd box manually. AAC stays on the existing mp4-crate
358    // path BUT with a manual `mp4a` 4cc fallback for iPhone-recorded
359    // MOVs whose audio sample entry wraps esds in a `wave` sub-box —
360    // `mp4 0.14`'s media_type() returns Err on those, which previously
361    // caused silent audio drop on every iPhone upload. Burned 2026-05-03.
362    let opus_dops = extract_mp4_opus_dops_body(data);
363    let ac3_cfg = extract_mp4_ac3_dac3_body(data);
364    let eac3_cfg = extract_mp4_eac3_dec3_body(data);
365    let media_type = track.media_type();
366    let crate_says_aac = media_type
367        .as_ref()
368        .map(|mt| matches!(mt, mp4::MediaType::AAC))
369        .unwrap_or(false);
370    let manual_says_aac = mp4_has_aac_sample_entry(data);
371    let is_aac = crate_says_aac || manual_says_aac;
372
373    if !is_aac && opus_dops.is_none() && ac3_cfg.is_none() && eac3_cfg.is_none() {
374        match media_type {
375            Ok(mt) => tracing::warn!(
376                codec = ?mt,
377                "audio passthrough skipped: only AAC / Opus / AC-3 / E-AC-3 are supported"
378            ),
379            Err(e) => tracing::warn!(
380                error = ?e,
381                "audio passthrough skipped: mp4 crate could not classify audio sample entry, \
382                 and manual stsd walk found no recognized 4cc"
383            ),
384        }
385        return None;
386    }
387
388    let timescale = track.timescale();
389    let sample_count = track.sample_count();
390
391    if is_aac {
392        // Verbatim ASC straight from esds — mp4-rust decodes it into
393        // {profile, freq_index, chan_conf} which discards HE-AAC / xHE-AAC
394        // extension bits. We walk the box tree ourselves.
395        //
396        // `extract_aac_asc` is the iPhone-survivable path: walks all
397        // traks, identifies audio via smhd, walks all stsd entries,
398        // accepts mp4a + enca, descends into wave, and falls back to a
399        // brute-force esds scan with a warn. If it returns None, every
400        // fail path inside has already logged; we don't need to log here.
401        let asc = match extract_aac_asc(data) {
402            Some(a) => a,
403            None => return None,
404        };
405        if asc.is_empty() {
406            tracing::warn!(
407                "AAC track found but AudioSpecificConfig is empty; dropping. \
408                 Source has an esds box but its DecoderSpecificInfo descriptor is \
409                 zero-length."
410            );
411            return None;
412        }
413        // Squad-25: surface the effective output channel count (post-PS
414        // upmix for HE-AAC v2 mono PS) and the SBR-doubled output rate
415        // for HE-AAC v1/v2. Falls back to the legacy core-only decoder
416        // when the structured parser declines (e.g. unrecognised ASC).
417        let parsed = crate::aac_asc::parse_aac_asc(&asc);
418        let sample_rate = match parsed
419            .as_ref()
420            .and_then(|p| p.sbr_sample_rate.or(Some(p.sample_rate)))
421            .or_else(|| decode_asc_sample_rate(&asc))
422        {
423            Some(sr) => sr,
424            None => {
425                tracing::warn!(
426                    asc_hex = %hex_prefix(&asc, 16),
427                    "AAC ASC sample rate could not be decoded; dropping audio. \
428                     Likely an extended sampling-frequency-index escape (0x0F) \
429                     pointing at unsupported bytes, or a malformed ASC."
430                );
431                return None;
432            }
433        };
434        let channels = parsed
435            .as_ref()
436            .map(crate::aac_asc::effective_output_channels)
437            .or_else(|| decode_asc_channels(&asc))
438            .unwrap_or(2);
439
440        let mut samples = Vec::with_capacity(sample_count as usize);
441        let mut durations = Vec::with_capacity(sample_count as usize);
442        // AAC-LC encodes 1024 PCM samples per access unit; AAC-HE
443        // (SBR) doubles the OUTPUT to 2048 but the core frame stays
444        // 1024 and the track's `mdhd.timescale` typically equals the
445        // SOURCE sample rate (not the SBR-doubled rate), so 1024 is
446        // the right tick count regardless of HE/non-HE.
447        //
448        // Fragmented MP4 sources (notably iPhone capture, some
449        // screen-recorder outputs) sometimes ship a `traf.trun`
450        // without per-sample durations AND a `tfhd`/`mvex.trex` whose
451        // `default_sample_duration` is 0. The mp4 crate then surfaces
452        // `sample.duration = 0` for every audio access unit, which
453        // sums to 0 total and trips the audio/video duration drift
454        // validator at job-end (failure mode observed on
455        // 2026-05-09 / job 37 — full-length audio dropped despite
456        // 12231 of 12318 access units extracting cleanly).
457        //
458        // Falling back to 1024 ticks per zero-duration sample
459        // re-derives the natural per-frame duration. Spec-conformant
460        // sources (where `sample.duration` carries the real value)
461        // are unaffected — fallback only fires on the 0 case.
462        const AAC_LC_CORE_FRAME_SIZE_TICKS: u32 = 1024;
463
464        // Fragmented MP4 path. The mp4 crate's `read_sample` returns
465        // garbage (typically the bytes of an adjacent moof box header)
466        // for fragmented audio tracks just like it does for video —
467        // see `build_fragmented_sample_table`'s docstring for the bug
468        // history. Walk moof->traf->trun ourselves and pull sample
469        // bytes straight out of `data` at the resolved offsets.
470        if let Some(frag) = build_fragmented_sample_table(data, track_id, 0, 0) {
471            tracing::info!(
472                track_id,
473                sample_count = frag.len(),
474                "fragmented MP4 audio: built sample table from moof/traf/trun"
475            );
476            for s in &frag {
477                let off = s.offset as usize;
478                let sz = s.size as usize;
479                let end = match off.checked_add(sz) {
480                    Some(e) if e <= data.len() => e,
481                    _ => {
482                        tracing::warn!(
483                            track_id,
484                            offset = s.offset,
485                            size = s.size,
486                            data_len = data.len(),
487                            "fragmented audio sample range out of bounds; truncating track"
488                        );
489                        break;
490                    }
491                };
492                // For AAC, ignore the source trun's per-sample
493                // duration entirely — AAC-LC AUs are exactly 1024
494                // PCM samples by spec. Source files (Apple / iOS /
495                // some web recorders) attach encoder-priming
496                // bookkeeping to the first sample's duration
497                // (e.g. 3298 ticks for a 1024-PCM-sample frame
498                // observed 2026-05-09); propagating that into our
499                // output mux makes Chrome MSE reject the audio
500                // SourceBuffer with `MediaSource readyState ended`.
501                // Fixed 1024 yields a clean contiguous timeline.
502                let dur = if is_aac {
503                    AAC_LC_CORE_FRAME_SIZE_TICKS
504                } else {
505                    s.duration_ticks
506                };
507                durations.push(dur);
508                samples.push(data[off..end].to_vec());
509            }
510        } else {
511            // Static moov sample table path — `read_sample` is correct
512            // here, the bug is fragmented-only.
513            let mut cursor = Cursor::new(data);
514            let mut reader = match Mp4Reader::read_header(&mut cursor, size) {
515                Ok(r) => r,
516                Err(e) => {
517                    tracing::warn!(error = %e, "audio passthrough: re-opening MP4 for sample read failed; dropping audio");
518                    return None;
519                }
520            };
521            for idx in 1..=sample_count {
522                match reader.read_sample(track_id, idx) {
523                    Ok(Some(sample)) => {
524                        let dur = if is_aac && sample.duration == 0 {
525                            AAC_LC_CORE_FRAME_SIZE_TICKS
526                        } else {
527                            sample.duration
528                        };
529                        durations.push(dur);
530                        samples.push(sample.bytes.to_vec());
531                    }
532                    Ok(None) => break,
533                    Err(e) => {
534                        tracing::warn!(
535                            track_id,
536                            idx,
537                            error = %e,
538                            "audio passthrough: read_sample error mid-track; \
539                             keeping samples read so far ({} of {}) and continuing",
540                            samples.len(),
541                            sample_count
542                        );
543                        break;
544                    }
545                }
546            }
547        }
548        if samples.is_empty() {
549            tracing::warn!(
550                track_id,
551                sample_count,
552                "AAC track parsed (ASC + sample table) but read_sample returned 0 \
553                 samples — possible mp4 crate stsd / stco parse failure on the source"
554            );
555            return None;
556        }
557        return Some(AudioTrack {
558            codec: "aac".into(),
559            samples,
560            sample_rate,
561            channels,
562            asc,
563            codec_private: Vec::new(),
564            timescale,
565            durations,
566        });
567    }
568
569    // AC-3 path. The `dac3` body lives in the sample entry; we use it as
570    // codec_private. Samples come back via the standard reader path (one
571    // AC-3 syncframe per MP4 sample). MP4 stsd preamble already advertises
572    // sample_rate (Q16) and channelcount but we re-derive both from the
573    // dac3 body for accuracy: the AudioSampleEntry preamble can mis-report
574    // (e.g. "48000" for an embedded 32 kHz stream — strict players use the
575    // dac3 body anyway).
576    if let Some(dac3_body) = ac3_cfg {
577        if dac3_body.len() < 3 {
578            tracing::warn!("MP4 AC-3 dac3 body shorter than 3 bytes — dropping audio");
579            return None;
580        }
581        let (sr, ch) = ac3_sample_rate_channels_from_dac3(&dac3_body)?;
582        let mut cursor = Cursor::new(data);
583        let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
584        let mut samples = Vec::with_capacity(sample_count as usize);
585        let mut durations = Vec::with_capacity(sample_count as usize);
586        for idx in 1..=sample_count {
587            match reader.read_sample(track_id, idx).ok()? {
588                Some(sample) => {
589                    durations.push(sample.duration);
590                    samples.push(sample.bytes.to_vec());
591                }
592                None => break,
593            }
594        }
595        if samples.is_empty() {
596            return None;
597        }
598        return Some(AudioTrack {
599            codec: "ac3".into(),
600            samples,
601            sample_rate: sr,
602            channels: ch,
603            asc: Vec::new(),
604            codec_private: dac3_body[..3].to_vec(),
605            timescale,
606            durations,
607        });
608    }
609
610    // E-AC-3 path. Same shape as AC-3 — body extracted from `dec3`.
611    if let Some(dec3_body) = eac3_cfg {
612        if dec3_body.len() < 5 {
613            tracing::warn!("MP4 E-AC-3 dec3 body shorter than 5 bytes — dropping audio");
614            return None;
615        }
616        let (sr, ch) = eac3_sample_rate_channels_from_dec3(&dec3_body)?;
617        let mut cursor = Cursor::new(data);
618        let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
619        let mut samples = Vec::with_capacity(sample_count as usize);
620        let mut durations = Vec::with_capacity(sample_count as usize);
621        for idx in 1..=sample_count {
622            match reader.read_sample(track_id, idx).ok()? {
623                Some(sample) => {
624                    durations.push(sample.duration);
625                    samples.push(sample.bytes.to_vec());
626                }
627                None => break,
628            }
629        }
630        if samples.is_empty() {
631            return None;
632        }
633        return Some(AudioTrack {
634            codec: "eac3".into(),
635            samples,
636            sample_rate: sr,
637            channels: ch,
638            asc: Vec::new(),
639            codec_private: dec3_body,
640            timescale,
641            durations,
642        });
643    }
644
645    // Opus path. The dOps body lives in the sample entry; samples (one
646    // Opus packet per MP4 sample) come back via the standard reader path
647    // since stco / stsc / stsz iteration is codec-agnostic.
648    let dops_body = opus_dops?; // body bytes only, no 'dOps' magic
649    let opus_head = dops_to_opus_head(&dops_body)?;
650    // For MP4-Opus the timescale is mandated 48000 by RFC 7845 §3 and
651    // virtually every encoder honours that, but tolerate divergence — the
652    // pipeline-level mux re-pins to 48000 when emitting.
653    let input_sample_rate =
654        u32::from_le_bytes([opus_head[4], opus_head[5], opus_head[6], opus_head[7]]);
655    let channels = opus_head[1] as u16;
656
657    let mut cursor = Cursor::new(data);
658    let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
659    let mut samples = Vec::with_capacity(sample_count as usize);
660    let mut durations = Vec::with_capacity(sample_count as usize);
661    for idx in 1..=sample_count {
662        match reader.read_sample(track_id, idx).ok()? {
663            Some(sample) => {
664                durations.push(sample.duration);
665                samples.push(sample.bytes.to_vec());
666            }
667            None => break,
668        }
669    }
670    if samples.is_empty() {
671        return None;
672    }
673    Some(AudioTrack {
674        codec: "opus".into(),
675        samples,
676        sample_rate: input_sample_rate,
677        channels,
678        asc: Vec::new(),
679        codec_private: opus_head,
680        timescale,
681        durations,
682    })
683}
684
685/// Walk every `trak` looking for one whose `stsd` contains an `ac-3`
686/// sample entry (ETSI TS 102 366 §F.2). Returns the body bytes of the
687/// contained `dac3` box (without the 8-byte box header) or None.
688fn extract_mp4_ac3_dac3_body(data: &[u8]) -> Option<Vec<u8>> {
689    extract_mp4_audio_config_body(data, b"ac-3", b"dac3")
690}
691
692/// Walk every `trak` looking for one whose `stsd` contains an `ec-3`
693/// sample entry (ETSI TS 102 366 §F.5). Returns the body bytes of the
694/// contained `dec3` box (without the 8-byte box header) or None.
695fn extract_mp4_eac3_dec3_body(data: &[u8]) -> Option<Vec<u8>> {
696    extract_mp4_audio_config_body(data, b"ec-3", b"dec3")
697}
698
699/// Generic walker — find an audio sample-entry of `entry_fourcc`, return
700/// the body of the named codec-config child (`dac3` / `dec3`) inside.
701/// Mirrors `extract_mp4_opus_dops_body`'s shape but parameterised on the
702/// entry / config 4-cc pair.
703fn extract_mp4_audio_config_body(
704    data: &[u8],
705    entry_fourcc: &[u8; 4],
706    cfg_fourcc: &[u8; 4],
707) -> Option<Vec<u8>> {
708    let moov = find_direct_child(data, b"moov")?;
709    let mut pos = 0;
710    while pos + 8 <= moov.len() {
711        let size =
712            u32::from_be_bytes([moov[pos], moov[pos + 1], moov[pos + 2], moov[pos + 3]]) as usize;
713        let btype = &moov[pos + 4..pos + 8];
714        if size < 8 || pos.checked_add(size).is_none_or(|end| end > moov.len()) {
715            break;
716        }
717        if btype == b"trak" {
718            let trak_body = &moov[pos + 8..pos + size];
719            if let Some(cfg) = extract_audio_cfg_from_trak(trak_body, entry_fourcc, cfg_fourcc) {
720                return Some(cfg);
721            }
722        }
723        pos += size;
724    }
725    None
726}
727
728fn extract_audio_cfg_from_trak(
729    trak: &[u8],
730    entry_fourcc: &[u8; 4],
731    cfg_fourcc: &[u8; 4],
732) -> Option<Vec<u8>> {
733    let stsd = find_box_body(trak, &[b"mdia", b"minf", b"stbl", b"stsd"])?;
734    if stsd.len() < 16 {
735        return None;
736    }
737    let mut pos = 8; // skip version/flags/entry_count
738    while pos + 8 <= stsd.len() {
739        let entry_size =
740            u32::from_be_bytes([stsd[pos], stsd[pos + 1], stsd[pos + 2], stsd[pos + 3]]) as usize;
741        let entry_type: [u8; 4] = stsd[pos + 4..pos + 8].try_into().ok()?;
742        if entry_size < 8 || pos.saturating_add(entry_size) > stsd.len() {
743            break;
744        }
745        if &entry_type == entry_fourcc {
746            let end = pos + entry_size;
747            // AudioSampleEntry layout per ISO/IEC 14496-12 §8.5.2.2: after
748            // the 8-byte box header there's a 28-byte fixed preamble
749            // followed by nested codec-specific boxes.
750            let child_start = pos + 8 + 28;
751            if child_start >= end {
752                return None;
753            }
754            return find_direct_child(&stsd[child_start..end], cfg_fourcc).map(|b| b.to_vec());
755        }
756        pos += entry_size;
757    }
758    None
759}
760
761/// Decode (sample_rate, channel_count) from a 3-byte `dac3` body per
762/// ETSI TS 102 366 §F.4. Bit layout (MSB-first across 24 bits):
763///   bits 23..22 fscod          (shift=22)
764///   bits 21..17 bsid           (shift=17)
765///   bits 16..14 bsmod          (shift=14)
766///   bits 13..11 acmod          (shift=11)
767///   bit  10     lfeon          (shift=10)
768///   bits  9.. 5 bit_rate_code  (shift= 5)
769///   bits  4.. 0 reserved (=0)
770fn ac3_sample_rate_channels_from_dac3(dac3: &[u8]) -> Option<(u32, u16)> {
771    if dac3.len() < 3 {
772        return None;
773    }
774    let raw = ((dac3[0] as u32) << 16) | ((dac3[1] as u32) << 8) | dac3[2] as u32;
775    let fscod = ((raw >> 22) & 0x03) as u8;
776    let acmod = ((raw >> 11) & 0x07) as u8;
777    let lfeon = ((raw >> 10) & 0x01) == 1;
778    let sr = match fscod {
779        0 => 48_000,
780        1 => 44_100,
781        2 => 32_000,
782        _ => return None,
783    };
784    Some((sr, crate::ac3_sync::channel_count(acmod, lfeon)))
785}
786
787/// Decode (sample_rate, channel_count) from a `dec3` body per ETSI TS 102
788/// 366 §F.6. Squad-26 only emits / extracts the single-substream form
789/// (5-byte body), which is what every vanilla 5.1 / 7.1 E-AC-3 file uses.
790fn eac3_sample_rate_channels_from_dec3(dec3: &[u8]) -> Option<(u32, u16)> {
791    if dec3.len() < 5 {
792        return None;
793    }
794    // Header: data_rate(13b) + num_ind_sub-1(3b) packed in bytes 0..2.
795    // Per-substream block starts at bit position 16.
796    // bits 16..18 = fscod
797    //  18..23 = bsid (=16)
798    //  23..24 = reserved
799    //  24..25 = asvc
800    //  25..28 = bsmod
801    //  28..31 = acmod
802    //  31..32 = lfeon
803    let raw_be = u64::from(dec3[0]) << 32
804        | u64::from(dec3[1]) << 24
805        | u64::from(dec3[2]) << 16
806        | u64::from(dec3[3]) << 8
807        | u64::from(dec3[4]);
808    // dec3 is 5 bytes total (40 bits) for the single-substream case.
809    // Adjust shifts: high bit is bit 39 in our 40-bit value.
810    //   bit 39..27 = data_rate (13 bits)  shift=27
811    //   bit 26..24 = num_ind_sub-1        shift=24
812    //   bit 23..22 = fscod                shift=22
813    //   bit 21..17 = bsid                 shift=17
814    //   bit 16     = reserved
815    //   bit 15     = asvc
816    //   bit 14..12 = bsmod
817    //   bit 11..9  = acmod                shift=9
818    //   bit 8      = lfeon                shift=8
819    //   bit 7..5   = reserved
820    //   bit 4..1   = num_dep_sub
821    //   bit 0      = reserved
822    let fscod = ((raw_be >> 22) & 0x03) as u8;
823    let acmod = ((raw_be >> 9) & 0x07) as u8;
824    let lfeon = ((raw_be >> 8) & 0x01) == 1;
825    let sr = crate::ac3_sync::eac3_sample_rate_hz(fscod, 0);
826    if sr == 0 {
827        return None;
828    }
829    Some((sr, crate::ac3_sync::channel_count(acmod, lfeon)))
830}
831
832/// Walk every `trak` looking for one whose `stsd` contains an `Opus`
833/// sample entry (RFC 7845 §4.4). Returns the body bytes of the contained
834/// `dOps` box (without the 8-byte box header) or None.
835///
836/// `find_box_body` only follows the FIRST trak it encounters (the video
837/// trak), so we have to iterate traks ourselves — same pattern as
838/// `extract_aac_asc`.
839///
840/// 4-cc match is `Opus` exactly (capital O) per spec. We do not match the
841/// lowercase `opus` variant — strict players reject that and we shouldn't
842/// silently accept input that some downstream stage will choke on.
843fn extract_mp4_opus_dops_body(data: &[u8]) -> Option<Vec<u8>> {
844    let moov = find_direct_child(data, b"moov")?;
845    let mut pos = 0;
846    while pos + 8 <= moov.len() {
847        let size =
848            u32::from_be_bytes([moov[pos], moov[pos + 1], moov[pos + 2], moov[pos + 3]]) as usize;
849        let btype = &moov[pos + 4..pos + 8];
850        if size < 8 || pos.checked_add(size).is_none_or(|end| end > moov.len()) {
851            break;
852        }
853        if btype == b"trak" {
854            let trak_body = &moov[pos + 8..pos + size];
855            if let Some(dops) = extract_dops_from_trak(trak_body) {
856                return Some(dops);
857            }
858        }
859        pos += size;
860    }
861    None
862}
863
864fn extract_dops_from_trak(trak: &[u8]) -> Option<Vec<u8>> {
865    let stsd = find_box_body(trak, &[b"mdia", b"minf", b"stbl", b"stsd"])?;
866    if stsd.len() < 16 {
867        return None;
868    }
869    let mut pos = 8; // skip version/flags/entry_count
870    while pos + 8 <= stsd.len() {
871        let entry_size =
872            u32::from_be_bytes([stsd[pos], stsd[pos + 1], stsd[pos + 2], stsd[pos + 3]]) as usize;
873        let entry_type: [u8; 4] = stsd[pos + 4..pos + 8].try_into().ok()?;
874        if entry_size < 8 || pos.saturating_add(entry_size) > stsd.len() {
875            break;
876        }
877        if &entry_type == b"Opus" {
878            let end = pos + entry_size;
879            // AudioSampleEntry layout per ISO/IEC 14496-12 §8.5.2.2: after
880            // the 8-byte box header there's a 28-byte fixed preamble
881            // (reserved/channelcount/samplesize/etc.) — same as `mp4a` —
882            // followed by nested codec-specific boxes. dOps lives there.
883            let child_start = pos + 8 + 28;
884            if child_start >= end {
885                return None;
886            }
887            return find_direct_child(&stsd[child_start..end], b"dOps").map(|b| b.to_vec());
888        }
889        pos += entry_size;
890    }
891    None
892}
893
894/// Convert a `dOps` body (BE numeric fields per RFC 7845 §4.5) back into
895/// the OpusHead-form body (LE numeric fields per RFC 7845 §5.1) that the
896/// mux side carries in `AudioInfo.codec_private`. This keeps the in-pipeline
897/// representation a single canonical form regardless of source container.
898///
899/// The dOps `Version` field (always 0 on the wire per §4.5) is rewritten
900/// to OpusHead `Version` = 1 (RFC 7845 §5.1: "version number, MUST be 1").
901fn dops_to_opus_head(dops: &[u8]) -> Option<Vec<u8>> {
902    if dops.len() < 11 {
903        return None;
904    }
905    // dops[0] = Version (0); dops[1] = OutputChannelCount;
906    // dops[2..4] = PreSkip BE; dops[4..8] = InputSampleRate BE;
907    // dops[8..10] = OutputGain BE; dops[10] = ChannelMappingFamily.
908    let output_channels = dops[1];
909    let pre_skip = u16::from_be_bytes([dops[2], dops[3]]);
910    let input_sample_rate = u32::from_be_bytes([dops[4], dops[5], dops[6], dops[7]]);
911    let output_gain = i16::from_be_bytes([dops[8], dops[9]]);
912    let channel_mapping_family = dops[10];
913
914    // Family != 0 → carry the channel mapping table verbatim too.
915    let extra_tail = if channel_mapping_family != 0 {
916        if dops.len() < 13 {
917            return None;
918        }
919        let tail_len = 2 + dops[12] as usize;
920        if dops.len() < 11 + tail_len {
921            return None;
922        }
923        dops[11..11 + tail_len].to_vec()
924    } else {
925        Vec::new()
926    };
927
928    let mut head = Vec::with_capacity(11 + extra_tail.len());
929    head.push(1u8); // OpusHead Version = 1
930    head.push(output_channels);
931    head.extend_from_slice(&pre_skip.to_le_bytes());
932    head.extend_from_slice(&input_sample_rate.to_le_bytes());
933    head.extend_from_slice(&(output_gain as u16).to_le_bytes());
934    head.push(channel_mapping_family);
935    head.extend_from_slice(&extra_tail);
936    Some(head)
937}
938
939/// Walk moov/trak*/mdia/minf/stbl/stsd to recover the AAC AudioSpecificConfig.
940///
941/// Returns the DecoderSpecificInfo payload verbatim. The walk is robust to
942/// the kinds of variation iPhone-recorded MOVs throw at us:
943///
944///   - **Multi-trak files**: iterates every `trak`. Most files have video +
945///     audio + (optional) timed metadata. We use the presence of `smhd`
946///     (Sound Media Header, ISO 14496-12 §8.4.5.3) to *positively* identify
947///     audio traks rather than relying on stsd[0]'s fourcc — that's how we
948///     reach the audio data even if the trak is in an unusual order.
949///   - **Multi-entry stsd**: iterates every `SampleEntry` inside `stsd`,
950///     not just entry[0]. Apple tooling occasionally emits multiple sample
951///     entries (e.g. `mp4a` + an alternate config) and we must find the
952///     first one that yields a usable ASC.
953///   - **enca (Encrypted-But-Clear)**: same 28-byte AudioSampleEntry
954///     prefix as `mp4a`, with an inner `frma 'mp4a'` declaring the
955///     original format. We treat `enca` as `mp4a` for ASC extraction.
956///   - **wave wrapping**: Apple QuickTime nests
957///     `mp4a → wave → frma + mp4a + esds`. `find_esds_recursive` descends
958///     into `wave` so the esds is found regardless of nesting depth.
959///   - **Brute-force fallback**: after the structured walk, if the trak
960///     was identified as audio (smhd present) but no ASC came back, we
961///     scan the trak buffer linearly for any `esds` box and try to parse
962///     an ASC out of it. This is the safety net for unforeseen wrappers
963///     (and the "log signpost" — anything that lands here gets a warn so
964///     we can codify the new shape into structured handling later).
965///
966/// Returns `None` only when none of the audio traks yielded a non-empty
967/// ASC. Every fall-through here has a `tracing::warn!` so CloudWatch
968/// surfaces the exact reason rather than producing audio-less output
969/// silently.
970fn extract_aac_asc(data: &[u8]) -> Option<Vec<u8>> {
971    let moov = find_direct_child(data, b"moov")?;
972    let mut pos = 0;
973    let mut saw_audio_trak = false;
974    while pos + 8 <= moov.len() {
975        let size =
976            u32::from_be_bytes([moov[pos], moov[pos + 1], moov[pos + 2], moov[pos + 3]]) as usize;
977        let btype = &moov[pos + 4..pos + 8];
978        if size < 8 || pos.checked_add(size).is_none_or(|end| end > moov.len()) {
979            break;
980        }
981        if btype == b"trak" {
982            let trak_body = &moov[pos + 8..pos + size];
983            if trak_is_audio(trak_body) {
984                saw_audio_trak = true;
985                if let Some(asc) = extract_asc_from_trak(trak_body) {
986                    return Some(asc);
987                }
988                // Audio trak identified by smhd but the structured
989                // walk came up empty — try a brute-force esds scan
990                // before declaring failure.
991                if let Some(asc) = brute_force_find_asc_in_trak(trak_body) {
992                    tracing::warn!(
993                        asc_len = asc.len(),
994                        "audio passthrough recovered ASC via brute-force esds scan; \
995                         the trak's stsd shape is not in our structured handler. \
996                         Capture this file and add coverage so the structured walk \
997                         finds it next time."
998                    );
999                    return Some(asc);
1000                }
1001            }
1002        }
1003        pos += size;
1004    }
1005    if saw_audio_trak {
1006        tracing::warn!(
1007            "audio passthrough skipped: identified an audio trak via smhd, but no \
1008             stsd entry yielded an AudioSpecificConfig. Possible causes: enca with \
1009             unsupported scheme, sample entry fourcc we don't recognise, esds box \
1010             missing or corrupt, mp4 sanitizer mis-aligned a wave-wrapped esds."
1011        );
1012    } else {
1013        tracing::warn!(
1014            "audio passthrough skipped: no trak had a Sound Media Header (smhd). \
1015             Source may be video-only, or its track headers do not conform to ISOBMFF \
1016             §8.4.5.3 (smhd is required for audio traks)."
1017        );
1018    }
1019    None
1020}
1021
1022/// Format the first `n` bytes of `bytes` as a hex string for diagnostic
1023/// log lines. Used by `extract_mp4_audio` so the log records the actual
1024/// ASC prefix when something downstream fails to parse it — that lets us
1025/// reproduce iPhone-shaped issues from CloudWatch alone, without needing
1026/// the user's source file in hand.
1027fn hex_prefix(bytes: &[u8], n: usize) -> String {
1028    let mut out = String::with_capacity(n * 2);
1029    for b in bytes.iter().take(n) {
1030        out.push_str(&format!("{b:02x}"));
1031    }
1032    out
1033}
1034
1035/// Audio sample-entry fourccs we recognise as carrying an AAC ASC.
1036///
1037/// `mp4a` is the standard ISOBMFF AudioSampleEntry. `enca` is the
1038/// EncryptedSampleEntry wrapper (ISO 23001-7 §6.2) — it carries the
1039/// same 28-byte AudioSampleEntry prefix with an inner `frma 'mp4a'`
1040/// declaring the original format, and the esds (with the clear ASC
1041/// bytes) sits next to the `sinf` ProtectionSchemeInfoBox. For
1042/// streams using `cenc` "clear" mode, the ASC itself is unencrypted,
1043/// so passthrough works the same as for `mp4a`.
1044const AAC_AUDIO_SAMPLE_ENTRIES: &[&[u8; 4]] = &[b"mp4a", b"enca"];
1045
1046/// Quick "is this trak an audio trak?" check. ISO 14496-12 §8.4.5.3
1047/// requires `smhd` (Sound Media Header) inside `mdia/minf` for every
1048/// audio trak. Looking for it is a strictly stronger signal than
1049/// inspecting the first `stsd` entry's fourcc — it's positive evidence
1050/// of trak intent rather than fourcc-position guessing.
1051fn trak_is_audio(trak: &[u8]) -> bool {
1052    find_box_body(trak, &[b"mdia", b"minf", b"smhd"]).is_some()
1053}
1054
1055fn extract_asc_from_trak(trak: &[u8]) -> Option<Vec<u8>> {
1056    let stsd = find_box_body(trak, &[b"mdia", b"minf", b"stbl", b"stsd"])?;
1057    if stsd.len() < 8 {
1058        tracing::warn!(
1059            stsd_len = stsd.len(),
1060            "audio passthrough: stsd shorter than its 8-byte FullBox preamble"
1061        );
1062        return None;
1063    }
1064    // Skip version/flags (4) + entry_count (4). Sample entries follow.
1065    let entries = &stsd[8..];
1066    let mut cursor = 0;
1067    while cursor + 8 <= entries.len() {
1068        let entry_size = u32::from_be_bytes([
1069            entries[cursor],
1070            entries[cursor + 1],
1071            entries[cursor + 2],
1072            entries[cursor + 3],
1073        ]) as usize;
1074        let entry_type: &[u8; 4] = entries[cursor + 4..cursor + 8].try_into().unwrap();
1075        if entry_size < 8 || cursor + entry_size > entries.len() {
1076            break;
1077        }
1078
1079        if AAC_AUDIO_SAMPLE_ENTRIES.contains(&entry_type) {
1080            // AudioSampleEntry layout per ISOBMFF §8.5.2: 8-byte box
1081            // header + 28-byte fixed preamble (reserved /
1082            // channelcount / samplesize / sample_rate Q16) + nested
1083            // boxes (esds, optional wave wrapper, optional chan).
1084            if entry_size >= 36 {
1085                let body = &entries[cursor + 8 + 28..cursor + entry_size];
1086                if let Some(asc) = find_esds_recursive(body) {
1087                    return Some(asc);
1088                }
1089            }
1090        }
1091        cursor += entry_size;
1092    }
1093    None
1094}
1095
1096/// Last-resort: linearly scan the trak buffer for any `esds` box and
1097/// try to parse an ASC out of it. Used only when the structured walk
1098/// (smhd → stsd → mp4a/enca → esds, optionally through `wave`) failed
1099/// despite the trak being an audio trak. Logs a warn at the call site
1100/// when this path returns a result so we can codify the source's
1101/// actual shape into the structured handler later.
1102fn brute_force_find_asc_in_trak(trak: &[u8]) -> Option<Vec<u8>> {
1103    let mut pos = 0;
1104    while pos + 8 <= trak.len() {
1105        if &trak[pos + 4..pos + 8] == b"esds" {
1106            let size = u32::from_be_bytes([trak[pos], trak[pos + 1], trak[pos + 2], trak[pos + 3]])
1107                as usize;
1108            if size >= 12 && pos + size <= trak.len() {
1109                // esds body begins after 8-byte box header + 4-byte FullBox preamble.
1110                let esds_body = &trak[pos + 12..pos + size];
1111                if let Some(asc) = extract_asc_from_esds(esds_body) {
1112                    if !asc.is_empty() {
1113                        return Some(asc);
1114                    }
1115                }
1116            }
1117        }
1118        pos += 1;
1119    }
1120    None
1121}
1122
1123/// Descend into the nested-box children of an mp4a sample entry to
1124/// find `esds`. Apple QuickTime / iPhone MOV files frequently wrap
1125/// the esds inside a `wave` container box (legacy from .mov format),
1126/// so a flat scan of immediate children misses it. Recursing into
1127/// `wave` (and only `wave` — other sub-boxes are not specified to
1128/// contain esds) lets us pick it up in either layout.
1129///
1130/// Returns the parsed AudioSpecificConfig bytes from the first esds
1131/// found.
1132fn find_esds_recursive(body: &[u8]) -> Option<Vec<u8>> {
1133    let mut pos = 0;
1134    while pos + 8 <= body.len() {
1135        let sub_size =
1136            u32::from_be_bytes([body[pos], body[pos + 1], body[pos + 2], body[pos + 3]]) as usize;
1137        let sub_type = &body[pos + 4..pos + 8];
1138        if sub_size < 8 || pos + sub_size > body.len() {
1139            break;
1140        }
1141        if sub_type == b"esds" {
1142            // esds body: 1 byte version + 3 flags + ES descriptor tree.
1143            let esds_body = &body[pos + 8 + 4..pos + sub_size];
1144            return extract_asc_from_esds(esds_body);
1145        }
1146        if sub_type == b"wave" {
1147            // QuickTime audio extension. Recurse — esds usually lives
1148            // inside.
1149            if let Some(asc) = find_esds_recursive(&body[pos + 8..pos + sub_size]) {
1150                return Some(asc);
1151            }
1152        }
1153        pos += sub_size;
1154    }
1155    None
1156}
1157
1158/// Walk `moov > trak[]` and return true if any audio trak (identified
1159/// by `smhd`, ISO 14496-12 §8.4.5.3) carries one of our recognised AAC
1160/// sample-entry fourccs (`mp4a` or `enca`). Walks every stsd entry, not
1161/// just entry[0], so multi-entry stsd shapes Apple tooling occasionally
1162/// produces still classify correctly.
1163///
1164/// Used as the manual AAC detector that bypasses `mp4 0.14`'s
1165/// `track.media_type()` — iPhone MOVs trip the crate's classifier when
1166/// audio carries QuickTime extensions (esds wrapped in `wave`), and the
1167/// silent-Err path used to drop audio on every upload.
1168fn mp4_has_aac_sample_entry(data: &[u8]) -> bool {
1169    let Some(moov) = find_direct_child(data, b"moov") else {
1170        return false;
1171    };
1172    let mut pos = 0;
1173    while pos + 8 <= moov.len() {
1174        let size =
1175            u32::from_be_bytes([moov[pos], moov[pos + 1], moov[pos + 2], moov[pos + 3]]) as usize;
1176        let btype = &moov[pos + 4..pos + 8];
1177        if size < 8 || pos + size > moov.len() {
1178            break;
1179        }
1180        if btype == b"trak" {
1181            let trak_body = &moov[pos + 8..pos + size];
1182            if !trak_is_audio(trak_body) {
1183                pos += size;
1184                continue;
1185            }
1186            if let Some(stsd) = find_box_body(trak_body, &[b"mdia", b"minf", b"stbl", b"stsd"])
1187                && stsd.len() >= 8
1188            {
1189                let entries = &stsd[8..];
1190                let mut cursor = 0;
1191                while cursor + 8 <= entries.len() {
1192                    let entry_size = u32::from_be_bytes([
1193                        entries[cursor],
1194                        entries[cursor + 1],
1195                        entries[cursor + 2],
1196                        entries[cursor + 3],
1197                    ]) as usize;
1198                    if entry_size < 8 || cursor + entry_size > entries.len() {
1199                        break;
1200                    }
1201                    let entry_type: &[u8; 4] = entries[cursor + 4..cursor + 8].try_into().unwrap();
1202                    if AAC_AUDIO_SAMPLE_ENTRIES.contains(&entry_type) {
1203                        return true;
1204                    }
1205                    cursor += entry_size;
1206                }
1207            }
1208        }
1209        pos += size;
1210    }
1211    false
1212}
1213
1214/// Parse MPEG-4 descriptor tree rooted at ES_Descriptor and pluck the
1215/// DecoderSpecificInfo payload. Tags: ES_Descr=0x03, DecoderConfigDescr=0x04,
1216/// DecoderSpecificInfo=0x05. Each descriptor has a tag byte then a variable
1217/// length (7 bits per byte, top bit = continuation).
1218fn extract_asc_from_esds(body: &[u8]) -> Option<Vec<u8>> {
1219    let (tag, payload, _rest) = read_descriptor(body)?;
1220    if tag != 0x03 {
1221        return None;
1222    }
1223    // ES_Descriptor layout: 2 bytes ES_ID + 1 flags byte + optional fields,
1224    // then nested descriptors. Flags bit layout (per spec):
1225    //   streamDependenceFlag (1) | URL_Flag (1) | OCRstreamFlag (1) | streamPriority (5)
1226    if payload.len() < 3 {
1227        return None;
1228    }
1229    let flags = payload[2];
1230    let mut off = 3;
1231    if flags & 0x80 != 0 {
1232        off += 2;
1233    } // dependsOn_ES_ID
1234    if flags & 0x40 != 0 {
1235        // URL_Flag: 1-byte length + URL string
1236        if off >= payload.len() {
1237            return None;
1238        }
1239        let url_len = payload[off] as usize;
1240        off += 1 + url_len;
1241    }
1242    if flags & 0x20 != 0 {
1243        off += 2;
1244    } // OCR_ES_ID
1245    if off > payload.len() {
1246        return None;
1247    }
1248
1249    // Iterate children looking for DecoderConfigDescriptor (tag 0x04).
1250    let mut cursor = &payload[off..];
1251    while !cursor.is_empty() {
1252        let (tag, child, rest) = read_descriptor(cursor)?;
1253        cursor = rest;
1254        if tag != 0x04 {
1255            continue;
1256        }
1257        // DecoderConfigDescriptor: 1 objectTypeIndication + 1 streamType
1258        // byte + 3 bufferSizeDB + 4 maxBitrate + 4 avgBitrate, then nested.
1259        if child.len() < 13 {
1260            return None;
1261        }
1262        let inner = &child[13..];
1263        let mut inner_cursor = inner;
1264        while !inner_cursor.is_empty() {
1265            let (t, dsi_payload, r) = read_descriptor(inner_cursor)?;
1266            inner_cursor = r;
1267            if t == 0x05 {
1268                return Some(dsi_payload.to_vec());
1269            }
1270        }
1271        return None;
1272    }
1273    None
1274}
1275
1276/// Parse a single descriptor: `[tag u8][len ULEB128-ish][payload]`. Returns
1277/// (tag, payload-slice, remaining-bytes-after-this-descriptor).
1278fn read_descriptor(data: &[u8]) -> Option<(u8, &[u8], &[u8])> {
1279    if data.is_empty() {
1280        return None;
1281    }
1282    let tag = data[0];
1283    let mut pos = 1;
1284    let mut length: usize = 0;
1285    for _ in 0..4 {
1286        if pos >= data.len() {
1287            return None;
1288        }
1289        let b = data[pos];
1290        pos += 1;
1291        length = (length << 7) | (b & 0x7F) as usize;
1292        if b & 0x80 == 0 {
1293            break;
1294        }
1295    }
1296    if pos + length > data.len() {
1297        return None;
1298    }
1299    let payload = &data[pos..pos + length];
1300    let rest = &data[pos + length..];
1301    Some((tag, payload, rest))
1302}
1303
1304/// Decode the sampling_frequency out of an ASC per ISO/IEC 14496-3 §1.6.2.1.
1305/// ASC bitstream: audioObjectType(5) samplingFrequencyIndex(4) ...
1306/// If index==0xF then 24-bit sample rate follows inline.
1307fn decode_asc_sample_rate(asc: &[u8]) -> Option<u32> {
1308    if asc.len() < 2 {
1309        return None;
1310    }
1311    let mut br = AscBitReader::new(asc);
1312    let aot = br.bits(5)?;
1313    let _extended_aot = if aot == 31 { br.bits(6)? + 32 } else { aot };
1314    let freq_idx = br.bits(4)? as usize;
1315    if freq_idx == 0xF {
1316        let sr = br.bits(24)?;
1317        Some(sr as u32)
1318    } else {
1319        const FREQS: [u32; 13] = [
1320            96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350,
1321        ];
1322        FREQS.get(freq_idx).copied()
1323    }
1324}
1325
1326fn decode_asc_channels(asc: &[u8]) -> Option<u16> {
1327    if asc.len() < 2 {
1328        return None;
1329    }
1330    let mut br = AscBitReader::new(asc);
1331    let aot = br.bits(5)?;
1332    let _ext = if aot == 31 { br.bits(6)? + 32 } else { aot };
1333    let freq_idx = br.bits(4)? as usize;
1334    if freq_idx == 0xF {
1335        let _ = br.bits(24)?;
1336    }
1337    let chan_cfg = br.bits(4)? as u16;
1338    // chan_cfg 0 means "inspect PCE"; we don't bother — default to 2.
1339    if chan_cfg == 0 {
1340        Some(2)
1341    } else {
1342        Some(chan_cfg)
1343    }
1344}
1345
1346struct AscBitReader<'a> {
1347    data: &'a [u8],
1348    pos: usize,
1349}
1350impl<'a> AscBitReader<'a> {
1351    fn new(data: &'a [u8]) -> Self {
1352        Self { data, pos: 0 }
1353    }
1354    fn bits(&mut self, n: u32) -> Option<u64> {
1355        let mut v: u64 = 0;
1356        for _ in 0..n {
1357            let byte = *self.data.get(self.pos / 8)?;
1358            let bit = (byte >> (7 - (self.pos % 8))) & 1;
1359            v = (v << 1) | bit as u64;
1360            self.pos += 1;
1361        }
1362        Some(v)
1363    }
1364}
1365
1366pub fn demux_mkv(data: &[u8]) -> Result<DemuxResult> {
1367    let cursor = Cursor::new(data);
1368    let mut mkv =
1369        MatroskaFile::open(cursor).map_err(|e| anyhow::anyhow!("reading MKV header: {e}"))?;
1370
1371    // AVC/HEVC in MKV: CodecPrivate holds the avcC / hvcC configuration record
1372    // verbatim. Length-prefixed Block samples need the same Annex-B conversion
1373    // we do for MP4, plus VPS/SPS/PPS prepended to the first sample of the
1374    // track. VP8/VP9/AV1 are self-contained and skip this dance.
1375    //
1376    // Snapshot every field we need off TrackEntry before `next_frame` starts
1377    // mutating `mkv` below — TrackEntry borrows from `mkv` and hold times
1378    // conflict with the &mut self on `next_frame`.
1379    let (
1380        track_number,
1381        track_uid,
1382        codec_id,
1383        width,
1384        height,
1385        annexb_prepend,
1386        length_size,
1387        color_space,
1388        mut color_metadata,
1389        mut color_info,
1390        track_default_duration_ns,
1391    ) = {
1392        let track_info = mkv
1393            .tracks()
1394            .iter()
1395            .find(|t| t.track_type() == MkvTrackType::Video)
1396            .context("no video track in MKV")?;
1397
1398        let track_number = track_info.track_number().get();
1399        let track_uid = track_info.track_uid().get();
1400        let codec_id = track_info.codec_id().to_string();
1401        // Per-track DefaultDuration (`0x23E383`, ns per frame) — Matroska's
1402        // canonical frame-rate hint. Used as the frame_rate fallback when the
1403        // segment's `Duration` element is absent (live-recorded MKVs and some
1404        // streaming WebMs ship without one). Squad-32: this fallback was
1405        // previously missing — frame_rate would silently default to 30.0
1406        // even when DefaultDuration cleanly described e.g. 23.976 / 60 fps.
1407        let default_duration_ns = track_info.default_duration().map(|d| d.get());
1408
1409        // Parse avcC/hvcC CodecPrivate once to recover both the parameter
1410        // sets and the recorded length_size_minus_one — 4-byte prefixes
1411        // are the common case, but the spec allows 1 or 2 bytes.
1412        let (annexb_prepend, length_size): (Vec<Vec<u8>>, u8) = if codec_id == "V_MPEG4/ISO/AVC" {
1413            let priv_bytes = track_info
1414                .codec_private()
1415                .context("V_MPEG4/ISO/AVC CodecPrivate missing")?;
1416            let cfg = parse_avcc(priv_bytes).context("V_MPEG4/ISO/AVC CodecPrivate malformed")?;
1417            (cfg.parameter_sets, cfg.length_size)
1418        } else if codec_id == "V_MPEGH/ISO/HEVC" {
1419            let priv_bytes = track_info
1420                .codec_private()
1421                .context("V_MPEGH/ISO/HEVC CodecPrivate missing")?;
1422            let cfg = parse_hvcc(priv_bytes).context("V_MPEGH/ISO/HEVC CodecPrivate malformed")?;
1423            (cfg.parameter_sets, cfg.length_size)
1424        } else {
1425            (Vec::new(), 4)
1426        };
1427
1428        if mkv_codec_needs_annexb(&codec_id) && annexb_prepend.is_empty() {
1429            bail!("AVC/HEVC MKV CodecPrivate missing or empty — no parameter sets to prepend");
1430        }
1431
1432        let video = track_info
1433            .video()
1434            .context("video track missing Video element")?;
1435        let w = video.pixel_width().get() as u32;
1436        let h = video.pixel_height().get() as u32;
1437
1438        // Parse the Colour element into a ColorMetadata + ColorSpace +
1439        // extended MkvColorInfo. Legacy MKVs without Colour produce the
1440        // SDR BT.709 default.
1441        let (color_space, color_metadata, color_info) = match video.colour() {
1442            Some(colour) => colour_to_pipeline(colour),
1443            None => (
1444                ColorSpace::Bt709,
1445                ColorMetadata::default(),
1446                MkvColorInfo::default(),
1447            ),
1448        };
1449
1450        (
1451            track_number,
1452            track_uid,
1453            codec_id,
1454            w,
1455            h,
1456            annexb_prepend,
1457            length_size,
1458            color_space,
1459            color_metadata,
1460            color_info,
1461            default_duration_ns,
1462        )
1463    };
1464
1465    // Squad-21: matroska-demuxer 0.7's `Colour::new` reads MaxCLL/MaxFALL from
1466    // the wrong ElementId offset (it actually reads MatrixCoefficients), and
1467    // `MasteringMetadata::new` reads each `_chromaticity_y` from the matching
1468    // `_chromaticity_x` ElementId — so all three primaries' y values come back
1469    // holding the corresponding x value. Re-scan the raw EBML bytes to recover
1470    // the canonical values; the same workaround already lives in
1471    // `probe_mkv_color_info`. We MUST also clear the unified
1472    // `ColorMetadata.content_light_level` and the mastering display y-fields
1473    // we synthesized from the poisoned typed accessors so a scan miss doesn't
1474    // leave the wrong value in place.
1475    color_info.max_cll = None;
1476    color_info.max_fall = None;
1477    color_metadata.content_light_level = None;
1478    if let Some(md) = color_metadata.mastering_display.as_mut() {
1479        // The y values are poisoned with the matching x values — clear them
1480        // in case the raw scan can't recover (defensive: leave 0 vs garbage).
1481        md.primaries_r_y = 0;
1482        md.primaries_g_y = 0;
1483        md.primaries_b_y = 0;
1484    }
1485    if let Some(local) = color_info.mastering.as_mut() {
1486        local.primary_r_chromaticity_y = None;
1487        local.primary_g_chromaticity_y = None;
1488        local.primary_b_chromaticity_y = None;
1489    }
1490    if let Some(fix) = scan_mkv_colour_raw(data) {
1491        color_info.max_cll = fix.max_cll;
1492        color_info.max_fall = fix.max_fall;
1493        if fix.max_cll.is_some() || fix.max_fall.is_some() {
1494            color_metadata.content_light_level = Some(ContentLightLevel {
1495                max_cll: fix.max_cll.unwrap_or(0).min(u16::MAX as u32) as u16,
1496                max_fall: fix.max_fall.unwrap_or(0).min(u16::MAX as u32) as u16,
1497            });
1498        }
1499        // Re-fold the recovered y-chromaticities (HEVC SEI D.2.28 wire
1500        // domain: 0.00002 increments → multiply by 50_000, saturate to u16).
1501        let chrom = |v: f64| (v * 50_000.0).round().clamp(0.0, u16::MAX as f64) as u16;
1502        if let Some(md) = color_metadata.mastering_display.as_mut() {
1503            if let Some(y) = fix.primary_r_chromaticity_y {
1504                md.primaries_r_y = chrom(y);
1505            }
1506            if let Some(y) = fix.primary_g_chromaticity_y {
1507                md.primaries_g_y = chrom(y);
1508            }
1509            if let Some(y) = fix.primary_b_chromaticity_y {
1510                md.primaries_b_y = chrom(y);
1511            }
1512        }
1513        if let Some(local) = color_info.mastering.as_mut() {
1514            if fix.primary_r_chromaticity_y.is_some() {
1515                local.primary_r_chromaticity_y = fix.primary_r_chromaticity_y;
1516            }
1517            if fix.primary_g_chromaticity_y.is_some() {
1518                local.primary_g_chromaticity_y = fix.primary_g_chromaticity_y;
1519            }
1520            if fix.primary_b_chromaticity_y.is_some() {
1521                local.primary_b_chromaticity_y = fix.primary_b_chromaticity_y;
1522            }
1523        }
1524    }
1525
1526    let needs_annexb = mkv_codec_needs_annexb(&codec_id);
1527    let codec = match codec_id.as_str() {
1528        "V_VP9" => "vp9".to_string(),
1529        "V_VP8" => "vp8".to_string(),
1530        "V_AV1" => "av1".to_string(),
1531        "V_MPEG4/ISO/AVC" => "h264".to_string(),
1532        "V_MPEGH/ISO/HEVC" => "h265".to_string(),
1533        other => other.to_lowercase(),
1534    };
1535
1536    let timestamp_scale = mkv.info().timestamp_scale().get();
1537    let duration_ticks = mkv.info().duration().unwrap_or(0.0);
1538    // timestamp_scale is in ns; duration is in ticks (float)
1539    let duration = duration_ticks * (timestamp_scale as f64) / 1_000_000_000.0;
1540
1541    // Tag-based bitrate: preferred over the computed fallback when a
1542    // muxer wrote a `BIT_RATE` Matroska Tag scoped to our track UID.
1543    // See `bitrate_from_tags` for scope-resolution details.
1544    let tag_bitrate = mkv
1545        .tags()
1546        .and_then(|tags| bitrate_from_tags(tags, track_uid));
1547    // Emit the extended metadata we can't (yet) carry on `StreamInfo`
1548    // on a structured log line — downstream work-items #HDR10 and mux
1549    // SEI passthrough will read them via `probe_mkv_color_info`.
1550    if color_info != MkvColorInfo::default() {
1551        tracing::info!(
1552            bits_per_channel = ?color_info.bits_per_channel,
1553            max_cll = ?color_info.max_cll,
1554            max_fall = ?color_info.max_fall,
1555            mastering = ?color_info.mastering,
1556            "MKV Colour: parsed HDR-adjacent metadata"
1557        );
1558    }
1559
1560    let mut samples: Vec<Vec<u8>> = Vec::new();
1561    let mut frame = MkvFrame::default();
1562    let mut total_video_bytes: u64 = 0;
1563    // Same per-stream tracker as the MP4 path. MKV's CodecPrivate carries
1564    // the avcC / hvcC bytes verbatim, so the same first-IRAP-prepend
1565    // heuristic applies (and is more robust than the old
1566    // `is_first_video_sample` flag, which assumed sample 0 was always IRAP).
1567    let mut mkv_tracker = if needs_annexb {
1568        Some(ParamSetTracker::new(if codec_id == "V_MPEG4/ISO/AVC" {
1569            NaluCodec::Avc
1570        } else {
1571            NaluCodec::Hevc
1572        }))
1573    } else {
1574        None
1575    };
1576    loop {
1577        match mkv.next_frame(&mut frame) {
1578            Ok(true) => {
1579                if frame.track == track_number {
1580                    let raw = std::mem::take(&mut frame.data);
1581                    total_video_bytes += raw.len() as u64;
1582                    if let Some(tracker) = mkv_tracker.as_mut() {
1583                        let annexb = length_prefixed_to_annexb_tracked(
1584                            &raw,
1585                            length_size,
1586                            tracker,
1587                            &annexb_prepend,
1588                        );
1589                        samples.push(annexb);
1590                    } else {
1591                        samples.push(raw);
1592                    }
1593                }
1594            }
1595            Ok(false) => break,
1596            Err(e) => bail!("MKV frame read error: {e}"),
1597        }
1598    }
1599
1600    let total_frames = samples.len() as u64;
1601    // Frame rate fallback chain (Squad-32):
1602    //   1. samples / segment_duration  (most accurate when both are known)
1603    //   2. 1 / DefaultDuration          (Matroska's canonical per-frame ns)
1604    //   3. 30.0                         (last-resort sentinel)
1605    let frame_rate = if duration > 0.0 {
1606        total_frames as f64 / duration
1607    } else if let Some(dd_ns) = track_default_duration_ns.filter(|n| *n > 0) {
1608        1_000_000_000.0 / dd_ns as f64
1609    } else {
1610        30.0
1611    };
1612
1613    let detected_pf = codec::pixel_format::detect(&codec, &samples);
1614
1615    // Bitrate priority: Tag `BIT_RATE` if present → summed sample bytes
1616    // over the segment duration. Never 0 unless the file has no samples
1617    // AND no tag (in which case bitrate is genuinely unknowable and we
1618    // keep the historical 0 sentinel).
1619    let bitrate = match tag_bitrate {
1620        Some(b) if b > 0 => b,
1621        _ => {
1622            if duration > 0.0 && total_video_bytes > 0 {
1623                ((total_video_bytes as f64 * 8.0) / duration) as u64
1624            } else {
1625                0
1626            }
1627        }
1628    };
1629
1630    let info = StreamInfo {
1631        codec: codec.clone(),
1632        width,
1633        height,
1634        frame_rate,
1635        duration,
1636        pixel_format: detected_pf,
1637        color_space,
1638        total_frames,
1639        bitrate,
1640        color_metadata,
1641    };
1642
1643    // Audio passthrough uses its own MatroskaFile handle (re-opened) since
1644    // next_frame above already consumed the stream.
1645    let audio = extract_mkv_audio(data);
1646
1647    Ok(DemuxResult {
1648        codec,
1649        info,
1650        samples,
1651        audio,
1652    })
1653}
1654
1655/// Pull the audio track out of an MKV / WebM for passthrough. Four codec
1656/// families are recognised today (Squad-18 + Squad-23 + Squad-26):
1657/// - `A_AAC`: AAC-LC. CodecPrivate carries the AudioSpecificConfig verbatim.
1658/// - `A_OPUS`: Opus. CodecPrivate carries the OpusHead body verbatim per
1659///   RFC 7845 §5.2 (the WebM spec mirrors this) — same bytes the dOps
1660///   writer needs (in OpusHead LE numeric form).
1661/// - `A_AC3`: AC-3. CodecPrivate is empty (frames are self-describing); we
1662///   derive the `dac3` body from the first frame's sync header per
1663///   ETSI TS 102 366 §F.4.
1664/// - `A_EAC3`: E-AC-3. Same — empty CodecPrivate; derive `dec3` body from
1665///   the first frame's sync header per ETSI TS 102 366 §F.6.
1666///
1667/// Other audio codec IDs (`A_VORBIS`, `A_MPEG/L3`) log a warning and the
1668/// track is dropped — pipeline falls back to video-only.
1669///
1670/// WebM is a Matroska subset so the same code path covers both.
1671fn extract_mkv_audio(data: &[u8]) -> Option<AudioTrack> {
1672    let cursor = Cursor::new(data);
1673    let mut mkv = MatroskaFile::open(cursor).ok()?;
1674
1675    enum MkvAudioKind {
1676        Aac,
1677        Opus,
1678        Ac3,
1679        Eac3,
1680    }
1681
1682    let (track_number, kind, codec_private_or_empty, sample_rate, channels, default_duration) = {
1683        let track = mkv
1684            .tracks()
1685            .iter()
1686            .find(|t| t.track_type() == MkvTrackType::Audio)?;
1687        let codec_id = track.codec_id();
1688        let kind = match codec_id {
1689            "A_AAC" => MkvAudioKind::Aac,
1690            "A_OPUS" => MkvAudioKind::Opus,
1691            "A_AC3" => MkvAudioKind::Ac3,
1692            "A_EAC3" => MkvAudioKind::Eac3,
1693            other => {
1694                tracing::warn!(
1695                    codec = other,
1696                    "audio passthrough skipped: only AAC / Opus / AC-3 / E-AC-3 are supported"
1697                );
1698                return None;
1699            }
1700        };
1701        // CodecPrivate is mandatory for AAC / Opus (carries ASC / OpusHead).
1702        // It's typically EMPTY for AC-3 / E-AC-3 in MKV — frames are
1703        // self-describing and the dac3 / dec3 body is derived from the
1704        // first frame's sync header. Tolerate either.
1705        let codec_private = match kind {
1706            MkvAudioKind::Aac => {
1707                let cp = track.codec_private()?.to_vec();
1708                if cp.is_empty() {
1709                    return None;
1710                }
1711                cp
1712            }
1713            MkvAudioKind::Opus => {
1714                // RFC 7845 §5.2: MKV CodecPrivate carries the full OpusHead
1715                // packet — magic signature "OpusHead" + body. Our internal
1716                // AudioTrack.codec_private contract (and the dOps writer in
1717                // mux.rs) expects the post-magic body only, so strip the
1718                // 8-byte magic if present. Without this, mux reads
1719                // codec_private[10] expecting ChannelMappingFamily but
1720                // actually gets pre-skip's LSB byte of OpusHead.
1721                let mut cp = track.codec_private()?.to_vec();
1722                if cp.is_empty() {
1723                    return None;
1724                }
1725                if cp.len() >= 8 && &cp[..8] == b"OpusHead" {
1726                    cp.drain(..8);
1727                }
1728                if cp.is_empty() {
1729                    return None;
1730                }
1731                cp
1732            }
1733            MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => track
1734                .codec_private()
1735                .map(|p| p.to_vec())
1736                .unwrap_or_default(),
1737        };
1738        let audio = track.audio()?;
1739        let sr = audio.sampling_frequency() as u32;
1740        let ch = audio.channels().get() as u16;
1741        let default_duration = track.default_duration().map(|d| d.get());
1742        (
1743            track.track_number().get(),
1744            kind,
1745            codec_private,
1746            sr,
1747            ch,
1748            default_duration,
1749        )
1750    };
1751
1752    // Per-codec timescale + per-frame default duration tick conversion.
1753    //   - AAC: mdhd timescale = sample_rate; natural frame = 1024 samples.
1754    //   - Opus: mdhd timescale pinned to 48000 per RFC 7845 §3 regardless
1755    //     of the source's nominal sample_rate; natural frame = 960 samples
1756    //     (20 ms standard libopus encoder frame).
1757    //   - AC-3 / E-AC-3: mdhd timescale = sample_rate; natural frame =
1758    //     1536 samples (6 blocks × 256 / ETSI TS 102 366).
1759    let timescale = match kind {
1760        MkvAudioKind::Aac => sample_rate,
1761        MkvAudioKind::Opus => 48_000,
1762        MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => sample_rate,
1763    };
1764    let default_frame_samples_at_ts = match kind {
1765        MkvAudioKind::Aac => 1024u64,
1766        MkvAudioKind::Opus => 960u64,
1767        MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => 1536u64,
1768    };
1769    // For the fallback duration math we need the rate matching the chosen
1770    // timescale (NOT the source's nominal sample_rate when kind=Opus).
1771    let timescale_for_fallback = if timescale == 0 { 48_000 } else { timescale };
1772
1773    let mut samples: Vec<Vec<u8>> = Vec::new();
1774    let mut durations: Vec<u32> = Vec::new();
1775    let mut frame = MkvFrame::default();
1776    loop {
1777        match mkv.next_frame(&mut frame) {
1778            Ok(true) => {
1779                if frame.track == track_number {
1780                    // Prefer the block's own duration, then default_duration,
1781                    // then the codec's natural frame size at the chosen
1782                    // mdhd timescale.
1783                    let dur_ns = frame.duration.or(default_duration).unwrap_or_else(|| {
1784                        1_000_000_000u64 * default_frame_samples_at_ts
1785                            / timescale_for_fallback as u64
1786                    });
1787                    // Convert ns → mdhd timescale ticks.
1788                    let dur_ticks = ((dur_ns as u128) * (timescale as u128) / 1_000_000_000) as u32;
1789                    durations.push(dur_ticks.max(1));
1790                    samples.push(std::mem::take(&mut frame.data));
1791                }
1792            }
1793            Ok(false) => break,
1794            Err(_) => break,
1795        }
1796    }
1797
1798    if samples.is_empty() {
1799        return None;
1800    }
1801
1802    Some(match kind {
1803        MkvAudioKind::Aac => {
1804            // Squad-25: MKV `Audio.Channels` is an integer hint and the ASC
1805            // (CodecPrivate) is canonical for HE-AAC v2 PS upmix + multichannel
1806            // configs. Prefer the parsed-ASC counts when available; fall back
1807            // to whatever the MKV header advertised.
1808            let parsed = crate::aac_asc::parse_aac_asc(&codec_private_or_empty);
1809            let aac_channels = parsed
1810                .as_ref()
1811                .map(crate::aac_asc::effective_output_channels)
1812                .unwrap_or(channels);
1813            let aac_sample_rate = parsed
1814                .as_ref()
1815                .and_then(|p| p.sbr_sample_rate.or(Some(p.sample_rate)))
1816                .unwrap_or(sample_rate);
1817            AudioTrack {
1818                codec: "aac".into(),
1819                samples,
1820                sample_rate: aac_sample_rate,
1821                channels: aac_channels,
1822                asc: codec_private_or_empty,
1823                codec_private: Vec::new(),
1824                timescale: aac_sample_rate, // mdhd timescale tracks the effective rate
1825                durations,
1826            }
1827        }
1828        MkvAudioKind::Opus => AudioTrack {
1829            codec: "opus".into(),
1830            samples,
1831            sample_rate,
1832            channels,
1833            asc: Vec::new(),
1834            codec_private: codec_private_or_empty,
1835            timescale,
1836            durations,
1837        },
1838        MkvAudioKind::Ac3 => {
1839            // CodecPrivate is empty for AC-3 in MKV. Synthesize the dac3
1840            // body by walking the first frame's sync header and re-packing
1841            // per ETSI TS 102 366 §F.4. Per-frame samples already collected.
1842            let dac3 = match samples
1843                .first()
1844                .and_then(|f| crate::ac3_sync::parse_sync_info(f).ok())
1845            {
1846                Some(crate::ac3_sync::SyncInfo::Ac3(s)) => {
1847                    crate::mux::dac3_body_from_sync(&s).to_vec()
1848                }
1849                _ => {
1850                    tracing::warn!(
1851                        "MKV A_AC3: failed to parse first frame sync header — dropping audio"
1852                    );
1853                    return None;
1854                }
1855            };
1856            // Re-derive sample_rate / channel layout from the parsed sync —
1857            // it's the authoritative source.
1858            let (sr, ch) =
1859                ac3_sample_rate_channels_from_dac3(&dac3).unwrap_or((sample_rate, channels));
1860            AudioTrack {
1861                codec: "ac3".into(),
1862                samples,
1863                sample_rate: sr,
1864                channels: ch,
1865                asc: Vec::new(),
1866                codec_private: dac3,
1867                timescale: sr,
1868                durations,
1869            }
1870        }
1871        MkvAudioKind::Eac3 => {
1872            // Same story for E-AC-3: derive dec3 from the first frame.
1873            let (dec3, sr, ch) = match samples
1874                .first()
1875                .and_then(|f| crate::ac3_sync::parse_sync_info(f).ok())
1876            {
1877                Some(crate::ac3_sync::SyncInfo::Eac3(s)) => {
1878                    // data_rate (kbps / 2) computed from the source frame:
1879                    //   frame_size_bytes = (frmsiz + 1) * 2
1880                    //   bitrate_kbps = (frame_size_bytes * 8 * sample_rate) / samples_per_frame / 1000
1881                    let sr = crate::ac3_sync::eac3_sample_rate_hz(s.fscod, s.fscod2);
1882                    let spf = crate::ac3_sync::eac3_samples_per_frame(s.numblkscod) as u64;
1883                    let frame_bytes = ((s.frmsiz as u64) + 1) * 2;
1884                    let bitrate_kbps = if spf > 0 && sr > 0 {
1885                        (frame_bytes * 8 * sr as u64) / spf / 1000
1886                    } else {
1887                        0
1888                    };
1889                    let data_rate = bitrate_kbps.div_ceil(2) as u16;
1890                    let dec3 = crate::mux::dec3_body_from_sync(&s, data_rate).to_vec();
1891                    let ch = crate::ac3_sync::channel_count(s.acmod, s.lfeon);
1892                    (dec3, sr, ch)
1893                }
1894                _ => {
1895                    tracing::warn!(
1896                        "MKV A_EAC3: failed to parse first frame sync header — dropping audio"
1897                    );
1898                    return None;
1899                }
1900            };
1901            AudioTrack {
1902                codec: "eac3".into(),
1903                samples,
1904                sample_rate: sr,
1905                channels: ch,
1906                asc: Vec::new(),
1907                codec_private: dec3,
1908                timescale: sr,
1909                durations,
1910            }
1911        }
1912    })
1913}
1914
1915/// True for MKV CodecIDs whose samples are length-prefixed (AVCC/HVCC) and
1916/// require SPS/PPS pulled from the track's CodecPrivate to feed a decoder
1917/// that expects Annex-B. demux_mkv bails on these until the Annex-B path is
1918/// wired — currently only VP8/VP9/AV1 are safe through MKV.
1919fn mkv_codec_needs_annexb(codec_id: &str) -> bool {
1920    matches!(codec_id, "V_MPEG4/ISO/AVC" | "V_MPEGH/ISO/HEVC")
1921}
1922
1923/// Walk the ISOBMFF box tree looking for an `av01` sample entry inside
1924/// `moov/trak/mdia/minf/stbl/stsd`. Returns true if found at the expected
1925/// nesting level. Doing a full tree walk (vs naive byte-search for "av01")
1926/// avoids false positives from sample data in mdat that happens to contain
1927/// those bytes.
1928/// Find the HEVC sample-entry fourcc (`hvc1`, `hev1`, `hvc2`, `hev2`,
1929/// `dvh1`, `dvhe`) in the video track's stsd box. Returns the 4-byte
1930/// fourcc or None. Used as the mp4 0.14 crate detection fallback —
1931/// its `media_type()` only returns H265 for `hev1`, so `hvc1` (the
1932/// Jellyfin corpus's HEVC flavor) needs this path.
1933fn hevc_sample_entry_fourcc(data: &[u8]) -> Option<[u8; 4]> {
1934    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
1935    let stsd_body = find_box_body(data, path)?;
1936    if stsd_body.len() < 16 {
1937        return None;
1938    }
1939    let mut pos = 8; // skip version/flags/entry_count
1940    while pos + 8 <= stsd_body.len() {
1941        let entry_size = u32::from_be_bytes([
1942            stsd_body[pos],
1943            stsd_body[pos + 1],
1944            stsd_body[pos + 2],
1945            stsd_body[pos + 3],
1946        ]) as usize;
1947        let entry_type: [u8; 4] = stsd_body[pos + 4..pos + 8].try_into().ok()?;
1948        match &entry_type {
1949            b"hvc1" | b"hev1" | b"hvc2" | b"hev2" | b"dvh1" | b"dvhe" => {
1950                return Some(entry_type);
1951            }
1952            _ => {}
1953        }
1954        if entry_size == 0 {
1955            break;
1956        }
1957        pos = pos.saturating_add(entry_size);
1958    }
1959    None
1960}
1961
1962/// Look for an Apple ProRes sample entry in the video track's stsd box.
1963/// Six fourccs cover the product family:
1964///   apcn = ProRes 422 Standard    apch = ProRes 422 HQ
1965///   apcs = ProRes 422 LT          apco = ProRes 422 Proxy
1966///   ap4h = ProRes 4444            ap4x = ProRes 4444 XQ
1967/// All share the same container layout (self-contained frame samples, no
1968/// length-prefix wrapping), so from demux's perspective they are
1969/// interchangeable — we return the first one we see so callers can log
1970/// which specific profile the input used. Decode dispatch uses the
1971/// unified `"prores"` codec label produced by `demux_mp4`.
1972fn prores_sample_entry_fourcc(data: &[u8]) -> Option<[u8; 4]> {
1973    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
1974    let stsd_body = find_box_body(data, path)?;
1975    if stsd_body.len() < 16 {
1976        return None;
1977    }
1978    let mut pos = 8;
1979    while pos + 8 <= stsd_body.len() {
1980        let entry_size = u32::from_be_bytes([
1981            stsd_body[pos],
1982            stsd_body[pos + 1],
1983            stsd_body[pos + 2],
1984            stsd_body[pos + 3],
1985        ]) as usize;
1986        let entry_type: [u8; 4] = stsd_body[pos + 4..pos + 8].try_into().ok()?;
1987        match &entry_type {
1988            b"apcn" | b"apch" | b"apcs" | b"apco" | b"ap4h" | b"ap4x" => {
1989                return Some(entry_type);
1990            }
1991            _ => {}
1992        }
1993        if entry_size == 0 {
1994            break;
1995        }
1996        pos = pos.saturating_add(entry_size);
1997    }
1998    None
1999}
2000
2001/// Find the AVC sample entry in MP4 and return its parsed avcC config
2002/// (length_size + SPS/PPS NAL units). Returns None when no `avc1`/`avc3`
2003/// sample entry is present or the avcC box is malformed.
2004fn extract_avc_config(data: &[u8]) -> Option<AvcConfig> {
2005    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
2006    let stsd_body = find_box_body(data, path)?;
2007    if stsd_body.len() < 16 {
2008        return None;
2009    }
2010
2011    let mut pos = 8;
2012    while pos + 8 <= stsd_body.len() {
2013        let entry_size = u32::from_be_bytes([
2014            stsd_body[pos],
2015            stsd_body[pos + 1],
2016            stsd_body[pos + 2],
2017            stsd_body[pos + 3],
2018        ]) as usize;
2019        let entry_type = &stsd_body[pos + 4..pos + 8];
2020        let is_avc = matches!(entry_type, b"avc1" | b"avc3");
2021        if !is_avc {
2022            if entry_size == 0 {
2023                break;
2024            }
2025            pos = pos.saturating_add(entry_size);
2026            continue;
2027        }
2028        let end = pos.saturating_add(entry_size);
2029        if end > stsd_body.len() {
2030            return None;
2031        }
2032        let child_start = pos + 8 + 78; // VisualSampleEntry fixed header
2033        if child_start >= end {
2034            return None;
2035        }
2036        let avcc = find_direct_child(&stsd_body[child_start..end], b"avcC")?;
2037        return parse_avcc(avcc);
2038    }
2039    None
2040}
2041
2042/// HDR static metadata pulled from the visual sample entry's `mdcv` and
2043/// `clli` boxes — Squad-21 wires this to ColorMetadata so Squad-20's
2044/// muxer can round-trip HDR10 mastering display + content light level
2045/// from any source MP4 / MOV that signals them.
2046#[derive(Debug, Default, Clone, Copy)]
2047struct Mp4VisualColorMetadata {
2048    mastering_display: Option<MasteringDisplay>,
2049    content_light_level: Option<ContentLightLevel>,
2050}
2051
2052/// Walk `moov/trak/mdia/minf/stbl/stsd > {av01, hvc1, hev1, ...}` and
2053/// pick out the optional `mdcv` and `clli` child boxes.
2054///
2055/// Per ISO/IEC 23001-17 (Carriage of static and dynamic metadata in
2056/// ISOBMFF), `mdcv` and `clli` are direct children of the visual
2057/// sample entry — same nesting level as `colr`. Layouts:
2058///
2059///   `mdcv` body (24 bytes):
2060///     u16[2] display_primaries[3]   // wire order GBR
2061///     u16    white_point_x
2062///     u16    white_point_y
2063///     u32    max_display_mastering_luminance  (in 0.0001 cd/m²)
2064///     u32    min_display_mastering_luminance  (in 0.0001 cd/m²)
2065///
2066///   `clli` body (4 bytes):
2067///     u16    max_content_light_level
2068///     u16    max_pic_average_light_level
2069fn extract_mp4_visual_color_metadata(data: &[u8]) -> Mp4VisualColorMetadata {
2070    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
2071    let Some(stsd_body) = find_box_body(data, path) else {
2072        return Mp4VisualColorMetadata::default();
2073    };
2074    if stsd_body.len() < 16 {
2075        return Mp4VisualColorMetadata::default();
2076    }
2077
2078    let mut pos = 8; // skip version/flags/entry_count
2079    while pos + 8 <= stsd_body.len() {
2080        let entry_size = u32::from_be_bytes([
2081            stsd_body[pos],
2082            stsd_body[pos + 1],
2083            stsd_body[pos + 2],
2084            stsd_body[pos + 3],
2085        ]) as usize;
2086        if entry_size < 8 || pos.saturating_add(entry_size) > stsd_body.len() {
2087            break;
2088        }
2089        let entry_type: [u8; 4] = match stsd_body[pos + 4..pos + 8].try_into() {
2090            Ok(v) => v,
2091            Err(_) => break,
2092        };
2093        // Visual sample entries — mdcv/clli only live under these.
2094        let is_visual = matches!(
2095            &entry_type,
2096            b"av01"
2097                | b"avc1"
2098                | b"avc3"
2099                | b"hvc1"
2100                | b"hev1"
2101                | b"hvc2"
2102                | b"hev2"
2103                | b"dvh1"
2104                | b"dvhe"
2105                | b"vp08"
2106                | b"vp09"
2107                | b"apcn"
2108                | b"apch"
2109                | b"apcs"
2110                | b"apco"
2111                | b"ap4h"
2112                | b"ap4x"
2113        );
2114        if !is_visual {
2115            pos = pos.saturating_add(entry_size);
2116            continue;
2117        }
2118        let end = pos.saturating_add(entry_size);
2119        // VisualSampleEntry header: 8-byte box header + 78 bytes of fixed
2120        // VisualSampleEntry fields before the first child box. Same
2121        // offset for every visual sample entry kind.
2122        let child_start = pos + 8 + 78;
2123        if child_start >= end {
2124            return Mp4VisualColorMetadata::default();
2125        }
2126        let children = &stsd_body[child_start..end];
2127        let mut out = Mp4VisualColorMetadata::default();
2128        if let Some(mdcv) = find_direct_child(children, b"mdcv") {
2129            out.mastering_display = parse_mp4_mdcv(mdcv);
2130        }
2131        if let Some(clli) = find_direct_child(children, b"clli") {
2132            out.content_light_level = parse_mp4_clli(clli);
2133        }
2134        return out;
2135    }
2136    Mp4VisualColorMetadata::default()
2137}
2138
2139fn parse_mp4_mdcv(body: &[u8]) -> Option<MasteringDisplay> {
2140    if body.len() < 24 {
2141        return None;
2142    }
2143    let u16be = |o: usize| u16::from_be_bytes([body[o], body[o + 1]]);
2144    let u32be = |o: usize| u32::from_be_bytes([body[o], body[o + 1], body[o + 2], body[o + 3]]);
2145    Some(MasteringDisplay {
2146        // Wire order is GBR per ISO/IEC 23001-17 §7.3.
2147        primaries_g_x: u16be(0),
2148        primaries_g_y: u16be(2),
2149        primaries_b_x: u16be(4),
2150        primaries_b_y: u16be(6),
2151        primaries_r_x: u16be(8),
2152        primaries_r_y: u16be(10),
2153        white_point_x: u16be(12),
2154        white_point_y: u16be(14),
2155        max_luminance: u32be(16),
2156        min_luminance: u32be(20),
2157    })
2158}
2159
2160fn parse_mp4_clli(body: &[u8]) -> Option<ContentLightLevel> {
2161    if body.len() < 4 {
2162        return None;
2163    }
2164    Some(ContentLightLevel {
2165        max_cll: u16::from_be_bytes([body[0], body[1]]),
2166        max_fall: u16::from_be_bytes([body[2], body[3]]),
2167    })
2168}
2169
2170/// Find the HEVC sample entry in MP4 and return its parsed hvcC config
2171/// (length_size + VPS/SPS/PPS NAL units in recorded order).
2172fn extract_hevc_config(data: &[u8]) -> Option<HevcConfig> {
2173    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
2174    let stsd_body = find_box_body(data, path)?;
2175    if stsd_body.len() < 16 {
2176        return None;
2177    }
2178
2179    let mut pos = 8;
2180    while pos + 8 <= stsd_body.len() {
2181        let entry_size = u32::from_be_bytes([
2182            stsd_body[pos],
2183            stsd_body[pos + 1],
2184            stsd_body[pos + 2],
2185            stsd_body[pos + 3],
2186        ]) as usize;
2187        let entry_type = &stsd_body[pos + 4..pos + 8];
2188        let is_hevc = matches!(
2189            entry_type,
2190            b"hvc1" | b"hev1" | b"hvc2" | b"hev2" | b"dvh1" | b"dvhe"
2191        );
2192        if !is_hevc {
2193            if entry_size == 0 {
2194                break;
2195            }
2196            pos = pos.saturating_add(entry_size);
2197            continue;
2198        }
2199        let end = pos.saturating_add(entry_size);
2200        if end > stsd_body.len() {
2201            return None;
2202        }
2203        let child_start = pos + 8 + 78;
2204        if child_start >= end {
2205            return None;
2206        }
2207        let hvcc = find_direct_child(&stsd_body[child_start..end], b"hvcC")?;
2208        return parse_hvcc(hvcc);
2209    }
2210    None
2211}
2212
2213/// Extract VPS/SPS/PPS NAL units from the `hvcC` config box nested
2214/// under the HEVC sample entry. The hvcC layout (ISO/IEC 14496-15
2215/// §8.3.3) puts parameter-set arrays at offset 22, each array as:
2216/// `array_type u8 | num_nalus u16 BE | [{nalu_len u16 BE, nalu ...}]`.
2217#[allow(dead_code)]
2218fn extract_hevc_parameter_sets(data: &[u8]) -> Vec<Vec<u8>> {
2219    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
2220    let Some(stsd_body) = find_box_body(data, path) else {
2221        return Vec::new();
2222    };
2223    if stsd_body.len() < 16 {
2224        return Vec::new();
2225    }
2226
2227    // Walk the stsd entries, find the HEVC sample entry.
2228    let mut pos = 8;
2229    while pos + 8 <= stsd_body.len() {
2230        let entry_size = u32::from_be_bytes([
2231            stsd_body[pos],
2232            stsd_body[pos + 1],
2233            stsd_body[pos + 2],
2234            stsd_body[pos + 3],
2235        ]) as usize;
2236        let entry_type = &stsd_body[pos + 4..pos + 8];
2237        let is_hevc = matches!(
2238            entry_type,
2239            b"hvc1" | b"hev1" | b"hvc2" | b"hev2" | b"dvh1" | b"dvhe"
2240        );
2241        if !is_hevc {
2242            if entry_size == 0 {
2243                break;
2244            }
2245            pos = pos.saturating_add(entry_size);
2246            continue;
2247        }
2248
2249        let end = pos.saturating_add(entry_size);
2250        if end > stsd_body.len() {
2251            return Vec::new();
2252        }
2253        let entry_body_start = pos + 8;
2254        // VisualSampleEntry header: 78 bytes between fourcc and first
2255        // child box (6 reserved + 2 data_ref_idx + 16 pre_defined +
2256        // 2 width + 2 height + 4x2 resolutions + 4 reserved + 2 frame_count
2257        // + 32 compressor_name + 2 depth + 2 pre_defined).
2258        let child_start = entry_body_start + 78;
2259        if child_start >= end {
2260            return Vec::new();
2261        }
2262        let child_area = &stsd_body[child_start..end];
2263        let hvcc = match find_direct_child(child_area, b"hvcC") {
2264            Some(b) => b,
2265            None => return Vec::new(),
2266        };
2267        return parse_hvcc_param_sets(hvcc);
2268    }
2269    Vec::new()
2270}
2271
2272/// Parse the H.264 AVCDecoderConfigurationRecord (avcC body) to extract
2273/// SPS and PPS NALU payloads. Layout (ISO/IEC 14496-15 §5.3.3.1):
2274///   u8  configurationVersion = 1
2275///   u8  AVCProfileIndication
2276///   u8  profile_compatibility
2277///   u8  AVCLevelIndication
2278///   u8  reserved(6)|lengthSizeMinusOne(2)
2279///   u8  reserved(3)|numOfSequenceParameterSets(5)
2280///   // per SPS: u16 nalUnitLength, u8[nalUnitLength] nalUnit
2281///   u8  numOfPictureParameterSets
2282///   // per PPS: u16 nalUnitLength, u8[nalUnitLength] nalUnit
2283/// Used for MKV `V_MPEG4/ISO/AVC` where CodecPrivate is the verbatim avcC body.
2284///
2285/// Kept as a back-compat alias over `crate::annexb::parse_avcc` — callers
2286/// should prefer the new parser that also returns `length_size`.
2287#[allow(dead_code)]
2288fn parse_avcc_param_sets(avcc: &[u8]) -> Vec<Vec<u8>> {
2289    if avcc.len() < 7 {
2290        return Vec::new();
2291    }
2292    let num_sps = (avcc[5] & 0x1F) as usize;
2293    let mut out = Vec::new();
2294    let mut cur = 6;
2295    for _ in 0..num_sps {
2296        if cur + 2 > avcc.len() {
2297            return out;
2298        }
2299        let nalu_len = u16::from_be_bytes([avcc[cur], avcc[cur + 1]]) as usize;
2300        cur += 2;
2301        if cur + nalu_len > avcc.len() {
2302            return out;
2303        }
2304        out.push(avcc[cur..cur + nalu_len].to_vec());
2305        cur += nalu_len;
2306    }
2307    if cur >= avcc.len() {
2308        return out;
2309    }
2310    let num_pps = avcc[cur] as usize;
2311    cur += 1;
2312    for _ in 0..num_pps {
2313        if cur + 2 > avcc.len() {
2314            return out;
2315        }
2316        let nalu_len = u16::from_be_bytes([avcc[cur], avcc[cur + 1]]) as usize;
2317        cur += 2;
2318        if cur + nalu_len > avcc.len() {
2319            return out;
2320        }
2321        out.push(avcc[cur..cur + nalu_len].to_vec());
2322        cur += nalu_len;
2323    }
2324    out
2325}
2326
2327#[allow(dead_code)]
2328fn parse_hvcc_param_sets(hvcc: &[u8]) -> Vec<Vec<u8>> {
2329    // HEVCDecoderConfigurationRecord:
2330    //   u8  configurationVersion = 1
2331    //   u8  general_profile_space(2)|tier(1)|profile_idc(5)
2332    //   u32 general_profile_compatibility_flags
2333    //   u48 general_constraint_indicator_flags
2334    //   u8  general_level_idc
2335    //   u16 reserved(4)|min_spatial_segmentation_idc(12)
2336    //   u8  reserved(6)|parallelismType(2)
2337    //   u8  reserved(6)|chroma_format_idc(2)
2338    //   u8  reserved(5)|bit_depth_luma_minus8(3)
2339    //   u8  reserved(5)|bit_depth_chroma_minus8(3)
2340    //   u16 avgFrameRate
2341    //   u8  constantFrameRate(2)|numTemporalLayers(3)|temporalIdNested(1)|lengthSizeMinusOne(2)
2342    //   u8  numOfArrays
2343    //   // per array:
2344    //   //   u8  array_completeness(1)|reserved(1)|NAL_unit_type(6)
2345    //   //   u16 numNalus
2346    //   //   // per nalu:  u16 nalUnitLength, u8[nalUnitLength] nalUnit
2347    if hvcc.len() < 23 {
2348        return Vec::new();
2349    }
2350    let num_arrays = hvcc[22] as usize;
2351    let mut out = Vec::new();
2352    let mut cur = 23;
2353    for _ in 0..num_arrays {
2354        if cur + 3 > hvcc.len() {
2355            break;
2356        }
2357        let _array_hdr = hvcc[cur];
2358        let num_nalus = u16::from_be_bytes([hvcc[cur + 1], hvcc[cur + 2]]) as usize;
2359        cur += 3;
2360        for _ in 0..num_nalus {
2361            if cur + 2 > hvcc.len() {
2362                return out;
2363            }
2364            let nalu_len = u16::from_be_bytes([hvcc[cur], hvcc[cur + 1]]) as usize;
2365            cur += 2;
2366            if cur + nalu_len > hvcc.len() {
2367                return out;
2368            }
2369            out.push(hvcc[cur..cur + nalu_len].to_vec());
2370            cur += nalu_len;
2371        }
2372    }
2373    out
2374}
2375
2376fn has_av01_sample_entry(data: &[u8]) -> bool {
2377    let path: &[&[u8; 4]] = &[b"moov", b"trak", b"mdia", b"minf", b"stbl", b"stsd"];
2378    let stsd_body = match find_box_body(data, path) {
2379        Some(b) => b,
2380        None => return false,
2381    };
2382    // stsd: 1 byte version + 3 flags + 4 entry_count + [box header { size u32, type [u8;4] }...]
2383    if stsd_body.len() < 16 {
2384        return false;
2385    }
2386    let mut pos = 8; // skip version/flags/entry_count
2387    while pos + 8 <= stsd_body.len() {
2388        let entry_type = &stsd_body[pos + 4..pos + 8];
2389        if entry_type == b"av01" {
2390            return true;
2391        }
2392        let entry_size = u32::from_be_bytes([
2393            stsd_body[pos],
2394            stsd_body[pos + 1],
2395            stsd_body[pos + 2],
2396            stsd_body[pos + 3],
2397        ]) as usize;
2398        if entry_size == 0 {
2399            break;
2400        }
2401        pos = pos.saturating_add(entry_size);
2402    }
2403    false
2404}
2405
2406/// Follow a box type path from `data` (top level) down and return the body
2407/// bytes (payload, excluding the 8-byte box header) of the last box in the
2408/// path, or None if any hop is missing. Handles 32-bit box sizes only —
2409/// adequate for moov/trak/stsd which are ~KB in practice.
2410fn find_box_body<'a>(data: &'a [u8], path: &[&[u8; 4]]) -> Option<&'a [u8]> {
2411    let mut slice = data;
2412    for (i, target) in path.iter().enumerate() {
2413        let found = find_direct_child(slice, target)?;
2414        if i + 1 == path.len() {
2415            return Some(found);
2416        }
2417        slice = found;
2418    }
2419    None
2420}
2421
2422fn find_direct_child<'a>(data: &'a [u8], target: &[u8; 4]) -> Option<&'a [u8]> {
2423    let mut pos = 0;
2424    while pos + 8 <= data.len() {
2425        let size =
2426            u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
2427        let btype = &data[pos + 4..pos + 8];
2428        if size < 8 || pos.checked_add(size).is_none_or(|end| end > data.len()) {
2429            return None;
2430        }
2431        if btype == target {
2432            return Some(&data[pos + 8..pos + size]);
2433        }
2434        pos += size;
2435    }
2436    None
2437}
2438
2439fn format_codec(track: &mp4::Mp4Track) -> String {
2440    match track.media_type() {
2441        Ok(mp4::MediaType::H264) => "h264".into(),
2442        Ok(mp4::MediaType::H265) => "h265".into(),
2443        Ok(mp4::MediaType::VP9) => "vp9".into(),
2444        _ => "unknown".into(),
2445    }
2446}
2447
2448fn extract_sps_pps(reader: &Mp4Reader<Cursor<&[u8]>>, track_id: u32) -> Vec<Vec<u8>> {
2449    let mut nalus = Vec::new();
2450    if let Some(track) = reader.tracks().get(&track_id)
2451        && let Some(ref avc1) = track.trak.mdia.minf.stbl.stsd.avc1
2452    {
2453        for sps in &avc1.avcc.sequence_parameter_sets {
2454            nalus.push(sps.bytes.to_vec());
2455        }
2456        for pps in &avc1.avcc.picture_parameter_sets {
2457            nalus.push(pps.bytes.to_vec());
2458        }
2459    }
2460    nalus
2461}
2462
2463// `avcc_to_annexb` was removed when MP4 and MKV paths converged on the
2464// shared `crate::annexb::length_prefixed_to_annexb` helper, which also
2465// honors non-4-byte length prefixes recorded in `lengthSizeMinusOne`.
2466
2467/// Map a Matroska `Colour` element into our pipeline's color-space,
2468/// per-H.273 `ColorMetadata`, and extended `MkvColorInfo`. Unspecified
2469/// sub-elements default to the SDR BT.709 baseline so decoders that
2470/// never read a Colour element keep behaving exactly as before.
2471fn colour_to_pipeline(colour: &MkvColour) -> (ColorSpace, ColorMetadata, MkvColorInfo) {
2472    let matrix_u8 = colour
2473        .matrix_coefficients()
2474        .map(matrix_coefficients_to_h273);
2475    let primaries_u8 = colour.primaries().map(primaries_to_h273);
2476    let transfer_u8 = colour.transfer_characteristics().map(transfer_to_h273);
2477    let range = colour.range();
2478
2479    let color_space = match colour.matrix_coefficients() {
2480        Some(MatrixCoefficients::Bt709) => ColorSpace::Bt709,
2481        Some(MatrixCoefficients::Bt470bg) | Some(MatrixCoefficients::Smpte170) => ColorSpace::Bt601,
2482        Some(MatrixCoefficients::Bt2020Ncl)
2483        | Some(MatrixCoefficients::Bt2020Cl)
2484        | Some(MatrixCoefficients::Bt2100) => ColorSpace::Bt2020,
2485        _ => ColorSpace::Bt709,
2486    };
2487
2488    let mastering = colour.mastering_metadata().map(mkv_mastering_to_local);
2489    let mkv_max_cll = colour.max_cll().and_then(|v| u32::try_from(v).ok());
2490    let mkv_max_fall = colour.max_fall().and_then(|v| u32::try_from(v).ok());
2491
2492    // Squad-21: also synthesize the unified ColorMetadata HDR fields from
2493    // the MKV `MasteringMetadata` + `MaxCLL` / `MaxFALL` so the muxer
2494    // (Squad-20) can write `mdcv`/`clli` without re-reading the
2495    // MKV-specific MkvColorInfo struct. matroska-demuxer 0.7's MaxCLL/
2496    // MaxFALL bug (see `probe_mkv_color_info`) means the values here
2497    // come from the typed accessor — for the canonical scan we re-read
2498    // raw bytes in `probe_mkv_color_info`. The two paths agree on
2499    // well-formed MKVs and disagree only on malformed ones (where the
2500    // raw scan wins). Pipeline plumbs the raw-scan path for MKV.
2501    let unified_mastering = mastering.as_ref().and_then(mkv_mastering_to_unified);
2502    let unified_cll = match (mkv_max_cll, mkv_max_fall) {
2503        (None, None) => None,
2504        (cll, fall) => Some(ContentLightLevel {
2505            max_cll: cll.unwrap_or(0).min(u16::MAX as u32) as u16,
2506            max_fall: fall.unwrap_or(0).min(u16::MAX as u32) as u16,
2507        }),
2508    };
2509
2510    let color_metadata = ColorMetadata {
2511        transfer: transfer_u8.map(TransferFn::from_h273).unwrap_or_default(),
2512        matrix_coefficients: matrix_u8.unwrap_or(1),
2513        colour_primaries: primaries_u8.unwrap_or(1),
2514        // H.273 full_range_flag: Matroska Range=2 (Full) sets it; any
2515        // other value (Broadcast, Defined, Unknown) keeps the studio
2516        // 16..235 default.
2517        full_range: matches!(range, Some(MkvRange::Full)),
2518        // Squad-21 wires MKV float chromaticities + max_cll/fall into
2519        // the H.265-spec u16 encoding via `mkv_mastering_to_unified` and
2520        // the f64 → cd/m² conversion above (also recovers around two
2521        // matroska-demuxer 0.7 bugs that misread MaxCLL/MaxFALL and y
2522        // chromaticities at the wrong ElementIds).
2523        mastering_display: unified_mastering,
2524        content_light_level: unified_cll,
2525    };
2526
2527    let extra = MkvColorInfo {
2528        bits_per_channel: colour.bits_per_channel().and_then(|v| u8::try_from(v).ok()),
2529        chroma_subsampling_horz: colour
2530            .chroma_subsampling_horz()
2531            .and_then(|v| u8::try_from(v).ok()),
2532        chroma_subsampling_vert: colour
2533            .chroma_subsampling_vert()
2534            .and_then(|v| u8::try_from(v).ok()),
2535        chroma_siting_horz: colour.chroma_sitting_horz().map(chroma_siting_horz_to_u8),
2536        chroma_siting_vert: colour.chroma_sitting_vert().map(chroma_siting_vert_to_u8),
2537        max_cll: mkv_max_cll,
2538        max_fall: mkv_max_fall,
2539        mastering,
2540    };
2541
2542    (color_space, color_metadata, extra)
2543}
2544
2545/// Convert the Matroska f64 chromaticities (range 0..=1) and luminance
2546/// (cd/m²) into the integer encoding the unified `MasteringDisplay`
2547/// uses (HEVC SEI D.2.28 wire format). Returns `None` when no
2548/// sub-element of the MasteringMetadata was populated.
2549fn mkv_mastering_to_unified(m: &MkvMasteringMetadata) -> Option<MasteringDisplay> {
2550    if m.primary_r_chromaticity_x.is_none()
2551        && m.primary_g_chromaticity_x.is_none()
2552        && m.primary_b_chromaticity_x.is_none()
2553        && m.white_point_chromaticity_x.is_none()
2554        && m.luminance_max.is_none()
2555        && m.luminance_min.is_none()
2556    {
2557        return None;
2558    }
2559    let chrom = |v: Option<f64>| -> u16 {
2560        // 0.00002 increments per HEVC SEI D.2.28 — map [0.0, ~1.31)
2561        // into a u16 with saturation.
2562        let scaled = (v.unwrap_or(0.0) * 50_000.0).round();
2563        scaled.clamp(0.0, u16::MAX as f64) as u16
2564    };
2565    let max_lum = (m.luminance_max.unwrap_or(0.0) * 10_000.0).round();
2566    let min_lum = (m.luminance_min.unwrap_or(0.0) * 10_000.0).round();
2567    Some(MasteringDisplay {
2568        primaries_r_x: chrom(m.primary_r_chromaticity_x),
2569        primaries_r_y: chrom(m.primary_r_chromaticity_y),
2570        primaries_g_x: chrom(m.primary_g_chromaticity_x),
2571        primaries_g_y: chrom(m.primary_g_chromaticity_y),
2572        primaries_b_x: chrom(m.primary_b_chromaticity_x),
2573        primaries_b_y: chrom(m.primary_b_chromaticity_y),
2574        white_point_x: chrom(m.white_point_chromaticity_x),
2575        white_point_y: chrom(m.white_point_chromaticity_y),
2576        max_luminance: max_lum.clamp(0.0, u32::MAX as f64) as u32,
2577        min_luminance: min_lum.clamp(0.0, u32::MAX as f64) as u32,
2578    })
2579}
2580
2581fn mkv_mastering_to_local(m: &MkvMastering) -> MkvMasteringMetadata {
2582    MkvMasteringMetadata {
2583        primary_r_chromaticity_x: m.primary_r_chromaticity_x(),
2584        primary_r_chromaticity_y: m.primary_r_chromaticity_y(),
2585        primary_g_chromaticity_x: m.primary_g_chromaticity_x(),
2586        primary_g_chromaticity_y: m.primary_g_chromaticity_y(),
2587        primary_b_chromaticity_x: m.primary_b_chromaticity_x(),
2588        primary_b_chromaticity_y: m.primary_b_chromaticity_y(),
2589        white_point_chromaticity_x: m.white_point_chromaticity_x(),
2590        white_point_chromaticity_y: m.white_point_chromaticity_y(),
2591        luminance_max: m.luminance_max(),
2592        luminance_min: m.luminance_min(),
2593    }
2594}
2595
2596/// MatroskaElement MatrixCoefficients (0x55B1) uses the H.273 numbering
2597/// 1:1, but the `matroska-demuxer` enum hides the raw u8. Reverse the
2598/// mapping so downstream (mux `colr nclx`, nvenc encode params) can
2599/// write the original numeric value back out without re-deriving it.
2600fn matrix_coefficients_to_h273(m: MatrixCoefficients) -> u8 {
2601    match m {
2602        MatrixCoefficients::Identity => 0,
2603        MatrixCoefficients::Bt709 => 1,
2604        MatrixCoefficients::Fcc73682 => 4,
2605        MatrixCoefficients::Bt470bg => 5,
2606        MatrixCoefficients::Smpte170 => 6,
2607        MatrixCoefficients::Smpte240 => 7,
2608        MatrixCoefficients::YCoCg => 8,
2609        MatrixCoefficients::Bt2020Ncl => 9,
2610        MatrixCoefficients::Bt2020Cl => 10,
2611        MatrixCoefficients::SmpteSt2085 => 11,
2612        MatrixCoefficients::ChromaDerivedNcl => 12,
2613        MatrixCoefficients::ChromaDerivedCl => 13,
2614        MatrixCoefficients::Bt2100 => 14,
2615        MatrixCoefficients::Unknown => 2, // H.273 "unspecified"
2616    }
2617}
2618
2619fn transfer_to_h273(t: TransferCharacteristics) -> u8 {
2620    match t {
2621        TransferCharacteristics::Bt709 => 1,
2622        TransferCharacteristics::Bt407m => 4,
2623        TransferCharacteristics::Bt407bg => 5,
2624        TransferCharacteristics::Smpte170 => 6,
2625        TransferCharacteristics::Smpte240 => 7,
2626        TransferCharacteristics::Linear => 8,
2627        TransferCharacteristics::Log => 9,
2628        TransferCharacteristics::LogSqrt => 10,
2629        TransferCharacteristics::Iec61966_2_4 => 11,
2630        TransferCharacteristics::Bt1361 => 12,
2631        TransferCharacteristics::Iec61966_2_1 => 13,
2632        TransferCharacteristics::Bt220_10 => 14,
2633        TransferCharacteristics::Bt220_12 => 15,
2634        TransferCharacteristics::Bt2100 => 16,
2635        TransferCharacteristics::SmpteSt428_1 => 17,
2636        TransferCharacteristics::Hlg => 18,
2637        TransferCharacteristics::Unknown => 2,
2638    }
2639}
2640
2641fn primaries_to_h273(p: Primaries) -> u8 {
2642    match p {
2643        Primaries::Bt709 => 1,
2644        Primaries::Bt470m => 4,
2645        Primaries::Bt601 => 5,
2646        Primaries::Smpte170 => 6,
2647        Primaries::Smpte240 => 7,
2648        Primaries::Film => 8,
2649        Primaries::Bt2020 => 9,
2650        Primaries::SmpteSt428_1 => 10,
2651        Primaries::SmpteRp432_2 => 11,
2652        Primaries::SmpteEg432_2 => 12,
2653        Primaries::JedecP22 => 22,
2654        Primaries::Unknown => 2,
2655    }
2656}
2657
2658fn chroma_siting_horz_to_u8(s: matroska_demuxer::ChromaSitingHorz) -> u8 {
2659    match s {
2660        matroska_demuxer::ChromaSitingHorz::LeftCollated => 1,
2661        matroska_demuxer::ChromaSitingHorz::Half => 2,
2662        matroska_demuxer::ChromaSitingHorz::Unknown => 0,
2663    }
2664}
2665
2666fn chroma_siting_vert_to_u8(s: matroska_demuxer::ChromaSitingVert) -> u8 {
2667    match s {
2668        matroska_demuxer::ChromaSitingVert::LeftCollated => 1,
2669        matroska_demuxer::ChromaSitingVert::Half => 2,
2670        matroska_demuxer::ChromaSitingVert::Unknown => 0,
2671    }
2672}
2673
2674/// Resolve a track-scoped `BIT_RATE` Matroska Tag to a bits-per-second
2675/// value. Matroska's tag-scoping rules (spec §"Tagging") say: a Tag
2676/// applies to the track whose `TagTrackUID` matches, or to every track
2677/// in the segment if `TagTrackUID` is absent or 0. We prefer an exact
2678/// UID match, fall back to a segment-wide tag when no per-track value
2679/// exists.
2680///
2681/// `BIT_RATE` is the canonical Matroska target tag name (FFmpeg writes
2682/// it; the MKVToolNix matrix documents it). Some encoders emit
2683/// `BPS` / `BPS-eng` instead — we accept both for robustness. Values
2684/// are strings of base-10 digits in bits per second.
2685fn bitrate_from_tags(tags: &[matroska_demuxer::Tag], track_uid: u64) -> Option<u64> {
2686    let matches_track = |tag: &matroska_demuxer::Tag| -> bool {
2687        match tag.targets() {
2688            None => true, // Segment-wide — applies to all tracks.
2689            Some(t) => match t.tag_track_uid() {
2690                None | Some(0) => true,
2691                Some(uid) => uid == track_uid,
2692            },
2693        }
2694    };
2695    let mut segment_wide: Option<u64> = None;
2696    let mut track_scoped: Option<u64> = None;
2697    for tag in tags {
2698        if !matches_track(tag) {
2699            continue;
2700        }
2701        for st in tag.simple_tags() {
2702            let name = st.name();
2703            let is_bitrate = name.eq_ignore_ascii_case("BIT_RATE")
2704                || name.eq_ignore_ascii_case("BPS")
2705                || name.to_ascii_uppercase().starts_with("BPS-");
2706            if !is_bitrate {
2707                continue;
2708            }
2709            let Some(val) = st.string() else {
2710                continue;
2711            };
2712            let Ok(parsed) = val.trim().parse::<u64>() else {
2713                continue;
2714            };
2715            let is_track_scoped = tag
2716                .targets()
2717                .and_then(|t| t.tag_track_uid())
2718                .map(|uid| uid == track_uid)
2719                .unwrap_or(false);
2720            if is_track_scoped {
2721                track_scoped = Some(parsed);
2722            } else if segment_wide.is_none() {
2723                segment_wide = Some(parsed);
2724            }
2725        }
2726    }
2727    track_scoped.or(segment_wide)
2728}
2729
2730/// Re-open an MKV container solely to extract the extended Colour
2731/// sub-elements that don't fit on `StreamInfo.color_metadata`
2732/// (MaxCLL / MaxFALL / SMPTE-2086 mastering primaries / bits_per_channel /
2733/// chroma siting). Intended for downstream paths that need HDR10 side
2734/// data for muxing; returns `None` when the file has no video track,
2735/// no `Colour` element, or isn't a well-formed MKV.
2736pub fn probe_mkv_color_info(data: &[u8]) -> Option<MkvColorInfo> {
2737    let cursor = Cursor::new(data);
2738    let mkv = MatroskaFile::open(cursor).ok()?;
2739    let track = mkv
2740        .tracks()
2741        .iter()
2742        .find(|t| t.track_type() == MkvTrackType::Video)?;
2743    let colour = track.video()?.colour()?;
2744    let (_, _, mut info) = colour_to_pipeline(colour);
2745
2746    // matroska-demuxer 0.7 has two known bugs we work around with a raw
2747    // EBML scan (see `scan_mkv_colour_raw` doc):
2748    //   * `Colour::new` misreads MaxCLL/MaxFALL at the MatrixCoefficients
2749    //     ElementId offset (so both come back holding the matrix value).
2750    //   * `MasteringMetadata::new` misreads each `_chromaticity_y` at the
2751    //     matching `_chromaticity_x` ElementId (so all three primaries' y
2752    //     values come back holding the corresponding x value).
2753    // Clear the poisoned fields before the raw scan overrides them so a
2754    // scan miss doesn't leave the wrong value in place.
2755    info.max_cll = None;
2756    info.max_fall = None;
2757    if let Some(local) = info.mastering.as_mut() {
2758        local.primary_r_chromaticity_y = None;
2759        local.primary_g_chromaticity_y = None;
2760        local.primary_b_chromaticity_y = None;
2761    }
2762    if let Some(fix) = scan_mkv_colour_raw(data) {
2763        info.max_cll = fix.max_cll;
2764        info.max_fall = fix.max_fall;
2765        if let Some(local) = info.mastering.as_mut() {
2766            if fix.primary_r_chromaticity_y.is_some() {
2767                local.primary_r_chromaticity_y = fix.primary_r_chromaticity_y;
2768            }
2769            if fix.primary_g_chromaticity_y.is_some() {
2770                local.primary_g_chromaticity_y = fix.primary_g_chromaticity_y;
2771            }
2772            if fix.primary_b_chromaticity_y.is_some() {
2773                local.primary_b_chromaticity_y = fix.primary_b_chromaticity_y;
2774            }
2775        }
2776    }
2777    Some(info)
2778}
2779
2780/// Raw-bytes EBML walk for the Colour element's MaxCLL (0x55BC),
2781/// MaxFALL (0x55BD), and the mastering display chromaticity_y fields
2782/// (0x55D2 / 0x55D4 / 0x55D6). Used exclusively as a workaround for
2783/// matroska-demuxer 0.7 bugs:
2784///   * `Colour::new` reads MaxCLL / MaxFALL from MatrixCoefficients
2785///     (lib.rs:725..728 in matroska-demuxer-0.7.0/src/lib.rs).
2786///   * `MasteringMetadata::new` reads `primary_{r,g,b}_chromaticity_y`
2787///     from the matching X ElementId (lib.rs:846/848/850), so all three
2788///     y values come back holding the corresponding x value.
2789/// Returns `None` when the file is not well-formed enough to reach the
2790/// Colour element, or when neither bug-recovery field is present.
2791#[derive(Default)]
2792struct RawColourFix {
2793    max_cll: Option<u32>,
2794    max_fall: Option<u32>,
2795    /// Mastering display y-chromaticity recoveries — Squad-21.
2796    primary_r_chromaticity_y: Option<f64>,
2797    primary_g_chromaticity_y: Option<f64>,
2798    primary_b_chromaticity_y: Option<f64>,
2799}
2800
2801fn scan_mkv_colour_raw(data: &[u8]) -> Option<RawColourFix> {
2802    // Top-level: EBML header (0x1A45DFA3) then Segment (0x18538067).
2803    // We walk linearly until we find the Segment element and grab its
2804    // payload bytes — all subsequent work is inside that slice.
2805    let mut cursor = 0;
2806    let seg_body: &[u8] = loop {
2807        let (el, after) = next_ebml_element(data, cursor)?;
2808        if el.id == 0x18538067 {
2809            break &data[el.body_start..el.body_start + el.body_len];
2810        }
2811        cursor = after;
2812    };
2813
2814    // Segment → Tracks (0x1654AE6B). Segment may carry many top-level
2815    // elements in any order — walk them until we find Tracks.
2816    let tracks = find_ebml_child(seg_body, 0x1654AE6B)?;
2817    // Tracks → TrackEntry* (0xAE). Look for the first TrackEntry whose
2818    // Video sub-element has a Colour; that's the path we care about.
2819    let mut cur = 0;
2820    while cur < tracks.len() {
2821        let (el, after) = next_ebml_element(tracks, cur)?;
2822        cur = after;
2823        if el.id != 0xAE {
2824            continue;
2825        }
2826        let entry = &tracks[el.body_start..el.body_start + el.body_len];
2827        let Some(video) = find_ebml_child(entry, 0xE0) else {
2828            continue;
2829        };
2830        let Some(colour) = find_ebml_child(video, 0x55B0) else {
2831            continue;
2832        };
2833
2834        let mut fix = RawColourFix::default();
2835        let mut c = 0;
2836        while c < colour.len() {
2837            let (ce, after_ce) = match next_ebml_element(colour, c) {
2838                Some(v) => v,
2839                None => break,
2840            };
2841            c = after_ce;
2842            let value_bytes = &colour[ce.body_start..ce.body_start + ce.body_len];
2843            match ce.id {
2844                0x55BC => {
2845                    fix.max_cll = read_unsigned(value_bytes).and_then(|v| u32::try_from(v).ok());
2846                }
2847                0x55BD => {
2848                    fix.max_fall = read_unsigned(value_bytes).and_then(|v| u32::try_from(v).ok());
2849                }
2850                // MasteringMetadata sub-element (0x55D0). Walk its children
2851                // and pull the three buggy y-chromaticities so callers can
2852                // override the typed-accessor reads.
2853                0x55D0 => {
2854                    let md = value_bytes;
2855                    let mut mc = 0;
2856                    while mc < md.len() {
2857                        let (mce, after_mce) = match next_ebml_element(md, mc) {
2858                            Some(v) => v,
2859                            None => break,
2860                        };
2861                        mc = after_mce;
2862                        let mv = &md[mce.body_start..mce.body_start + mce.body_len];
2863                        match mce.id {
2864                            0x55D2 => fix.primary_r_chromaticity_y = read_float(mv),
2865                            0x55D4 => fix.primary_g_chromaticity_y = read_float(mv),
2866                            0x55D6 => fix.primary_b_chromaticity_y = read_float(mv),
2867                            _ => {}
2868                        }
2869                    }
2870                }
2871                _ => {}
2872            }
2873        }
2874        if fix.max_cll.is_some()
2875            || fix.max_fall.is_some()
2876            || fix.primary_r_chromaticity_y.is_some()
2877            || fix.primary_g_chromaticity_y.is_some()
2878            || fix.primary_b_chromaticity_y.is_some()
2879        {
2880            return Some(fix);
2881        }
2882    }
2883    None
2884}
2885
2886/// Walk the direct children of `buf` (assumed to be an EBML master
2887/// element body, NOT starting with the master's own header) and
2888/// return the payload slice of the first element with id `want`.
2889fn find_ebml_child(buf: &[u8], want: u32) -> Option<&[u8]> {
2890    let mut cur = 0;
2891    while cur < buf.len() {
2892        let (el, after) = next_ebml_element(buf, cur)?;
2893        cur = after;
2894        if el.id == want {
2895            return Some(&buf[el.body_start..el.body_start + el.body_len]);
2896        }
2897    }
2898    None
2899}
2900
2901#[derive(Debug)]
2902struct RawEbmlElement {
2903    id: u32,
2904    body_start: usize,
2905    body_len: usize,
2906}
2907
2908/// Read a single EBML element at `off` within `buf`. Returns the
2909/// element descriptor plus the byte offset immediately after the
2910/// element (header + body). Only handles up to 4-byte IDs (all
2911/// Matroska elements fit) and size VInts up to 8 bytes.
2912fn next_ebml_element(buf: &[u8], off: usize) -> Option<(RawEbmlElement, usize)> {
2913    if off >= buf.len() {
2914        return None;
2915    }
2916    let (id, id_len) = read_id_vint(&buf[off..])?;
2917    let body_off = off + id_len;
2918    if body_off >= buf.len() {
2919        return None;
2920    }
2921    let (size, size_len) = read_size_vint(&buf[body_off..])?;
2922    let body_start = body_off + size_len;
2923    if body_start + size as usize > buf.len() {
2924        return None;
2925    }
2926    let elem = RawEbmlElement {
2927        id,
2928        body_start,
2929        body_len: size as usize,
2930    };
2931    Some((elem, body_start + size as usize))
2932}
2933
2934/// Read an EBML Class A/B/C/D ID (top-bit marker determines width,
2935/// 1..=4 bytes). Returns (raw id with marker bits preserved, byte-count).
2936fn read_id_vint(buf: &[u8]) -> Option<(u32, usize)> {
2937    if buf.is_empty() {
2938        return None;
2939    }
2940    let first = buf[0];
2941    let len = if first & 0x80 != 0 {
2942        1
2943    } else if first & 0x40 != 0 {
2944        2
2945    } else if first & 0x20 != 0 {
2946        3
2947    } else if first & 0x10 != 0 {
2948        4
2949    } else {
2950        return None;
2951    };
2952    if buf.len() < len {
2953        return None;
2954    }
2955    let mut id: u32 = 0;
2956    for b in &buf[..len] {
2957        id = (id << 8) | (*b as u32);
2958    }
2959    Some((id, len))
2960}
2961
2962/// Read an EBML size VInt (1..=8 bytes). Strips the marker bit and
2963/// returns the numeric value plus byte-count.
2964fn read_size_vint(buf: &[u8]) -> Option<(u64, usize)> {
2965    if buf.is_empty() {
2966        return None;
2967    }
2968    let first = buf[0];
2969    if first == 0 {
2970        return None;
2971    }
2972    let len = first.leading_zeros() as usize + 1;
2973    if len > 8 || buf.len() < len {
2974        return None;
2975    }
2976    // Mask off the leading marker bit. `len == 8` (first byte 0x01) has
2977    // *no* value bits in the first byte — all 56 value bits live in
2978    // bytes 1..8. `u8 >> 8` is UB, so branch explicitly.
2979    let mask: u8 = if len == 8 { 0 } else { 0xFFu8 >> len };
2980    let mut v: u64 = (first & mask) as u64;
2981    for b in &buf[1..len] {
2982        v = (v << 8) | (*b as u64);
2983    }
2984    Some((v, len))
2985}
2986
2987/// Read a big-endian unsigned integer (1..=8 bytes) from a Matroska
2988/// value payload. Zero-length payloads encode 0.
2989fn read_unsigned(buf: &[u8]) -> Option<u64> {
2990    if buf.len() > 8 {
2991        return None;
2992    }
2993    let mut v: u64 = 0;
2994    for b in buf {
2995        v = (v << 8) | (*b as u64);
2996    }
2997    Some(v)
2998}
2999
3000/// Read a big-endian Matroska float payload — 4 bytes encode an f32,
3001/// 8 bytes encode an f64. Anything else is malformed.
3002fn read_float(buf: &[u8]) -> Option<f64> {
3003    match buf.len() {
3004        4 => {
3005            let arr: [u8; 4] = buf.try_into().ok()?;
3006            Some(f32::from_be_bytes(arr) as f64)
3007        }
3008        8 => {
3009            let arr: [u8; 8] = buf.try_into().ok()?;
3010            Some(f64::from_be_bytes(arr))
3011        }
3012        _ => None,
3013    }
3014}
3015
3016// ---------------------------------------------------------------------------
3017// Streaming demuxer impls (Squad streaming-migration-55 P1)
3018// ---------------------------------------------------------------------------
3019//
3020// Per-format `StreamingDemuxer` implementations. Each holds only the cursor
3021// state needed to produce ONE sample at a time — no per-sample
3022// accumulation. Audio remains buffered (Squad-18 contract preserved).
3023//
3024// The legacy `demux()` is implemented at the bottom as a thin adapter:
3025// `demux_streaming(input)` → drain `next_video_sample()` into a `Vec`.
3026
3027/// MP4 / MOV streaming demuxer. Owns the input bytes (so its
3028/// `Mp4Reader<Cursor<Vec<u8>>>` cursor is self-contained) and walks
3029/// `read_sample(track_id, idx)` one sample at a time. Per-sample
3030/// AVCC→Annex-B + parameter-set tracking (Squad-14) is preserved.
3031/// Per-sample location record built when the input is a fragmented
3032/// MP4. The `mp4` crate (v0.14) returns garbage (typically the bytes
3033/// of an adjacent `moof` box) from `read_sample` on fragmented inputs
3034/// — affects BOTH video and audio tracks. Side-stepping `read_sample`
3035/// for fragmented input by pre-computing sample
3036/// (file_offset, size, pts, duration) from the moof->traf->trun chain
3037/// produces correct bytes regardless of track kind. The track filter
3038/// is `track_id` (parameter on the walker chain) — generic across
3039/// video/audio/anything else with a track_id.
3040///
3041/// Bug history: the audio-extraction path WAS originally claimed to
3042/// "walk boxes itself" (per a prior comment here) but in fact it
3043/// called `reader.read_sample(audio_track_id, idx)` — the same buggy
3044/// path video uses. Burned 2026-05-09: malformed audio segments
3045/// (8-byte first AU containing the source's `moof` header bytes
3046/// `00 00 NN NN 6d 6f 6f 66`, every following AU mid-box-tree)
3047/// passed dedup hash unchanged because they're size-deterministic
3048/// per source, MSE rejected them with `Number of bands exceeds limit`
3049/// → SourceBuffer error → MediaSource readyState ended → all video
3050/// appendBuffer calls failed.
3051#[derive(Debug, Clone, Copy)]
3052struct FragSample {
3053    offset: u64,
3054    size: u32,
3055    pts_ticks: i64,
3056    duration_ticks: u32,
3057}
3058
3059pub struct Mp4StreamingDemuxer {
3060    // Owned for the box-tree slice walkers (extract_*); the reader's
3061    // cursor consumes a clone.
3062    data: Vec<u8>,
3063    reader: Mp4Reader<Cursor<Vec<u8>>>,
3064    header: DemuxHeader,
3065    audio: Option<AudioTrack>,
3066    track_id: u32,
3067    sample_count: u32,
3068    next_idx: u32,
3069    // For AVC/HEVC: codec-specific config. Empty for the rest.
3070    sps_pps: Vec<Vec<u8>>,
3071    length_size: u8,
3072    tracker: Option<ParamSetTracker>,
3073    /// `Some` when the input is fragmented MP4. Each entry is a
3074    /// (file_offset, size, pts, duration) tuple resolved from
3075    /// moof/traf/trun. `next_video_sample` reads bytes directly from
3076    /// `self.data` at these offsets instead of going through the mp4
3077    /// crate's `read_sample`.
3078    fragmented_samples: Option<Vec<FragSample>>,
3079}
3080
3081/// Walk top-level `moof` boxes in `data`, gather per-sample
3082/// (file_offset, size, pts, duration) tuples for the track id matching
3083/// `track_id` (works for video, audio, or any other track kind).
3084/// Returns `Some(table)` when the input is fragmented (at least one
3085/// top-level moof exists), `None` otherwise. An empty `Some(vec![])`
3086/// means "fragmented, but this track id had no samples in any moof"
3087/// — that's distinct from non-fragmented (None) and the caller
3088/// shouldn't fall back to `read_sample` in that case (it'd return
3089/// the same garbage bytes that prompted the fragmented path in the
3090/// first place).
3091///
3092/// Best-effort: silently skips moofs / trafs / truns that don't parse,
3093/// or that reference unknown tracks. Each successfully-walked trun
3094/// contributes its samples in order so the resulting Vec is decode-
3095/// order across the file.
3096fn build_fragmented_sample_table(
3097    data: &[u8],
3098    track_id: u32,
3099    default_sample_duration_from_trex: u32,
3100    default_sample_size_from_trex: u32,
3101) -> Option<Vec<FragSample>> {
3102    let mut samples: Vec<FragSample> = Vec::new();
3103    let mut pos: usize = 0;
3104    let mut accumulated_pts: i64 = 0;
3105    let mut found_any_moof = false;
3106
3107    while pos + 8 <= data.len() {
3108        let box_size_field = u32::from_be_bytes(data[pos..pos + 4].try_into().ok()?);
3109        let box_type = &data[pos + 4..pos + 8];
3110        let (box_size, header_size): (usize, usize) = if box_size_field == 1 {
3111            // 64-bit largesize form.
3112            if pos + 16 > data.len() {
3113                break;
3114            }
3115            let big = u64::from_be_bytes(data[pos + 8..pos + 16].try_into().ok()?);
3116            (big as usize, 16)
3117        } else if box_size_field == 0 {
3118            // box extends to EOF — stop walking after this one.
3119            (data.len() - pos, 8)
3120        } else {
3121            (box_size_field as usize, 8)
3122        };
3123        if box_size < header_size || pos + box_size > data.len() {
3124            break;
3125        }
3126
3127        if box_type == b"moof" {
3128            found_any_moof = true;
3129            let moof_start = pos;
3130            let moof_end = pos + box_size;
3131            walk_moof(
3132                data,
3133                moof_start + header_size,
3134                moof_end,
3135                moof_start as u64,
3136                track_id,
3137                default_sample_duration_from_trex,
3138                default_sample_size_from_trex,
3139                &mut accumulated_pts,
3140                &mut samples,
3141            );
3142        }
3143        pos = pos
3144            .checked_add(box_size)
3145            .filter(|&n| n <= data.len())
3146            .unwrap_or(data.len());
3147    }
3148
3149    if found_any_moof { Some(samples) } else { None }
3150}
3151
3152#[allow(clippy::too_many_arguments)]
3153fn walk_moof(
3154    data: &[u8],
3155    children_start: usize,
3156    moof_end: usize,
3157    moof_offset: u64,
3158    track_id: u32,
3159    default_sample_duration_from_trex: u32,
3160    default_sample_size_from_trex: u32,
3161    accumulated_pts: &mut i64,
3162    samples: &mut Vec<FragSample>,
3163) {
3164    let mut pos = children_start;
3165    while pos + 8 <= moof_end {
3166        let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
3167            Ok(b) => b,
3168            Err(_) => break,
3169        });
3170        let typ = &data[pos + 4..pos + 8];
3171        if size == 0 || size as usize + pos > moof_end {
3172            break;
3173        }
3174        if typ == b"traf" {
3175            walk_traf(
3176                data,
3177                pos + 8,
3178                pos + size as usize,
3179                moof_offset,
3180                track_id,
3181                default_sample_duration_from_trex,
3182                default_sample_size_from_trex,
3183                accumulated_pts,
3184                samples,
3185            );
3186        }
3187        pos += size as usize;
3188    }
3189}
3190
3191#[allow(clippy::too_many_arguments)]
3192fn walk_traf(
3193    data: &[u8],
3194    children_start: usize,
3195    traf_end: usize,
3196    moof_offset: u64,
3197    track_id: u32,
3198    default_sample_duration_from_trex: u32,
3199    default_sample_size_from_trex: u32,
3200    accumulated_pts: &mut i64,
3201    samples: &mut Vec<FragSample>,
3202) {
3203    // First pass: find tfhd (always first child of traf per spec) +
3204    // collect tfhd-derived defaults + base_data_offset semantics.
3205    let mut this_track: Option<u32> = None;
3206    let mut tfhd_default_sample_duration: u32 = default_sample_duration_from_trex;
3207    let mut tfhd_default_sample_size: u32 = default_sample_size_from_trex;
3208    let mut base_data_offset: u64 = moof_offset; // default-base-is-moof
3209    let mut base_data_offset_explicit = false;
3210    let mut tfdt_base_pts: Option<i64> = None;
3211
3212    let mut pos = children_start;
3213    while pos + 8 <= traf_end {
3214        let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
3215            Ok(b) => b,
3216            Err(_) => break,
3217        });
3218        let typ = &data[pos + 4..pos + 8];
3219        if size == 0 || size as usize + pos > traf_end {
3220            break;
3221        }
3222        if typ == b"tfhd" {
3223            // tfhd: u8 version + u24 flags + u32 track_id + optional fields per flag bits
3224            if pos + 16 > traf_end {
3225                pos += size as usize;
3226                continue;
3227            }
3228            let flags = u32::from_be_bytes(match data[pos + 8..pos + 12].try_into() {
3229                Ok(b) => b,
3230                Err(_) => break,
3231            }) & 0x00ff_ffff;
3232            let tk = u32::from_be_bytes(match data[pos + 12..pos + 16].try_into() {
3233                Ok(b) => b,
3234                Err(_) => break,
3235            });
3236            this_track = Some(tk);
3237            let mut p = pos + 16;
3238            // base_data_offset_present
3239            if flags & 0x01 != 0 {
3240                if p + 8 > traf_end {
3241                    break;
3242                }
3243                base_data_offset = u64::from_be_bytes(match data[p..p + 8].try_into() {
3244                    Ok(b) => b,
3245                    Err(_) => break,
3246                });
3247                base_data_offset_explicit = true;
3248                p += 8;
3249            }
3250            // sample_description_index_present
3251            if flags & 0x02 != 0 {
3252                p += 4;
3253            }
3254            // default_sample_duration_present
3255            if flags & 0x08 != 0 {
3256                if p + 4 > traf_end {
3257                    break;
3258                }
3259                tfhd_default_sample_duration =
3260                    u32::from_be_bytes(match data[p..p + 4].try_into() {
3261                        Ok(b) => b,
3262                        Err(_) => break,
3263                    });
3264                p += 4;
3265            }
3266            // default_sample_size_present
3267            if flags & 0x10 != 0 {
3268                if p + 4 > traf_end {
3269                    break;
3270                }
3271                tfhd_default_sample_size = u32::from_be_bytes(match data[p..p + 4].try_into() {
3272                    Ok(b) => b,
3273                    Err(_) => break,
3274                });
3275                p += 4;
3276            }
3277            // default_sample_flags_present (skip 4 bytes)
3278            if flags & 0x20 != 0 {
3279                p += 4;
3280            }
3281            // default-base-is-moof flag: when set AND base_data_offset
3282            // not present, base is the moof start (which is our default).
3283            let _ = p;
3284        } else if typ == b"tfdt" {
3285            // tfdt: version u8 + flags u24 + base_media_decode_time (u32 v0 / u64 v1)
3286            if pos + 12 > traf_end {
3287                pos += size as usize;
3288                continue;
3289            }
3290            let version = data[pos + 8];
3291            if version == 1 {
3292                if pos + 20 > traf_end {
3293                    pos += size as usize;
3294                    continue;
3295                }
3296                let bmdt =
3297                    u64::from_be_bytes(data[pos + 12..pos + 20].try_into().unwrap_or([0; 8]));
3298                tfdt_base_pts = Some(bmdt as i64);
3299            } else {
3300                let bmdt =
3301                    u32::from_be_bytes(data[pos + 12..pos + 16].try_into().unwrap_or([0; 4]));
3302                tfdt_base_pts = Some(bmdt as i64);
3303            }
3304        }
3305        pos += size as usize;
3306    }
3307
3308    let Some(tk) = this_track else {
3309        return;
3310    };
3311    if tk != track_id {
3312        return;
3313    }
3314
3315    if let Some(bp) = tfdt_base_pts {
3316        *accumulated_pts = bp;
3317    }
3318
3319    // Second pass: walk trun boxes in declaration order.
3320    let mut pos = children_start;
3321    while pos + 8 <= traf_end {
3322        let size = u32::from_be_bytes(match data[pos..pos + 4].try_into() {
3323            Ok(b) => b,
3324            Err(_) => break,
3325        });
3326        let typ = &data[pos + 4..pos + 8];
3327        if size == 0 || size as usize + pos > traf_end {
3328            break;
3329        }
3330        if typ == b"trun" {
3331            walk_trun(
3332                data,
3333                pos + 8,
3334                pos + size as usize,
3335                if base_data_offset_explicit {
3336                    base_data_offset
3337                } else {
3338                    moof_offset
3339                },
3340                tfhd_default_sample_duration,
3341                tfhd_default_sample_size,
3342                accumulated_pts,
3343                samples,
3344            );
3345        }
3346        pos += size as usize;
3347    }
3348    let _ = base_data_offset_explicit;
3349}
3350
3351#[allow(clippy::too_many_arguments)]
3352fn walk_trun(
3353    data: &[u8],
3354    children_start: usize,
3355    trun_end: usize,
3356    base_offset: u64,
3357    default_sample_duration: u32,
3358    default_sample_size: u32,
3359    accumulated_pts: &mut i64,
3360    samples: &mut Vec<FragSample>,
3361) {
3362    if children_start + 8 > trun_end {
3363        return;
3364    }
3365    let version = data[children_start];
3366    let flags = u32::from_be_bytes(match data[children_start..children_start + 4].try_into() {
3367        Ok(b) => b,
3368        Err(_) => return,
3369    }) & 0x00ff_ffff;
3370    let sample_count = u32::from_be_bytes(
3371        match data[children_start + 4..children_start + 8].try_into() {
3372            Ok(b) => b,
3373            Err(_) => return,
3374        },
3375    );
3376    let mut p = children_start + 8;
3377    let mut data_offset_in_trun: i32 = 0;
3378    if flags & 0x000_001 != 0 {
3379        if p + 4 > trun_end {
3380            return;
3381        }
3382        data_offset_in_trun = i32::from_be_bytes(match data[p..p + 4].try_into() {
3383            Ok(b) => b,
3384            Err(_) => return,
3385        });
3386        p += 4;
3387    }
3388    if flags & 0x000_004 != 0 {
3389        // first-sample-flags-present: skip 4 bytes
3390        p += 4;
3391    }
3392
3393    let sample_duration_present = flags & 0x000_100 != 0;
3394    let sample_size_present = flags & 0x000_200 != 0;
3395    let sample_flags_present = flags & 0x000_400 != 0;
3396    let sample_cto_present = flags & 0x000_800 != 0;
3397
3398    let mut current_offset = base_offset.wrapping_add(data_offset_in_trun as u64);
3399    for _ in 0..sample_count {
3400        let dur = if sample_duration_present {
3401            if p + 4 > trun_end {
3402                return;
3403            }
3404            let d = u32::from_be_bytes(match data[p..p + 4].try_into() {
3405                Ok(b) => b,
3406                Err(_) => return,
3407            });
3408            p += 4;
3409            d
3410        } else {
3411            default_sample_duration
3412        };
3413        let sz = if sample_size_present {
3414            if p + 4 > trun_end {
3415                return;
3416            }
3417            let s = u32::from_be_bytes(match data[p..p + 4].try_into() {
3418                Ok(b) => b,
3419                Err(_) => return,
3420            });
3421            p += 4;
3422            s
3423        } else {
3424            default_sample_size
3425        };
3426        if sample_flags_present {
3427            p += 4;
3428        }
3429        let cto: i32 = if sample_cto_present {
3430            if p + 4 > trun_end {
3431                return;
3432            }
3433            let c = if version == 0 {
3434                u32::from_be_bytes(match data[p..p + 4].try_into() {
3435                    Ok(b) => b,
3436                    Err(_) => return,
3437                }) as i32
3438            } else {
3439                i32::from_be_bytes(match data[p..p + 4].try_into() {
3440                    Ok(b) => b,
3441                    Err(_) => return,
3442                })
3443            };
3444            p += 4;
3445            c
3446        } else {
3447            0
3448        };
3449
3450        if sz > 0 {
3451            samples.push(FragSample {
3452                offset: current_offset,
3453                size: sz,
3454                pts_ticks: accumulated_pts.saturating_add(cto as i64),
3455                duration_ticks: dur,
3456            });
3457        }
3458        current_offset = current_offset.saturating_add(sz as u64);
3459        *accumulated_pts = accumulated_pts.saturating_add(dur as i64);
3460    }
3461}
3462
3463pub(crate) fn demux_mp4_streaming_init(data: &[u8]) -> Result<Mp4StreamingDemuxer> {
3464    // Same lenient pre-pass as `demux_mp4` — see comment there for
3465    // the iPhone / QuickTime `wave` atom rationale.
3466    let owned = sanitize_isobmff_box_sizes(data);
3467    let size = owned.len() as u64;
3468    // Build a probe reader against an immutable borrow first — same as
3469    // legacy `demux_mp4`. This pulls track / codec metadata before we
3470    // commit the owned buffer to the cursor that backs the streaming
3471    // reader.
3472    let probe = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
3473        .context("reading MP4 header")?;
3474
3475    let video_track = probe
3476        .tracks()
3477        .values()
3478        .find(|t| t.track_type().ok() == Some(mp4::TrackType::Video))
3479        .context("no video track in MP4")?;
3480
3481    let track_id = video_track.track_id();
3482    let codec_from_mp4 = format_codec(video_track);
3483    let codec = if codec_from_mp4 == "unknown" && has_av01_sample_entry(&owned) {
3484        "av1".to_string()
3485    } else if codec_from_mp4 == "unknown" && hevc_sample_entry_fourcc(&owned).is_some() {
3486        "h265".to_string()
3487    } else if codec_from_mp4 == "unknown" && prores_sample_entry_fourcc(&owned).is_some() {
3488        "prores".to_string()
3489    } else {
3490        codec_from_mp4
3491    };
3492    let width = video_track.width() as u32;
3493    let height = video_track.height() as u32;
3494    let sample_count = video_track.sample_count();
3495    let duration = video_track.duration().as_secs_f64();
3496    let video_track_timescale = video_track.timescale();
3497    let frame_rate = if duration > 0.0 {
3498        sample_count as f64 / duration
3499    } else {
3500        30.0
3501    };
3502    let bitrate = video_track.bitrate() as u64;
3503
3504    let mp4_color = extract_mp4_visual_color_metadata(&owned);
3505    let initial_color_metadata = ColorMetadata {
3506        mastering_display: mp4_color.mastering_display,
3507        content_light_level: mp4_color.content_light_level,
3508        ..Default::default()
3509    };
3510
3511    let mut info = StreamInfo {
3512        codec: codec.clone(),
3513        width,
3514        height,
3515        frame_rate,
3516        duration,
3517        pixel_format: PixelFormat::Yuv420p,
3518        color_space: ColorSpace::Bt709,
3519        total_frames: sample_count as u64,
3520        bitrate,
3521        color_metadata: initial_color_metadata,
3522    };
3523
3524    let needs_annexb = matches!(codec.as_str(), "h264" | "h265");
3525    let (sps_pps, length_size) = if needs_annexb {
3526        if codec == "h264" {
3527            match extract_avc_config(&owned) {
3528                Some(cfg) => (cfg.parameter_sets, cfg.length_size),
3529                None => (extract_sps_pps(&probe, track_id), 4u8),
3530            }
3531        } else {
3532            match extract_hevc_config(&owned) {
3533                Some(cfg) => (cfg.parameter_sets, cfg.length_size),
3534                None => (Vec::new(), 4u8),
3535            }
3536        }
3537    } else {
3538        (Vec::new(), 4u8)
3539    };
3540
3541    // Pixel format detection requires the first sample's bitstream.
3542    // Pull just that one sample (without consuming the streaming
3543    // cursor) via a temporary reader, then refine info.pixel_format
3544    // before handing the StreamInfo to the caller.
3545    if sample_count > 0 {
3546        let mut probe_for_pf = Mp4Reader::read_header(Cursor::new(owned.as_slice()), size)
3547            .context("re-reading MP4 for pixel-format probe")?;
3548        if let Ok(Some(s)) = probe_for_pf.read_sample(track_id, 1) {
3549            let first_sample = s.bytes.to_vec();
3550            let detected_pf = codec::pixel_format::detect(&codec, &[first_sample]);
3551            info.pixel_format = detected_pf;
3552        }
3553    }
3554
3555    drop(probe);
3556
3557    let audio = extract_mp4_audio(&owned);
3558
3559    // Build the streaming reader against an owned cursor.
3560    let reader_cursor = Cursor::new(owned.clone());
3561    let reader =
3562        Mp4Reader::read_header(reader_cursor, size).context("opening MP4 streaming reader")?;
3563
3564    let tracker = if needs_annexb {
3565        Some(ParamSetTracker::new(if codec == "h264" {
3566            NaluCodec::Avc
3567        } else {
3568            NaluCodec::Hevc
3569        }))
3570    } else {
3571        None
3572    };
3573
3574    let _ = needs_annexb; // tracker presence reflects this
3575
3576    // Detect fragmented MP4 + build a sample table from moof/traf/trun
3577    // when applicable. The mp4 crate's `read_sample` returns garbage
3578    // (typically the bytes of an adjacent moof box header) for any
3579    // fragmented track regardless of kind, so for fragmented input
3580    // we bypass `read_sample` entirely and read sample bytes directly
3581    // from `owned` at the offsets in this table. `extract_mp4_audio`
3582    // does the same against its own `data` slice.
3583    let fragmented_samples = build_fragmented_sample_table(&owned, track_id, 0, 0).map(|table| {
3584        tracing::info!(
3585            track_id,
3586            sample_count = table.len(),
3587            "fragmented MP4 detected; built sample table from moof/traf/trun"
3588        );
3589        table
3590    });
3591    let final_sample_count = match &fragmented_samples {
3592        Some(table) => table.len() as u32,
3593        None => sample_count,
3594    };
3595
3596    // Recompute frame_rate + duration from fragmented sample timestamps
3597    // when (a) we built a fragmented sample table AND (b) the static
3598    // moov sample table was empty or had a zero duration. Pure
3599    // fragmented MP4 — common from web recorders, screen capture
3600    // tools, and modern phone exports — leaves moov with no static
3601    // samples + tkhd.duration=0; the previous fallback was the 30.0
3602    // sentinel, which silently encoded a 24-fps VFR source as 30-fps
3603    // CFR and produced ~20% short output. The fragmented sample
3604    // table's actual duration_ticks (from moof.traf.trun per-sample
3605    // duration entries) carries the truth. Trust the static table
3606    // when it's populated — that path was correct already.
3607    if let Some(table) = fragmented_samples.as_ref() {
3608        if !table.is_empty() && (sample_count == 0 || duration <= 0.0) && video_track_timescale > 0
3609        {
3610            let total_ticks: u64 = table.iter().map(|s| s.duration_ticks as u64).sum();
3611            if total_ticks > 0 {
3612                let total_seconds = total_ticks as f64 / video_track_timescale as f64;
3613                if total_seconds > 0.0 {
3614                    let avg_fps = table.len() as f64 / total_seconds;
3615                    info.frame_rate = avg_fps.clamp(1.0, 240.0);
3616                    info.duration = total_seconds;
3617                    info.total_frames = table.len() as u64;
3618                    tracing::info!(
3619                        track_id,
3620                        avg_fps,
3621                        total_seconds,
3622                        sample_count = table.len(),
3623                        timescale = video_track_timescale,
3624                        "fragmented MP4: recomputed frame_rate + duration from \
3625                         moof/traf/trun timestamps (static moov sample table \
3626                         was empty)"
3627                    );
3628                }
3629            }
3630        }
3631    }
3632    Ok(Mp4StreamingDemuxer {
3633        data: owned,
3634        reader,
3635        header: DemuxHeader { codec, info },
3636        audio,
3637        track_id,
3638        sample_count: final_sample_count,
3639        next_idx: 1,
3640        sps_pps,
3641        length_size,
3642        tracker,
3643        fragmented_samples,
3644    })
3645}
3646
3647impl StreamingDemuxer for Mp4StreamingDemuxer {
3648    fn header(&self) -> &DemuxHeader {
3649        &self.header
3650    }
3651
3652    fn next_video_sample(&mut self) -> Result<Option<Sample>> {
3653        // Fragmented MP4 path: pull bytes directly from the input buffer
3654        // at the offsets we resolved at init time.
3655        if let Some(table) = self.fragmented_samples.as_ref() {
3656            let idx_zero_based = (self.next_idx - 1) as usize;
3657            if idx_zero_based >= table.len() {
3658                return Ok(None);
3659            }
3660            self.next_idx += 1;
3661            let entry = table[idx_zero_based];
3662            let off = entry.offset as usize;
3663            let end = off.saturating_add(entry.size as usize);
3664            if end > self.data.len() {
3665                tracing::warn!(
3666                    idx = idx_zero_based + 1,
3667                    offset = entry.offset,
3668                    size = entry.size,
3669                    data_len = self.data.len(),
3670                    "fragmented sample reaches past EOF; stopping at the previous frame"
3671                );
3672                return Ok(None);
3673            }
3674            let raw = self.data[off..end].to_vec();
3675            let data = if let Some(tracker) = self.tracker.as_mut() {
3676                length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
3677            } else {
3678                raw
3679            };
3680            return Ok(Some(Sample {
3681                data,
3682                pts_ticks: entry.pts_ticks,
3683                duration_ticks: entry.duration_ticks,
3684            }));
3685        }
3686        loop {
3687            if self.next_idx > self.sample_count {
3688                return Ok(None);
3689            }
3690            let idx = self.next_idx;
3691            self.next_idx += 1;
3692            // Mirror the audio-track tolerance in `extract_mp4_audio`:
3693            // when a mid-track read_sample fails on a fragmented MP4
3694            // with a truncated `traf.trun` index — the typical iPhone /
3695            // Android broken-recording shape — surface a warn and
3696            // signal soft EOF to the encode loop. The frames that DID
3697            // demux upstream still flow through, the encoder produces
3698            // an AV1 sequence header from the first one, and the CMAF
3699            // muxer's `finalize` writes a valid (truncated) init
3700            // segment. Without this, a single missing trun entry
3701            // halfway through a clip would propagate as `TranscodeFailure`
3702            // for the whole job — the symptom we hit 2026-05-08.
3703            let s = match self.reader.read_sample(self.track_id, idx) {
3704                Ok(s) => s,
3705                Err(e) => {
3706                    tracing::warn!(
3707                        track_id = self.track_id,
3708                        idx,
3709                        emitted = idx.saturating_sub(1),
3710                        sample_count = self.sample_count,
3711                        error = %e,
3712                        "video stream: read_sample error mid-track; \
3713                         stopping at sample {} of {} (truncated source — \
3714                         iPhone fragmented MP4 with a missing trun entry \
3715                         is the typical cause)",
3716                        idx.saturating_sub(1),
3717                        self.sample_count,
3718                    );
3719                    return Ok(None);
3720                }
3721            };
3722            let Some(sample) = s else { continue };
3723            let pts_ticks = sample.start_time as i64;
3724            let duration_ticks = sample.duration;
3725            let raw = sample.bytes.to_vec();
3726            let data = if let Some(tracker) = self.tracker.as_mut() {
3727                length_prefixed_to_annexb_tracked(&raw, self.length_size, tracker, &self.sps_pps)
3728            } else {
3729                raw
3730            };
3731            return Ok(Some(Sample {
3732                data,
3733                pts_ticks,
3734                duration_ticks,
3735            }));
3736        }
3737    }
3738
3739    fn audio(&self) -> Option<&AudioTrack> {
3740        self.audio.as_ref()
3741    }
3742}
3743
3744impl Mp4StreamingDemuxer {
3745    /// For tests + the legacy `demux()` adapter: reach back at the
3746    /// owned input bytes (e.g. for an opt-in re-probe).
3747    #[allow(dead_code)]
3748    pub(crate) fn raw_bytes(&self) -> &[u8] {
3749        &self.data
3750    }
3751}
3752
3753/// MKV / WebM streaming demuxer. Wraps `MatroskaFile` whose `next_frame`
3754/// API is already pull-shaped, so the streaming impl is a thin wrapper:
3755/// pull next frame, filter to the video track, AVCC→Annex-B convert if
3756/// AVC/HEVC, surface as a `Sample`.
3757pub struct MkvStreamingDemuxer {
3758    mkv: MatroskaFile<Cursor<Vec<u8>>>,
3759    header: DemuxHeader,
3760    audio: Option<AudioTrack>,
3761    track_number: u64,
3762    timestamp_scale: u64,
3763    annexb_prepend: Vec<Vec<u8>>,
3764    length_size: u8,
3765    tracker: Option<ParamSetTracker>,
3766    /// Default-duration in ns from the track header — used as the
3767    /// fallback per-sample duration when the Block doesn't carry one.
3768    default_duration_ns: Option<u64>,
3769    /// Lazily set on the first `next_video_sample()` call by running
3770    /// `pixel_format::detect` against the first emitted sample.
3771    /// `header.info.pixel_format` is then patched in place. Subsequent
3772    /// calls skip the probe (codec sequence headers don't change
3773    /// mid-stream for the codecs we support).
3774    pixel_format_detected: bool,
3775}
3776
3777pub(crate) fn demux_mkv_streaming_init(data: &[u8]) -> Result<MkvStreamingDemuxer> {
3778    let owned = data.to_vec();
3779    // First pass: open with a borrow to harvest header metadata without
3780    // consuming the buffer that backs the streaming reader.
3781    let cursor = Cursor::new(owned.as_slice());
3782    let probe =
3783        MatroskaFile::open(cursor).map_err(|e| anyhow::anyhow!("reading MKV header: {e}"))?;
3784
3785    let (
3786        track_number,
3787        track_uid,
3788        codec_id,
3789        width,
3790        height,
3791        annexb_prepend,
3792        length_size,
3793        color_space,
3794        mut color_metadata,
3795        mut color_info,
3796        track_default_duration_ns,
3797    ) = {
3798        let track_info = probe
3799            .tracks()
3800            .iter()
3801            .find(|t| t.track_type() == MkvTrackType::Video)
3802            .context("no video track in MKV")?;
3803
3804        let track_number = track_info.track_number().get();
3805        let track_uid = track_info.track_uid().get();
3806        let codec_id = track_info.codec_id().to_string();
3807        let default_duration_ns = track_info.default_duration().map(|d| d.get());
3808
3809        let (annexb_prepend, length_size): (Vec<Vec<u8>>, u8) = if codec_id == "V_MPEG4/ISO/AVC" {
3810            let priv_bytes = track_info
3811                .codec_private()
3812                .context("V_MPEG4/ISO/AVC CodecPrivate missing")?;
3813            let cfg = parse_avcc(priv_bytes).context("V_MPEG4/ISO/AVC CodecPrivate malformed")?;
3814            (cfg.parameter_sets, cfg.length_size)
3815        } else if codec_id == "V_MPEGH/ISO/HEVC" {
3816            let priv_bytes = track_info
3817                .codec_private()
3818                .context("V_MPEGH/ISO/HEVC CodecPrivate missing")?;
3819            let cfg = parse_hvcc(priv_bytes).context("V_MPEGH/ISO/HEVC CodecPrivate malformed")?;
3820            (cfg.parameter_sets, cfg.length_size)
3821        } else {
3822            (Vec::new(), 4)
3823        };
3824
3825        if mkv_codec_needs_annexb(&codec_id) && annexb_prepend.is_empty() {
3826            bail!("AVC/HEVC MKV CodecPrivate missing or empty — no parameter sets to prepend");
3827        }
3828
3829        let video = track_info
3830            .video()
3831            .context("video track missing Video element")?;
3832        let w = video.pixel_width().get() as u32;
3833        let h = video.pixel_height().get() as u32;
3834
3835        let (color_space, color_metadata, color_info) = match video.colour() {
3836            Some(colour) => colour_to_pipeline(colour),
3837            None => (
3838                ColorSpace::Bt709,
3839                ColorMetadata::default(),
3840                MkvColorInfo::default(),
3841            ),
3842        };
3843
3844        (
3845            track_number,
3846            track_uid,
3847            codec_id,
3848            w,
3849            h,
3850            annexb_prepend,
3851            length_size,
3852            color_space,
3853            color_metadata,
3854            color_info,
3855            default_duration_ns,
3856        )
3857    };
3858
3859    // Apply the matroska-demuxer 0.7 raw-scan workarounds — same as the
3860    // legacy demux_mkv path.
3861    color_info.max_cll = None;
3862    color_info.max_fall = None;
3863    color_metadata.content_light_level = None;
3864    if let Some(md) = color_metadata.mastering_display.as_mut() {
3865        md.primaries_r_y = 0;
3866        md.primaries_g_y = 0;
3867        md.primaries_b_y = 0;
3868    }
3869    if let Some(local) = color_info.mastering.as_mut() {
3870        local.primary_r_chromaticity_y = None;
3871        local.primary_g_chromaticity_y = None;
3872        local.primary_b_chromaticity_y = None;
3873    }
3874    if let Some(fix) = scan_mkv_colour_raw(&owned) {
3875        color_info.max_cll = fix.max_cll;
3876        color_info.max_fall = fix.max_fall;
3877        if fix.max_cll.is_some() || fix.max_fall.is_some() {
3878            color_metadata.content_light_level = Some(ContentLightLevel {
3879                max_cll: fix.max_cll.unwrap_or(0).min(u16::MAX as u32) as u16,
3880                max_fall: fix.max_fall.unwrap_or(0).min(u16::MAX as u32) as u16,
3881            });
3882        }
3883        let chrom = |v: f64| (v * 50_000.0).round().clamp(0.0, u16::MAX as f64) as u16;
3884        if let Some(md) = color_metadata.mastering_display.as_mut() {
3885            if let Some(y) = fix.primary_r_chromaticity_y {
3886                md.primaries_r_y = chrom(y);
3887            }
3888            if let Some(y) = fix.primary_g_chromaticity_y {
3889                md.primaries_g_y = chrom(y);
3890            }
3891            if let Some(y) = fix.primary_b_chromaticity_y {
3892                md.primaries_b_y = chrom(y);
3893            }
3894        }
3895        if let Some(local) = color_info.mastering.as_mut() {
3896            if fix.primary_r_chromaticity_y.is_some() {
3897                local.primary_r_chromaticity_y = fix.primary_r_chromaticity_y;
3898            }
3899            if fix.primary_g_chromaticity_y.is_some() {
3900                local.primary_g_chromaticity_y = fix.primary_g_chromaticity_y;
3901            }
3902            if fix.primary_b_chromaticity_y.is_some() {
3903                local.primary_b_chromaticity_y = fix.primary_b_chromaticity_y;
3904            }
3905        }
3906    }
3907
3908    let needs_annexb = mkv_codec_needs_annexb(&codec_id);
3909    let codec = match codec_id.as_str() {
3910        "V_VP9" => "vp9".to_string(),
3911        "V_VP8" => "vp8".to_string(),
3912        "V_AV1" => "av1".to_string(),
3913        "V_MPEG4/ISO/AVC" => "h264".to_string(),
3914        "V_MPEGH/ISO/HEVC" => "h265".to_string(),
3915        other => other.to_lowercase(),
3916    };
3917
3918    let timestamp_scale = probe.info().timestamp_scale().get();
3919    let duration_ticks = probe.info().duration().unwrap_or(0.0);
3920    let duration = duration_ticks * (timestamp_scale as f64) / 1_000_000_000.0;
3921    let tag_bitrate = probe
3922        .tags()
3923        .and_then(|tags| bitrate_from_tags(tags, track_uid));
3924    if color_info != MkvColorInfo::default() {
3925        tracing::info!(
3926            bits_per_channel = ?color_info.bits_per_channel,
3927            max_cll = ?color_info.max_cll,
3928            max_fall = ?color_info.max_fall,
3929            mastering = ?color_info.mastering,
3930            "MKV Colour: parsed HDR-adjacent metadata"
3931        );
3932    }
3933
3934    drop(probe);
3935
3936    // Audio: extract from the owned bytes via a separate MatroskaFile
3937    // open (same as legacy demux_mkv). The video reader below needs its
3938    // own clean cursor.
3939    let audio = extract_mkv_audio(&owned);
3940
3941    // Build the streaming MKV reader against the owned buffer.
3942    let mkv = MatroskaFile::open(Cursor::new(owned.clone()))
3943        .map_err(|e| anyhow::anyhow!("opening MKV streaming reader: {e}"))?;
3944
3945    // Bitrate / frame_rate / pixel_format are best-effort at construction
3946    // time. Bitrate falls back to 0 (unknown) if no tag exists; the
3947    // legacy path computes it by summing sample bytes which is fine for
3948    // Vec-materialized output but blows the streaming budget. We surface
3949    // the tag bitrate when present and 0 otherwise — pipeline already
3950    // tolerates 0 (matches the AVI / TS behaviour).
3951    let bitrate = tag_bitrate.unwrap_or(0);
3952
3953    // For frame_rate we apply the Squad-32 fallback chain as far as it
3954    // goes without the materialized sample count. samples/duration is
3955    // unknowable in streaming, so use DefaultDuration first then 30.0.
3956    let frame_rate = if let Some(dd_ns) = track_default_duration_ns.filter(|n| *n > 0) {
3957        1_000_000_000.0 / dd_ns as f64
3958    } else if duration > 0.0 {
3959        // duration-only fallback: assume 30 fps × duration as the floor.
3960        // This matches what the legacy path produced when sample count
3961        // was tiny; for normal media DefaultDuration is virtually always
3962        // present.
3963        30.0
3964    } else {
3965        30.0
3966    };
3967
3968    // Pixel format detection requires a sample. For the streaming
3969    // demuxer's StreamInfo we keep the codec-defaulted Yuv420p — the
3970    // actual decoded format is whatever the decoder produces.
3971    // (The legacy `demux_mkv()` adapter re-runs `pixel_format::detect`
3972    // on the materialized samples after the drain.)
3973    let pixel_format = PixelFormat::Yuv420p;
3974
3975    let info = StreamInfo {
3976        codec: codec.clone(),
3977        width,
3978        height,
3979        frame_rate,
3980        duration,
3981        pixel_format,
3982        color_space,
3983        total_frames: 0, // unknown until drained
3984        bitrate,
3985        color_metadata,
3986    };
3987
3988    let tracker = if needs_annexb {
3989        Some(ParamSetTracker::new(if codec_id == "V_MPEG4/ISO/AVC" {
3990            NaluCodec::Avc
3991        } else {
3992            NaluCodec::Hevc
3993        }))
3994    } else {
3995        None
3996    };
3997
3998    let _ = needs_annexb; // tracker presence reflects this
3999    Ok(MkvStreamingDemuxer {
4000        mkv,
4001        header: DemuxHeader { codec, info },
4002        audio,
4003        track_number,
4004        timestamp_scale,
4005        annexb_prepend,
4006        length_size,
4007        tracker,
4008        default_duration_ns: track_default_duration_ns,
4009        pixel_format_detected: false,
4010    })
4011}
4012
4013impl StreamingDemuxer for MkvStreamingDemuxer {
4014    fn header(&self) -> &DemuxHeader {
4015        &self.header
4016    }
4017
4018    fn next_video_sample(&mut self) -> Result<Option<Sample>> {
4019        let mut frame = MkvFrame::default();
4020        loop {
4021            match self.mkv.next_frame(&mut frame) {
4022                Ok(true) => {
4023                    if frame.track != self.track_number {
4024                        continue;
4025                    }
4026                    let raw = std::mem::take(&mut frame.data);
4027                    let data = if let Some(tracker) = self.tracker.as_mut() {
4028                        length_prefixed_to_annexb_tracked(
4029                            &raw,
4030                            self.length_size,
4031                            tracker,
4032                            &self.annexb_prepend,
4033                        )
4034                    } else {
4035                        raw
4036                    };
4037                    // Lazy pixel-format detection on the first sample.
4038                    // `pixel_format::detect` only ever reads `samples[0]`,
4039                    // so a one-shot probe against the first emitted sample
4040                    // matches the legacy `demux_mkv()` behaviour without
4041                    // requiring the full Vec to be materialised first.
4042                    if !self.pixel_format_detected {
4043                        let detected = codec::pixel_format::detect(
4044                            &self.header.codec,
4045                            std::slice::from_ref(&data),
4046                        );
4047                        self.header.info.pixel_format = detected;
4048                        self.pixel_format_detected = true;
4049                    }
4050                    let pts_ticks = frame.timestamp.saturating_mul(self.timestamp_scale) as i64;
4051                    let duration_ticks = frame
4052                        .duration
4053                        .or(self.default_duration_ns)
4054                        .map(|ns| ns.min(u32::MAX as u64) as u32)
4055                        .unwrap_or(0);
4056                    return Ok(Some(Sample {
4057                        data,
4058                        pts_ticks,
4059                        duration_ticks,
4060                    }));
4061                }
4062                Ok(false) => return Ok(None),
4063                Err(e) => bail!("MKV frame read error: {e}"),
4064            }
4065        }
4066    }
4067
4068    fn audio(&self) -> Option<&AudioTrack> {
4069        self.audio.as_ref()
4070    }
4071}
4072
4073#[cfg(test)]
4074mod tests {
4075    use super::*;
4076
4077    #[test]
4078    fn mkv_annexb_guard_flags_avc_and_hevc() {
4079        assert!(mkv_codec_needs_annexb("V_MPEG4/ISO/AVC"));
4080        assert!(mkv_codec_needs_annexb("V_MPEGH/ISO/HEVC"));
4081    }
4082
4083    #[test]
4084    fn mkv_annexb_guard_passes_self_contained_codecs() {
4085        assert!(!mkv_codec_needs_annexb("V_VP9"));
4086        assert!(!mkv_codec_needs_annexb("V_VP8"));
4087        assert!(!mkv_codec_needs_annexb("V_AV1"));
4088        assert!(!mkv_codec_needs_annexb("V_UNKNOWN"));
4089    }
4090
4091    #[test]
4092    fn parse_avcc_extracts_sps_and_pps() {
4093        // One SPS (6 bytes) + one PPS (4 bytes), no extension fields.
4094        let sps: [u8; 6] = [0x67, 0x42, 0x00, 0x1e, 0xab, 0x40];
4095        let pps: [u8; 4] = [0x68, 0xce, 0x3c, 0x80];
4096        let mut avcc = Vec::new();
4097        avcc.push(0x01); // configurationVersion
4098        avcc.push(0x42); // AVCProfileIndication = 66 (Baseline)
4099        avcc.push(0x00); // profile_compatibility
4100        avcc.push(0x1e); // AVCLevelIndication = 3.0
4101        avcc.push(0xff); // reserved(6)=1|lengthSizeMinusOne(2)=3
4102        avcc.push(0xe1); // reserved(3)=7|numOfSequenceParameterSets(5)=1
4103        avcc.extend_from_slice(&(sps.len() as u16).to_be_bytes());
4104        avcc.extend_from_slice(&sps);
4105        avcc.push(0x01); // numOfPictureParameterSets = 1
4106        avcc.extend_from_slice(&(pps.len() as u16).to_be_bytes());
4107        avcc.extend_from_slice(&pps);
4108
4109        let sets = parse_avcc_param_sets(&avcc);
4110        assert_eq!(sets.len(), 2, "expected SPS + PPS");
4111        assert_eq!(&sets[0], &sps);
4112        assert_eq!(&sets[1], &pps);
4113    }
4114
4115    #[test]
4116    fn parse_avcc_truncated_returns_partial() {
4117        // Truncation mid-SPS should not panic; returns whatever was fully read.
4118        let avcc: [u8; 6] = [0x01, 0x42, 0x00, 0x1e, 0xff, 0xe1];
4119        let sets = parse_avcc_param_sets(&avcc);
4120        assert!(sets.is_empty());
4121    }
4122
4123    #[test]
4124    fn parse_avcc_empty_record_returns_empty() {
4125        assert!(parse_avcc_param_sets(&[]).is_empty());
4126        assert!(parse_avcc_param_sets(&[0x01]).is_empty());
4127    }
4128
4129    /// Build a minimal box: `[size u32 BE][fourcc 4][payload]`.
4130    fn mkbox(fourcc: &[u8; 4], payload: &[u8]) -> Vec<u8> {
4131        let size = (8 + payload.len()) as u32;
4132        let mut out = Vec::with_capacity(size as usize);
4133        out.extend_from_slice(&size.to_be_bytes());
4134        out.extend_from_slice(fourcc);
4135        out.extend_from_slice(payload);
4136        out
4137    }
4138
4139    #[test]
4140    fn av01_detector_finds_sample_entry_in_nested_stsd() {
4141        // Minimal valid stsd body: version/flags (4 B) + entry_count=1 (4 B)
4142        // + one 16-byte av01 box header.
4143        let mut stsd_body = vec![0u8; 8];
4144        stsd_body.extend_from_slice(&mkbox(b"av01", &[0u8; 8]));
4145        let stsd = mkbox(b"stsd", &stsd_body);
4146        let stbl = mkbox(b"stbl", &stsd);
4147        let minf = mkbox(b"minf", &stbl);
4148        let mdia = mkbox(b"mdia", &minf);
4149        let trak = mkbox(b"trak", &mdia);
4150        let moov = mkbox(b"moov", &trak);
4151        assert!(has_av01_sample_entry(&moov));
4152    }
4153
4154    #[test]
4155    fn av01_detector_ignores_av01_in_wrong_place() {
4156        // av01 bytes floating in mdat must not trigger the detector.
4157        let mdat = mkbox(b"mdat", b"...av01... garbage");
4158        assert!(!has_av01_sample_entry(&mdat));
4159    }
4160
4161    #[test]
4162    fn read_size_vint_8_byte_encoding() {
4163        // size_vint_8 form used by the MKV test builder: `(1 << 56) | size`
4164        // encoded as 8 bytes big-endian. First byte is 0x01.
4165        let size: u64 = 1000;
4166        let v = (1u64 << 56) | size;
4167        let bytes = v.to_be_bytes();
4168        let (read, len) = read_size_vint(&bytes).expect("parse 8-byte size");
4169        assert_eq!(len, 8);
4170        assert_eq!(read, 1000);
4171    }
4172
4173    #[test]
4174    fn read_size_vint_1_byte_encoding() {
4175        // 1-byte VInt for value 1: 0x81.
4176        let (v, l) = read_size_vint(&[0x81]).expect("1-byte size");
4177        assert_eq!(l, 1);
4178        assert_eq!(v, 1);
4179    }
4180
4181    #[test]
4182    fn read_id_vint_parses_matroska_ids() {
4183        assert_eq!(read_id_vint(&[0xAE]), Some((0xAE, 1)));
4184        assert_eq!(
4185            read_id_vint(&[0x1A, 0x45, 0xDF, 0xA3, 0xFF]),
4186            Some((0x1A45DFA3, 4))
4187        );
4188        assert_eq!(read_id_vint(&[0x55, 0xB0, 0xFF]), Some((0x55B0, 2)));
4189    }
4190
4191    #[test]
4192    fn av01_detector_returns_false_for_avc1_sample_entry() {
4193        let mut stsd_body = vec![0u8; 8];
4194        stsd_body.extend_from_slice(&mkbox(b"avc1", &[0u8; 8]));
4195        let stsd = mkbox(b"stsd", &stsd_body);
4196        let stbl = mkbox(b"stbl", &stsd);
4197        let minf = mkbox(b"minf", &stbl);
4198        let mdia = mkbox(b"mdia", &minf);
4199        let trak = mkbox(b"trak", &mdia);
4200        let moov = mkbox(b"moov", &trak);
4201        assert!(!has_av01_sample_entry(&moov));
4202    }
4203
4204    /// Helper: build a minimal MOV box tree carrying a single sample
4205    /// entry with the supplied fourcc, nested moov/trak/mdia/minf/stbl/stsd.
4206    /// The sample entry payload itself is zeros — the prores detector
4207    /// only looks at the fourcc, not at any internal fields.
4208    fn mov_with_sample_entry(fourcc: &[u8; 4]) -> Vec<u8> {
4209        let mut stsd_body = vec![0u8; 8]; // version/flags + entry_count
4210        stsd_body.extend_from_slice(&mkbox(fourcc, &[0u8; 8]));
4211        let stsd = mkbox(b"stsd", &stsd_body);
4212        let stbl = mkbox(b"stbl", &stsd);
4213        let minf = mkbox(b"minf", &stbl);
4214        let mdia = mkbox(b"mdia", &minf);
4215        let trak = mkbox(b"trak", &mdia);
4216        mkbox(b"moov", &trak)
4217    }
4218
4219    #[test]
4220    fn prores_detector_finds_all_six_fourccs() {
4221        for fcc in [b"apco", b"apcs", b"apcn", b"apch", b"ap4h", b"ap4x"] {
4222            let moov = mov_with_sample_entry(fcc);
4223            let detected = prores_sample_entry_fourcc(&moov)
4224                .unwrap_or_else(|| panic!("did not detect ProRes fourcc {fcc:?}"));
4225            assert_eq!(&detected, fcc, "fourcc round-trip for {fcc:?}");
4226        }
4227    }
4228
4229    #[test]
4230    fn prores_detector_ignores_non_prores_fourccs() {
4231        // A sample entry whose fourcc is something else (h264, hevc, etc.)
4232        // must NOT trigger the ProRes detector even when nested correctly.
4233        for fcc in [b"avc1", b"hvc1", b"av01", b"vp09", b"mp4v"] {
4234            let moov = mov_with_sample_entry(fcc);
4235            assert!(
4236                prores_sample_entry_fourcc(&moov).is_none(),
4237                "false positive on fourcc {fcc:?}"
4238            );
4239        }
4240    }
4241
4242    #[test]
4243    fn prores_detector_returns_none_when_no_stsd() {
4244        // Bare moov with no stsd path — must safely return None,
4245        // never panic.
4246        let moov = mkbox(b"moov", &[0u8; 4]);
4247        assert!(prores_sample_entry_fourcc(&moov).is_none());
4248    }
4249
4250    #[test]
4251    fn detect_container_recognises_mpeg_ts_sync_pattern() {
4252        // detect_container is package-private here; we exercise it via
4253        // a buffer whose first three sync points all land on 0x47.
4254        let mut buf = vec![0xFFu8; 12];
4255        buf[0] = 0x47;
4256        // Pad to length so detect_container can probe offsets 188 and 376.
4257        while buf.len() < 400 {
4258            buf.push(0x00);
4259        }
4260        buf[188] = 0x47;
4261        buf[376] = 0x47;
4262        assert_eq!(detect_container(&buf), "ts");
4263    }
4264
4265    #[test]
4266    fn detect_container_rejects_lone_0x47_byte() {
4267        // A single 0x47 sync byte must not be enough — random payloads
4268        // routinely contain it. Demand at least two confirming hits.
4269        let mut buf = vec![0u8; 400];
4270        buf[0] = 0x47;
4271        buf[188] = 0x00; // miss the second probe
4272        assert_ne!(detect_container(&buf), "ts");
4273    }
4274
4275    #[test]
4276    fn detect_container_recognises_avi_riff_signature() {
4277        let mut buf: Vec<u8> = b"RIFF".to_vec();
4278        buf.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
4279        buf.extend_from_slice(b"AVI ");
4280        buf.extend_from_slice(&[0u8; 32]);
4281        assert_eq!(detect_container(&buf), "avi");
4282    }
4283}