Skip to main content

container/
cmaf.rs

1//! Fragmented MP4 / CMAF box writers.
2//!
3//! Produces ISO/IEC 14496-12 §8.8 movie-fragment boxes (`moof` / `mfhd` /
4//! `traf` / `tfhd` / `tfdt` / `trun`) and the corresponding `mvex` /
5//! `mehd` / `trex` declarations that go inside a CMAF init segment's
6//! `moov`. CMAF (ISO/IEC 23000-19) constrains the general 14496-12 model:
7//! exactly one track per fragment (one `traf` per `moof`), exactly one
8//! track per init segment, and a small set of mandatory boxes.
9//!
10//! This module is the box-level primitive layer. Higher-level callers
11//! (`init_segment_video`, `media_segment_video`, etc. in subsequent
12//! commits) compose these into init + media segments. The split lets us
13//! unit-test each box's byte layout against the spec without having to
14//! drive a full encode + segment pipeline.
15//!
16//! Spec citations are given by section number in the relevant box's doc
17//! comment so future readers can cross-check against the standard.
18//!
19//! # CMAF brand
20//!
21//! Init segments for video tracks declare the `cmfc` brand (CMAF
22//! constraints, per CMAF §7.3.4). Audio tracks use `cmfa`. Both brands
23//! coexist in `compatible_brands` alongside the existing `iso6` / `mp42`
24//! / `av01` brands so non-CMAF-aware tools that consume the same boxes
25//! (e.g. an old ffprobe) can still demux them.
26//!
27//! # Sample-flags packing
28//!
29//! `default_sample_flags` (in `trex` / `tfhd`) and `first_sample_flags`
30//! / per-sample flags (in `trun`) are packed per ISO/IEC 14496-12
31//! §8.8.3.1. The 32 bits are laid out:
32//!
33//! ```text
34//!   reserved[6]      = 0
35//!   is_leading[2]    = 0
36//!   sample_depends_on[2]
37//!   sample_is_depended_on[2]
38//!   sample_has_redundancy[2]
39//!   sample_padding_value[3] = 0
40//!   sample_is_non_sync_sample[1]
41//!   sample_degradation_priority[16] = 0
42//! ```
43//!
44//! For AV1 / AAC the meaningful values are `sample_depends_on = 1`
45//! (this sample depends on others — i.e. P / B / non-IDR) or `2`
46//! (independent — i.e. IDR / sync), and `sample_is_non_sync_sample = 1`
47//! for non-key frames, `0` for keyframes. The helper
48//! [`SampleFlags::pack`] handles this; callers shouldn't compose the
49//! u32 by hand.
50
51use anyhow::{Context, Result};
52use codec::frame::{ColorMetadata, VideoCodec};
53use std::fs::{self, File};
54use std::io::{BufWriter, Write};
55use std::path::{Path, PathBuf};
56
57use crate::AudioInfo;
58use crate::mux::{
59    BoxBuilder, build_audio_stsd, build_av01, build_avc1, build_avcc, build_hvc1, build_hvcc,
60    write_unity_matrix,
61};
62use crate::nal_mux::{NalMuxCodec, NalSampleWriter};
63
64/// CMAF brand identifiers used in `ftyp.compatible_brands`.
65pub mod brand {
66    /// CMAF video constraints brand (CMAF §7.3.4).
67    pub const CMFC: &[u8; 4] = b"cmfc";
68    /// CMAF audio constraints brand (CMAF §7.3.5).
69    pub const CMFA: &[u8; 4] = b"cmfa";
70}
71
72/// Track type discriminator. CMAF places one track per init / fragment;
73/// this enum is what higher-level orchestration uses to pick which
74/// codec dispatch to take. The init / segment writers themselves don't
75/// take this enum (they have type-specific entry points), so it stays
76/// `#[allow(dead_code)]` until the pipeline orchestrator (Phase 4)
77/// wires it through.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(dead_code)]
80pub enum CmafTrackKind {
81    Video,
82    Audio,
83}
84
85/// Sample flags as packed in `default_sample_flags` / `first_sample_flags` /
86/// per-sample `sample_flags` in `trun`. ISO/IEC 14496-12 §8.8.3.1.
87///
88/// Defaults model an AV1 P-frame: depends-on=1, non-sync=1, no redundancy.
89/// Override `is_sync` for IDR / key samples. The remaining fields aren't
90/// meaningful for our pipeline (no DRM / leading samples / temporal layers
91/// past Annex H), so they stay at their spec-default zero values.
92#[derive(Debug, Clone, Copy)]
93pub struct SampleFlags {
94    /// `sample_is_non_sync_sample` flag. False ⇔ keyframe / IDR.
95    pub is_sync: bool,
96}
97
98impl SampleFlags {
99    /// Pack into the wire-format u32. See module docs for bit layout.
100    pub fn pack(self) -> u32 {
101        // For sync samples: sample_depends_on=2 (no other samples needed
102        // to decode — i.e. independent), sample_is_non_sync_sample=0.
103        // For non-sync: sample_depends_on=1 (depends on prior samples),
104        // sample_is_non_sync_sample=1.
105        if self.is_sync {
106            // depends_on=2 in bits 24-25; is_non_sync=0 in bit 16.
107            0x02_00_00_00
108        } else {
109            // depends_on=1 in bits 24-25; is_non_sync=1 in bit 16.
110            0x01_01_00_00
111        }
112    }
113
114    pub fn keyframe() -> Self {
115        Self { is_sync: true }
116    }
117    pub fn delta_frame() -> Self {
118        Self { is_sync: false }
119    }
120}
121
122/// Per-sample fields written into `trun`. Each entry produces one row
123/// of (duration, size, flags) in the fragment's sample table.
124#[derive(Debug, Clone, Copy)]
125pub struct CmafSample {
126    /// Sample duration in track timescale ticks.
127    pub duration: u32,
128    /// Encoded sample size in bytes.
129    pub size: u32,
130    /// Sample flags (sync / non-sync). The very FIRST sample in a fragment
131    /// uses `first_sample_flags` instead — see `build_trun_video`.
132    pub flags: SampleFlags,
133}
134
135// =====================================================================
136// Box writers
137// =====================================================================
138
139/// `mfhd` — Movie Fragment Header (14496-12 §8.8.5).
140///
141/// Carries the per-fragment sequence number. CMAF requires
142/// `sequence_number` to be monotonic and start at 1 for the first
143/// fragment of each track.
144///
145/// Wire layout (16 bytes total):
146/// ```text
147///   size:u32          = 16
148///   type:'mfhd'
149///   version:u8        = 0
150///   flags:u24         = 0
151///   sequence_number:u32
152/// ```
153pub fn build_mfhd(sequence_number: u32) -> Vec<u8> {
154    let mut b = BoxBuilder::new(b"mfhd");
155    b.u8(0); // version
156    b.extend(&[0, 0, 0]); // flags
157    b.u32(sequence_number);
158    b.finish()
159}
160
161/// `tfhd` — Track Fragment Header (14496-12 §8.8.7).
162///
163/// We always set the `default-base-is-moof` flag (`0x020000`) — required
164/// by CMAF §7.3.2.1. With this flag, sample data offsets in `trun`
165/// become relative to the start of the enclosing `moof`, which is
166/// exactly what HLS-CMAF expects. We avoid emitting `base_data_offset`
167/// (an absolute file offset that breaks segment portability).
168///
169/// Optional fields are emitted based on the bitwise combination of
170/// `tf_flags`:
171///   0x000001 base_data_offset            (NOT emitted; we use default-base-is-moof)
172///   0x000002 sample_description_index    (only if non-default needed)
173///   0x000008 default_sample_duration     (emitted when `default_duration.is_some()`)
174///   0x000010 default_sample_size         (emitted when `default_size.is_some()`)
175///   0x000020 default_sample_flags        (emitted when `default_flags.is_some()`)
176///   0x010000 duration-is-empty
177///   0x020000 default-base-is-moof        (always emitted)
178pub fn build_tfhd(
179    track_id: u32,
180    default_duration: Option<u32>,
181    default_size: Option<u32>,
182    default_flags: Option<u32>,
183) -> Vec<u8> {
184    let mut tf_flags: u32 = 0x020000; // default-base-is-moof
185    if default_duration.is_some() {
186        tf_flags |= 0x000008;
187    }
188    if default_size.is_some() {
189        tf_flags |= 0x000010;
190    }
191    if default_flags.is_some() {
192        tf_flags |= 0x000020;
193    }
194
195    let mut b = BoxBuilder::new(b"tfhd");
196    b.u8(0); // version
197    let flag_bytes = tf_flags.to_be_bytes();
198    b.extend(&flag_bytes[1..]); // 24-bit flags (drop high byte)
199    b.u32(track_id);
200    if let Some(d) = default_duration {
201        b.u32(d);
202    }
203    if let Some(s) = default_size {
204        b.u32(s);
205    }
206    if let Some(f) = default_flags {
207        b.u32(f);
208    }
209    b.finish()
210}
211
212/// `tfdt` — Track Fragment Decode Time (14496-12 §8.8.12).
213///
214/// Carries the absolute decode time of the first sample in this
215/// fragment, in track timescale ticks, accumulated from the start of
216/// the track (NOT from the start of the fragment). Required by CMAF
217/// §7.3.2.1.
218///
219/// We always emit version 1 (u64 decode time). Version 0's u32 wraps
220/// at ~24h for a 48 kHz audio track; version 1 covers >12 million
221/// years at the same rate. The 4 extra bytes are immaterial.
222///
223/// Wire layout (20 bytes total):
224/// ```text
225///   size:u32          = 20
226///   type:'tfdt'
227///   version:u8        = 1
228///   flags:u24         = 0
229///   base_media_decode_time:u64
230/// ```
231pub fn build_tfdt(base_media_decode_time: u64) -> Vec<u8> {
232    let mut b = BoxBuilder::new(b"tfdt");
233    b.u8(1); // version 1
234    b.extend(&[0, 0, 0]); // flags
235    b.u64(base_media_decode_time);
236    b.finish()
237}
238
239/// `trun` — Track Run (14496-12 §8.8.8) for a video fragment.
240///
241/// Encodes the per-sample table for the fragment's run of samples.
242/// CMAF allows multiple `trun`s per `traf` but we always emit exactly
243/// one (cleaner manifest, no functional difference).
244///
245/// Flag bits we always set:
246///   0x000001 data-offset-present       (offset from moof start to mdat data)
247///   0x000004 first-sample-flags-present (override of default for sample 0)
248///   0x000100 sample-duration-present
249///   0x000200 sample-size-present
250///
251/// We don't emit per-sample-flags (0x000400) because all non-first
252/// samples in a video fragment share the default (P-frame), and we
253/// don't emit sample-composition-time-offsets (0x000800) because
254/// AV1 has no B-frame reordering in our pipeline (PTS == DTS).
255///
256/// `data_offset` is the byte offset from the START of the enclosing
257/// `moof` to the first byte of the fragment's `mdat` payload. It
258/// CANNOT be filled in until the full `moof` size is known, so this
259/// builder leaves it as 0 and returns the byte position to be patched.
260/// See [`MoofData::patch_data_offset`].
261fn build_trun_video(samples: &[CmafSample]) -> (Vec<u8>, usize) {
262    let mut b = BoxBuilder::new(b"trun");
263    b.u8(0); // version
264    // Flags: data-offset (1) | first-sample-flags (4) | duration (0x100) | size (0x200)
265    let flags: u32 = 0x000001 | 0x000004 | 0x000100 | 0x000200;
266    let flag_bytes = flags.to_be_bytes();
267    b.extend(&flag_bytes[1..]);
268    b.u32(samples.len() as u32);
269    // data_offset placeholder — final value patched in once moof size is
270    // known. We track its absolute position WITHIN this trun box (header
271    // 8 + version 1 + flags 3 + sample_count 4 = 16) so the caller can
272    // translate to a position-within-moof later.
273    let data_offset_pos_within_trun = b.current_len();
274    b.u32(0); // placeholder
275
276    // first_sample_flags: the spec's standard pattern is to mark sample
277    // 0 explicitly (almost always a sync sample for the first fragment;
278    // for subsequent fragments the first sample is whatever the GOP
279    // boundary produced — typically also sync since CMAF segments must
280    // start with a sync sample per §7.3.2.1).
281    if let Some(first) = samples.first() {
282        b.u32(first.flags.pack());
283    } else {
284        b.u32(0);
285    }
286
287    for s in samples {
288        b.u32(s.duration);
289        b.u32(s.size);
290    }
291
292    let bytes = b.finish();
293    (bytes, data_offset_pos_within_trun)
294}
295
296/// `trun` for an audio fragment. Same shape as video but no sync-flags
297/// distinction (every audio sample is independently decodable in
298/// AAC-LC / Opus / AC-3 / E-AC-3), so we don't emit first-sample-flags
299/// — the default in `trex` / `tfhd` covers them all.
300fn build_trun_audio(samples: &[CmafSample]) -> (Vec<u8>, usize) {
301    let mut b = BoxBuilder::new(b"trun");
302    b.u8(0); // version
303    // Flags: data-offset (1) | duration (0x100) | size (0x200)
304    let flags: u32 = 0x000001 | 0x000100 | 0x000200;
305    let flag_bytes = flags.to_be_bytes();
306    b.extend(&flag_bytes[1..]);
307    b.u32(samples.len() as u32);
308    let data_offset_pos_within_trun = b.current_len();
309    b.u32(0); // placeholder
310
311    for s in samples {
312        b.u32(s.duration);
313        b.u32(s.size);
314    }
315
316    let bytes = b.finish();
317    (bytes, data_offset_pos_within_trun)
318}
319
320/// `mehd` — Movie Extends Header (14496-12 §8.8.2).
321///
322/// Carries the total fragment duration of the longest track, in
323/// movie timescale ticks. CMAF treats this as informational; players
324/// derive actual duration from the sum of per-fragment `trun` rows.
325/// We emit it for spec completeness.
326///
327/// Version 1 (u64 fragment_duration) — same rationale as `tfdt`.
328///
329/// Wire layout (20 bytes total):
330/// ```text
331///   size:u32          = 20
332///   type:'mehd'
333///   version:u8        = 1
334///   flags:u24         = 0
335///   fragment_duration:u64
336/// ```
337pub fn build_mehd(fragment_duration: u64) -> Vec<u8> {
338    let mut b = BoxBuilder::new(b"mehd");
339    b.u8(1); // version 1
340    b.extend(&[0, 0, 0]); // flags
341    b.u64(fragment_duration);
342    b.finish()
343}
344
345/// `trex` — Track Extends (14496-12 §8.8.3).
346///
347/// Per-track defaults that apply to every `trun` in every `moof`
348/// unless overridden via `tfhd`'s default-* fields or per-sample
349/// values in `trun`. The point of `trex` is to keep `moof` boxes
350/// small: if every sample has the same duration / size / flags, the
351/// `trun` can omit them and just inherit from `trex`.
352///
353/// In practice we override `default_sample_duration` / `_size` per
354/// fragment (durations vary slightly with rounding; sizes vary per
355/// sample) so most of these fields just hold spec-zero values. We do
356/// set `default_sample_description_index = 1` since every sample in
357/// our pipeline references the single `stsd` entry built in the
358/// init segment.
359///
360/// Wire layout (32 bytes total):
361/// ```text
362///   size:u32          = 32
363///   type:'trex'
364///   version:u8        = 0
365///   flags:u24         = 0
366///   track_id:u32
367///   default_sample_description_index:u32 = 1
368///   default_sample_duration:u32          = 0
369///   default_sample_size:u32              = 0
370///   default_sample_flags:u32             = 0 (or non-sync default)
371/// ```
372pub fn build_trex(track_id: u32, default_sample_flags: u32) -> Vec<u8> {
373    let mut b = BoxBuilder::new(b"trex");
374    b.u8(0); // version
375    b.extend(&[0, 0, 0]); // flags
376    b.u32(track_id);
377    b.u32(1); // default_sample_description_index
378    b.u32(0); // default_sample_duration (overridden per-fragment)
379    b.u32(0); // default_sample_size (overridden per-sample)
380    b.u32(default_sample_flags);
381    b.finish()
382}
383
384/// `mvex` — Movie Extends container (14496-12 §8.8.1).
385///
386/// Goes inside `moov`. Wraps a single `mehd` plus one `trex` per
387/// track. Presence of `mvex` is what tells a parser this MP4 is
388/// fragmented (i.e. there will be `moof`s following).
389pub fn build_mvex(mehd: &[u8], trexes: &[Vec<u8>]) -> Vec<u8> {
390    let mut b = BoxBuilder::new(b"mvex");
391    b.extend(mehd);
392    for trex in trexes {
393        b.extend(trex);
394    }
395    b.finish()
396}
397
398/// `traf` — Track Fragment (14496-12 §8.8.6).
399///
400/// Wraps `tfhd` + `tfdt` + `trun` for one track inside one `moof`.
401/// CMAF mandates exactly one `traf` per `moof` (§7.3.2.1: "Each CMAF
402/// Fragment SHALL contain exactly one Track Fragment Box.").
403fn build_traf(tfhd: &[u8], tfdt: &[u8], trun: &[u8]) -> Vec<u8> {
404    let mut b = BoxBuilder::new(b"traf");
405    b.extend(tfhd);
406    b.extend(tfdt);
407    b.extend(trun);
408    b.finish()
409}
410
411/// Full `moof` blob with the inner `trun.data_offset` patched up.
412///
413/// Returned by [`build_moof_video`] and [`build_moof_audio`]. Holds the
414/// final byte vector AND knows where inside it the `data_offset` field
415/// lives, so callers can either accept the default offset (immediately
416/// after the moof — i.e. mdat starts right after this moof in the file)
417/// OR substitute their own if they're writing some intervening bytes.
418///
419/// The default `data_offset` is `bytes.len() + 8`: full moof size plus
420/// the 8-byte mdat header. That's the standard "moof immediately
421/// followed by mdat" CMAF layout.
422pub struct MoofData {
423    pub bytes: Vec<u8>,
424    /// Byte position WITHIN `bytes` of the 4-byte big-endian
425    /// `data_offset` field inside `trun`. Use [`Self::patch_data_offset`]
426    /// to overwrite it.
427    pub data_offset_pos: usize,
428}
429
430impl MoofData {
431    /// Patch the `trun.data_offset` field in place. Call once with the
432    /// final byte offset from the START of the moof to the START of
433    /// the mdat payload (i.e. moof_size + 8 for a no-gap layout).
434    pub fn patch_data_offset(&mut self, data_offset: u32) {
435        self.bytes[self.data_offset_pos..self.data_offset_pos + 4]
436            .copy_from_slice(&data_offset.to_be_bytes());
437    }
438
439    /// Convenience: patch with the default no-gap offset (moof
440    /// immediately followed by mdat). Use this in the common case
441    /// where moof + mdat are written contiguously.
442    pub fn patch_default_no_gap(&mut self) {
443        let off = (self.bytes.len() + 8) as u32;
444        self.patch_data_offset(off);
445    }
446}
447
448/// Build a video `moof` for one CMAF fragment.
449///
450/// Composes `mfhd` + `traf{tfhd, tfdt, trun}` and tracks the byte
451/// position of `trun.data_offset` so the caller can patch it once
452/// the moof's final size is known (or accept the default no-gap
453/// layout via [`MoofData::patch_default_no_gap`]).
454pub fn build_moof_video(
455    sequence_number: u32,
456    track_id: u32,
457    base_media_decode_time: u64,
458    samples: &[CmafSample],
459) -> MoofData {
460    let mfhd = build_mfhd(sequence_number);
461    // Default duration/size omitted — they'll vary per-sample, so
462    // emitting them as defaults would be wrong. Default flags set to
463    // delta-frame so per-sample flags are needed only on the first
464    // (sync) sample, which we override via first_sample_flags in trun.
465    let tfhd = build_tfhd(
466        track_id,
467        None,
468        None,
469        Some(SampleFlags::delta_frame().pack()),
470    );
471    let tfdt = build_tfdt(base_media_decode_time);
472    let (trun, data_offset_pos_within_trun) = build_trun_video(samples);
473
474    // Compute where `data_offset` lives within the eventual moof.
475    // moof_header(8) + mfhd(16) + traf_header(8) + tfhd_len + tfdt(20) +
476    //   data_offset_pos_within_trun.
477    let moof_header = 8usize;
478    let traf_header = 8usize;
479    let pos_in_moof = moof_header
480        + mfhd.len()
481        + traf_header
482        + tfhd.len()
483        + tfdt.len()
484        + data_offset_pos_within_trun;
485
486    let traf = build_traf(&tfhd, &tfdt, &trun);
487    let mut b = BoxBuilder::new(b"moof");
488    b.extend(&mfhd);
489    b.extend(&traf);
490    let bytes = b.finish();
491
492    MoofData {
493        bytes,
494        data_offset_pos: pos_in_moof,
495    }
496}
497
498/// Build an audio `moof`. Same composition as video but without
499/// first-sample-flags differentiation in `trun` (every audio sample
500/// is independently decodable).
501pub fn build_moof_audio(
502    sequence_number: u32,
503    track_id: u32,
504    base_media_decode_time: u64,
505    samples: &[CmafSample],
506) -> MoofData {
507    let mfhd = build_mfhd(sequence_number);
508    // Audio default-flags: every sample is independently decodable,
509    // so default to sync.
510    let tfhd = build_tfhd(track_id, None, None, Some(SampleFlags::keyframe().pack()));
511    let tfdt = build_tfdt(base_media_decode_time);
512    let (trun, data_offset_pos_within_trun) = build_trun_audio(samples);
513
514    let moof_header = 8usize;
515    let traf_header = 8usize;
516    let pos_in_moof = moof_header
517        + mfhd.len()
518        + traf_header
519        + tfhd.len()
520        + tfdt.len()
521        + data_offset_pos_within_trun;
522
523    let traf = build_traf(&tfhd, &tfdt, &trun);
524    let mut b = BoxBuilder::new(b"moof");
525    b.extend(&mfhd);
526    b.extend(&traf);
527    let bytes = b.finish();
528
529    MoofData {
530        bytes,
531        data_offset_pos: pos_in_moof,
532    }
533}
534
535// =====================================================================
536// Init segment writers (Phase 1.2)
537// =====================================================================
538//
539// CMAF init segments carry `ftyp + moov` only — no sample data. The
540// `moov.trak.mdia.minf.stbl` has a populated `stsd` (the sample
541// description) but EMPTY `stts/stsc/stsz/stco`. That's how the parser
542// knows samples will arrive in subsequent `moof` boxes via the
543// `mvex/trex` defaults set in this same `moov`.
544//
545// The track is one-per-init per CMAF §7.3.2.1 (each video init carries
546// only the video track, each audio init only the audio track).
547// `track_id = 1` in both cases since each init's `moov` is independent.
548
549/// Build a CMAF video init segment for an AV1 track.
550///
551/// `config_obus` is the LOB-formatted OBU sequence header (with
552/// `obu_has_size_field=1`) — call [`crate::mux::extract_sequence_header`]
553/// against the first encoded packet to get this. `timescale` is the
554/// track's mdhd/mvhd timescale in ticks per second; we recommend
555/// `frame_rate × 1000` rounded to a clean number (e.g. 30000 for 30fps,
556/// 24000 for 24fps) so per-frame durations divide evenly. The fragment
557/// duration in `mehd` is left at 0 (informational; players derive
558/// actual duration from `trun`).
559pub fn build_init_segment_video(
560    width: u32,
561    height: u32,
562    timescale: u32,
563    config_obus: &[u8],
564    color_metadata: &ColorMetadata,
565) -> Vec<u8> {
566    let av01 = build_av01(width, height, config_obus, color_metadata);
567    build_init_segment_video_with_entry(width, height, timescale, &av01, b"av01")
568}
569
570/// Build a CMAF video init segment from a pre-built **visual sample entry**
571/// (`av01` / `avc1` / `avc3` / `hvc1` / `hev1`, with its config box + colr
572/// already inside) and the `ftyp` codec brand. Codec-agnostic — the caller
573/// constructs the sample entry for AV1 / H.264 / H.265.
574pub fn build_init_segment_video_with_entry(
575    width: u32,
576    height: u32,
577    timescale: u32,
578    sample_entry: &[u8],
579    codec_brand: &[u8; 4],
580) -> Vec<u8> {
581    let track_id = 1u32;
582
583    let ftyp = build_ftyp_video(codec_brand);
584
585    // moov children
586    let mvhd = build_mvhd(timescale, /* duration */ 0, /* next_track_id */ 2);
587    let trak = build_video_trak(width, height, timescale, track_id, sample_entry);
588    let mvex_blob = {
589        let mehd = build_mehd(0);
590        // For video, default sample flags are delta-frame (most samples
591        // in a fragment are P-frames); the IDR opening each fragment
592        // overrides via trun's first_sample_flags. This matches what the
593        // moof writer sets in tfhd.
594        let trex = build_trex(track_id, SampleFlags::delta_frame().pack());
595        build_mvex(&mehd, &[trex])
596    };
597
598    let mut moov = BoxBuilder::new(b"moov");
599    moov.extend(&mvhd);
600    moov.extend(&trak);
601    moov.extend(&mvex_blob);
602    let moov = moov.finish();
603
604    let mut out = Vec::with_capacity(ftyp.len() + moov.len());
605    out.extend_from_slice(&ftyp);
606    out.extend_from_slice(&moov);
607    out
608}
609
610/// Build a CMAF audio init segment.
611///
612/// `audio_info` carries codec / sample_rate / channels / asc_bytes (or
613/// codec_private for Opus / AC-3 / E-AC-3). Same struct the existing
614/// non-fragmented muxer's `with_audio` accepts — see crate::AudioInfo.
615pub fn build_init_segment_audio(audio_info: &AudioInfo) -> Vec<u8> {
616    let track_id = 1u32;
617
618    let ftyp = build_ftyp_audio();
619
620    let mvhd = build_mvhd(
621        audio_info.timescale,
622        /* duration */ 0,
623        /* next_track_id */ 2,
624    );
625    let trak = build_audio_trak(audio_info, track_id);
626    let mvex_blob = {
627        let mehd = build_mehd(0);
628        // Every audio sample is independently decodable — sync default.
629        let trex = build_trex(track_id, SampleFlags::keyframe().pack());
630        build_mvex(&mehd, &[trex])
631    };
632
633    let mut moov = BoxBuilder::new(b"moov");
634    moov.extend(&mvhd);
635    moov.extend(&trak);
636    moov.extend(&mvex_blob);
637    let moov = moov.finish();
638
639    let mut out = Vec::with_capacity(ftyp.len() + moov.len());
640    out.extend_from_slice(&ftyp);
641    out.extend_from_slice(&moov);
642    out
643}
644
645/// `ftyp` for a video init segment. Brands declare `cmfc` (CMAF video
646/// constraints), the codec brand (`av01` / `avc1` / `hvc1`), plus `iso6` /
647/// `mp42` / `iso2` for broad parser compatibility. Major brand is `iso6` (CMAF /
648/// 14496-12 edition 6) — Apple's player and ffmpeg both honour it.
649fn build_ftyp_video(codec_brand: &[u8; 4]) -> Vec<u8> {
650    let mut b = BoxBuilder::new(b"ftyp");
651    b.extend(b"iso6"); // major_brand
652    b.u32(0); // minor_version
653    b.extend(b"iso6");
654    b.extend(b"iso2");
655    b.extend(b"mp42");
656    b.extend(brand::CMFC);
657    b.extend(codec_brand);
658    b.finish()
659}
660
661/// `ftyp` for an audio init segment. Same as video but `cmfa` brand
662/// instead of `cmfc`, and no `av01` (irrelevant for an audio-only
663/// segment).
664fn build_ftyp_audio() -> Vec<u8> {
665    let mut b = BoxBuilder::new(b"ftyp");
666    b.extend(b"iso6"); // major_brand
667    b.u32(0); // minor_version
668    b.extend(b"iso6");
669    b.extend(b"iso2");
670    b.extend(b"mp42");
671    b.extend(brand::CMFA);
672    b.finish()
673}
674
675/// `mvhd` (14496-12 §8.2.2) — movie header. Same layout as the existing
676/// non-fragmented muxer; reimplemented here because we need a slightly
677/// different `next_track_id` (single-track init segments).
678fn build_mvhd(timescale: u32, duration: u64, next_track_id: u32) -> Vec<u8> {
679    let mut b = BoxBuilder::new(b"mvhd");
680    b.u8(0);
681    b.extend(&[0, 0, 0]);
682    b.u32(0); // creation_time
683    b.u32(0); // modification_time
684    b.u32(timescale);
685    b.u32(duration as u32);
686    b.u32(0x00010000); // rate 1.0
687    b.u16(0x0100); // volume 1.0
688    b.u16(0); // reserved
689    b.u32(0);
690    b.u32(0);
691    write_unity_matrix(&mut b);
692    for _ in 0..6 {
693        b.u32(0);
694    } // pre_defined
695    b.u32(next_track_id);
696    b.finish()
697}
698
699fn build_video_trak(
700    width: u32,
701    height: u32,
702    timescale: u32,
703    track_id: u32,
704    sample_entry: &[u8],
705) -> Vec<u8> {
706    let tkhd = build_video_tkhd(width, height, track_id);
707    let mdia = build_video_mdia(timescale, sample_entry);
708    let mut b = BoxBuilder::new(b"trak");
709    b.extend(&tkhd);
710    b.extend(&mdia);
711    b.finish()
712}
713
714fn build_video_tkhd(width: u32, height: u32, track_id: u32) -> Vec<u8> {
715    let mut b = BoxBuilder::new(b"tkhd");
716    b.u8(0);
717    // flags = 0x000003 (track_enabled | track_in_movie). We don't set
718    // 0x000004 (track_in_preview) — that's a QuickTime-flavored bit and
719    // streaming players ignore it.
720    b.extend(&[0, 0, 0x03]);
721    b.u32(0); // creation_time
722    b.u32(0); // modification_time
723    b.u32(track_id);
724    b.u32(0); // reserved
725    b.u32(0); // duration (movie timescale; fragment muxer leaves this 0)
726    b.u32(0);
727    b.u32(0);
728    b.u16(0); // layer
729    b.u16(0); // alternate_group
730    b.u16(0); // volume = 0 for video
731    b.u16(0); // reserved
732    write_unity_matrix(&mut b);
733    b.u32(width << 16); // width 16.16
734    b.u32(height << 16);
735    b.finish()
736}
737
738fn build_video_mdia(timescale: u32, sample_entry: &[u8]) -> Vec<u8> {
739    let mdhd = build_mdhd(timescale, 0);
740    let hdlr = build_hdlr(b"vide", "VideoHandler\0");
741    let minf = build_video_minf(sample_entry);
742    let mut b = BoxBuilder::new(b"mdia");
743    b.extend(&mdhd);
744    b.extend(&hdlr);
745    b.extend(&minf);
746    b.finish()
747}
748
749fn build_mdhd(timescale: u32, duration: u64) -> Vec<u8> {
750    let mut b = BoxBuilder::new(b"mdhd");
751    b.u8(0);
752    b.extend(&[0, 0, 0]);
753    b.u32(0); // creation_time
754    b.u32(0); // modification_time
755    b.u32(timescale);
756    b.u32(duration as u32);
757    b.u16(0x55c4); // language 'und'
758    b.u16(0); // pre_defined
759    b.finish()
760}
761
762/// Generic handler box — `'vide'` for video, `'soun'` for audio. The
763/// human-readable name string (with trailing NUL) is purely
764/// informational; ffprobe surfaces it but no playback path consumes it.
765fn build_hdlr(handler_type: &[u8; 4], name: &str) -> Vec<u8> {
766    let mut b = BoxBuilder::new(b"hdlr");
767    b.u8(0);
768    b.extend(&[0, 0, 0]);
769    b.u32(0); // pre_defined
770    b.extend(handler_type);
771    b.u32(0);
772    b.u32(0);
773    b.u32(0); // reserved[3]
774    b.extend(name.as_bytes());
775    b.finish()
776}
777
778fn build_video_minf(sample_entry: &[u8]) -> Vec<u8> {
779    let vmhd = build_vmhd();
780    let dinf = build_dinf();
781    let stbl = build_video_stbl_empty(sample_entry);
782    let mut b = BoxBuilder::new(b"minf");
783    b.extend(&vmhd);
784    b.extend(&dinf);
785    b.extend(&stbl);
786    b.finish()
787}
788
789fn build_vmhd() -> Vec<u8> {
790    let mut b = BoxBuilder::new(b"vmhd");
791    b.u8(0);
792    b.extend(&[0, 0, 0x01]); // flags = 1 per spec
793    b.u16(0); // graphicsmode (0 = copy)
794    b.u16(0);
795    b.u16(0);
796    b.u16(0); // opcolor[3] (RGB, 0,0,0)
797    b.finish()
798}
799
800fn build_smhd() -> Vec<u8> {
801    let mut b = BoxBuilder::new(b"smhd");
802    b.u8(0);
803    b.extend(&[0, 0, 0]);
804    b.u16(0); // balance (0 = center)
805    b.u16(0); // reserved
806    b.finish()
807}
808
809/// `dinf` containing a minimal `dref` with one `url ` self-reference.
810/// Required by 14496-12 even when sample data is in the same file.
811fn build_dinf() -> Vec<u8> {
812    let url = {
813        let mut b = BoxBuilder::new(b"url ");
814        b.u8(0); // version
815        b.extend(&[0, 0, 0x01]); // flags = 1 (data is in the same file)
816        b.finish()
817    };
818    let dref = {
819        let mut b = BoxBuilder::new(b"dref");
820        b.u8(0);
821        b.extend(&[0, 0, 0]);
822        b.u32(1); // entry_count
823        b.extend(&url);
824        b.finish()
825    };
826    let mut b = BoxBuilder::new(b"dinf");
827    b.extend(&dref);
828    b.finish()
829}
830
831/// Empty sample tables for a CMAF video init: `stsd` has the av01
832/// sample entry (with av1C, colr, optional mdcv/clli) and the rest of
833/// the tables are empty boxes (entry_count=0).
834fn build_video_stbl_empty(sample_entry: &[u8]) -> Vec<u8> {
835    let stsd = {
836        let mut b = BoxBuilder::new(b"stsd");
837        b.u8(0);
838        b.extend(&[0, 0, 0]);
839        b.u32(1); // entry_count
840        b.extend(sample_entry);
841        b.finish()
842    };
843    let stts = build_empty_full_box(b"stts");
844    let stsc = build_empty_full_box(b"stsc");
845    let stsz = {
846        let mut b = BoxBuilder::new(b"stsz");
847        b.u8(0);
848        b.extend(&[0, 0, 0]);
849        b.u32(0); // sample_size = 0 → variable, per stsz (then sample_count must be 0 too)
850        b.u32(0); // sample_count = 0
851        b.finish()
852    };
853    let stco = build_empty_full_box(b"stco");
854
855    let mut b = BoxBuilder::new(b"stbl");
856    b.extend(&stsd);
857    b.extend(&stts);
858    b.extend(&stsc);
859    b.extend(&stsz);
860    b.extend(&stco);
861    b.finish()
862}
863
864fn build_audio_trak(info: &AudioInfo, track_id: u32) -> Vec<u8> {
865    let tkhd = build_audio_tkhd(track_id);
866    let mdia = build_audio_mdia(info);
867    let mut b = BoxBuilder::new(b"trak");
868    b.extend(&tkhd);
869    b.extend(&mdia);
870    b.finish()
871}
872
873fn build_audio_tkhd(track_id: u32) -> Vec<u8> {
874    let mut b = BoxBuilder::new(b"tkhd");
875    b.u8(0);
876    b.extend(&[0, 0, 0x03]);
877    b.u32(0);
878    b.u32(0);
879    b.u32(track_id);
880    b.u32(0);
881    b.u32(0);
882    b.u32(0);
883    b.u32(0);
884    b.u16(0); // layer
885    b.u16(0); // alternate_group (audio init has only one track; 0 fine)
886    b.u16(0x0100); // volume 1.0
887    b.u16(0); // reserved
888    write_unity_matrix(&mut b);
889    b.u32(0);
890    b.u32(0); // width / height = 0
891    b.finish()
892}
893
894fn build_audio_mdia(info: &AudioInfo) -> Vec<u8> {
895    let mdhd = build_mdhd(info.timescale, 0);
896    let hdlr = build_hdlr(b"soun", "SoundHandler\0");
897    let minf = build_audio_minf(info);
898    let mut b = BoxBuilder::new(b"mdia");
899    b.extend(&mdhd);
900    b.extend(&hdlr);
901    b.extend(&minf);
902    b.finish()
903}
904
905fn build_audio_minf(info: &AudioInfo) -> Vec<u8> {
906    let smhd = build_smhd();
907    let dinf = build_dinf();
908    let stbl = build_audio_stbl_empty(info);
909    let mut b = BoxBuilder::new(b"minf");
910    b.extend(&smhd);
911    b.extend(&dinf);
912    b.extend(&stbl);
913    b.finish()
914}
915
916fn build_audio_stbl_empty(info: &AudioInfo) -> Vec<u8> {
917    let stsd = build_audio_stsd(info);
918    let stts = build_empty_full_box(b"stts");
919    let stsc = build_empty_full_box(b"stsc");
920    let stsz = {
921        let mut b = BoxBuilder::new(b"stsz");
922        b.u8(0);
923        b.extend(&[0, 0, 0]);
924        b.u32(0);
925        b.u32(0);
926        b.finish()
927    };
928    let stco = build_empty_full_box(b"stco");
929
930    let mut b = BoxBuilder::new(b"stbl");
931    b.extend(&stsd);
932    b.extend(&stts);
933    b.extend(&stsc);
934    b.extend(&stsz);
935    b.extend(&stco);
936    b.finish()
937}
938
939/// Empty FullBox with version 0 + flags 0 + entry_count 0. Layout:
940///   size:u32 = 16 | type | version:u8 = 0 | flags:u24 = 0 | entry_count:u32 = 0
941fn build_empty_full_box(box_type: &[u8; 4]) -> Vec<u8> {
942    let mut b = BoxBuilder::new(box_type);
943    b.u8(0);
944    b.extend(&[0, 0, 0]);
945    b.u32(0);
946    b.finish()
947}
948
949// =====================================================================
950// Stateful per-rendition segmenter (Phase 1.3 + 1.4)
951// =====================================================================
952//
953// Each `CmafVideoMuxer` (one per video rendition) and `CmafAudioMuxer`
954// (one per audio rendition; usually a single instance per asset)
955// accumulates encoded packets in memory and flushes them to disk as
956// CMAF media segments (`seg-NNNNN.m4s` = `moof + mdat`) on demand.
957//
958// Memory ceiling: at most one segment's worth of payload bytes are
959// held at a time (caller flushes at every keyframe boundary that
960// crosses the segment-duration target). For a 4-second 1080p AV1
961// segment at ~3 Mbps that's ~1.5 MB; not a concern at the per-job
962// 4 GiB ceiling.
963//
964// The init segment (`init.mp4`) is written EAGERLY on construction
965// for audio (we have everything we need) and LAZILY on first flush
966// for video (we need the first packet's OBU sequence header to build
967// the av1C config record).
968
969/// Per-segment metadata returned by [`CmafVideoMuxer::flush_segment`] /
970/// [`CmafAudioMuxer::flush_segment`]. These records form the input to
971/// the HLS playlist writer (Phase 3) and the segment-alignment validator
972/// (Phase 5).
973#[derive(Debug, Clone)]
974pub struct SegmentInfo {
975    /// 1-based monotonically increasing sequence number per track.
976    pub sequence_number: u32,
977    /// Path of the `seg-NNNNN.m4s` file on disk.
978    pub path: PathBuf,
979    /// Total file size in bytes (moof + mdat header + payload).
980    pub byte_size: u64,
981    /// Sum of per-sample durations in track-timescale ticks. The HLS
982    /// `EXTINF` line is written from this divided by the timescale.
983    pub duration_ticks: u64,
984}
985
986/// Output of a finalized track muxer: where the init segment lives,
987/// the ordered list of media segments, and the timescale needed to
988/// convert `duration_ticks` to seconds.
989#[derive(Debug, Clone)]
990pub struct CmafTrackManifest {
991    pub init_path: PathBuf,
992    pub segments: Vec<SegmentInfo>,
993    pub timescale: u32,
994}
995
996impl CmafTrackManifest {
997    /// Total duration across all segments, in seconds.
998    pub fn duration_seconds(&self) -> f64 {
999        let total_ticks: u64 = self.segments.iter().map(|s| s.duration_ticks).sum();
1000        total_ticks as f64 / self.timescale as f64
1001    }
1002}
1003
1004/// One pending video sample inside the muxer's per-segment buffer.
1005struct PendingVideoSample {
1006    payload: Vec<u8>,
1007    duration: u32,
1008    is_keyframe: bool,
1009}
1010
1011/// One pending audio sample.
1012struct PendingAudioSample {
1013    payload: Vec<u8>,
1014    duration: u32,
1015}
1016
1017/// Stateful CMAF video segmenter for one AV1 rendition.
1018///
1019/// Driven by the pipeline:
1020/// 1. Construct with rendition dimensions + output dir + timescale.
1021/// 2. Call `add_packet` for each encoded packet from the encoder.
1022///    The first packet's OBU stream MUST contain a sequence header;
1023///    the muxer extracts it and uses it for `av1C` in the init.mp4
1024///    (written lazily on the first `flush_segment` call).
1025/// 3. Call `flush_segment` whenever a CMAF fragment boundary is
1026///    reached (the orchestrator decides when based on accumulated
1027///    duration + the segment_duration knob).
1028/// 4. After the last packet is added and flushed, call `finalize`
1029///    to consume the muxer and get the [`CmafTrackManifest`].
1030///
1031/// Segment files are named `seg-00001.m4s`, `seg-00002.m4s`, ...
1032/// in the output dir.
1033pub struct CmafVideoMuxer {
1034    output_dir: PathBuf,
1035    width: u32,
1036    height: u32,
1037    timescale: u32,
1038    color_metadata: ColorMetadata,
1039    track_id: u32,
1040    /// Output codec. `Av1` stores OBUs verbatim + builds `av01`/`av1C`;
1041    /// `H264`/`H265` repackage Annex-B → length-prefixed via `nal_writer` and
1042    /// build `avc3`/`hev1` init segments with inline parameter sets.
1043    codec: VideoCodec,
1044    /// AV1 only: the OBU sequence header captured from the first packet.
1045    config_obus: Option<Vec<u8>>,
1046    /// H.264/H.265 only: Annex-B → length-prefixed repackaging + SPS/PPS(/VPS)
1047    /// capture (inline mode — each segment self-describes; `avc3`/`hev1`).
1048    nal_writer: Option<NalSampleWriter>,
1049    init_path: PathBuf,
1050    init_written: bool,
1051    sequence_number: u32,
1052    base_decode_time: u64,
1053    pending: Vec<PendingVideoSample>,
1054    segments: Vec<SegmentInfo>,
1055}
1056
1057/// Optional construction parameters for [`CmafVideoMuxer`]. Defaults
1058/// match the original 5-arg `new()` behaviour: write init.mp4, start
1059/// segment numbering at 1, decode-time at 0.
1060///
1061/// Non-default values are used by the multi-GPU helper-task path
1062/// (see `pipeline::cmaf` helper variant): when multiple muxers share
1063/// a single per-rung output directory, each helper's muxer starts
1064/// at a non-1 `first_segment_index` and the corresponding decode-time
1065/// offset, and only the primary writes `init.mp4`.
1066#[derive(Debug, Clone)]
1067pub struct CmafVideoMuxerOptions {
1068    /// 1-based segment index the muxer's first `flush_segment()` will
1069    /// write. The output file is `seg-{first_segment_index:05}.m4s`.
1070    /// Defaults to `1` (the primary's first segment).
1071    pub first_segment_index: u32,
1072    /// Decode-time (in track-timescale ticks) of the muxer's first
1073    /// segment's first sample. Should equal
1074    /// `(first_segment_index - 1) * segment_duration_ticks` so that
1075    /// `tfdt` is byte-identical to what the primary would produce for
1076    /// the same segment index. Defaults to `0`.
1077    pub first_segment_base_decode_time: u64,
1078    /// When `false`, `flush_segment()` and `finalize()` skip writing
1079    /// `init.mp4`. Use when a sibling muxer (typically the primary)
1080    /// is responsible for the init segment and helpers must not race
1081    /// against it. Defaults to `true`.
1082    pub write_init_segment: bool,
1083}
1084
1085impl Default for CmafVideoMuxerOptions {
1086    fn default() -> Self {
1087        Self {
1088            first_segment_index: 1,
1089            first_segment_base_decode_time: 0,
1090            write_init_segment: true,
1091        }
1092    }
1093}
1094
1095impl CmafVideoMuxer {
1096    /// Construct a new video muxer that writes init.mp4 + segments to
1097    /// `output_dir`. Creates the directory if it doesn't exist.
1098    ///
1099    /// Equivalent to `new_with_options(..., CmafVideoMuxerOptions::default())`.
1100    pub fn new(
1101        output_dir: impl AsRef<Path>,
1102        width: u32,
1103        height: u32,
1104        timescale: u32,
1105        color_metadata: ColorMetadata,
1106    ) -> Result<Self> {
1107        Self::new_with_options(
1108            output_dir,
1109            width,
1110            height,
1111            timescale,
1112            color_metadata,
1113            CmafVideoMuxerOptions::default(),
1114        )
1115    }
1116
1117    /// Construct a muxer with non-default options. See
1118    /// [`CmafVideoMuxerOptions`].
1119    ///
1120    /// The helper-task path uses this to attach to an in-progress rung:
1121    /// the helper's muxer starts numbering segments at the helper's
1122    /// claim range start, advances `tfdt` to the corresponding decode
1123    /// time, and skips the init segment write that the primary owns.
1124    pub fn new_with_options(
1125        output_dir: impl AsRef<Path>,
1126        width: u32,
1127        height: u32,
1128        timescale: u32,
1129        color_metadata: ColorMetadata,
1130        options: CmafVideoMuxerOptions,
1131    ) -> Result<Self> {
1132        Self::new_with_codec_options(
1133            output_dir,
1134            width,
1135            height,
1136            timescale,
1137            color_metadata,
1138            VideoCodec::Av1,
1139            options,
1140        )
1141    }
1142
1143    /// Codec-aware constructor. `Av1` matches the legacy behaviour; `H264` /
1144    /// `H265` build `avc3` / `hev1` init segments and repackage the encoder's
1145    /// Annex-B packets into length-prefixed samples with inline parameter sets
1146    /// (each segment self-describes — robust across the multi-GPU helper path).
1147    pub fn new_with_codec_options(
1148        output_dir: impl AsRef<Path>,
1149        width: u32,
1150        height: u32,
1151        timescale: u32,
1152        color_metadata: ColorMetadata,
1153        codec: VideoCodec,
1154        options: CmafVideoMuxerOptions,
1155    ) -> Result<Self> {
1156        assert!(
1157            options.first_segment_index >= 1,
1158            "first_segment_index is 1-based; got {}",
1159            options.first_segment_index,
1160        );
1161        let output_dir = output_dir.as_ref().to_path_buf();
1162        fs::create_dir_all(&output_dir)
1163            .with_context(|| format!("creating CMAF video output dir: {}", output_dir.display()))?;
1164        let init_path = output_dir.join("init.mp4");
1165        // H.264/H.265 use inline parameter sets (avc3/hev1) so each segment —
1166        // and each independently-encoded multi-GPU chunk — self-describes.
1167        let nal_writer = match codec {
1168            VideoCodec::Av1 => None,
1169            VideoCodec::H264 => Some(NalSampleWriter::new_inline(NalMuxCodec::H264)),
1170            VideoCodec::H265 => Some(NalSampleWriter::new_inline(NalMuxCodec::H265)),
1171        };
1172        Ok(Self {
1173            output_dir,
1174            width,
1175            height,
1176            timescale,
1177            color_metadata,
1178            track_id: 1,
1179            codec,
1180            config_obus: None,
1181            nal_writer,
1182            init_path,
1183            // When write_init_segment is false, mark init as already
1184            // written so `ensure_init_written` is a no-op. The primary
1185            // is expected to have written (or will write) init.mp4
1186            // separately.
1187            init_written: !options.write_init_segment,
1188            // `flush_segment` pre-increments `sequence_number` before
1189            // writing, so the on-disk segment number equals
1190            // `sequence_number` AFTER the increment. To produce
1191            // `seg-{first_segment_index:05}.m4s` as the first output,
1192            // start at `first_segment_index - 1`.
1193            sequence_number: options.first_segment_index - 1,
1194            base_decode_time: options.first_segment_base_decode_time,
1195            pending: Vec::new(),
1196            segments: Vec::new(),
1197        })
1198    }
1199
1200    /// Add one encoded video packet to the current pending segment.
1201    /// `duration` is in track-timescale ticks. `is_keyframe` must be
1202    /// true for IDR / sync-sample packets — the muxer doesn't peek
1203    /// into the OBU stream to figure that out, and a wrong value
1204    /// will produce a CMAF segment that doesn't decode (the spec
1205    /// requires every segment to start with a sync sample).
1206    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32, is_keyframe: bool) -> Result<()> {
1207        match &mut self.nal_writer {
1208            None => {
1209                // AV1: capture the OBU sequence header once; store OBUs verbatim.
1210                if self.config_obus.is_none() {
1211                    self.config_obus = Some(crate::mux::extract_sequence_header(&payload).context(
1212                        "extracting AV1 sequence header from first packet for av1C config record",
1213                    )?);
1214                }
1215                self.pending.push(PendingVideoSample {
1216                    payload,
1217                    duration,
1218                    is_keyframe,
1219                });
1220            }
1221            Some(writer) => {
1222                // H.264/H.265: split the Annex-B packet into access units (one
1223                // per frame); each becomes a length-prefixed sample carrying its
1224                // own inline SPS/PPS. Per-AU keyframe (IDR) detection comes from
1225                // the bitstream, not the caller's flag. Each frame keeps the
1226                // full per-frame `duration` (a packet may hold several frames).
1227                for au in writer.push_packet(&payload) {
1228                    self.pending.push(PendingVideoSample {
1229                        payload: au.data,
1230                        duration,
1231                        is_keyframe: au.is_keyframe,
1232                    });
1233                }
1234            }
1235        }
1236        Ok(())
1237    }
1238
1239    /// Whether the muxer is ready to flush a segment that starts on a
1240    /// sync sample. The first sample in `pending` must be a keyframe.
1241    /// CMAF requires every segment to begin with a sync sample
1242    /// (§7.3.2.1), so the orchestrator should ensure this invariant
1243    /// before calling `flush_segment`.
1244    pub fn first_pending_is_keyframe(&self) -> bool {
1245        self.pending.first().is_some_and(|s| s.is_keyframe)
1246    }
1247
1248    /// Total duration of pending samples in track-timescale ticks. The
1249    /// orchestrator uses this to decide when a segment has reached
1250    /// its target duration.
1251    pub fn pending_duration_ticks(&self) -> u64 {
1252        self.pending.iter().map(|s| s.duration as u64).sum()
1253    }
1254
1255    /// View of segments already flushed to disk. Each entry's
1256    /// `sequence_number` is the segment's 1-based index; `path` is
1257    /// the on-disk location. The helper-task path
1258    /// (`pipeline::cmaf::cmaf_transcode_rung_slice`) reads this
1259    /// between `add_packet` calls to detect "did the last add
1260    /// trigger an auto-flush?" — when `segments().len()` grows, the
1261    /// last entry is the newly-flushed segment.
1262    pub fn segments(&self) -> &[SegmentInfo] {
1263        &self.segments
1264    }
1265
1266    /// Drop every sample currently in the pending buffer without
1267    /// writing them to disk. Used by the helper-task path when its
1268    /// claim has been shrunk by an `attach_helper` and the encoder's
1269    /// lookahead would otherwise produce a segment that conflicts
1270    /// with whichever helper now owns that range.
1271    ///
1272    /// Specifically: when a primary's claim is shrunk from `[0..N)`
1273    /// to `[0..K)`, the primary's encoder has already received
1274    /// frames `K*KI..K*KI+lookahead` by the time the claim-shrink
1275    /// is observed at the segment boundary. Those frames belong to
1276    /// the helper that took `[K..N)`. Discarding the muxer pending
1277    /// + dropping the encoder is the cleanest way to ensure no
1278    /// stale segment file is written for the helper's territory.
1279    pub fn clear_pending(&mut self) {
1280        self.pending.clear();
1281    }
1282
1283    /// Flush pending samples to a new media segment file. Writes
1284    /// `init.mp4` first if it hasn't been written yet (the av1C config
1285    /// record needs the first packet's sequence header). Returns the
1286    /// segment's metadata and clears the pending buffer.
1287    ///
1288    /// No-op if `pending` is empty.
1289    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
1290        if self.pending.is_empty() {
1291            return Ok(None);
1292        }
1293        if !self.first_pending_is_keyframe() {
1294            anyhow::bail!(
1295                "CMAF segment must start with a sync sample; first pending sample is not a keyframe \
1296                 (segment_number={}, pending_count={})",
1297                self.sequence_number + 1,
1298                self.pending.len()
1299            );
1300        }
1301        self.ensure_init_written()?;
1302
1303        self.sequence_number += 1;
1304        let seq = self.sequence_number;
1305        let samples_meta: Vec<CmafSample> = self
1306            .pending
1307            .iter()
1308            .map(|s| CmafSample {
1309                duration: s.duration,
1310                size: s.payload.len() as u32,
1311                flags: if s.is_keyframe {
1312                    SampleFlags::keyframe()
1313                } else {
1314                    SampleFlags::delta_frame()
1315                },
1316            })
1317            .collect();
1318        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
1319
1320        let mut moof = build_moof_video(seq, self.track_id, self.base_decode_time, &samples_meta);
1321        moof.patch_default_no_gap();
1322
1323        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
1324        let mdat_box_size: u64 = 8 + payload_total;
1325        if mdat_box_size > u32::MAX as u64 {
1326            // Above u32::MAX we'd need a `largesize` mdat (16-byte header).
1327            // For 4-second segments at sane bitrates this is impossible; if
1328            // we ever hit it, bail with a clear error rather than silently
1329            // overflowing.
1330            anyhow::bail!(
1331                "CMAF media segment payload {} bytes exceeds 32-bit mdat size limit",
1332                payload_total
1333            );
1334        }
1335
1336        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
1337        let file = File::create(&path)
1338            .with_context(|| format!("creating CMAF segment file: {}", path.display()))?;
1339        let mut writer = BufWriter::new(file);
1340        writer.write_all(&moof.bytes).context("writing moof")?;
1341        writer
1342            .write_all(&(mdat_box_size as u32).to_be_bytes())
1343            .context("writing mdat size")?;
1344        writer.write_all(b"mdat").context("writing mdat type")?;
1345        for sample in &self.pending {
1346            writer
1347                .write_all(&sample.payload)
1348                .context("writing mdat payload")?;
1349        }
1350        writer.flush().context("flushing CMAF segment writer")?;
1351        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
1352
1353        self.base_decode_time += segment_duration;
1354        self.pending.clear();
1355
1356        let info = SegmentInfo {
1357            sequence_number: seq,
1358            path,
1359            byte_size,
1360            duration_ticks: segment_duration,
1361        };
1362        self.segments.push(info.clone());
1363        Ok(Some(info))
1364    }
1365
1366    /// Finalize the muxer: ensures the init segment is on disk (covers
1367    /// the edge case where add_packet was called but flush_segment
1368    /// never was — e.g. an empty source), drops any non-flushed
1369    /// pending samples (caller should have flushed them), and returns
1370    /// the manifest.
1371    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
1372        if !self.pending.is_empty() {
1373            // Flush whatever's left. The caller should have done this
1374            // explicitly; we cover them defensively.
1375            self.flush_segment()?;
1376        }
1377        self.ensure_init_written()?;
1378        Ok(CmafTrackManifest {
1379            init_path: self.init_path,
1380            segments: self.segments,
1381            timescale: self.timescale,
1382        })
1383    }
1384
1385    fn ensure_init_written(&mut self) -> Result<()> {
1386        if self.init_written {
1387            return Ok(());
1388        }
1389        let init = match self.codec {
1390            VideoCodec::Av1 => {
1391                let config = self.config_obus.as_ref().ok_or_else(|| {
1392                    anyhow::anyhow!(
1393                        "cannot write CMAF video init segment: no AV1 sequence header has been \
1394                         observed yet (call add_packet before flush_segment / finalize)"
1395                    )
1396                })?;
1397                build_init_segment_video(
1398                    self.width,
1399                    self.height,
1400                    self.timescale,
1401                    config,
1402                    &self.color_metadata,
1403                )
1404            }
1405            VideoCodec::H264 => {
1406                let w = self.nal_writer.as_ref().context("H.264 CMAF nal writer missing")?;
1407                if !w.has_param_sets() {
1408                    anyhow::bail!("cannot write CMAF H.264 init segment: no SPS/PPS observed yet");
1409                }
1410                let avcc = build_avcc(&w.sps, &w.pps);
1411                // avc3 sample entry (in-band parameter sets); avc1 ftyp brand.
1412                let entry = build_avc1(self.width, self.height, &avcc, &self.color_metadata, b"avc3");
1413                build_init_segment_video_with_entry(
1414                    self.width,
1415                    self.height,
1416                    self.timescale,
1417                    &entry,
1418                    b"avc1",
1419                )
1420            }
1421            VideoCodec::H265 => {
1422                let w = self.nal_writer.as_ref().context("H.265 CMAF nal writer missing")?;
1423                if !w.has_param_sets() {
1424                    anyhow::bail!(
1425                        "cannot write CMAF H.265 init segment: no VPS/SPS/PPS observed yet"
1426                    );
1427                }
1428                let hvcc = build_hvcc(&w.vps, &w.sps, &w.pps);
1429                // hev1 sample entry (in-band parameter sets); hvc1 ftyp brand.
1430                let entry = build_hvc1(self.width, self.height, &hvcc, &self.color_metadata, b"hev1");
1431                build_init_segment_video_with_entry(
1432                    self.width,
1433                    self.height,
1434                    self.timescale,
1435                    &entry,
1436                    b"hvc1",
1437                )
1438            }
1439        };
1440        let mut file = File::create(&self.init_path).with_context(|| {
1441            format!(
1442                "creating CMAF video init segment: {}",
1443                self.init_path.display()
1444            )
1445        })?;
1446        file.write_all(&init)
1447            .context("writing CMAF video init segment bytes")?;
1448        file.flush().context("flushing CMAF video init segment")?;
1449        self.init_written = true;
1450        Ok(())
1451    }
1452}
1453
1454/// Stateful CMAF audio segmenter. Same model as the video muxer but
1455/// simpler — every audio sample is independently decodable, so there's
1456/// no first-sample-flags / sync-boundary requirement.
1457pub struct CmafAudioMuxer {
1458    output_dir: PathBuf,
1459    info: AudioInfo,
1460    track_id: u32,
1461    init_path: PathBuf,
1462    init_written: bool,
1463    sequence_number: u32,
1464    base_decode_time: u64,
1465    pending: Vec<PendingAudioSample>,
1466    segments: Vec<SegmentInfo>,
1467}
1468
1469impl CmafAudioMuxer {
1470    pub fn new(output_dir: impl AsRef<Path>, info: AudioInfo) -> Result<Self> {
1471        let output_dir = output_dir.as_ref().to_path_buf();
1472        fs::create_dir_all(&output_dir)
1473            .with_context(|| format!("creating CMAF audio output dir: {}", output_dir.display()))?;
1474        let init_path = output_dir.join("init.mp4");
1475        Ok(Self {
1476            output_dir,
1477            info,
1478            track_id: 1,
1479            init_path,
1480            init_written: false,
1481            sequence_number: 0,
1482            base_decode_time: 0,
1483            pending: Vec::new(),
1484            segments: Vec::new(),
1485        })
1486    }
1487
1488    pub fn add_packet(&mut self, payload: Vec<u8>, duration: u32) -> Result<()> {
1489        self.pending.push(PendingAudioSample { payload, duration });
1490        Ok(())
1491    }
1492
1493    pub fn pending_duration_ticks(&self) -> u64 {
1494        self.pending.iter().map(|s| s.duration as u64).sum()
1495    }
1496
1497    pub fn flush_segment(&mut self) -> Result<Option<SegmentInfo>> {
1498        if self.pending.is_empty() {
1499            return Ok(None);
1500        }
1501        self.ensure_init_written()?;
1502
1503        self.sequence_number += 1;
1504        let seq = self.sequence_number;
1505        let samples_meta: Vec<CmafSample> = self
1506            .pending
1507            .iter()
1508            .map(|s| CmafSample {
1509                duration: s.duration,
1510                size: s.payload.len() as u32,
1511                flags: SampleFlags::keyframe(),
1512            })
1513            .collect();
1514        let segment_duration: u64 = samples_meta.iter().map(|s| s.duration as u64).sum();
1515
1516        let mut moof = build_moof_audio(seq, self.track_id, self.base_decode_time, &samples_meta);
1517        moof.patch_default_no_gap();
1518
1519        let payload_total: u64 = self.pending.iter().map(|s| s.payload.len() as u64).sum();
1520        let mdat_box_size: u64 = 8 + payload_total;
1521        if mdat_box_size > u32::MAX as u64 {
1522            anyhow::bail!(
1523                "CMAF audio media segment payload {} bytes exceeds 32-bit mdat size limit",
1524                payload_total
1525            );
1526        }
1527
1528        let path = self.output_dir.join(format!("seg-{:05}.m4s", seq));
1529        let file = File::create(&path)
1530            .with_context(|| format!("creating CMAF audio segment file: {}", path.display()))?;
1531        let mut writer = BufWriter::new(file);
1532        writer
1533            .write_all(&moof.bytes)
1534            .context("writing audio moof")?;
1535        writer
1536            .write_all(&(mdat_box_size as u32).to_be_bytes())
1537            .context("writing audio mdat size")?;
1538        writer
1539            .write_all(b"mdat")
1540            .context("writing audio mdat type")?;
1541        for sample in &self.pending {
1542            writer
1543                .write_all(&sample.payload)
1544                .context("writing audio mdat payload")?;
1545        }
1546        writer
1547            .flush()
1548            .context("flushing CMAF audio segment writer")?;
1549        let byte_size = moof.bytes.len() as u64 + mdat_box_size;
1550
1551        self.base_decode_time += segment_duration;
1552        self.pending.clear();
1553
1554        let info = SegmentInfo {
1555            sequence_number: seq,
1556            path,
1557            byte_size,
1558            duration_ticks: segment_duration,
1559        };
1560        self.segments.push(info.clone());
1561        Ok(Some(info))
1562    }
1563
1564    pub fn finalize(mut self) -> Result<CmafTrackManifest> {
1565        if !self.pending.is_empty() {
1566            self.flush_segment()?;
1567        }
1568        self.ensure_init_written()?;
1569        let timescale = self.info.timescale;
1570        Ok(CmafTrackManifest {
1571            init_path: self.init_path,
1572            segments: self.segments,
1573            timescale,
1574        })
1575    }
1576
1577    fn ensure_init_written(&mut self) -> Result<()> {
1578        if self.init_written {
1579            return Ok(());
1580        }
1581        let init = build_init_segment_audio(&self.info);
1582        let mut file = File::create(&self.init_path).with_context(|| {
1583            format!(
1584                "creating CMAF audio init segment: {}",
1585                self.init_path.display()
1586            )
1587        })?;
1588        file.write_all(&init)
1589            .context("writing CMAF audio init segment bytes")?;
1590        file.flush().context("flushing CMAF audio init segment")?;
1591        self.init_written = true;
1592        Ok(())
1593    }
1594}
1595
1596#[cfg(test)]
1597mod tests {
1598    use super::*;
1599
1600    fn read_be_u32(buf: &[u8], pos: usize) -> u32 {
1601        u32::from_be_bytes(buf[pos..pos + 4].try_into().unwrap())
1602    }
1603
1604    fn read_be_u64(buf: &[u8], pos: usize) -> u64 {
1605        u64::from_be_bytes(buf[pos..pos + 8].try_into().unwrap())
1606    }
1607
1608    fn box_size_and_type(buf: &[u8]) -> (u32, &[u8]) {
1609        let size = read_be_u32(buf, 0);
1610        let kind = &buf[4..8];
1611        (size, kind)
1612    }
1613
1614    #[test]
1615    fn mfhd_layout_is_16_bytes_with_sequence_number() {
1616        let bytes = build_mfhd(42);
1617        assert_eq!(bytes.len(), 16);
1618        let (size, kind) = box_size_and_type(&bytes);
1619        assert_eq!(size, 16);
1620        assert_eq!(kind, b"mfhd");
1621        assert_eq!(bytes[8], 0); // version
1622        assert_eq!(&bytes[9..12], &[0, 0, 0]); // flags
1623        assert_eq!(read_be_u32(&bytes, 12), 42);
1624    }
1625
1626    #[test]
1627    fn tfhd_minimal_track_id_only_is_16_bytes() {
1628        let bytes = build_tfhd(1, None, None, None);
1629        // 8 (header) + 1 (version) + 3 (flags) + 4 (track_id) = 16.
1630        assert_eq!(bytes.len(), 16);
1631        let (size, kind) = box_size_and_type(&bytes);
1632        assert_eq!(size, 16);
1633        assert_eq!(kind, b"tfhd");
1634        // tf_flags should ONLY have default-base-is-moof (0x020000) set.
1635        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1636        let flags = u32::from_be_bytes(flag_bytes);
1637        assert_eq!(flags, 0x020000);
1638        assert_eq!(read_be_u32(&bytes, 12), 1);
1639    }
1640
1641    #[test]
1642    fn tfhd_with_default_flags_only_packs_correct_bits() {
1643        let bytes = build_tfhd(1, None, None, Some(SampleFlags::delta_frame().pack()));
1644        // 8 header + 1 version + 3 flags + 4 track_id + 4 default_sample_flags = 20.
1645        assert_eq!(bytes.len(), 20);
1646        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1647        let flags = u32::from_be_bytes(flag_bytes);
1648        // default-base-is-moof (0x020000) | default-sample-flags (0x000020).
1649        assert_eq!(flags, 0x020020);
1650        assert_eq!(read_be_u32(&bytes, 12), 1);
1651        assert_eq!(read_be_u32(&bytes, 16), SampleFlags::delta_frame().pack());
1652    }
1653
1654    #[test]
1655    fn tfhd_with_all_defaults_packs_in_spec_order() {
1656        let bytes = build_tfhd(1, Some(1024), Some(2048), Some(0x01010000));
1657        // 8 + 1 + 3 + 4 + 4 + 4 + 4 = 28.
1658        assert_eq!(bytes.len(), 28);
1659        let flag_bytes = [0u8, bytes[9], bytes[10], bytes[11]];
1660        let flags = u32::from_be_bytes(flag_bytes);
1661        // default-base-is-moof (0x020000) | dur (0x000008) | size (0x000010) | flags (0x000020).
1662        assert_eq!(flags, 0x020038);
1663        assert_eq!(read_be_u32(&bytes, 12), 1);
1664        assert_eq!(read_be_u32(&bytes, 16), 1024); // duration
1665        assert_eq!(read_be_u32(&bytes, 20), 2048); // size
1666        assert_eq!(read_be_u32(&bytes, 24), 0x01010000); // flags
1667    }
1668
1669    #[test]
1670    fn tfdt_v1_carries_u64_decode_time() {
1671        let bytes = build_tfdt(0x0123_4567_89AB_CDEF);
1672        // 8 header + 1 version + 3 flags + 8 decode_time = 20.
1673        assert_eq!(bytes.len(), 20);
1674        assert_eq!(box_size_and_type(&bytes), (20, b"tfdt".as_slice()));
1675        assert_eq!(bytes[8], 1); // version 1
1676        assert_eq!(read_be_u64(&bytes, 12), 0x0123_4567_89AB_CDEF);
1677    }
1678
1679    #[test]
1680    fn mehd_v1_carries_u64_fragment_duration() {
1681        let bytes = build_mehd(1_000_000);
1682        assert_eq!(bytes.len(), 20);
1683        assert_eq!(box_size_and_type(&bytes), (20, b"mehd".as_slice()));
1684        assert_eq!(bytes[8], 1);
1685        assert_eq!(read_be_u64(&bytes, 12), 1_000_000);
1686    }
1687
1688    #[test]
1689    fn trex_layout_is_32_bytes_with_track_id_and_flags() {
1690        let default_flags = SampleFlags::delta_frame().pack();
1691        let bytes = build_trex(2, default_flags);
1692        // 8 + 1 + 3 + 4 + 4 + 4 + 4 + 4 = 32.
1693        assert_eq!(bytes.len(), 32);
1694        assert_eq!(box_size_and_type(&bytes), (32, b"trex".as_slice()));
1695        assert_eq!(read_be_u32(&bytes, 12), 2); // track_id
1696        assert_eq!(read_be_u32(&bytes, 16), 1); // default_sample_description_index
1697        assert_eq!(read_be_u32(&bytes, 20), 0); // default_sample_duration
1698        assert_eq!(read_be_u32(&bytes, 24), 0); // default_sample_size
1699        assert_eq!(read_be_u32(&bytes, 28), default_flags);
1700    }
1701
1702    #[test]
1703    fn sample_flags_pack_distinguishes_sync_from_delta() {
1704        let sync = SampleFlags::keyframe().pack();
1705        let delta = SampleFlags::delta_frame().pack();
1706        assert_ne!(sync, delta);
1707        // Sync: depends_on=2 in bits 24-25, is_non_sync=0 in bit 16.
1708        assert_eq!(sync, 0x02_00_00_00);
1709        // Delta: depends_on=1, is_non_sync=1.
1710        assert_eq!(delta, 0x01_01_00_00);
1711    }
1712
1713    #[test]
1714    fn moof_video_one_keyframe_sample_round_trip() {
1715        let samples = vec![CmafSample {
1716            duration: 1500,
1717            size: 4096,
1718            flags: SampleFlags::keyframe(),
1719        }];
1720        let mut moof = build_moof_video(1, 1, 0, &samples);
1721        moof.patch_default_no_gap();
1722
1723        let (size, kind) = box_size_and_type(&moof.bytes);
1724        assert_eq!(size as usize, moof.bytes.len());
1725        assert_eq!(kind, b"moof");
1726
1727        // mfhd starts at offset 8 (after moof header).
1728        let (mfhd_size, mfhd_kind) = box_size_and_type(&moof.bytes[8..]);
1729        assert_eq!(mfhd_size, 16);
1730        assert_eq!(mfhd_kind, b"mfhd");
1731        assert_eq!(read_be_u32(&moof.bytes, 8 + 12), 1); // sequence_number
1732
1733        // traf starts after mfhd.
1734        let traf_start = 8 + mfhd_size as usize;
1735        let (_, traf_kind) = box_size_and_type(&moof.bytes[traf_start..]);
1736        assert_eq!(traf_kind, b"traf");
1737
1738        // The patched data_offset should equal moof.len() + 8.
1739        let patched = read_be_u32(&moof.bytes, moof.data_offset_pos);
1740        assert_eq!(patched as usize, moof.bytes.len() + 8);
1741
1742        // The first_sample_flags slot in trun should equal the keyframe flags.
1743        // It sits 4 bytes after the data_offset field per the trun layout.
1744        let first_flags = read_be_u32(&moof.bytes, moof.data_offset_pos + 4);
1745        assert_eq!(first_flags, SampleFlags::keyframe().pack());
1746    }
1747
1748    #[test]
1749    fn moof_video_three_samples_records_per_sample_dur_and_size() {
1750        let samples = vec![
1751            CmafSample {
1752                duration: 1500,
1753                size: 4096,
1754                flags: SampleFlags::keyframe(),
1755            },
1756            CmafSample {
1757                duration: 1500,
1758                size: 1024,
1759                flags: SampleFlags::delta_frame(),
1760            },
1761            CmafSample {
1762                duration: 1500,
1763                size: 1024,
1764                flags: SampleFlags::delta_frame(),
1765            },
1766        ];
1767        let mut moof = build_moof_video(2, 1, 6000, &samples);
1768        moof.patch_default_no_gap();
1769
1770        // Walk into trun and read sample_count.
1771        // moof header(8) + mfhd(16) + traf header(8) = 32.
1772        // Then tfhd: 8 + 1 + 3 + 4 + 4 = 20 bytes (track_id + default_flags).
1773        // Then tfdt v1: 20 bytes.
1774        // trun starts at 32 + 20 + 20 = 72.
1775        let trun_start = 8 + 16 + 8 + 20 + 20;
1776        let (_, trun_kind) = box_size_and_type(&moof.bytes[trun_start..]);
1777        assert_eq!(trun_kind, b"trun");
1778        let sample_count = read_be_u32(&moof.bytes, trun_start + 12);
1779        assert_eq!(sample_count, 3);
1780
1781        // Per-sample table starts after data_offset(4) + first_sample_flags(4):
1782        //   trun_start + 8(header) + 1(version) + 3(flags) + 4(count) +
1783        //                4(data_offset) + 4(first_sample_flags) = trun_start + 24.
1784        let table_start = trun_start + 24;
1785        // sample 0: dur=1500, size=4096
1786        assert_eq!(read_be_u32(&moof.bytes, table_start), 1500);
1787        assert_eq!(read_be_u32(&moof.bytes, table_start + 4), 4096);
1788        // sample 1: dur=1500, size=1024
1789        assert_eq!(read_be_u32(&moof.bytes, table_start + 8), 1500);
1790        assert_eq!(read_be_u32(&moof.bytes, table_start + 12), 1024);
1791        // sample 2: dur=1500, size=1024
1792        assert_eq!(read_be_u32(&moof.bytes, table_start + 16), 1500);
1793        assert_eq!(read_be_u32(&moof.bytes, table_start + 20), 1024);
1794    }
1795
1796    #[test]
1797    fn moof_audio_does_not_emit_first_sample_flags() {
1798        let samples = vec![
1799            CmafSample {
1800                duration: 1024,
1801                size: 256,
1802                flags: SampleFlags::keyframe(),
1803            },
1804            CmafSample {
1805                duration: 1024,
1806                size: 256,
1807                flags: SampleFlags::keyframe(),
1808            },
1809        ];
1810        let mut moof = build_moof_audio(1, 2, 0, &samples);
1811        moof.patch_default_no_gap();
1812
1813        // Audio trun flags = 0x000001 | 0x000100 | 0x000200 = 0x000301
1814        // (no first-sample-flags bit, no per-sample-flags bit).
1815        let trun_start = 8 + 16 + 8 + 20 + 20;
1816        let flag_bytes = [
1817            0u8,
1818            moof.bytes[trun_start + 9],
1819            moof.bytes[trun_start + 10],
1820            moof.bytes[trun_start + 11],
1821        ];
1822        let flags = u32::from_be_bytes(flag_bytes);
1823        assert_eq!(flags, 0x000001 | 0x000100 | 0x000200);
1824
1825        // Per-sample table starts after data_offset(4) only — no
1826        // first_sample_flags this time.
1827        //   trun_start + 8 + 1 + 3 + 4 + 4 = trun_start + 20.
1828        let table_start = trun_start + 20;
1829        assert_eq!(read_be_u32(&moof.bytes, table_start), 1024); // sample 0 dur
1830        assert_eq!(read_be_u32(&moof.bytes, table_start + 4), 256); // sample 0 size
1831        assert_eq!(read_be_u32(&moof.bytes, table_start + 8), 1024); // sample 1 dur
1832        assert_eq!(read_be_u32(&moof.bytes, table_start + 12), 256); // sample 1 size
1833    }
1834
1835    #[test]
1836    fn moof_data_offset_patch_is_at_correct_position() {
1837        // Keyframe-only fragment of 1 sample. Data offset is at a
1838        // computable position; verify patch_data_offset writes there.
1839        let samples = vec![CmafSample {
1840            duration: 1500,
1841            size: 1234,
1842            flags: SampleFlags::keyframe(),
1843        }];
1844        let mut moof = build_moof_video(1, 1, 0, &samples);
1845        moof.patch_data_offset(0xDEAD_BEEF);
1846        let read_back = read_be_u32(&moof.bytes, moof.data_offset_pos);
1847        assert_eq!(read_back, 0xDEAD_BEEF);
1848    }
1849
1850    // Synthetic AV1 OBU bytes that contain exactly one
1851    // OBU_SEQUENCE_HEADER (type=1, has_size=1, ext=0). This is what
1852    // `extract_sequence_header` sniffs out of the first encoded packet
1853    // to build the av1C config record. Payload is 1 byte (0xAA) — the
1854    // value is irrelevant for our shape tests; the muxer just round-
1855    // trips it as bytes inside av1C.
1856    fn synthetic_seq_header_packet() -> Vec<u8> {
1857        let header_byte: u8 = (1 << 3) | (1 << 1); // obu_type=1, has_size=1
1858        vec![header_byte, 0x01, 0xAA]
1859    }
1860
1861    fn find_box<'a>(buf: &'a [u8], box_type: &[u8; 4]) -> Option<&'a [u8]> {
1862        let mut pos = 0;
1863        while pos + 8 <= buf.len() {
1864            let size = read_be_u32(buf, pos) as usize;
1865            if size < 8 || pos + size > buf.len() {
1866                return None;
1867            }
1868            let kind = &buf[pos + 4..pos + 8];
1869            if kind == box_type {
1870                return Some(&buf[pos..pos + size]);
1871            }
1872            pos += size;
1873        }
1874        None
1875    }
1876
1877    fn ftyp_compatible_brands(ftyp: &[u8]) -> Vec<&[u8]> {
1878        // size:4 + 'ftyp' + major:4 + minor:4 = 16, then brands[]
1879        let mut brands = Vec::new();
1880        let mut p = 16;
1881        while p + 4 <= ftyp.len() {
1882            brands.push(&ftyp[p..p + 4]);
1883            p += 4;
1884        }
1885        brands
1886    }
1887
1888    #[test]
1889    fn init_segment_video_lists_cmfc_and_av01_brands() {
1890        let init = build_init_segment_video(
1891            1920,
1892            1080,
1893            30000,
1894            &synthetic_seq_header_packet(),
1895            &ColorMetadata::default(),
1896        );
1897        let ftyp = find_box(&init, b"ftyp").expect("init has ftyp");
1898        let brands = ftyp_compatible_brands(ftyp);
1899        assert!(
1900            brands.contains(&b"cmfc".as_slice()),
1901            "cmfc brand missing: {brands:?}"
1902        );
1903        assert!(
1904            brands.contains(&b"av01".as_slice()),
1905            "av01 brand missing: {brands:?}"
1906        );
1907        assert!(
1908            brands.contains(&b"iso6".as_slice()),
1909            "iso6 brand missing: {brands:?}"
1910        );
1911    }
1912
1913    #[test]
1914    fn init_segment_audio_lists_cmfa_brand() {
1915        // ASC bytes for AAC-LC: object_type=2 (LC), sample_rate_index=3 (48 kHz),
1916        // channelConfiguration=2 (stereo).
1917        let info = AudioInfo::aac_lc(48000, 2, vec![0x11, 0x90]);
1918        let init = build_init_segment_audio(&info);
1919        let ftyp = find_box(&init, b"ftyp").expect("init has ftyp");
1920        let brands = ftyp_compatible_brands(ftyp);
1921        assert!(
1922            brands.contains(&b"cmfa".as_slice()),
1923            "cmfa brand missing: {brands:?}"
1924        );
1925        assert!(
1926            !brands.contains(&b"cmfc".as_slice()),
1927            "cmfc should not appear in audio init"
1928        );
1929    }
1930
1931    #[test]
1932    fn init_segment_video_moov_contains_mvex_with_trex() {
1933        let init = build_init_segment_video(
1934            1280,
1935            720,
1936            30000,
1937            &synthetic_seq_header_packet(),
1938            &ColorMetadata::default(),
1939        );
1940        let moov = find_box(&init, b"moov").expect("init has moov");
1941        let mvex = find_box(&moov[8..], b"mvex").expect("moov has mvex");
1942        assert!(
1943            find_box(&mvex[8..], b"trex").is_some(),
1944            "mvex must contain trex"
1945        );
1946        assert!(
1947            find_box(&mvex[8..], b"mehd").is_some(),
1948            "mvex must contain mehd"
1949        );
1950    }
1951
1952    #[test]
1953    fn init_segment_video_stbl_has_empty_sample_tables() {
1954        let init = build_init_segment_video(
1955            1280,
1956            720,
1957            30000,
1958            &synthetic_seq_header_packet(),
1959            &ColorMetadata::default(),
1960        );
1961        let moov = find_box(&init, b"moov").expect("init has moov");
1962        let trak = find_box(&moov[8..], b"trak").expect("moov has trak");
1963        let mdia = find_box(&trak[8..], b"mdia").expect("trak has mdia");
1964        let minf = find_box(&mdia[8..], b"minf").expect("mdia has minf");
1965        let stbl = find_box(&minf[8..], b"stbl").expect("minf has stbl");
1966
1967        // stsz: sample_size=0 (variable), sample_count=0 (no samples in init)
1968        let stsz = find_box(&stbl[8..], b"stsz").expect("stbl has stsz");
1969        // 8 (header) + 1 (version) + 3 (flags) + 4 (sample_size) + 4 (sample_count) = 20.
1970        assert_eq!(stsz.len(), 20);
1971        assert_eq!(read_be_u32(stsz, 12), 0); // sample_size
1972        assert_eq!(read_be_u32(stsz, 16), 0); // sample_count
1973
1974        // stts/stsc/stco: entry_count=0
1975        for box_type in [b"stts", b"stsc", b"stco"] {
1976            let bx = find_box(&stbl[8..], box_type).expect("stbl has empty full box");
1977            assert_eq!(
1978                bx.len(),
1979                16,
1980                "{:?} should be 16-byte empty FullBox",
1981                std::str::from_utf8(box_type).unwrap()
1982            );
1983            assert_eq!(read_be_u32(bx, 12), 0); // entry_count
1984        }
1985
1986        // stsd has exactly one entry — the av01 sample entry.
1987        let stsd = find_box(&stbl[8..], b"stsd").expect("stbl has stsd");
1988        assert_eq!(read_be_u32(stsd, 12), 1); // entry_count
1989        // First sample entry should be av01.
1990        let av01 = &stsd[16..];
1991        assert_eq!(&av01[4..8], b"av01");
1992    }
1993
1994    #[test]
1995    fn cmaf_video_muxer_emits_init_then_segment_files() {
1996        let dir = tempfile::tempdir().unwrap();
1997        let mut muxer =
1998            CmafVideoMuxer::new(dir.path(), 1280, 720, 30000, ColorMetadata::default()).unwrap();
1999
2000        // Two-packet "fragment": one keyframe, one delta. Each "payload"
2001        // starts with the synthetic sequence header (so the muxer's
2002        // first-packet OBU sniff succeeds) but the muxer doesn't care
2003        // about the rest of the payload bytes — it just round-trips
2004        // them through mdat.
2005        let mut k = synthetic_seq_header_packet();
2006        k.extend_from_slice(&[0xDE, 0xAD]);
2007        muxer.add_packet(k, 1500, true).unwrap();
2008        muxer
2009            .add_packet(synthetic_seq_header_packet(), 1500, false)
2010            .unwrap();
2011
2012        let info = muxer
2013            .flush_segment()
2014            .unwrap()
2015            .expect("flush emits a segment");
2016        assert_eq!(info.sequence_number, 1);
2017        assert_eq!(info.duration_ticks, 3000);
2018        assert!(info.path.exists());
2019        assert_eq!(info.path.file_name().unwrap(), "seg-00001.m4s");
2020
2021        // init.mp4 was written lazily on first flush.
2022        let init_path = dir.path().join("init.mp4");
2023        assert!(init_path.exists(), "init.mp4 must exist after first flush");
2024
2025        // Segment file starts with `moof` and contains an `mdat` after.
2026        let seg_bytes = std::fs::read(&info.path).unwrap();
2027        assert_eq!(&seg_bytes[4..8], b"moof");
2028        let moof_size = read_be_u32(&seg_bytes, 0) as usize;
2029        assert_eq!(&seg_bytes[moof_size + 4..moof_size + 8], b"mdat");
2030
2031        // Manifest finalize covers the empty-pending case (we already flushed).
2032        let manifest = muxer.finalize().unwrap();
2033        assert_eq!(manifest.segments.len(), 1);
2034        assert_eq!(manifest.timescale, 30000);
2035        assert!((manifest.duration_seconds() - 0.1).abs() < 1e-6); // 3000/30000 = 0.1s
2036    }
2037
2038    #[test]
2039    fn cmaf_h264_init_segment_is_avc3_with_inline_params() {
2040        let dir = tempfile::tempdir().unwrap();
2041        let mut muxer = CmafVideoMuxer::new_with_codec_options(
2042            dir.path(),
2043            1280,
2044            720,
2045            30000,
2046            ColorMetadata::default(),
2047            VideoCodec::H264,
2048            CmafVideoMuxerOptions::default(),
2049        )
2050        .unwrap();
2051        // Synthetic Annex-B keyframe AU: SPS (7) + PPS (8) + IDR (5).
2052        let mut kf = vec![0, 0, 0, 1, 0x67, 0x42, 0x00, 0x1e, 0xAA]; // SPS
2053        kf.extend_from_slice(&[0, 0, 0, 1, 0x68, 0xCE, 0x3C]); // PPS
2054        kf.extend_from_slice(&[0, 0, 0, 1, 0x65, 0x88, 0x11, 0x22]); // IDR slice
2055        muxer.add_packet(kf, 1000, true).unwrap();
2056        muxer
2057            .add_packet(vec![0, 0, 0, 1, 0x41, 0x9a, 0x33], 1000, false) // P-slice
2058            .unwrap();
2059        let info = muxer.flush_segment().unwrap().expect("segment flushed");
2060        assert!(info.path.exists());
2061        let manifest = muxer.finalize().unwrap();
2062        assert_eq!(manifest.segments.len(), 1);
2063
2064        let has = |buf: &[u8], pat: &[u8; 4]| buf.windows(4).any(|w| w == pat);
2065        let init = std::fs::read(dir.path().join("init.mp4")).unwrap();
2066        assert!(has(&init, b"avc3"), "H.264 CMAF init must use the avc3 sample entry");
2067        assert!(has(&init, b"avcC"), "init must carry the avcC config box");
2068        assert!(!has(&init, b"av01"), "must NOT contain an av01 box");
2069        let seg = std::fs::read(&info.path).unwrap();
2070        assert!(has(&seg, b"moof") && has(&seg, b"mdat"));
2071    }
2072
2073    #[test]
2074    fn cmaf_h265_init_segment_is_hev1() {
2075        let dir = tempfile::tempdir().unwrap();
2076        let mut muxer = CmafVideoMuxer::new_with_codec_options(
2077            dir.path(),
2078            1280,
2079            720,
2080            30000,
2081            ColorMetadata::default(),
2082            VideoCodec::H265,
2083            CmafVideoMuxerOptions::default(),
2084        )
2085        .unwrap();
2086        // Synthetic HEVC keyframe AU: VPS (32) + SPS (33) + PPS (34) + IDR (19).
2087        let mut kf = vec![0, 0, 0, 1, 0x40, 0x01, 0x0c]; // VPS
2088        kf.extend_from_slice(&[0, 0, 0, 1, 0x42, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03]); // SPS
2089        kf.extend_from_slice(&[0, 0, 0, 1, 0x44, 0x01, 0xc1]); // PPS
2090        kf.extend_from_slice(&[0, 0, 0, 1, 0x26, 0x01, 0xaf]); // IDR_W_RADL slice (type 19)
2091        muxer.add_packet(kf, 1000, true).unwrap();
2092        let info = muxer.flush_segment().unwrap().expect("segment flushed");
2093        let _ = muxer.finalize().unwrap();
2094        let has = |buf: &[u8], pat: &[u8; 4]| buf.windows(4).any(|w| w == pat);
2095        let init = std::fs::read(dir.path().join("init.mp4")).unwrap();
2096        assert!(has(&init, b"hev1"), "H.265 CMAF init must use the hev1 sample entry");
2097        assert!(has(&init, b"hvcC"), "init must carry the hvcC config box");
2098        assert!(info.path.exists());
2099    }
2100
2101    #[test]
2102    fn cmaf_video_muxer_options_default_matches_legacy_new() {
2103        // Calling `new()` and `new_with_options(..., default())` must
2104        // produce byte-identical first-segment output. This is the
2105        // contract that lets every existing call site stay on `new()`
2106        // unmodified.
2107        let dir_a = tempfile::tempdir().unwrap();
2108        let dir_b = tempfile::tempdir().unwrap();
2109        let mut ma = CmafVideoMuxer::new(
2110            dir_a.path(),
2111            1280,
2112            720,
2113            30000,
2114            ColorMetadata::default(),
2115        )
2116        .unwrap();
2117        let mut mb = CmafVideoMuxer::new_with_options(
2118            dir_b.path(),
2119            1280,
2120            720,
2121            30000,
2122            ColorMetadata::default(),
2123            CmafVideoMuxerOptions::default(),
2124        )
2125        .unwrap();
2126
2127        let mut kf = synthetic_seq_header_packet();
2128        kf.extend_from_slice(&[0xDE, 0xAD]);
2129        ma.add_packet(kf.clone(), 1500, true).unwrap();
2130        mb.add_packet(kf, 1500, true).unwrap();
2131
2132        let info_a = ma.flush_segment().unwrap().unwrap();
2133        let info_b = mb.flush_segment().unwrap().unwrap();
2134        assert_eq!(info_a.sequence_number, info_b.sequence_number);
2135        assert_eq!(info_a.duration_ticks, info_b.duration_ticks);
2136        assert_eq!(
2137            info_a.path.file_name().unwrap(),
2138            info_b.path.file_name().unwrap(),
2139        );
2140        // Byte-identical moof+mdat — proves no observable difference.
2141        let bytes_a = std::fs::read(&info_a.path).unwrap();
2142        let bytes_b = std::fs::read(&info_b.path).unwrap();
2143        assert_eq!(bytes_a, bytes_b);
2144        // init.mp4 written in both cases.
2145        assert!(dir_a.path().join("init.mp4").exists());
2146        assert!(dir_b.path().join("init.mp4").exists());
2147    }
2148
2149    #[test]
2150    fn cmaf_video_muxer_first_segment_index_offset_writes_correct_filename() {
2151        // A helper muxer attached at segment 5 of an in-progress rung
2152        // must produce `seg-00005.m4s` as its first output, not 00001.
2153        let dir = tempfile::tempdir().unwrap();
2154        let mut muxer = CmafVideoMuxer::new_with_options(
2155            dir.path(),
2156            1280,
2157            720,
2158            30000,
2159            ColorMetadata::default(),
2160            CmafVideoMuxerOptions {
2161                first_segment_index: 5,
2162                first_segment_base_decode_time: 4 * 3000, // 4 prior segments × 3000-tick duration
2163                write_init_segment: true,
2164            },
2165        )
2166        .unwrap();
2167
2168        let mut kf = synthetic_seq_header_packet();
2169        kf.extend_from_slice(&[0xCA, 0xFE]);
2170        muxer.add_packet(kf, 1500, true).unwrap();
2171        muxer
2172            .add_packet(synthetic_seq_header_packet(), 1500, false)
2173            .unwrap();
2174
2175        let info = muxer.flush_segment().unwrap().unwrap();
2176        assert_eq!(
2177            info.sequence_number, 5,
2178            "first flush of an offset muxer must produce segment number 5",
2179        );
2180        assert_eq!(info.path.file_name().unwrap(), "seg-00005.m4s");
2181
2182        // Second flush continues the sequence at 6.
2183        let mut kf2 = synthetic_seq_header_packet();
2184        kf2.extend_from_slice(&[0xBE, 0xEF]);
2185        muxer.add_packet(kf2, 1500, true).unwrap();
2186        let info2 = muxer.flush_segment().unwrap().unwrap();
2187        assert_eq!(info2.sequence_number, 6);
2188        assert_eq!(info2.path.file_name().unwrap(), "seg-00006.m4s");
2189    }
2190
2191    #[test]
2192    fn cmaf_video_muxer_offset_base_decode_time_propagates_to_tfdt() {
2193        // Verifies the `tfdt` box of the offset muxer's first segment
2194        // carries the configured base_decode_time. Without this, an
2195        // HLS player would see segment 5 starting at decode-time 0,
2196        // producing a buffer underrun at the cut from primary's
2197        // segment 4 to helper's segment 5.
2198        let dir = tempfile::tempdir().unwrap();
2199        let mut muxer = CmafVideoMuxer::new_with_options(
2200            dir.path(),
2201            1280,
2202            720,
2203            30000,
2204            ColorMetadata::default(),
2205            CmafVideoMuxerOptions {
2206                first_segment_index: 5,
2207                first_segment_base_decode_time: 4 * 3000,
2208                write_init_segment: true,
2209            },
2210        )
2211        .unwrap();
2212
2213        let mut kf = synthetic_seq_header_packet();
2214        kf.extend_from_slice(&[0x01, 0x02]);
2215        muxer.add_packet(kf, 1500, true).unwrap();
2216        let info = muxer.flush_segment().unwrap().unwrap();
2217
2218        // Walk the segment bytes: moof > traf > tfdt. tfdt v1 layout:
2219        //   8 bytes box header (size + 'tfdt')
2220        //   1 byte version (=1) + 3 bytes flags
2221        //   8 bytes base_media_decode_time (u64 BE)
2222        let bytes = std::fs::read(&info.path).unwrap();
2223        let moof_size = read_be_u32(&bytes, 0) as usize;
2224        let moof = &bytes[..moof_size];
2225        let traf = find_box(&moof[8..], b"traf").expect("moof has traf");
2226        let tfdt = find_box(&traf[8..], b"tfdt").expect("traf has tfdt");
2227        let version = tfdt[8];
2228        assert_eq!(version, 1, "tfdt should be version 1 (u64 decode time)");
2229        let dt = u64::from_be_bytes([
2230            tfdt[12], tfdt[13], tfdt[14], tfdt[15], tfdt[16], tfdt[17], tfdt[18], tfdt[19],
2231        ]);
2232        assert_eq!(
2233            dt, 12000,
2234            "tfdt base_media_decode_time must equal configured offset (4×3000)",
2235        );
2236    }
2237
2238    #[test]
2239    fn cmaf_video_muxer_write_init_false_skips_init_file() {
2240        // A helper muxer must NOT write init.mp4 — the primary owns
2241        // that file. Verify that flush_segment + finalize do not
2242        // create init.mp4 in the output directory.
2243        let dir = tempfile::tempdir().unwrap();
2244        let mut muxer = CmafVideoMuxer::new_with_options(
2245            dir.path(),
2246            1280,
2247            720,
2248            30000,
2249            ColorMetadata::default(),
2250            CmafVideoMuxerOptions {
2251                first_segment_index: 5,
2252                first_segment_base_decode_time: 4 * 3000,
2253                write_init_segment: false,
2254            },
2255        )
2256        .unwrap();
2257
2258        let mut kf = synthetic_seq_header_packet();
2259        kf.extend_from_slice(&[0x03, 0x04]);
2260        muxer.add_packet(kf, 1500, true).unwrap();
2261        let info = muxer.flush_segment().unwrap().unwrap();
2262        assert!(
2263            info.path.exists(),
2264            "segment file must be written even when init is skipped",
2265        );
2266        let init_path = dir.path().join("init.mp4");
2267        assert!(
2268            !init_path.exists(),
2269            "init.mp4 must NOT be written when write_init_segment=false",
2270        );
2271
2272        // finalize must also not write init.
2273        let _ = muxer.finalize().unwrap();
2274        assert!(
2275            !init_path.exists(),
2276            "finalize must not retroactively write init.mp4 when disabled",
2277        );
2278    }
2279
2280    #[test]
2281    fn cmaf_video_muxer_two_writers_share_output_dir_with_distinct_indices() {
2282        // The actual helper-task contract: primary writes segments
2283        // 1..3 + init.mp4 into dir/. Helper writes segments 3..5 into
2284        // the same dir with write_init_segment=false. After both
2285        // finalize, all 4 segment files plus init.mp4 exist.
2286        let dir = tempfile::tempdir().unwrap();
2287
2288        let mut primary = CmafVideoMuxer::new(
2289            dir.path(),
2290            1280,
2291            720,
2292            30000,
2293            ColorMetadata::default(),
2294        )
2295        .unwrap();
2296        let mut helper = CmafVideoMuxer::new_with_options(
2297            dir.path(),
2298            1280,
2299            720,
2300            30000,
2301            ColorMetadata::default(),
2302            CmafVideoMuxerOptions {
2303                first_segment_index: 3,
2304                first_segment_base_decode_time: 2 * 3000,
2305                write_init_segment: false,
2306            },
2307        )
2308        .unwrap();
2309
2310        // Primary writes segments 1 and 2.
2311        for _ in 0..2 {
2312            let mut kf = synthetic_seq_header_packet();
2313            kf.extend_from_slice(&[0xAA, 0xBB]);
2314            primary.add_packet(kf, 1500, true).unwrap();
2315            primary
2316                .add_packet(synthetic_seq_header_packet(), 1500, false)
2317                .unwrap();
2318            primary.flush_segment().unwrap().unwrap();
2319        }
2320        // Helper writes segments 3 and 4.
2321        for _ in 0..2 {
2322            let mut kf = synthetic_seq_header_packet();
2323            kf.extend_from_slice(&[0xCC, 0xDD]);
2324            helper.add_packet(kf, 1500, true).unwrap();
2325            helper
2326                .add_packet(synthetic_seq_header_packet(), 1500, false)
2327                .unwrap();
2328            helper.flush_segment().unwrap().unwrap();
2329        }
2330
2331        primary.finalize().unwrap();
2332        helper.finalize().unwrap();
2333
2334        // All four segments + one init.mp4 present.
2335        for seg_idx in 1..=4 {
2336            let p = dir.path().join(format!("seg-{seg_idx:05}.m4s"));
2337            assert!(p.exists(), "segment {seg_idx} missing at {}", p.display());
2338        }
2339        let init_path = dir.path().join("init.mp4");
2340        assert!(init_path.exists(), "primary's init.mp4 must be present");
2341    }
2342
2343    #[test]
2344    #[should_panic(expected = "first_segment_index is 1-based")]
2345    fn cmaf_video_muxer_first_segment_index_zero_panics() {
2346        let dir = tempfile::tempdir().unwrap();
2347        let _ = CmafVideoMuxer::new_with_options(
2348            dir.path(),
2349            1280,
2350            720,
2351            30000,
2352            ColorMetadata::default(),
2353            CmafVideoMuxerOptions {
2354                first_segment_index: 0,
2355                first_segment_base_decode_time: 0,
2356                write_init_segment: true,
2357            },
2358        );
2359    }
2360
2361    #[test]
2362    fn cmaf_video_muxer_rejects_segment_starting_on_non_keyframe() {
2363        let dir = tempfile::tempdir().unwrap();
2364        let mut muxer =
2365            CmafVideoMuxer::new(dir.path(), 640, 360, 30000, ColorMetadata::default()).unwrap();
2366        muxer
2367            .add_packet(synthetic_seq_header_packet(), 1500, false)
2368            .unwrap();
2369        let err = muxer
2370            .flush_segment()
2371            .expect_err("must fail when first sample is not sync");
2372        assert!(err.to_string().contains("must start with a sync sample"));
2373    }
2374
2375    #[test]
2376    fn cmaf_audio_muxer_emits_init_and_segments_with_correct_durations() {
2377        let info = AudioInfo {
2378            codec: "aac".into(),
2379            sample_rate: 48000,
2380            channels: 2,
2381            timescale: 48000,
2382            asc_bytes: vec![0x12, 0x10],
2383            codec_private: vec![],
2384        };
2385        let dir = tempfile::tempdir().unwrap();
2386        let mut muxer = CmafAudioMuxer::new(dir.path(), info).unwrap();
2387
2388        // 5 AAC frames at 1024 samples each = 5120 ticks @ 48 kHz =
2389        // ~107 ms total.
2390        for _ in 0..5 {
2391            muxer.add_packet(vec![0xDE; 256], 1024).unwrap();
2392        }
2393        let seg = muxer
2394            .flush_segment()
2395            .unwrap()
2396            .expect("audio segment emitted");
2397        assert_eq!(seg.duration_ticks, 5 * 1024);
2398        assert!(seg.path.exists());
2399        let init_path = dir.path().join("init.mp4");
2400        assert!(init_path.exists());
2401
2402        // Audio segment moof should NOT contain a first_sample_flags
2403        // slot — the trun layout for audio omits that flag bit. We
2404        // already cover this in `moof_audio_does_not_emit_first_sample_flags`;
2405        // here we just verify the file shape is valid.
2406        let bytes = std::fs::read(&seg.path).unwrap();
2407        assert_eq!(&bytes[4..8], b"moof");
2408
2409        let manifest = muxer.finalize().unwrap();
2410        assert_eq!(manifest.timescale, 48000);
2411        assert!((manifest.duration_seconds() - (5.0 * 1024.0 / 48000.0)).abs() < 1e-6);
2412    }
2413
2414    #[test]
2415    fn mvex_wraps_mehd_and_one_or_more_trex_in_order() {
2416        let mehd = build_mehd(10_000);
2417        let trex_v = build_trex(1, SampleFlags::delta_frame().pack());
2418        let trex_a = build_trex(2, SampleFlags::keyframe().pack());
2419        let mvex = build_mvex(&mehd, &[trex_v.clone(), trex_a.clone()]);
2420        let (size, kind) = box_size_and_type(&mvex);
2421        assert_eq!(size as usize, mvex.len());
2422        assert_eq!(kind, b"mvex");
2423        // 8 (header) + mehd(20) + trex(32) + trex(32) = 92.
2424        assert_eq!(mvex.len(), 8 + mehd.len() + trex_v.len() + trex_a.len());
2425        // First child is mehd.
2426        let (_, child0_kind) = box_size_and_type(&mvex[8..]);
2427        assert_eq!(child0_kind, b"mehd");
2428        // Second child is the first trex.
2429        let (_, child1_kind) = box_size_and_type(&mvex[8 + mehd.len()..]);
2430        assert_eq!(child1_kind, b"trex");
2431    }
2432}